412 lines
15 KiB
Python
412 lines
15 KiB
Python
# -*- coding: utf-8 -*- #
|
|
# Copyright 2022 Google Inc. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
"""Client for interaction with Datascan API CRUD DATAPLEX."""
|
|
|
|
from __future__ import absolute_import
|
|
from __future__ import division
|
|
from __future__ import unicode_literals
|
|
|
|
from googlecloudsdk.api_lib.dataplex import util as dataplex_api
|
|
from googlecloudsdk.api_lib.util import messages as messages_util
|
|
from googlecloudsdk.calliope import parser_extensions
|
|
from googlecloudsdk.command_lib.iam import iam_util
|
|
from googlecloudsdk.generated_clients.apis.dataplex.v1 import dataplex_v1_messages
|
|
|
|
|
|
def GenerateData(args: parser_extensions.Namespace):
|
|
"""Generate Data From Arguments."""
|
|
module = dataplex_api.GetMessageModule()
|
|
if args.IsKnownAndSpecified('data_source_entity'):
|
|
data = module.GoogleCloudDataplexV1DataSource(
|
|
entity=args.data_source_entity
|
|
)
|
|
else:
|
|
data = module.GoogleCloudDataplexV1DataSource(
|
|
resource=args.data_source_resource
|
|
)
|
|
return data
|
|
|
|
|
|
def GenerateDataQualitySpec(args):
|
|
"""Generate DataQualitySpec From Arguments."""
|
|
module = dataplex_api.GetMessageModule()
|
|
|
|
if args.IsSpecified('data_quality_spec_file'):
|
|
dataqualityspec = dataplex_api.ReadObject(args.data_quality_spec_file)
|
|
if dataqualityspec is not None:
|
|
dataqualityspec = messages_util.DictToMessageWithErrorCheck(
|
|
dataplex_api.SnakeToCamelDict(dataqualityspec),
|
|
module.GoogleCloudDataplexV1DataQualitySpec,
|
|
)
|
|
else:
|
|
dataqualityspec = module.GoogleCloudDataplexV1DataQualitySpec()
|
|
return dataqualityspec
|
|
|
|
|
|
def GenerateDataProfileSpec(args: parser_extensions.Namespace):
|
|
"""Generate DataProfileSpec From Arguments."""
|
|
module = dataplex_api.GetMessageModule()
|
|
|
|
if args.IsSpecified('data_profile_spec_file'):
|
|
dataprofilespec = dataplex_api.ReadObject(args.data_profile_spec_file)
|
|
if dataprofilespec is not None:
|
|
dataprofilespec = messages_util.DictToMessageWithErrorCheck(
|
|
dataplex_api.SnakeToCamelDict(dataprofilespec),
|
|
module.GoogleCloudDataplexV1DataProfileSpec,
|
|
)
|
|
else:
|
|
exclude_fields, include_fields, sampling_percent, row_filter = [None] * 4
|
|
if args.IsKnownAndSpecified('exclude_field_names'):
|
|
exclude_fields = (
|
|
module.GoogleCloudDataplexV1DataProfileSpecSelectedFields(
|
|
fieldNames=list(
|
|
val.strip() for val in args.exclude_field_names.split(',')
|
|
)
|
|
)
|
|
)
|
|
if args.IsKnownAndSpecified('include_field_names'):
|
|
include_fields = (
|
|
module.GoogleCloudDataplexV1DataProfileSpecSelectedFields(
|
|
fieldNames=list(
|
|
val.strip() for val in args.include_field_names.split(',')
|
|
)
|
|
)
|
|
)
|
|
if args.IsKnownAndSpecified('sampling_percent'):
|
|
sampling_percent = float(args.sampling_percent)
|
|
if args.IsKnownAndSpecified('row_filter'):
|
|
row_filter = args.row_filter
|
|
dataprofilespec = module.GoogleCloudDataplexV1DataProfileSpec(
|
|
excludeFields=exclude_fields,
|
|
includeFields=include_fields,
|
|
samplingPercent=sampling_percent,
|
|
rowFilter=row_filter,
|
|
)
|
|
if args.IsKnownAndSpecified('export_results_table'):
|
|
dataprofilespec.postScanActions = module.GoogleCloudDataplexV1DataProfileSpecPostScanActions(
|
|
bigqueryExport=module.GoogleCloudDataplexV1DataProfileSpecPostScanActionsBigQueryExport(
|
|
resultsTable=args.export_results_table
|
|
)
|
|
)
|
|
return dataprofilespec
|
|
|
|
|
|
def GenerateDataDiscoverySpec(args: parser_extensions.Namespace):
|
|
"""Generate DataDiscoverySpec From Arguments."""
|
|
module = dataplex_api.GetMessageModule()
|
|
|
|
datadiscoveryspec = module.GoogleCloudDataplexV1DataDiscoverySpec()
|
|
|
|
# BigQuery publishing config.
|
|
datadiscoveryspec.bigqueryPublishingConfig = (
|
|
module.GoogleCloudDataplexV1DataDiscoverySpecBigQueryPublishingConfig()
|
|
)
|
|
if args.IsKnownAndSpecified('bigquery_publishing_connection'):
|
|
datadiscoveryspec.bigqueryPublishingConfig.connection = (
|
|
args.bigquery_publishing_connection
|
|
)
|
|
if args.IsKnownAndSpecified('bigquery_publishing_table_type'):
|
|
datadiscoveryspec.bigqueryPublishingConfig.tableType = module.GoogleCloudDataplexV1DataDiscoverySpecBigQueryPublishingConfig.TableTypeValueValuesEnum(
|
|
args.bigquery_publishing_table_type
|
|
)
|
|
if args.IsKnownAndSpecified('bigquery_publishing_dataset_project'):
|
|
datadiscoveryspec.bigqueryPublishingConfig.project = (
|
|
args.bigquery_publishing_dataset_project
|
|
)
|
|
if args.IsKnownAndSpecified('bigquery_publishing_dataset_location'):
|
|
datadiscoveryspec.bigqueryPublishingConfig.location = (
|
|
args.bigquery_publishing_dataset_location
|
|
)
|
|
|
|
datadiscoveryspec.storageConfig = (
|
|
module.GoogleCloudDataplexV1DataDiscoverySpecStorageConfig()
|
|
)
|
|
if args.IsKnownAndSpecified('storage_include_patterns'):
|
|
datadiscoveryspec.storageConfig.includePatterns = (
|
|
args.storage_include_patterns
|
|
)
|
|
if args.IsKnownAndSpecified('storage_exclude_patterns'):
|
|
datadiscoveryspec.storageConfig.excludePatterns = (
|
|
args.storage_exclude_patterns
|
|
)
|
|
|
|
# CSV options.
|
|
datadiscoveryspec.storageConfig.csvOptions = (
|
|
module.GoogleCloudDataplexV1DataDiscoverySpecStorageConfigCsvOptions()
|
|
)
|
|
if args.IsKnownAndSpecified('csv_delimiter'):
|
|
datadiscoveryspec.storageConfig.csvOptions.delimiter = args.csv_delimiter
|
|
if args.IsKnownAndSpecified('csv_header_row_count'):
|
|
try:
|
|
datadiscoveryspec.storageConfig.csvOptions.headerRows = int(
|
|
args.csv_header_row_count
|
|
)
|
|
except ValueError:
|
|
raise ValueError(
|
|
'csv_header_row_count must be an integer, but got'
|
|
f' {args.csv_header_row_count}'
|
|
)
|
|
if args.IsKnownAndSpecified('csv_quote_character'):
|
|
datadiscoveryspec.storageConfig.csvOptions.quote = args.csv_quote_character
|
|
if args.IsKnownAndSpecified('csv_encoding'):
|
|
datadiscoveryspec.storageConfig.csvOptions.encoding = args.csv_encoding
|
|
if args.IsKnownAndSpecified('csv_disable_type_inference'):
|
|
datadiscoveryspec.storageConfig.csvOptions.typeInferenceDisabled = (
|
|
args.csv_disable_type_inference
|
|
)
|
|
|
|
# JSON options.
|
|
datadiscoveryspec.storageConfig.jsonOptions = (
|
|
module.GoogleCloudDataplexV1DataDiscoverySpecStorageConfigJsonOptions()
|
|
)
|
|
if args.IsKnownAndSpecified('json_encoding'):
|
|
datadiscoveryspec.storageConfig.jsonOptions.encoding = args.json_encoding
|
|
if args.IsKnownAndSpecified('json_disable_type_inference'):
|
|
datadiscoveryspec.storageConfig.jsonOptions.typeInferenceDisabled = (
|
|
args.json_disable_type_inference
|
|
)
|
|
|
|
return datadiscoveryspec
|
|
|
|
|
|
def GenerateDataDocumentationSpec():
|
|
"""Generate DataDocumentationSpec From Arguments."""
|
|
module = dataplex_api.GetMessageModule()
|
|
return module.GoogleCloudDataplexV1DataDocumentationSpec()
|
|
|
|
|
|
def GenerateSchedule(args):
|
|
"""Generate DataQualitySpec From Arguments."""
|
|
module = dataplex_api.GetMessageModule()
|
|
schedule = module.GoogleCloudDataplexV1TriggerSchedule(cron=args.schedule)
|
|
return schedule
|
|
|
|
|
|
def GenerateTrigger(
|
|
args: parser_extensions.Namespace,
|
|
) -> dataplex_v1_messages.GoogleCloudDataplexV1Trigger:
|
|
"""Generates Trigger for data scan From Arguments.
|
|
|
|
Args:
|
|
args: The arguments of the command.
|
|
|
|
Returns:
|
|
The trigger for the data scan.
|
|
"""
|
|
module = dataplex_api.GetMessageModule()
|
|
trigger = module.GoogleCloudDataplexV1Trigger()
|
|
is_one_time_scan = args.IsKnownAndSpecified('one_time') and args.one_time
|
|
is_ttl_after_scan_completion = args.IsKnownAndSpecified(
|
|
'ttl_after_scan_completion'
|
|
)
|
|
if is_ttl_after_scan_completion and not is_one_time_scan:
|
|
raise ValueError(
|
|
'ttl_after_scan_completion is only supported for one-time scans.'
|
|
' Provide --one-time to enable one-time scan.'
|
|
)
|
|
if args.IsKnownAndSpecified('schedule'):
|
|
trigger.schedule = GenerateSchedule(args)
|
|
elif is_one_time_scan:
|
|
trigger.oneTime = module.GoogleCloudDataplexV1TriggerOneTime()
|
|
if is_ttl_after_scan_completion:
|
|
trigger.oneTime.ttlAfterScanCompletion = args.ttl_after_scan_completion
|
|
else:
|
|
trigger.onDemand = module.GoogleCloudDataplexV1TriggerOnDemand()
|
|
return trigger
|
|
|
|
|
|
def GenerateExecutionSpecForCreateRequest(args):
|
|
"""Generate ExecutionSpec From Arguments."""
|
|
module = dataplex_api.GetMessageModule()
|
|
if hasattr(args, 'field'):
|
|
field = args.field
|
|
else:
|
|
field = (
|
|
args.incremental_field if hasattr(args, 'incremental_field') else None
|
|
)
|
|
executionspec = module.GoogleCloudDataplexV1DataScanExecutionSpec(
|
|
field=field,
|
|
trigger=GenerateTrigger(args),
|
|
)
|
|
return executionspec
|
|
|
|
|
|
def GenerateExecutionSpecForUpdateRequest(args):
|
|
"""Generate ExecutionSpec From Arguments."""
|
|
module = dataplex_api.GetMessageModule()
|
|
executionspec = module.GoogleCloudDataplexV1DataScanExecutionSpec(
|
|
trigger=GenerateTrigger(args),
|
|
)
|
|
return executionspec
|
|
|
|
|
|
def GenerateUpdateMask(args: parser_extensions.Namespace):
|
|
"""Create Update Mask for Datascan."""
|
|
update_mask = []
|
|
args_to_mask = {
|
|
'description': 'description',
|
|
'display_name': 'displayName',
|
|
'labels': 'labels',
|
|
'on_demand': 'executionSpec.trigger.onDemand',
|
|
'schedule': 'executionSpec.trigger.schedule',
|
|
}
|
|
args_to_mask_attr = {
|
|
'data_profile_spec_file': 'dataProfileSpec',
|
|
'data_quality_spec_file': 'dataQualitySpec',
|
|
'row_filter': 'dataProfileSpec.rowFilter',
|
|
'sampling_percent': 'dataProfileSpec.samplingPercent',
|
|
'include_field_names': 'dataProfileSpec.includeFields',
|
|
'exclude_field_names': 'dataProfileSpec.excludeFields',
|
|
'bigquery_publishing_table_type': (
|
|
'dataDiscoverySpec.bigqueryPublishingConfig.tableType'
|
|
),
|
|
'bigquery_publishing_connection': (
|
|
'dataDiscoverySpec.bigqueryPublishingConfig.connection'
|
|
),
|
|
'bigquery_publishing_dataset_location': (
|
|
'dataDiscoverySpec.bigqueryPublishingConfig.location'
|
|
),
|
|
'bigquery_publishing_dataset_project': (
|
|
'dataDiscoverySpec.bigqueryPublishingConfig.project'
|
|
),
|
|
'storage_include_patterns': (
|
|
'dataDiscoverySpec.storageConfig.includePatterns'
|
|
),
|
|
'storage_exclude_patterns': (
|
|
'dataDiscoverySpec.storageConfig.excludePatterns'
|
|
),
|
|
'csv_delimiter': 'dataDiscoverySpec.storageConfig.csvOptions.delimiter',
|
|
'csv_header_row_count': (
|
|
'dataDiscoverySpec.storageConfig.csvOptions.headerRows'
|
|
),
|
|
'csv_quote_character': 'dataDiscoverySpec.storageConfig.csvOptions.quote',
|
|
'csv_encoding': 'dataDiscoverySpec.storageConfig.csvOptions.encoding',
|
|
'csv_disable_type_inference': (
|
|
'dataDiscoverySpec.storageConfig.csvOptions.typeInferenceDisabled'
|
|
),
|
|
'json_encoding': 'dataDiscoverySpec.storageConfig.jsonOptions.encoding',
|
|
'json_disable_type_inference': (
|
|
'dataDiscoverySpec.storageConfig.jsonOptions.typeInferenceDisabled'
|
|
),
|
|
}
|
|
|
|
for arg, val in args_to_mask.items():
|
|
if args.IsSpecified(arg):
|
|
update_mask.append(val)
|
|
|
|
for arg, val in args_to_mask_attr.items():
|
|
if args.IsKnownAndSpecified(arg):
|
|
update_mask.append(val)
|
|
return update_mask
|
|
|
|
|
|
def GenerateDatascanForCreateRequest(args: parser_extensions.Namespace):
|
|
"""Create Datascan for Message Create Requests."""
|
|
module = dataplex_api.GetMessageModule()
|
|
request = module.GoogleCloudDataplexV1DataScan(
|
|
description=args.description,
|
|
displayName=args.display_name,
|
|
labels=dataplex_api.CreateLabels(
|
|
module.GoogleCloudDataplexV1DataScan, args
|
|
),
|
|
data=GenerateData(args),
|
|
executionSpec=GenerateExecutionSpecForCreateRequest(args),
|
|
)
|
|
if args.scan_type == 'PROFILE':
|
|
if args.IsKnownAndSpecified('data_quality_spec_file'):
|
|
raise ValueError(
|
|
'Data quality spec file specified for data profile scan.'
|
|
)
|
|
else:
|
|
request.dataProfileSpec = GenerateDataProfileSpec(args)
|
|
elif args.scan_type == 'QUALITY':
|
|
if args.IsKnownAndSpecified('data_profile_spec_file'):
|
|
raise ValueError(
|
|
'Data profile spec file specified for data quality scan.'
|
|
)
|
|
elif args.IsSpecified('data_quality_spec_file'):
|
|
request.dataQualitySpec = GenerateDataQualitySpec(args)
|
|
else:
|
|
raise ValueError(
|
|
'If scan-type="QUALITY" , data-quality-spec-file is a required'
|
|
' argument.'
|
|
)
|
|
elif args.scan_type == 'DISCOVERY':
|
|
request.dataDiscoverySpec = GenerateDataDiscoverySpec(args)
|
|
elif args.scan_type == 'DOCUMENTATION':
|
|
request.dataDocumentationSpec = GenerateDataDocumentationSpec()
|
|
return request
|
|
|
|
|
|
def GenerateDatascanForUpdateRequest(args: parser_extensions.Namespace):
|
|
"""Create Datascan for Message Update Requests."""
|
|
module = dataplex_api.GetMessageModule()
|
|
request = module.GoogleCloudDataplexV1DataScan(
|
|
description=args.description,
|
|
displayName=args.display_name,
|
|
labels=dataplex_api.CreateLabels(
|
|
module.GoogleCloudDataplexV1DataScan, args
|
|
),
|
|
executionSpec=GenerateExecutionSpecForUpdateRequest(args),
|
|
)
|
|
if args.scan_type == 'PROFILE':
|
|
if args.IsKnownAndSpecified('data_quality_spec_file'):
|
|
raise ValueError(
|
|
'Data quality spec file specified for data profile scan.'
|
|
)
|
|
request.dataProfileSpec = GenerateDataProfileSpec(args)
|
|
elif args.scan_type == 'QUALITY':
|
|
if args.IsKnownAndSpecified('data_profile_spec_file'):
|
|
raise ValueError(
|
|
'Data profile spec file specified for data quality scan.'
|
|
)
|
|
elif args.IsSpecified('data_quality_spec_file'):
|
|
request.dataQualitySpec = GenerateDataQualitySpec(args)
|
|
else:
|
|
request.dataQualitySpec = module.GoogleCloudDataplexV1DataQualitySpec()
|
|
elif args.scan_type == 'DISCOVERY':
|
|
request.dataDiscoverySpec = GenerateDataDiscoverySpec(args)
|
|
elif args.scan_type == 'DOCUMENTATION':
|
|
request.dataDocumentationSpec = GenerateDataDocumentationSpec()
|
|
return request
|
|
|
|
|
|
def SetIamPolicy(datascan_ref, policy):
|
|
"""Set IAM Policy request."""
|
|
set_iam_policy_req = dataplex_api.GetMessageModule().DataplexProjectsLocationsDataScansSetIamPolicyRequest(
|
|
resource=datascan_ref.RelativeName(),
|
|
googleIamV1SetIamPolicyRequest=dataplex_api.GetMessageModule().GoogleIamV1SetIamPolicyRequest(
|
|
policy=policy
|
|
),
|
|
)
|
|
return dataplex_api.GetClientInstance().projects_locations_dataScans.SetIamPolicy(
|
|
set_iam_policy_req
|
|
)
|
|
|
|
|
|
def SetIamPolicyFromFile(datascan_ref, policy_file):
|
|
"""Set IAM policy binding request from file."""
|
|
policy = iam_util.ParsePolicyFile(
|
|
policy_file, dataplex_api.GetMessageModule().GoogleIamV1Policy
|
|
)
|
|
return SetIamPolicy(datascan_ref, policy)
|
|
|
|
|
|
def WaitForOperation(operation):
|
|
"""Waits for the given google.longrunning.Operation to complete."""
|
|
return dataplex_api.WaitForOperation(
|
|
operation, dataplex_api.GetClientInstance().projects_locations_dataScans
|
|
)
|