# -*- coding: utf-8 -*- # # Copyright 2018 Google LLC. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Argument processors for DLP surface arguments.""" from __future__ import absolute_import from __future__ import division from __future__ import unicode_literals import os from googlecloudsdk.api_lib.util import apis from googlecloudsdk.calliope import arg_parsers from googlecloudsdk.calliope import base from googlecloudsdk.command_lib.util.apis import arg_utils from googlecloudsdk.core import exceptions from googlecloudsdk.core import log from googlecloudsdk.core import properties from googlecloudsdk.core import resources from googlecloudsdk.core.util import files from googlecloudsdk.core.util import times import six _DLP_API = 'dlp' _DLP_API_VERSION = 'v2' _COLOR_SPEC_ERROR_SUFFIX = """\ Colors should be specified as a string of `r,g,b` float values in the interval [0,1] representing the amount of red, green, and blue in the color, respectively. For example, `black = 0,0,0`, `red = 1.0,0,0`, `white = 1.0,1.0,1.0`, and so on. """ VALID_IMAGE_EXTENSIONS = { 'n_a': 'IMAGE', '.png': 'IMAGE_PNG', '.jpeg': 'IMAGE_JPEG', '.jpg': 'IMAGE_JPEG', '.svg': 'IMAGE_SVG', '.bmp': 'IMAGE_BMP' } class ImageFileError(exceptions.Error): """Error if an image file is improperly formatted or missing.""" class RedactColorError(exceptions.Error): """Error if a redact color is improperly formatted or missing.""" class BigQueryTableNameError(exceptions.Error): """Error if a BigQuery table name is improperly formatted.""" # Misc/Helper Functions def _GetMessageClass(msg_type_name): """Get API message object for given message type name.""" msg = apis.GetMessagesModule(_DLP_API, _DLP_API_VERSION) return getattr(msg, msg_type_name) def _ValidateExtension(extension): """Validate image file name extension.""" if not extension: # No extension is ok. return True # But if provided it should match expected values return extension and (extension in VALID_IMAGE_EXTENSIONS) def _ConvertColorValue(color): """Convert color value(color) to a float or raise value error.""" j = float(color) if j > 1.0 or j < 0.0: raise ValueError('Invalid Color.') return j def _ValidateAndParseColors(value): """Validates that values has proper format and returns parsed components.""" values = value.split(',') if len(values) == 3: try: return [_ConvertColorValue(x) for x in values] except ValueError: raise RedactColorError('Invalid Color Value(s) [{}]. ' '{}'.format(value, _COLOR_SPEC_ERROR_SUFFIX)) else: raise RedactColorError('You must specify exactly 3 color values [{}]. ' '{}'.format(value, _COLOR_SPEC_ERROR_SUFFIX)) def _ValidateAndParseInputTableName(table_name): """Validate BigQuery table name format and returned parsed components.""" name_parts = table_name.split('.') if len(name_parts) != 3: raise BigQueryTableNameError( 'Invalid BigQuery table name [{}]. BigQuery tables are uniquely ' 'identified by their project_id, dataset_id, and table_id in the ' 'format `..`.'.format(table_name)) return name_parts def _ValidateAndParseOutputTableName(table_name): """Validate BigQuery table name format and returned parsed components.""" # Table id is optional for output tables. name_parts = table_name.split('.') if len(name_parts) != 3 and len(name_parts) != 2: raise BigQueryTableNameError( 'Invalid BigQuery output table name [{}]. BigQuery tables are uniquely ' 'identified by their project_id, dataset_id, and or table_id in the ' 'format `..` or ' '`..'.format(table_name)) return name_parts # Types def InfoType(value): # Defines elment type for infoTypes collection on request """Return GooglePrivacyDlpV2InfoType message for a parsed value.""" infotype = _GetMessageClass('GooglePrivacyDlpV2InfoType') return infotype(name=value) def PrivacyField(value): """Create a GooglePrivacyDlpV2FieldId for value.""" field_id = _GetMessageClass('GooglePrivacyDlpV2FieldId') return field_id(name=value) def BigQueryInputOptions(table_name): """Convert BigQuery table name into GooglePrivacyDlpV2BigQueryOptions. Creates BigQuery input options for a job trigger. Args: table_name: str, BigQuery table name to create input options from in the form `..`. Returns: GooglePrivacyDlpV2BigQueryOptions, input options for job trigger. Raises: BigQueryTableNameError if table_name is improperly formatted. """ project_id, data_set_id, table_id = _ValidateAndParseInputTableName( table_name) big_query_options = _GetMessageClass('GooglePrivacyDlpV2BigQueryOptions') big_query_table = _GetMessageClass('GooglePrivacyDlpV2BigQueryTable') table = big_query_table( datasetId=data_set_id, projectId=project_id, tableId=table_id) options = big_query_options(tableReference=table) return options def GcsInputOptions(url): """Return CloudStorageOptions for given GCS url.""" cloud_storage_options = _GetMessageClass( 'GooglePrivacyDlpV2CloudStorageOptions') file_set = _GetMessageClass('GooglePrivacyDlpV2FileSet') return cloud_storage_options(fileSet=file_set(url=url)) def DatastoreInputOptions(table_name): """Convert Datastore arg value into GooglePrivacyDlpV2DatastoreOptions. Creates Datastore input options for a job trigger from datastore table name. Args: table_name: str, Datastore table name to create options from in the form `namespace:example-kind` or simply `example-kind`. Returns: GooglePrivacyDlpV2Action, output action for job trigger. """ data_store_options = _GetMessageClass('GooglePrivacyDlpV2DatastoreOptions') kind = _GetMessageClass('GooglePrivacyDlpV2KindExpression') partition_id = _GetMessageClass('GooglePrivacyDlpV2PartitionId') project = properties.VALUES.core.project.Get(required=True) split_name = table_name.split(':') if len(split_name) == 2: namespace, table = split_name kind_exp = kind(name=table) partition = partition_id(namespaceId=namespace, projectId=project) else: kind_exp = kind(name=table_name) partition = partition_id(projectId=project) return data_store_options(kind=kind_exp, partitionId=partition) def PubSubTopicAction(topic): """Return PubSub DlpV2Action for given PubSub topic.""" action_msg = _GetMessageClass('GooglePrivacyDlpV2Action') pubsub_action = _GetMessageClass('GooglePrivacyDlpV2PublishToPubSub') return action_msg(pubSub=pubsub_action(topic=topic)) def BigQueryTableAction(table_name): """Convert BigQuery formatted table name into GooglePrivacyDlpV2Action. Creates a BigQuery output action for a job trigger. Args: table_name: str, BigQuery table name to create action from in the form `..` or `.`. Returns: GooglePrivacyDlpV2Action, output action for job trigger. Raises: BigQueryTableNameError if table_name is improperly formatted. """ name_parts = _ValidateAndParseOutputTableName(table_name) project_id = name_parts[0] data_set_id = name_parts[1] table_id = '' if len(name_parts) == 3: table_id = name_parts[2] action_msg = _GetMessageClass('GooglePrivacyDlpV2Action') save_findings_config = _GetMessageClass('GooglePrivacyDlpV2SaveFindings') output_config = _GetMessageClass('GooglePrivacyDlpV2OutputStorageConfig') big_query_table = _GetMessageClass('GooglePrivacyDlpV2BigQueryTable') table = big_query_table( datasetId=data_set_id, projectId=project_id, tableId=table_id) return action_msg( saveFindings=save_findings_config( outputConfig=output_config(table=table))) def DlpTimeStamp(value): return times.FormatDateTime(value, tzinfo=times.UTC) # Request Hooks def SetRequestParent(ref, args, request): """Set parent value for a DlpXXXRequest.""" del ref parent = args.project or properties.VALUES.core.project.Get(required=True) project_ref = resources.REGISTRY.Parse(parent, collection='dlp.projects') request.parent = project_ref.RelativeName() return request def SetCancelRequestHook(ref, args, request): """Set cancel message on DlpProjectsDlpJobsCancelRequest.""" del ref del args cancel_request = _GetMessageClass('GooglePrivacyDlpV2CancelDlpJobRequest') request.googlePrivacyDlpV2CancelDlpJobRequest = cancel_request() return request def UpdateDataStoreOptions(ref, args, request): """Update partitionId.projectId on DatastoreOptions.""" del ref data_store_options = ( request.googlePrivacyDlpV2CreateJobTriggerRequest.jobTrigger.inspectJob .storageConfig.datastoreOptions) if args.project and data_store_options: data_store_options.partitionId.projectId = args.project return request # Required since bigQueryOptions are create by a separate flag so # identifyingFields can't be set until before requests is sent. def UpdateIdentifyingFields(ref, args, request): """Update bigQueryOptions.identifyingFields with parsed fields.""" del ref big_query_options = ( request.googlePrivacyDlpV2CreateDlpJobRequest.inspectJob.storageConfig .bigQueryOptions) if big_query_options and args.identifying_fields: field_id = _GetMessageClass('GooglePrivacyDlpV2FieldId') big_query_options.identifyingFields = [ field_id(name=field) for field in args.identifying_fields ] return request def SetOrderByFromSortBy(ref, args, request): """Set orderBy attribute on message from common --sort-by flag.""" del ref if args.sort_by: order_by_fields = [] for field in args.sort_by: # ~field ==> field desc if field.startswith('~'): field = field.lstrip('~') + ' desc' else: field += ' asc' order_by_fields.append(field) request.orderBy = ','.join(order_by_fields) return request # Argument Processors def ExtractBqTableFromInputConfig(value): """Extracts and returns BigQueryTable from parsed BigQueryOptions message.""" return value.tableReference def GetReplaceTextTransform(value): replace_config = _GetMessageClass('GooglePrivacyDlpV2ReplaceValueConfig') value_holder = _GetMessageClass('GooglePrivacyDlpV2Value') return replace_config(newValue=value_holder(stringValue=value)) def GetInfoTypeTransform(value): del value infotype_config = _GetMessageClass( 'GooglePrivacyDlpV2ReplaceWithInfoTypeConfig') return infotype_config() def GetRedactTransform(value): del value redact_config = _GetMessageClass('GooglePrivacyDlpV2RedactConfig') return redact_config() def GetImageFromFile(path): """Builds a GooglePrivacyDlpV2ByteContentItem message from a path. Will attempt to set message.type from file extension (if present). Args: path: the path arg given to the command. Raises: ImageFileError: if the image path does not exist and does not have a valid extension. Returns: GooglePrivacyDlpV2ByteContentItem: an message containing image data for the API on the image to analyze. """ extension = os.path.splitext(path)[-1].lower() extension = extension or 'n_a' image_item = _GetMessageClass('GooglePrivacyDlpV2ByteContentItem') if os.path.isfile(path) and _ValidateExtension(extension): enum_val = arg_utils.ChoiceToEnum(VALID_IMAGE_EXTENSIONS[extension], image_item.TypeValueValuesEnum) image = image_item(data=files.ReadBinaryFileContents(path), type=enum_val) else: raise ImageFileError( 'The image path [{}] does not exist or has an invalid extension. ' 'Must be one of [jpg, jpeg, png, bmp or svg]. ' 'Please double-check your input and try again.'.format(path)) return image def GetRedactColorFromString(color_string): """Convert color_string into GooglePrivacyDlpV2Color. Creates a GooglePrivacyDlpV2Color message from input string to use for image redaction. Args: color_string: str, string representing red, green and blue color saturation percentages as float values between 0.0 and 1.0. For example, `black = 0,0,0`, `red = 1.0,0,0`, `white = 1.0,1.0,1.0` etc. Returns: GooglePrivacyDlpV2Color, color message. Raises: RedactColorError if color_string is improperly formatted. """ color_msg = _GetMessageClass('GooglePrivacyDlpV2Color') red, green, blue = _ValidateAndParseColors(color_string) return color_msg(red=red, blue=blue, green=green) def GetJobScheduleDurationString(value): """Return API required format for duration specified by value.""" return '{}s'.format(six.text_type(value)) # Additional Arguments Hook def GetIdentifyingFieldsArg(): """Capture identifying fields for BigQuery table.""" help_text = ('Comma separated list of references to field names uniquely ' 'identifying rows within the BigQuery table. Nested fields in ' 'the format `person.birthdate.year` are allowed.') return [ base.Argument( '--identifying-fields', metavar='IDENTIFYING_FIELDS', type=arg_parsers.ArgList(), help=help_text) ] def _PossiblyWriteRedactedResponseToOutputFile(value, parsed_args): """Helper function for writing redacted contents to an output file.""" if not parsed_args.output_file: return with files.BinaryFileWriter(parsed_args.output_file) as outfile: outfile.write(value) log.status.Print('The redacted contents can be viewed in [{}]'.format( parsed_args.output_file)) def PossiblyWriteRedactedTextResponseToOutputFile(response, parsed_args): """Write the contents of the redacted text file to parsed_args.output_file.""" _PossiblyWriteRedactedResponseToOutputFile(response.item.value, parsed_args) return response def PossiblyWriteRedactedImageResponseToOutputFile(response, parsed_args): """Write the redacted image to parsed_args.output_file.""" _PossiblyWriteRedactedResponseToOutputFile(response.redactedImage, parsed_args) return response def AddOutputFileFlag(): """Add --output-file to a redact command.""" return [ base.Argument( '--output-file', help='Path to the file to write redacted contents to.') ]