# -*- coding: utf-8 -*- # # Copyright 2018 Google LLC. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Declarative Hooks for BQ surface arguments.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from __future__ import unicode_literals import datetime import time import uuid from apitools.base.py import encoding from apitools.base.py import exceptions as apitools_exceptions from googlecloudsdk.api_lib.bq import util as api_util from googlecloudsdk.core import exceptions from googlecloudsdk.core import log from googlecloudsdk.core import yaml from googlecloudsdk.core.util import times _BQ_JOB_ID_PREFIX = 'gcloud-bq' _INVALID_SCHEMA_UPDATE_MESSAGE = """\ Invalid Schema change. Only adding additional columns or relaxing `required` fields on existing columns is supported: For more details on BigQuery schemas see: https://cloud.google.com/bigquery/docs/schemas.""" class PermissionsFileError(exceptions.Error): """Error if a permissions file is improperly formatted.""" class SchemaFileError(exceptions.Error): """Error if a schema file is improperly formatted.""" class TableDataFileError(exceptions.Error): """Error if a tabel data file is improperly formatted.""" class SchemaUpdateError(exceptions.Error): """Error if a schema update fails.""" # Argument Processors def JobListProjectionProcessor(show_config): projection_enum = api_util.GetApiMessage( 'BigqueryJobsListRequest' ).ProjectionValueValuesEnum if show_config: return projection_enum.full return projection_enum.minimal def JobIdProcessor(job_id_arg): if job_id_arg: return job_id_arg job_id = '{}-{}'.format(_BQ_JOB_ID_PREFIX, uuid.uuid4().hex) return job_id def PermissionsFileProcessor(input_file): """Builds a bigquery AccessValueListEntry array from input file. Expects YAML or JSON formatted file. Args: input_file: input file contents from argparse namespace. Raises: PermissionsFileError: if the file contents are not a valid JSON or YAML file. Returns: [AccessValueListEntry]: Array of AccessValueListEntry messages specifying access permissions defined input file. """ access_value_msg = api_util.GetApiMessage('Dataset').AccessValueListEntry try: permissions_array = [] permissions_from_file = yaml.load(input_file[0]) permissions_from_file = permissions_from_file.get('access', None) if not permissions_from_file or not isinstance(permissions_from_file, list): raise PermissionsFileError( 'Error parsing permissions file: no access list defined in file' ) for access_yaml in permissions_from_file: permission = encoding.PyValueToMessage(access_value_msg, access_yaml) if _ValidatePermission(permission): permissions_array.append(permission) else: raise PermissionsFileError(( 'Error parsing permissions file:' ' invalid permission definition' ' [{}]'.format(permission) )) return sorted(permissions_array, key=lambda x: x.role) except yaml.YAMLParseError as ype: raise PermissionsFileError( 'Error parsing permissions file [{}]'.format(ype) ) def _ValidatePermission(permission_obj): is_valid = ( permission_obj.domain or permission_obj.userByEmail or permission_obj.specialGroup or permission_obj.view or permission_obj.groupByEmail ) and permission_obj.role return is_valid def ProcessTableExpiration(expire_duration): """Convert commandline duration into epoch timeoffset (in ms).""" t = times.GetDateTimePlusDuration(datetime.datetime.now(), expire_duration) return int(time.mktime(t.timetuple())) * 1000 def BqTableSchemaFileProcessor(file_arg): """Convert Input JSON file into TableSchema message.""" table_schema_type = api_util.GetApiMessage('TableSchema') schema_field_type = api_util.GetApiMessage('TableFieldSchema') try: schema_json = yaml.load(file_arg) schema_json = schema_json.get('schema', None) if not schema_json or not isinstance(schema_json, list): raise SchemaFileError( 'Error parsing schema file: no schema field list defined in file' ) all_fields = [] for field in schema_json: new_field = schema_field_type( name=field['name'], type=field['type'], mode=field.get('mode', 'NULLABLE'), ) all_fields.append(new_field) return table_schema_type(fields=sorted(all_fields, key=lambda x: x.name)) except yaml.YAMLParseError as ype: raise SchemaFileError('Error parsing schema file [{}]'.format(ype)) except (AttributeError, KeyError) as e: raise SchemaFileError( 'Error parsing schema file, invalid field definition [{}]'.format(e) ) def BqTableDataFileProcessor(file_arg): """Convert Input JSON file into TableSchema message.""" data_insert_request_type = api_util.GetApiMessage('TableDataInsertAllRequest') insert_row_type = data_insert_request_type.RowsValueListEntry data_row_type = api_util.GetApiMessage('JsonObject') try: data_json = yaml.load(file_arg) if not data_json or not isinstance(data_json, list): raise TableDataFileError( 'Error parsing data file: no data records defined in file' ) rows = [] for row in data_json: rows.append( insert_row_type(json=encoding.DictToMessage(row, data_row_type)) ) return rows except yaml.YAMLParseError as ype: raise TableDataFileError('Error parsing data file [{}]'.format(ype)) # Request modifiers def SetViewParameters(ref, args, request): """Ensure that view parameters are set properly tables create request.""" del ref # unused if not args.view: request.table.view = None return request def ProcessDatasetOverwrite(ref, args, request): """Process the overwrite flag on datasets create.""" del ref dataset_id = request.dataset.datasetReference.datasetId project_id = request.projectId if args.overwrite: if _DatasetExists(dataset_id, project_id): _TryDeleteDataset(dataset_id, project_id) return request def ProcessTableOverwrite(ref, args, request): """Process the overwrite flag on tables create.""" dataset_id = ref.datasetId table_id = ref.Name() project_id = ref.projectId if args.overwrite: if _TableExists(dataset_id, table_id, project_id): _TryDeleteTable(dataset_id, table_id, project_id) return request def ProcessSchemaUpdate(ref, args, request): """Process schema Updates (additions/mode changes) for the request. Retrieves the current table schema for ref and attempts to merge in the schema provided in the requests. This is necessary since the API backend does not handle PATCH semantics for schema updates (e.g. process the deltas) so we must always send the fully updated schema in the requests. Args: ref: resource reference for table. args: argparse namespace for requests request: BigqueryTablesPatchRequest object Returns: request: updated requests Raises: SchemaUpdateError: table not found or invalid an schema change. """ table = request.table relaxed_columns = args.relax_columns if not table.schema and not relaxed_columns: # if not updating schema, return request # then just return. original_schema = _TryGetCurrentSchema( ref.Parent().Name(), ref.Name(), ref.projectId ) new_schema_columns = table.schema updated_fields = _GetUpdatedSchema( original_schema, new_schema_columns, relaxed_columns ) table_schema_type = api_util.GetApiMessage('TableSchema') request.table.schema = table_schema_type(fields=updated_fields) return request def _TryGetCurrentSchema(dataset_id, table_id, project_id): """Try to retrieve the current BigQuery TableSchema for a table_ref. Tries to fetch the schema of an existing table. Raises SchemaUpdateError if table is not found or if table is not of type 'TABLE'. Args: dataset_id: the dataset id containing the table. table_id: the table id for the table. project_id: the project id containing the dataset and table. Returns: schema: the table schema object Raises: SchemaUpdateError: table not found or invalid table type. """ client = api_util.GetApiClient() service = client.tables get_request_type = api_util.GetApiMessage('BigqueryTablesGetRequest') get_request = get_request_type( datasetId=dataset_id, tableId=table_id, projectId=project_id ) try: table = service.Get(get_request) if not table or table.type != 'TABLE': raise SchemaUpdateError( 'Schema modifications only supported ' 'on TABLE objects received [{}]'.format(table) ) except apitools_exceptions.HttpNotFoundError: raise SchemaUpdateError( 'Table with id [{}:{}:{}] not found.'.format( project_id, dataset_id, table_id ) ) return table.schema def _GetUpdatedSchema(original_schema, new_columns=None, relaxed_columns=None): """Update original_schema by adding and/or relaxing mode on columns.""" orig_field_map = ( {f.name: f for f in original_schema.fields} if original_schema else {} ) if relaxed_columns: orig_field_map = _GetRelaxedCols(relaxed_columns, orig_field_map) if new_columns: orig_field_map = _AddNewColsToSchema(new_columns.fields, orig_field_map) return sorted(orig_field_map.values(), key=lambda x: x.name) def _GetRelaxedCols(relaxed_columns, orig_schema_map): """Change mode to `NULLABLE` for columns in existing schema. Tries set mode on existing columns in orig_schema_map to `NULLABLE`. Raises SchemaUpdateError if column is not found in orig_schema_map. Args: relaxed_columns: [string] the list columns to relax required mode for. orig_schema_map: {string: TableSchemaField} map of field name to TableSchemaField objects representing the original schema. Returns: updated_schema_map: {string: TableSchemaField} map of field name to TableSchemaField objects representing the updated schema. Raises: SchemaUpdateError: if any of the fields to be relaxed are not in the original schema. """ updated_schema_map = orig_schema_map.copy() for col in relaxed_columns: if col in orig_schema_map: updated_schema_map[col].mode = 'NULLABLE' else: raise SchemaUpdateError(_INVALID_SCHEMA_UPDATE_MESSAGE) return updated_schema_map def _AddNewColsToSchema(new_fields, orig_schema_map): """Add new columns to an existing schema. Tries add new fields to an existing schema. Raises SchemaUpdateError if column already exists in the orig_schema_map. Args: new_fields: [TableSchemaField] the list columns add to schema. orig_schema_map: {string: TableSchemaField} map of field name to TableSchemaField objects representing the original schema. Returns: updated_schema_map: {string: TableSchemaField} map of field name to TableSchemaField objects representing the updated schema. Raises: SchemaUpdateError: if any of the fields to be relaxed are not in the original schema. """ updated_schema_map = orig_schema_map.copy() for new_field in new_fields: if new_field.name in orig_schema_map: raise SchemaUpdateError(_INVALID_SCHEMA_UPDATE_MESSAGE) updated_schema_map[new_field.name] = new_field return updated_schema_map def _DatasetExists(dataset_id, project_id): """Validate a resource of the given type with specified ID already exists.""" client = api_util.GetApiClient() service = client.datasets get_request_type = api_util.GetApiMessage('BigqueryDatasetsGetRequest') get_request = get_request_type(datasetId=dataset_id, projectId=project_id) try: service.Get(get_request) return True except apitools_exceptions.HttpNotFoundError: log.info( 'Dataset with id [{}:{}] not found.'.format(project_id, dataset_id) ) return False def _TableExists(dataset_id, table_id, project_id): """Validate a resource of the given type with specified ID already exists.""" client = api_util.GetApiClient() service = client.tables get_request_type = api_util.GetApiMessage('BigqueryTablesGetRequest') get_request = get_request_type( datasetId=dataset_id, tableId=table_id, projectId=project_id ) try: service.Get(get_request) return True except apitools_exceptions.HttpNotFoundError: log.info( 'Table with id [{}:{}:{}] not found.'.format( project_id, dataset_id, table_id ) ) return False def _TryDeleteDataset(dataset_id, project_id): """Try to delete a dataset, propagating error on failure.""" client = api_util.GetApiClient() service = client.datasets delete_request_type = api_util.GetApiMessage('BigqueryDatasetsDeleteRequest') delete_request = delete_request_type( datasetId=dataset_id, projectId=project_id, deleteContents=True ) service.Delete(delete_request) log.info('Deleted dataset [{}:{}]'.format(project_id, dataset_id)) def _TryDeleteTable(dataset_id, table_id, project_id): """Try to delete a dataset, propagating error on failure.""" client = api_util.GetApiClient() service = client.tables delete_request_type = api_util.GetApiMessage('BigqueryTablesDeleteRequest') delete_request = delete_request_type( datasetId=dataset_id, tableId=table_id, projectId=project_id ) service.Delete(delete_request) log.info('Deleted table [{}:{}:{}]'.format(project_id, dataset_id, table_id))