#!/usr/bin/env python """The BigQuery CLI dataset client library.""" import datetime from typing import Dict, List, NamedTuple, Optional from googleapiclient import discovery from clients import utils as bq_client_utils from frontend import utils as frontend_utils from utils import bq_error from utils import bq_id_utils from utils import bq_processor_utils EXTERNAL_CATALOG_DATASET_OPTIONS_FIELD_NAME = 'externalCatalogDatasetOptions' def GetDataset(apiclient: discovery.Resource, reference, dataset_view=None): """Get dataset with dataset_view parameter.""" request = dict(reference) request['accessPolicyVersion'] = ( bq_client_utils.MAX_SUPPORTED_IAM_POLICY_VERSION ) if dataset_view is not None: request['datasetView'] = dataset_view return apiclient.datasets().get(**request).execute() def ListDatasets( apiclient: discovery.Resource, id_fallbacks: NamedTuple( 'IDS', [ ('project_id', Optional[str]), ], ), reference: Optional[bq_id_utils.ApiClientHelper.ProjectReference] = None, max_results: Optional[int] = None, page_token: Optional[str] = None, list_all: Optional[bool] = None, filter_expression: Optional[str] = None, ): """List the datasets associated with this reference.""" return ListDatasetsWithTokenAndUnreachable( apiclient, id_fallbacks, reference, max_results, page_token, list_all, filter_expression, )['datasets'] def ListDatasetsWithTokenAndUnreachable( apiclient: discovery.Resource, id_fallbacks: NamedTuple( 'IDS', [ ('project_id', Optional[str]), ], ), reference: Optional[bq_id_utils.ApiClientHelper.ProjectReference] = None, max_results: Optional[int] = None, page_token: Optional[str] = None, list_all: Optional[bool] = None, filter_expression: Optional[str] = None, ): """List the datasets associated with this reference.""" reference = bq_client_utils.NormalizeProjectReference( id_fallbacks=id_fallbacks, reference=reference ) bq_id_utils.typecheck( reference, bq_id_utils.ApiClientHelper.ProjectReference, method='ListDatasets', ) request = bq_processor_utils.PrepareListRequest( reference, max_results, page_token, filter_expression ) if list_all is not None: request['all'] = list_all result = apiclient.datasets().list(**request).execute() dataset_list = result.get('datasets', []) unreachable_set = set(result.get('unreachable', [])) next_token = result.get('nextPageToken', None) if max_results is not None: while 'nextPageToken' in result and len(dataset_list) < max_results: request['maxResults'] = max_results - len(dataset_list) request['pageToken'] = result['nextPageToken'] result = apiclient.datasets().list(**request).execute() dataset_list.extend(result.get('datasets', [])) unreachable_set.update(result.get('unreachable', [])) next_token = result.get('nextPageToken', None) response = dict(datasets=dataset_list) if next_token: response['token'] = next_token if unreachable_set: response['unreachable'] = list(unreachable_set) return response def GetDatasetIAMPolicy(apiclient, reference): """Gets IAM policy for the given dataset resource. Arguments: apiclient: the apiclient used to make the request. reference: the DatasetReference for the dataset resource. Returns: The IAM policy attached to the given dataset resource. Raises: BigqueryTypeError: if reference is not a DatasetReference. """ bq_id_utils.typecheck( reference, bq_id_utils.ApiClientHelper.DatasetReference, method='GetDatasetIAMPolicy', ) formatted_resource = 'projects/%s/datasets/%s' % ( reference.projectId, reference.datasetId, ) body = { 'options': { 'requestedPolicyVersion': ( bq_client_utils.MAX_SUPPORTED_IAM_POLICY_VERSION ) } } return ( apiclient.datasets() .getIamPolicy( resource=formatted_resource, body=body, ) .execute() ) def SetDatasetIAMPolicy(apiclient: discovery.Resource, reference, policy): """Sets IAM policy for the given dataset resource. Arguments: apiclient: the apiclient used to make the request. reference: the DatasetReference for the dataset resource. policy: The policy string in JSON format. Returns: The updated IAM policy attached to the given dataset resource. Raises: BigqueryTypeError: if reference is not a DatasetReference. """ bq_id_utils.typecheck( reference, bq_id_utils.ApiClientHelper.DatasetReference, method='SetDatasetIAMPolicy', ) formatted_resource = 'projects/%s/datasets/%s' % ( reference.projectId, reference.datasetId, ) request = {'policy': policy} return ( apiclient.datasets() .setIamPolicy(body=request, resource=formatted_resource) .execute() ) def DatasetExists( apiclient: discovery.Resource, reference: 'bq_id_utils.ApiClientHelper.DatasetReference', ) -> bool: """Returns true if a dataset exists.""" bq_id_utils.typecheck( reference, bq_id_utils.ApiClientHelper.DatasetReference, method='DatasetExists', ) try: apiclient.datasets().get(**dict(reference)).execute() return True except bq_error.BigqueryNotFoundError: return False def GetDatasetRegion( apiclient: discovery.Resource, reference: 'bq_id_utils.ApiClientHelper.DatasetReference', ) -> Optional[str]: """Returns the region of a dataset as a string.""" bq_id_utils.typecheck( reference, bq_id_utils.ApiClientHelper.DatasetReference, method='GetDatasetRegion', ) try: return apiclient.datasets().get(**dict(reference)).execute()['location'] except bq_error.BigqueryNotFoundError: return None # TODO(b/191712821): add tags modification here. For the Preview Tags are not # modifiable using BigQuery UI/Cli, only using ResourceManager. def CreateDataset( apiclient: discovery.Resource, reference, ignore_existing=False, description=None, display_name=None, acl=None, default_table_expiration_ms=None, default_partition_expiration_ms=None, data_location=None, labels=None, default_kms_key=None, source_dataset_reference=None, external_source=None, connection_id=None, external_catalog_dataset_options=None, max_time_travel_hours=None, storage_billing_model=None, resource_tags=None, ): """Create a dataset corresponding to DatasetReference. Args: apiclient: The apiclient used to make the request. reference: The DatasetReference to create. ignore_existing: (boolean, default False) If False, raise an exception if the dataset already exists. description: An optional dataset description. display_name: An optional friendly name for the dataset. acl: An optional ACL for the dataset, as a list of dicts. default_table_expiration_ms: Default expiration time to apply to new tables in this dataset. default_partition_expiration_ms: Default partition expiration time to apply to new partitioned tables in this dataset. data_location: Location where the data in this dataset should be stored. Must be either 'EU' or 'US'. If specified, the project that owns the dataset must be enabled for data location. labels: An optional dict of labels. default_kms_key: An optional kms dey that will apply to all newly created tables in the dataset, if no explicit key is supplied in the creating request. source_dataset_reference: An optional ApiClientHelper.DatasetReference that will be the source of this linked dataset. # external_source: External source that backs this dataset. connection_id: Connection used for accessing the external_source. external_catalog_dataset_options: An optional JSON string or file path containing the external catalog dataset options to create. max_time_travel_hours: Optional. Define the max time travel in hours. The value can be from 48 to 168 hours (2 to 7 days). The default value is 168 hours if this is not set. storage_billing_model: Optional. Sets the storage billing model for the dataset. resource_tags: An optional dict of tags to attach to the dataset. Raises: BigqueryTypeError: If reference is not an ApiClientHelper.DatasetReference or if source_dataset_reference is provided but is not an bq_id_utils.ApiClientHelper.DatasetReference. or if both external_dataset_reference and source_dataset_reference are provided or if not all required arguments for external database is provided. BigqueryDuplicateError: if reference exists and ignore_existing is False. """ bq_id_utils.typecheck( reference, bq_id_utils.ApiClientHelper.DatasetReference, method='CreateDataset', ) body = bq_processor_utils.ConstructObjectInfo(reference) if display_name is not None: body['friendlyName'] = display_name if description is not None: body['description'] = description if acl is not None: body['access'] = acl if default_table_expiration_ms is not None: body['defaultTableExpirationMs'] = default_table_expiration_ms if default_partition_expiration_ms is not None: body['defaultPartitionExpirationMs'] = default_partition_expiration_ms if default_kms_key is not None: body['defaultEncryptionConfiguration'] = {'kmsKeyName': default_kms_key} if data_location is not None: body['location'] = data_location if labels: body['labels'] = {} for label_key, label_value in labels.items(): body['labels'][label_key] = label_value if source_dataset_reference is not None: bq_id_utils.typecheck( source_dataset_reference, bq_id_utils.ApiClientHelper.DatasetReference, method='CreateDataset', ) body['linkedDatasetSource'] = { 'sourceDataset': bq_processor_utils.ConstructObjectInfo( source_dataset_reference )['datasetReference'] } # externalDatasetReference can only be specified in case of externals # datasets. This option cannot be used in case of regular dataset or linked # datasets. # So we only set this if an external_source is specified. if external_source: body['externalDatasetReference'] = { 'externalSource': external_source, 'connection': connection_id, } if external_catalog_dataset_options is not None: body[EXTERNAL_CATALOG_DATASET_OPTIONS_FIELD_NAME] = frontend_utils.GetJson( external_catalog_dataset_options ) if max_time_travel_hours is not None: body['maxTimeTravelHours'] = max_time_travel_hours if storage_billing_model is not None: body['storageBillingModel'] = storage_billing_model if resource_tags is not None: body['resourceTags'] = resource_tags args = dict(reference.GetProjectReference()) args['accessPolicyVersion'] = bq_client_utils.MAX_SUPPORTED_IAM_POLICY_VERSION try: apiclient.datasets().insert(body=body, **args).execute() except bq_error.BigqueryDuplicateError: if not ignore_existing: raise def UpdateDataset( apiclient: discovery.Resource, reference: 'bq_id_utils.ApiClientHelper.DatasetReference', description: Optional[str] = None, display_name: Optional[str] = None, acl=None, default_table_expiration_ms=None, default_partition_expiration_ms=None, labels_to_set=None, label_keys_to_remove=None, etag=None, default_kms_key=None, max_time_travel_hours=None, storage_billing_model=None, tags_to_attach: Optional[Dict[str, str]] = None, tags_to_remove: Optional[List[str]] = None, clear_all_tags: Optional[bool] = False, external_catalog_dataset_options: Optional[str] = None, update_mode: Optional[bq_client_utils.UpdateMode] = None, ): """Updates a dataset. Args: apiclient: The apiclient used to make the request. reference: The DatasetReference to update. description: An optional dataset description. display_name: An optional friendly name for the dataset. acl: An optional ACL for the dataset, as a list of dicts. default_table_expiration_ms: Optional number of milliseconds for the default expiration duration for new tables created in this dataset. default_partition_expiration_ms: Optional number of milliseconds for the default partition expiration duration for new partitioned tables created in this dataset. labels_to_set: An optional dict of labels to set on this dataset. label_keys_to_remove: An optional list of label keys to remove from this dataset. etag: If set, checks that etag in the existing dataset matches. default_kms_key: An optional kms dey that will apply to all newly created tables in the dataset, if no explicit key is supplied in the creating request. max_time_travel_hours: Optional. Define the max time travel in hours. The value can be from 48 to 168 hours (2 to 7 days). The default value is 168 hours if this is not set. storage_billing_model: Optional. Sets the storage billing model for the dataset. tags_to_attach: An optional dict of tags to attach to the dataset tags_to_remove: An optional list of tag keys to remove from the dataset clear_all_tags: If set, clears all the tags attached to the dataset external_catalog_dataset_options: An optional JSON string or file path containing the external catalog dataset options to update. update_mode: An optional flag indicating which datasets fields to update, either metadata fields only, ACL fields only, or both metadata and ACL fields. Raises: BigqueryTypeError: If reference is not a DatasetReference. """ bq_id_utils.typecheck( reference, bq_id_utils.ApiClientHelper.DatasetReference, method='UpdateDataset', ) # Get the existing dataset and associated ETag. dataset = _ExecuteGetDatasetRequest(apiclient, reference, etag) # Merge in the changes. if display_name is not None: dataset['friendlyName'] = display_name if description is not None: dataset['description'] = description if acl is not None: dataset['access'] = acl if default_table_expiration_ms is not None: dataset['defaultTableExpirationMs'] = default_table_expiration_ms if default_partition_expiration_ms is not None: if default_partition_expiration_ms == 0: dataset['defaultPartitionExpirationMs'] = None else: dataset['defaultPartitionExpirationMs'] = default_partition_expiration_ms if default_kms_key is not None: dataset['defaultEncryptionConfiguration'] = {'kmsKeyName': default_kms_key} if 'labels' not in dataset: dataset['labels'] = {} if labels_to_set: for label_key, label_value in labels_to_set.items(): dataset['labels'][label_key] = label_value if label_keys_to_remove: for label_key in label_keys_to_remove: dataset['labels'][label_key] = None if max_time_travel_hours is not None: dataset['maxTimeTravelHours'] = max_time_travel_hours if storage_billing_model is not None: dataset['storageBillingModel'] = storage_billing_model resource_tags = {} if clear_all_tags and 'resourceTags' in dataset: for tag in dataset['resourceTags']: resource_tags[tag] = None else: for tag in tags_to_remove or []: resource_tags[tag] = None for tag in tags_to_attach or {}: resource_tags[tag] = tags_to_attach[tag] # resourceTags is used to add a new tag binding, update value of existing # tag and also to remove a tag binding dataset['resourceTags'] = resource_tags if external_catalog_dataset_options is not None: dataset.setdefault(EXTERNAL_CATALOG_DATASET_OPTIONS_FIELD_NAME, {}) current_options = dataset[EXTERNAL_CATALOG_DATASET_OPTIONS_FIELD_NAME] dataset[EXTERNAL_CATALOG_DATASET_OPTIONS_FIELD_NAME] = ( frontend_utils.UpdateExternalCatalogDatasetOptions( current_options, external_catalog_dataset_options ) ) _ExecutePatchDatasetRequest( apiclient, reference, dataset, etag, update_mode, ) def _ExecuteGetDatasetRequest( apiclient: discovery.Resource, reference, etag: Optional[str] = None, ): """Executes request to get dataset. Args: apiclient: the apiclient used to make the request. reference: the DatasetReference to get. etag: if set, checks that etag in the existing dataset matches. Returns: The result of executing the request, if it succeeds. """ args = dict(reference) args['accessPolicyVersion'] = bq_client_utils.MAX_SUPPORTED_IAM_POLICY_VERSION get_request = apiclient.datasets().get(**args) if etag: get_request.headers['If-Match'] = etag dataset = get_request.execute() return dataset def _ExecutePatchDatasetRequest( apiclient: discovery.Resource, reference, dataset, etag: Optional[str] = None, update_mode: Optional[bq_client_utils.UpdateMode] = None, ): """Executes request to patch dataset. Args: apiclient: the apiclient used to make the request. reference: the DatasetReference to patch. dataset: the body of request etag: if set, checks that etag in the existing dataset matches. update_mode: a flag indicating which datasets fields to update. """ parameters = dict(reference) parameters['accessPolicyVersion'] = ( bq_client_utils.MAX_SUPPORTED_IAM_POLICY_VERSION ) if update_mode is not None: parameters['updateMode'] = update_mode.value request = apiclient.datasets().patch(body=dataset, **parameters) # Perform a conditional update to protect against concurrent # modifications to this dataset. By placing the ETag returned in # the get operation into the If-Match header, the API server will # make sure the dataset hasn't changed. If there is a conflicting # change, this update will fail with a "Precondition failed" # error. if etag or dataset['etag']: request.headers['If-Match'] = etag if etag else dataset['etag'] request.execute() def DeleteDataset( apiclient: discovery.Resource, reference: bq_id_utils.ApiClientHelper.DatasetReference, ignore_not_found: bool = False, delete_contents: Optional[bool] = None, ) -> None: """Deletes DatasetReference reference. Args: apiclient: the api client to make the request with. reference: the DatasetReference to delete. ignore_not_found: Whether to ignore "not found" errors. delete_contents: [Boolean] Whether to delete the contents of non-empty datasets. If not specified and the dataset has tables in it, the delete will fail. If not specified, the server default applies. Raises: BigqueryTypeError: if reference is not a DatasetReference. bq_error.BigqueryNotFoundError: if reference does not exist and ignore_not_found is False. """ bq_id_utils.typecheck( reference, bq_id_utils.ApiClientHelper.DatasetReference, method='DeleteDataset', ) args = dict(reference) if delete_contents is not None: args['deleteContents'] = delete_contents try: apiclient.datasets().delete(**args).execute() except bq_error.BigqueryNotFoundError: if not ignore_not_found: raise def UndeleteDataset( apiclient: discovery.Resource, dataset_reference: bq_id_utils.ApiClientHelper.DatasetReference, timestamp: Optional[datetime.datetime] = None, ) -> bool: """Undeletes a dataset. Args: apiclient: The api client to make the request with. dataset_reference: [Type: bq_id_utils.ApiClientHelper.DatasetReference]DatasetReference of the dataset to be undeleted timestamp: [Type: Optional[datetime.datetime]]Timestamp for which dataset version is to be undeleted Returns: bool: The job description, or None for ignored errors. Raises: BigqueryDuplicateError: when the dataset to be undeleted already exists. """ try: args = dict(dataset_reference) if timestamp: args['body'] = { 'deletionTime': frontend_utils.FormatRfc3339(timestamp).replace( '+00:00', '' ) } return apiclient.datasets().undelete(**args).execute() except bq_error.BigqueryDuplicateError as e: raise e