#!/usr/bin/env python """The BigQuery CLI make command.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import time from typing import Optional from absl import app from absl import flags import bq_flags import bq_utils from clients import client_connection from clients import client_data_transfer from clients import client_dataset from clients import client_reservation from clients import client_row_access_policy from clients import client_table from clients import utils as bq_client_utils from frontend import bigquery_command from frontend import bq_cached_client from frontend import flags as frontend_flags from frontend import utils as frontend_utils from frontend import utils_data_transfer from frontend import utils_flags from frontend import utils_formatting from frontend import utils_id as frontend_id_utils from utils import bq_error from utils import bq_id_utils from utils import bq_processor_utils # These aren't relevant for user-facing docstrings: # pylint: disable=g-doc-return-or-yield # pylint: disable=g-doc-args class Make(bigquery_command.BigqueryCmd): """Creates a dataset or a table.""" usage = """mk [-d] OR mk [-t] []""" def __init__(self, name: str, fv: flags.FlagValues): super(Make, self).__init__(name, fv) flags.DEFINE_boolean( 'force', False, 'Bypass existence checks and ignore errors that the object already ' 'exists.', short_name='f', flag_values=fv, ) flags.DEFINE_boolean( 'dataset', False, 'Create dataset with this name.', short_name='d', flag_values=fv, ) flags.DEFINE_boolean( 'table', False, 'Create table with this name.', short_name='t', flag_values=fv, ) flags.DEFINE_boolean( 'transfer_config', None, 'Create transfer config.', flag_values=fv ) flags.DEFINE_string( 'target_dataset', '', 'Target dataset for the created transfer configuration.', flag_values=fv, ) flags.DEFINE_string( 'display_name', '', 'Display name for the created transfer configuration or connection.', flag_values=fv, ) flags.DEFINE_string( 'data_source', '', 'Data source for the created transfer configuration.', flag_values=fv, ) flags.DEFINE_integer( 'refresh_window_days', 0, 'Refresh window days for the created transfer configuration.', flag_values=fv, ) flags.DEFINE_string( 'params', None, 'Parameters for the created transfer configuration in JSON format. ' 'For example: --params=\'{"param":"param_value"}\'', short_name='p', flag_values=fv, ) flags.DEFINE_string( 'service_account_name', '', 'Service account used as the credential on the transfer config.', flag_values=fv, ) flags.DEFINE_string( 'notification_pubsub_topic', '', 'Pub/Sub topic used for notification after transfer run completed or ' 'failed.', flag_values=fv, ) flags.DEFINE_bool( 'transfer_run', False, 'Creates transfer runs for a time range.', flag_values=fv, ) flags.DEFINE_string( 'start_time', None, 'Start time of the range of transfer runs. ' 'The format for the time stamp is RFC3339 UTC "Zulu". ' 'Example: 2019-01-20T06:50:0Z. Read more: ' 'https://developers.google.com/protocol-buffers/docs/' 'reference/google.protobuf#google.protobuf.Timestamp ', flag_values=fv, ) flags.DEFINE_string( 'end_time', None, 'Exclusive end time of the range of transfer runs. ' 'The format for the time stamp is RFC3339 UTC "Zulu". ' 'Example: 2019-01-20T06:50:0Z. Read more: ' 'https://developers.google.com/protocol-buffers/docs/' 'reference/google.protobuf#google.protobuf.Timestamp ', flag_values=fv, ) flags.DEFINE_string( 'run_time', None, 'Specific time for a transfer run. ' 'The format for the time stamp is RFC3339 UTC "Zulu". ' 'Example: 2019-01-20T06:50:0Z. Read more: ' 'https://developers.google.com/protocol-buffers/docs/' 'reference/google.protobuf#google.protobuf.Timestamp ', flag_values=fv, ) flags.DEFINE_string( 'schedule_start_time', None, 'Time to start scheduling transfer runs for the given ' 'transfer configuration. If empty, the default value for ' 'the start time will be used to start runs immediately.' 'The format for the time stamp is RFC3339 UTC "Zulu". ' 'Read more: ' 'https://developers.google.com/protocol-buffers/docs/' 'reference/google.protobuf#google.protobuf.Timestamp', flag_values=fv, ) flags.DEFINE_string( 'schedule_end_time', None, 'Time to stop scheduling transfer runs for the given ' 'transfer configuration. If empty, the default value for ' 'the end time will be used to schedule runs indefinitely.' 'The format for the time stamp is RFC3339 UTC "Zulu". ' 'Read more: ' 'https://developers.google.com/protocol-buffers/docs/' 'reference/google.protobuf#google.protobuf.Timestamp', flag_values=fv, ) flags.DEFINE_string( 'schedule', None, 'Data transfer schedule. If the data source does not support a custom schedule, this should be empty. If empty, the default value for the data source will be used. The specified times are in UTC. Examples of valid format: 1st,3rd monday of month 15:30, every wed,fri of jan,jun 13:15, and first sunday of quarter 00:00. See more explanation about the format here: https://cloud.google.com/appengine/docs/flexible/python/scheduling-jobs-with-cron-yaml#the_schedule_format', # pylint: disable=line-too-long flag_values=fv, ) flags.DEFINE_bool( 'no_auto_scheduling', False, 'Disables automatic scheduling of data transfer runs for this ' 'configuration.', flag_values=fv, ) self.event_driven_schedule_flag = ( frontend_flags.define_event_driven_schedule(flag_values=fv) ) flags.DEFINE_string( 'schema', '', 'Either a filename or a comma-separated list of fields in the form ' 'name[:type].', flag_values=fv, ) flags.DEFINE_string( 'description', None, 'Description of the dataset, table or connection.', flag_values=fv, ) flags.DEFINE_string( 'data_location', None, 'Geographic location of the data. See details at ' 'https://cloud.google.com/bigquery/docs/locations.', flag_values=fv, ) flags.DEFINE_integer( 'expiration', None, 'Expiration time, in seconds from now, of a table.', flag_values=fv, ) flags.DEFINE_integer( 'default_table_expiration', None, 'Default lifetime, in seconds, for newly-created tables in a ' 'dataset. Newly-created tables will have an expiration time of ' 'the current time plus this value.', flag_values=fv, ) flags.DEFINE_integer( 'default_partition_expiration', None, 'Default partition expiration for all partitioned tables in the dataset' ', in seconds. The storage in a partition will have an expiration time ' 'of its partition time plus this value. If this property is set, ' 'partitioned tables created in the dataset will use this instead of ' 'default_table_expiration.', flag_values=fv, ) flags.DEFINE_string( 'external_table_definition', None, 'Specifies a table definition to use to create an external table. ' 'The value can be either an inline table definition or a path to a ' 'file containing a JSON table definition. ' 'The format of inline definition is "schema@format=uri@connection", ' 'where "schema@", "format=", and "connection" are optional and "format"' 'has the default value of "CSV" if not specified. ', flag_values=fv, ) flags.DEFINE_string( 'connection_id', None, 'The connection specifying the credentials to be used to read external storage. The connection_id can have the form ".." or "projects//locations//connections/". ', # pylint: disable=line-too-long flag_values=fv, ) flags.DEFINE_string( 'storage_uri', None, ( 'The fully qualified location prefix of the external folder where' ' the table data of a BigLake table is stored. The "*" wildcard' ' character is not allowed. The URI should be in the format' ' "gs://bucket/path_to_table/". ' ), flag_values=fv, ) flags.DEFINE_enum( 'file_format', None, ['PARQUET'], 'The file format the table data of a BigLake table is stored in. ', flag_values=fv, ) flags.DEFINE_enum( 'table_format', None, ['ICEBERG'], ( 'The table format the metadata only snapshots of a BigLake table' ' are stored in. ' ), flag_values=fv, ) flags.DEFINE_string( 'view', '', 'Create view with this SQL query.', flag_values=fv ) flags.DEFINE_multi_string( 'view_udf_resource', None, 'The URI or local filesystem path of a code file to load and ' 'evaluate immediately as a User-Defined Function resource used ' 'by the view.', flag_values=fv, ) flags.DEFINE_string( 'materialized_view', None, '[Experimental] Create materialized view with this Standard SQL query.', flag_values=fv, ) flags.DEFINE_boolean( 'enable_refresh', None, 'Whether to enable automatic refresh of the materialized views when ' 'the base table is updated. If not set, the default is true.', flag_values=fv, ) flags.DEFINE_integer( 'refresh_interval_ms', None, 'Milliseconds that must have elapsed since last refresh until the ' 'materialized view can be automatically refreshed again. If not set, ' 'the default value is "1800000" (30 minutes).', flag_values=fv, ) flags.DEFINE_string( 'max_staleness', None, 'INTERVAL value that determines the maximum staleness allowed when querying a materialized view or an external table. By default no staleness is allowed. Examples of valid max_staleness values: 1 day: "0-0 1 0:0:0"; 1 hour: "0-0 0 1:0:0".See more explanation about the INTERVAL values: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#interval_type', # pylint: disable=line-too-long flag_values=fv, ) flags.DEFINE_boolean( 'use_legacy_sql', None, ( 'The choice of using Legacy SQL for the query is optional. If not' ' specified, the server will automatically determine the dialect' ' based on query information, such as dialect prefixes. If no' ' prefixes are found, it will default to Legacy SQL.' ), flag_values=fv, ) flags.DEFINE_string( 'time_partitioning_type', None, 'Enables time based partitioning on the table and set the type. The ' 'default value is DAY, which will generate one partition per day. ' 'Other supported values are HOUR, MONTH, and YEAR.', flag_values=fv, ) flags.DEFINE_integer( 'time_partitioning_expiration', None, 'Enables time based partitioning on the table and sets the number of ' 'seconds for which to keep the storage for the partitions in the table.' ' The storage in a partition will have an expiration time of its ' 'partition time plus this value.', flag_values=fv, ) flags.DEFINE_string( 'time_partitioning_field', None, 'Enables time based partitioning on the table and the table will be ' 'partitioned based on the value of this field. If time based ' 'partitioning is enabled without this value, the table will be ' 'partitioned based on the loading time.', flag_values=fv, ) flags.DEFINE_string( 'destination_kms_key', None, 'Cloud KMS key for encryption of the destination table data.', flag_values=fv, ) flags.DEFINE_multi_string( 'label', None, 'A label to set on the table or dataset. The format is "key:value"', flag_values=fv, ) flags.DEFINE_boolean( 'require_partition_filter', None, 'Whether to require partition filter for queries over this table. ' 'Only apply to partitioned table.', flag_values=fv, ) flags.DEFINE_string( 'clustering_fields', None, 'Comma-separated list of field names that specifies the columns on ' 'which a table is clustered.', flag_values=fv, ) flags.DEFINE_string( 'range_partitioning', None, 'Enables range partitioning on the table. The format should be ' '"field,start,end,interval". The table will be partitioned based on the' ' value of the field. Field must be a top-level, non-repeated INT64 ' 'field. Start, end, and interval are INT64 values defining the ranges.', flag_values=fv, ) flags.DEFINE_boolean( 'row_access_policy', None, 'Creates a row access policy.', flag_values=fv, ) flags.DEFINE_string( 'policy_id', None, 'Policy ID used to create row access policy for.', flag_values=fv, ) flags.DEFINE_string( 'target_table', None, 'The table to create the row access policy for.', flag_values=fv, ) flags.DEFINE_string( 'grantees', None, 'Comma separated list of iam_member users or groups that specifies the' ' initial members that the row-level access policy should be created' ' with.', flag_values=fv, ) flags.DEFINE_string( 'filter_predicate', None, 'A SQL boolean expression that represents the rows defined by this row' ' access policy.', flag_values=fv, ) flags.DEFINE_boolean( 'reservation', None, 'Creates a reservation described by this identifier. ', flag_values=fv, ) flags.DEFINE_boolean( 'capacity_commitment', None, 'Creates a capacity commitment. You do not need to specify a capacity ' 'commitment id, this will be assigned automatically.', flag_values=fv, ) flags.DEFINE_enum( 'plan', None, [ 'FLEX', 'MONTHLY', 'ANNUAL', 'THREE_YEAR', ], 'Commitment plan for this capacity commitment. Plans cannot be deleted ' 'before their commitment period is over. Options include:' '\n FLEX\n MONTHLY\n ANNUAL\n THREE_YEAR', flag_values=fv, ) flags.DEFINE_enum( 'renewal_plan', None, [ 'FLEX', 'MONTHLY', 'ANNUAL', 'THREE_YEAR', 'NONE', ], 'The plan this capacity commitment is converted to after committed ' 'period ends. Options include:' '\n NONE' '\n FLEX' '\n MONTHLY' '\n ANNUAL' '\n THREE_YEAR' '\n NONE can only be used in conjunction with --edition, ' '\n while FLEX and MONTHLY cannot be used together with --edition.', flag_values=fv, ) flags.DEFINE_integer( 'slots', 0, 'The number of baseline slots associated with the reservation.', flag_values=fv, ) flags.DEFINE_boolean( 'multi_region_auxiliary', False, 'If true, capacity commitment or reservation is placed in the ' 'organization' 's auxiliary region which is designated for disaster ' 'recovery purposes. Applicable only for US and EU locations. Available ' 'only for allow-listed projects.', flag_values=fv, ) flags.DEFINE_boolean( 'use_idle_slots', True, 'If true, any query running in this reservation will be able to use ' 'idle slots from other reservations. Used if ignore_idle_slots is ' 'None.', flag_values=fv, ) flags.DEFINE_boolean( 'ignore_idle_slots', None, 'If false, any query running in this reservation will be able to use ' 'idle slots from other reservations.', flag_values=fv, ) flags.DEFINE_integer( 'max_concurrency', None, 'Deprecated, please use target_job_concurrency instead.', flag_values=fv, ) flags.DEFINE_integer( 'concurrency', None, 'Deprecated, please use target_job_concurrency instead.', flag_values=fv, ) flags.DEFINE_integer( 'target_job_concurrency', None, 'Sets a soft upper bound on the number of jobs that can run ' 'concurrently in the reservation. Default value is 0 which means that ' 'concurrency target will be automatically computed by the system.', flag_values=fv, ) flags.DEFINE_integer( 'autoscale_max_slots', None, 'Number of slots to be scaled when needed. Autoscale will be enabled ' 'when setting this.', flag_values=fv, ) flags.DEFINE_enum( 'job_type', None, [ 'QUERY', 'PIPELINE', 'ML_EXTERNAL', 'BACKGROUND', 'SPARK', 'CONTINUOUS', 'BACKGROUND_CHANGE_DATA_CAPTURE', 'BACKGROUND_COLUMN_METADATA_INDEX', 'BACKGROUND_SEARCH_INDEX_REFRESH', ], ( 'Type of jobs to create reservation assignment for.' ' Options include:' '\n QUERY' '\n PIPELINE' '\n Note if PIPELINE reservations are' ' created, then load jobs will just use the slots from this' " reservation and slots from shared pool won't be used." '\n ML_EXTERNAL' '\n BigQuery ML jobs that use services external to BQ' ' for model training will use slots from this reservation. Slots' ' used by these jobs are not preemptible, i.e., they are not' ' available for other jobs running in the reservation. These jobs' ' will not utilize idle slots from other reservations.' '\n BACKGROUND' '\n BACKGROUND_CHANGE_DATA_CAPTURE' '\n BigQuery CDC background merge will use' ' BACKGROUND_CHANGE_DATA_CAPTURE' ' reservations to execute if created, but will fall back to using' ' BACKGROUND reservations if one does not exist.' '\n BACKGROUND_COLUMN_METADATA_INDEX' '\n BACKGROUND_SEARCH_INDEX_REFRESH' '\n SPARK' '\n BigQuery Spark jobs' ' that use services external to BQ for executing SPARK procedure' ' job. Slots used by these jobs are not preemptible, i.e., they are' ' not available for other jobs running in the reservation. These' ' jobs will not utilize idle slots from other reservations.' ), flag_values=fv, ) flags.DEFINE_enum( 'priority', None, [ 'HIGH', 'INTERACTIVE', 'BATCH', ], 'Reservation assignment default job priority. Only available for ' 'whitelisted reservations. Options include:' '\n HIGH' '\n INTERACTIVE' '\n BATCH', flag_values=fv, ) flags.DEFINE_string( 'reservation_id', None, 'Reservation ID used to create reservation assignment for. ' 'Used in conjunction with --reservation_assignment.', flag_values=fv, ) flags.DEFINE_boolean( 'reservation_assignment', None, 'Create a reservation assignment.', flag_values=fv, ) flags.DEFINE_enum( 'assignee_type', None, ['PROJECT', 'FOLDER', 'ORGANIZATION'], 'Type of assignees for the reservation assignment. Options include:' '\n PROJECT' '\n FOLDER' '\n ORGANIZATION' '\n Used in conjunction with --reservation_assignment.', flag_values=fv, ) flags.DEFINE_string( 'assignee_id', None, 'Project/folder/organization ID, to which the reservation is assigned. ' 'Used in conjunction with --reservation_assignment.', flag_values=fv, ) flags.DEFINE_enum( 'edition', None, ['STANDARD', 'ENTERPRISE', 'ENTERPRISE_PLUS'], 'Type of editions for the reservation or capacity commitment. ' 'Options include:' '\n STANDARD' '\n ENTERPRISE' '\n ENTERPRISE_PLUS' '\n Used in conjunction with --reservation or --capacity_commitment.' '\n STANDARD cannot be used together with --capacity_commitment.', flag_values=fv, ) flags.DEFINE_integer( 'max_slots', None, 'The overall max slots for the reservation. It needs to be specified ' 'together with --scaling_mode. It cannot be used together ' 'with --autoscale_max_slots. It is a private preview feature.', flag_values=fv, ) flags.DEFINE_enum( 'scaling_mode', None, [ 'AUTOSCALE_ONLY', 'IDLE_SLOTS_ONLY', 'ALL_SLOTS', ], 'The scaling mode for the reservation. Available only for reservations ' 'enrolled in the Max Slots Preview. It needs to be specified together ' 'with --max_slots. It cannot be used together with ' '--autoscale_max_slots. Options include:' '\n AUTOSCALE_ONLY' '\n IDLE_SLOTS_ONLY' '\n ALL_SLOTS', flag_values=fv, ) flags.DEFINE_boolean( 'reservation_group', None, 'Creates a reservation group described by this identifier. ', flag_values=fv, ) flags.DEFINE_string( 'reservation_group_name', None, 'Reservation group name used to create reservation for, it can be full' ' path or just the reservation group name. Used in conjunction with' ' --reservation.', flag_values=fv, ) flags.DEFINE_boolean( 'connection', None, 'Create a connection.', flag_values=fv ) flags.DEFINE_enum( 'connection_type', None, bq_processor_utils.CONNECTION_TYPES, 'Connection type. Valid values:\n ' + '\n '.join(bq_processor_utils.CONNECTION_TYPES), flag_values=fv, ) flags.DEFINE_string( 'properties', None, 'Connection properties in JSON format.', flag_values=fv, ) flags.DEFINE_string( 'connection_credential', None, 'Connection credential in JSON format.', flag_values=fv, ) flags.DEFINE_string( 'connector_configuration', None, 'Connection configuration for connector in JSON format.', flag_values=fv, ) flags.DEFINE_string( 'iam_role_id', None, '[Experimental] IAM role id.', flag_values=fv ) # TODO(b/231712311): look into cleaning up this flag now that only federated # aws connections are supported. flags.DEFINE_boolean( 'federated_aws', True, '[Experimental] Federated identity.', flag_values=fv, ) flags.DEFINE_boolean( 'federated_azure', None, '[Experimental] Federated identity for Azure.', flag_values=fv, ) flags.DEFINE_string( 'tenant_id', None, '[Experimental] Tenant id.', flag_values=fv ) flags.DEFINE_string( 'federated_app_client_id', None, '[Experimental] The application (client) id of the Active Directory ' 'application to use with Azure federated identity.', flag_values=fv, ) flags.DEFINE_string( 'default_kms_key', None, 'Defines default KMS key name for all newly objects created in the ' 'dataset. Table/Model creation request can override this default.', flag_values=fv, ) flags.DEFINE_boolean( 'use_avro_logical_types', True, 'If sourceFormat is set to "AVRO", indicates whether to enable ' 'interpreting logical types into their corresponding types ' '(ie. TIMESTAMP), instead of only using their raw types (ie. INTEGER).', flag_values=fv, ) flags.DEFINE_string( 'source_dataset', None, 'When set to a dataset reference, creates a Linked Dataset pointing to ' 'the source dataset.', flag_values=fv, ) flags.DEFINE_string( 'external_source', None, ( 'External source that backs this dataset. Currently only AWS Glue' ' databases are supported (format' ' aws-glue://)' ), flag_values=fv, ) flags.DEFINE_string( 'external_catalog_dataset_options', None, 'Options defining open source compatible datasets living in the' ' BigQuery catalog. Contains metadata of open source database or' ' default storage location represented by the current dataset. The' ' value can be either an inline JSON or a path to a file containing a' ' JSON definition.', flag_values=fv, ) flags.DEFINE_string( 'external_catalog_table_options', None, 'Options defining the metadata of an open source compatible table' ' living in the BigQuery catalog. Contains metadata of open source' ' table including serializer/deserializer information, table schema,' ' etc. The value can be either an inline JSON or a path to a file' ' containing a JSON definition.', flag_values=fv, ) flags.DEFINE_boolean( 'parquet_enum_as_string', False, 'Infer Parquet ENUM logical type as STRING ' '(instead of BYTES by default).', flag_values=fv, ) flags.DEFINE_boolean( 'parquet_enable_list_inference', False, frontend_utils.PARQUET_LIST_INFERENCE_DESCRIPTION, flag_values=fv, ) flags.DEFINE_integer( 'max_time_travel_hours', None, 'Optional. Define the max time travel in hours. The value can be from ' '48 to 168 hours (2 to 7 days). The default value is 168 hours if this ' 'is not set.', flag_values=fv, ) flags.DEFINE_enum( 'storage_billing_model', None, ['LOGICAL', 'PHYSICAL'], 'Optional. Sets the storage billing model for the dataset. \n' 'LOGICAL - switches to logical billing model \n' 'PHYSICAL - switches to physical billing model.', flag_values=fv, ) flags.DEFINE_enum( 'metadata_cache_mode', None, ['AUTOMATIC', 'MANUAL'], 'Enables metadata cache for an external table with a connection. ' 'Specify AUTOMATIC to automatically refresh the cached metadata. ' 'Specify MANUAL to stop the automatic refresh.', flag_values=fv, ) flags.DEFINE_enum( 'object_metadata', None, ['DIRECTORY', 'SIMPLE'], 'Object Metadata Type used to create Object Tables. SIMPLE is the ' 'only supported value to create an Object Table containing a directory ' 'listing of objects found at the uri in external_data_definition.', flag_values=fv, ) flags.DEFINE_boolean( 'preserve_ascii_control_characters', False, 'Whether to preserve embedded Ascii Control characters in CSV External ' 'table ', flag_values=fv, ) flags.DEFINE_string( 'reference_file_schema_uri', None, 'provide a reference file with the table schema, currently ' 'enabled for the formats: AVRO, PARQUET, ORC.', flag_values=fv, ) flags.DEFINE_enum( 'file_set_spec_type', None, ['FILE_SYSTEM_MATCH', 'NEW_LINE_DELIMITED_MANIFEST'], 'Specifies how to discover files given source URIs. ' 'Options include: ' '\n FILE_SYSTEM_MATCH: expand source URIs by listing files from the ' 'underlying object store. This is the default behavior.' '\n NEW_LINE_DELIMITED_MANIFEST: indicate the source URIs provided are ' 'new line delimited manifest files, where each line contains a URI ' 'with no wild-card.', flag_values=fv, ) flags.DEFINE_string( 'kms_key_name', None, 'Cloud KMS key name used for encryption.', flag_values=fv, ) flags.DEFINE_string( 'add_tags', None, 'Tags to attach to the dataset or table. The format is namespaced key:value pair like "1234567/my_tag_key:my_tag_value,test-project123/environment:production" ', # pylint: disable=line-too-long flag_values=fv, ) flags.DEFINE_string( 'source', None, 'Path to file with JSON payload for an update', flag_values=fv, ) flags.DEFINE_integer( 'scheduling_policy_concurrency', None, 'Cap on target per-project concurrency of jobs running within the' ' reservation.', flag_values=fv, ) flags.DEFINE_integer( 'scheduling_policy_max_slots', None, 'Cap on target rate of slot consumption per-project within the' ' reservation.', flag_values=fv, ) self.null_marker_flag = frontend_flags.define_null_marker(flag_values=fv) self.null_markers_flag = frontend_flags.define_null_markers(flag_values=fv) self.time_zone_flag = frontend_flags.define_time_zone(flag_values=fv) self.date_format_flag = frontend_flags.define_date_format(flag_values=fv) self.datetime_format_flag = frontend_flags.define_datetime_format( flag_values=fv ) self.time_format_flag = frontend_flags.define_time_format(flag_values=fv) self.timestamp_format_flag = frontend_flags.define_timestamp_format( flag_values=fv ) self.timestamp_target_precision_flag = ( frontend_flags.define_timestamp_target_precision(flag_values=fv) ) self.source_column_match_flag = frontend_flags.define_source_column_match( flag_values=fv ) self.parquet_map_target_type_flag = ( frontend_flags.define_parquet_map_target_type(flag_values=fv) ) flags.DEFINE_boolean( 'migration_workflow', None, 'Create a migration workflow.', flag_values=fv, ) flags.DEFINE_string( 'config_file', None, 'The file containing the JSON of the migration workflow to create.', flag_values=fv, ) self._ProcessCommandRc(fv) def printSuccessMessage(self, object_name: str, reference: str): print( "%s '%s' successfully created." % ( object_name, reference, ) ) def RunWithArgs( self, identifier: str = '', schema: str = '' ) -> Optional[int]: # pylint: disable=g-doc-exception """Create a dataset, table, view, or transfer configuration with this name. See 'bq help mk' for more information. Examples: bq mk new_dataset bq mk new_dataset.new_table bq --dataset_id=new_dataset mk table bq mk -t new_dataset.newtable name:integer,value:string bq mk --view='select 1 as num' new_dataset.newview (--view_udf_resource=path/to/file.js) bq mk --materialized_view='select sum(x) as sum_x from dataset.table' new_dataset.newview bq mk -d --data_location=EU new_dataset bq mk -d --source_dataset=src_dataset new_dataset (requires allowlisting) bq mk -d --external_source=aws-glue:// --connection_id= new_dataset bq mk --transfer_config --target_dataset=dataset --display_name=name -p='{"param":"value"}' --data_source=source --schedule_start_time={schedule_start_time} --schedule_end_time={schedule_end_time} bq mk --transfer_run --start_time={start_time} --end_time={end_time} projects/p/locations/l/transferConfigs/c bq mk --transfer_run --run_time={run_time} projects/p/locations/l/transferConfigs/c bq mk --reservation --project_id=project --location=us reservation_name bq mk --reservation_assignment --reservation_id=project:us.dev --job_type=QUERY --assignee_type=PROJECT --assignee_id=myproject bq mk --reservation_assignment --reservation_id=project:us.dev --job_type=QUERY --assignee_type=FOLDER --assignee_id=123 bq mk --reservation_assignment --reservation_id=project:us.dev --job_type=QUERY --assignee_type=ORGANIZATION --assignee_id=456 bq mk --reservation_group --project_id=project --location=us reservation_group_name bq mk --connection --connection_type='CLOUD_SQL' --properties='{"instanceId" : "instance", "database" : "db", "type" : "MYSQL" }' --connection_credential='{"username":"u", "password":"p"}' --project_id=proj --location=us --display_name=name new_connection bq mk --row_access_policy --policy_id=new_policy --target_table='existing_dataset.existing_table' --grantees='user:user1@google.com,group:group1@google.com' --filter_predicate='Region="US"' bq mk --source=file.json bq mk --migration_workflow --location=us --config_file=file.json """ client = bq_cached_client.Client.Get() reference = None if self.d and self.t: raise app.UsageError('Cannot specify both -d and -t.') if frontend_utils.ValidateAtMostOneSelected( self.schema, self.view, self.materialized_view ): raise app.UsageError( 'Cannot specify more than one of' ' --schema or --view or --materialized_view.' ) if self.t: reference = bq_client_utils.GetTableReference( id_fallbacks=client, identifier=identifier ) elif self.view: reference = bq_client_utils.GetTableReference( id_fallbacks=client, identifier=identifier ) elif self.materialized_view: reference = bq_client_utils.GetTableReference( id_fallbacks=client, identifier=identifier ) elif self.row_access_policy: reference = bq_client_utils.GetRowAccessPolicyReference( id_fallbacks=client, table_identifier=self.target_table, policy_id=self.policy_id, ) try: client_row_access_policy.create_row_access_policy( bqclient=client, policy_reference=reference, grantees=self.grantees.split(','), filter_predicate=self.filter_predicate, ) except BaseException as e: raise bq_error.BigqueryError( "Failed to create row access policy '%s' on '%s': %s" % (self.policy_id, self.target_table, e) ) self.printSuccessMessage('Row access policy', self.policy_id) elif self.reservation: object_info = None reference = bq_client_utils.GetReservationReference( id_fallbacks=client, identifier=identifier, default_location=bq_flags.LOCATION.value, ) try: if self.reservation_group_name is not None: utils_flags.fail_if_not_using_alpha_feature( bq_flags.AlphaFeatures.RESERVATION_GROUPS ) ignore_idle_arg = self.ignore_idle_slots if ignore_idle_arg is None: ignore_idle_arg = not self.use_idle_slots concurrency = self.target_job_concurrency if concurrency is None: concurrency = ( self.concurrency if self.concurrency is not None else self.max_concurrency ) object_info = client_reservation.CreateReservation( client=client.GetReservationApiClient(), api_version=bq_flags.API_VERSION.value, reference=reference, slots=self.slots, ignore_idle_slots=ignore_idle_arg, edition=self.edition, target_job_concurrency=concurrency, multi_region_auxiliary=self.multi_region_auxiliary, autoscale_max_slots=self.autoscale_max_slots, max_slots=self.max_slots, scaling_mode=self.scaling_mode, reservation_group_name=self.reservation_group_name, scheduling_policy_concurrency=self.scheduling_policy_concurrency, scheduling_policy_max_slots=self.scheduling_policy_max_slots, ) except BaseException as e: raise bq_error.BigqueryError( "Failed to create reservation '%s': %s" % (identifier, e) ) if object_info is not None: frontend_utils.PrintObjectInfo( object_info, reference, custom_format='show' ) elif self.capacity_commitment: reference = bq_client_utils.GetCapacityCommitmentReference( id_fallbacks=client, identifier=identifier, default_location=bq_flags.LOCATION.value, default_capacity_commitment_id=' ', ) try: object_info = client_reservation.CreateCapacityCommitment( client=client.GetReservationApiClient(), reference=reference, edition=self.edition, slots=self.slots, plan=self.plan, renewal_plan=self.renewal_plan, multi_region_auxiliary=self.multi_region_auxiliary, ) except BaseException as e: raise bq_error.BigqueryError( "Failed to create capacity commitment in '%s': %s" % (identifier, e) ) if object_info is not None: frontend_utils.PrintObjectInfo( object_info, reference, custom_format='show' ) elif self.reservation_assignment: try: reference = bq_client_utils.GetReservationReference( id_fallbacks=client, default_location=bq_flags.LOCATION.value, identifier=self.reservation_id, ) object_info = client_reservation.CreateReservationAssignment( client=client.GetReservationApiClient(), reference=reference, job_type=self.job_type, priority=self.priority, assignee_type=self.assignee_type, assignee_id=self.assignee_id, scheduling_policy_max_slots=self.scheduling_policy_max_slots, scheduling_policy_concurrency=self.scheduling_policy_concurrency, ) reference = bq_client_utils.GetReservationAssignmentReference( id_fallbacks=client, path=object_info['name'] ) frontend_utils.PrintObjectInfo( object_info, reference, custom_format='show' ) except BaseException as e: raise bq_error.BigqueryError( "Failed to create reservation assignment '%s': %s" % (identifier, e) ) elif self.reservation_group: try: utils_flags.fail_if_not_using_alpha_feature( bq_flags.AlphaFeatures.RESERVATION_GROUPS ) reference = bq_client_utils.GetReservationGroupReference( id_fallbacks=client, identifier=identifier, default_location=bq_flags.LOCATION.value, ) object_info = client_reservation.CreateReservationGroup( reservation_group_client=client.GetReservationApiClient(), reference=reference, ) frontend_utils.PrintObjectInfo( object_info, reference, custom_format='show' ) except BaseException as e: raise bq_error.BigqueryError( "Failed to create reservation group '%s': %s" % (identifier, e) ) elif self.transfer_config: transfer_client = client.GetTransferV1ApiClient() reference = 'projects/' + ( bq_client_utils.GetProjectReference(id_fallbacks=client).projectId ) credentials = False if self.data_source: credentials = utils_data_transfer.CheckValidCreds( reference, self.data_source, transfer_client ) else: raise bq_error.BigqueryError('A data source must be provided.') auth_info = {} if ( not credentials and self.data_source != 'loadtesting' and not self.service_account_name ): auth_info = utils_data_transfer.RetrieveAuthorizationInfo( reference, self.data_source, transfer_client ) location = self.data_location or bq_flags.LOCATION.value self.event_driven_schedule = ( self.event_driven_schedule_flag.value if self.event_driven_schedule_flag.present else None ) schedule_args = client_data_transfer.TransferScheduleArgs( schedule=self.schedule, start_time=self.schedule_start_time, end_time=self.schedule_end_time, disable_auto_scheduling=self.no_auto_scheduling, event_driven_schedule=self.event_driven_schedule, ) transfer_name = client_data_transfer.create_transfer_config( transfer_client=client.GetTransferV1ApiClient(), reference=reference, data_source=self.data_source, target_dataset=self.target_dataset, display_name=self.display_name, refresh_window_days=self.refresh_window_days, params=self.params, auth_info=auth_info, service_account_name=self.service_account_name, destination_kms_key=self.destination_kms_key, notification_pubsub_topic=self.notification_pubsub_topic, schedule_args=schedule_args, location=location, ) self.printSuccessMessage('Transfer configuration', transfer_name) elif self.transfer_run: formatter = utils_flags.get_formatter_from_flags() formatted_identifier = frontend_id_utils.FormatDataTransferIdentifiers( client, identifier ) reference = bq_id_utils.ApiClientHelper.TransferConfigReference( transferConfigName=formatted_identifier ) incompatible_options = ( self.start_time or self.end_time ) and self.run_time incomplete_options = ( not self.start_time or not self.end_time ) and not self.run_time if incompatible_options or incomplete_options: raise app.UsageError( 'Need to specify either both --start_time and --end_time ' 'or only --run_time.' ) results = list( map( utils_formatting.format_transfer_run_info, client_data_transfer.start_manual_transfer_runs( transfer_client=client.GetTransferV1ApiClient(), reference=reference, start_time=self.start_time, end_time=self.end_time, run_time=self.run_time, ), ) ) utils_formatting.configure_formatter( formatter, bq_id_utils.ApiClientHelper.TransferRunReference, print_format='make', object_info=results[0], ) for result in results: formatter.AddDict(result) formatter.Print() elif self.connection: # Create a new connection. connection_type_defined = self.connection_type connection_type_defined = ( connection_type_defined or self.connector_configuration ) if not connection_type_defined: error = ( 'Need to specify --connection_type or --connector_configuration.' ) raise app.UsageError(error) if self.connection_type == 'AWS' and self.iam_role_id: self.properties = bq_processor_utils.MakeAccessRolePropertiesJson( self.iam_role_id ) if not self.federated_aws: raise app.UsageError('Non-federated AWS connections are deprecated.') if self.connection_type == 'Azure' and self.tenant_id: if self.federated_azure: if not self.federated_app_client_id: raise app.UsageError( 'Must specify --federated_app_client_id for federated Azure ' 'connections.' ) self.properties = bq_processor_utils.MakeAzureFederatedAppClientAndTenantIdPropertiesJson( self.tenant_id, self.federated_app_client_id ) else: self.properties = bq_processor_utils.MakeTenantIdPropertiesJson( self.tenant_id ) param_properties = self.properties # All connection types require properties, except CLOUD_RESOURCE as # it serves as a container for a service account that is generated # by the connection service. if not param_properties and self.connection_type == 'CLOUD_RESOURCE': param_properties = '{}' # SPARK connections does not require properties since all the fields in # the properties are optional. if not param_properties and self.connection_type == 'SPARK': param_properties = '{}' properties_defined = param_properties properties_defined = properties_defined or self.connector_configuration if not properties_defined: error = 'Need to specify --properties or --connector_configuration' raise app.UsageError(error) created_connection = client_connection.CreateConnection( client=client.GetConnectionV1ApiClient(), project_id=bq_flags.PROJECT_ID.value, location=bq_flags.LOCATION.value, connection_type=self.connection_type, properties=param_properties, connection_credential=self.connection_credential, display_name=self.display_name, description=self.description, connection_id=identifier, kms_key_name=self.kms_key_name, connector_configuration=self.connector_configuration, ) if created_connection: reference = bq_client_utils.GetConnectionReference( id_fallbacks=client, path=created_connection['name'] ) print('Connection %s successfully created' % reference) utils_formatting.maybe_print_manual_instructions_for_connection( created_connection, flag_format=bq_flags.FORMAT.value ) elif self.migration_workflow: if not bq_flags.LOCATION.value: raise app.UsageError( 'Need to specify location for creating migration workflows.' ) if not self.config_file: raise app.UsageError( 'Need to specify config file for creating migration workflows.' ) reference = None self.DelegateToGcloudAndExit( 'migration_workflows', 'mk', identifier, command_flags_for_this_resource={ 'location': bq_flags.LOCATION.value, 'config_file': self.config_file, 'sync': bq_flags.SYNCHRONOUS_MODE.value, 'synchronous_mode': bq_flags.SYNCHRONOUS_MODE.value, }, ) elif self.d or not identifier: reference = bq_client_utils.GetDatasetReference( id_fallbacks=client, identifier=identifier ) if reference.datasetId and identifier: frontend_utils.ValidateDatasetName(reference.datasetId) else: reference = bq_client_utils.GetReference( id_fallbacks=client, identifier=identifier ) bq_id_utils.typecheck( reference, ( bq_id_utils.ApiClientHelper.DatasetReference, bq_id_utils.ApiClientHelper.TableReference, ), "Invalid identifier '%s' for mk." % (identifier,), is_usage_error=True, ) if isinstance(reference, bq_id_utils.ApiClientHelper.DatasetReference): if self.schema: raise app.UsageError('Cannot specify schema with a dataset.') if self.source and self.description: raise app.UsageError('Cannot specify description with a source.') if self.expiration: raise app.UsageError('Cannot specify an expiration for a dataset.') if self.external_table_definition is not None: raise app.UsageError( 'Cannot specify an external_table_definition for a dataset.' ) if (not self.force) and client_dataset.DatasetExists( apiclient=client.apiclient, reference=reference, ): message = "Dataset '%s' already exists." % (reference,) if not self.f: raise bq_error.BigqueryError(message) else: print(message) return default_table_exp_ms = None if self.default_table_expiration is not None: default_table_exp_ms = self.default_table_expiration * 1000 default_partition_exp_ms = None if self.default_partition_expiration is not None: default_partition_exp_ms = self.default_partition_expiration * 1000 location = self.data_location or bq_flags.LOCATION.value labels = None if self.label is not None: labels = frontend_utils.ParseLabels(self.label) if self.source_dataset: source_dataset_reference = bq_client_utils.GetDatasetReference( id_fallbacks=client, identifier=self.source_dataset ) else: source_dataset_reference = None if self.external_source and self.source_dataset: raise app.UsageError( 'Cannot specify both external_source and linked dataset.' ) if self.external_catalog_table_options is not None: raise app.UsageError( 'Cannot specify external_catalog_table_options for a dataset.' ) if self.external_source: if not self.connection_id: if not self.external_source.startswith('google-cloudspanner:/'): raise app.UsageError( 'connection_id is required when external_source is specified.' ) resource_tags = None if self.add_tags is not None: resource_tags = bq_utils.ParseTags(self.add_tags) self.description, acl = frontend_utils.ProcessSource( self.description, self.source ) command_flags_for_this_resource = { 'description': self.description, 'force': self.force, } # TODO(b/355324165): Add genralised code to handle defaults. if location and (location != 'us' or location != 'US'): command_flags_for_this_resource['location'] = location # The DatasetExists check has already happened. self.PossiblyDelegateToGcloudAndExit( resource='datasets', bq_command='mk', identifier=identifier, command_flags_for_this_resource=command_flags_for_this_resource, ) client_dataset.CreateDataset( apiclient=client.apiclient, reference=reference, ignore_existing=self.force, description=self.description, acl=acl, default_table_expiration_ms=default_table_exp_ms, default_partition_expiration_ms=default_partition_exp_ms, data_location=location, default_kms_key=self.default_kms_key, labels=labels, source_dataset_reference=source_dataset_reference, external_source=self.external_source, connection_id=self.connection_id, external_catalog_dataset_options=self.external_catalog_dataset_options, max_time_travel_hours=self.max_time_travel_hours, storage_billing_model=self.storage_billing_model, resource_tags=resource_tags, ) self.printSuccessMessage('Dataset', reference) elif isinstance(reference, bq_id_utils.ApiClientHelper.TableReference): if self.source_dataset: raise app.UsageError('Cannot specify --source_dataset for a table.') object_name = 'Table' if self.view: object_name = 'View' if self.materialized_view: object_name = 'Materialized View' if (not self.force) and client_table.table_exists( apiclient=client.apiclient, reference=reference ): message = ( "%s '%s' could not be created; a table with this name " 'already exists.' ) % ( object_name, reference, ) if not self.f: raise bq_error.BigqueryError(message) else: print(message) return if schema: schema = bq_client_utils.ReadSchema(schema) else: schema = None expiration = None labels = None if self.label is not None: labels = frontend_utils.ParseLabels(self.label) if self.data_location: raise app.UsageError('Cannot specify data location for a table.') if self.default_table_expiration: raise app.UsageError('Cannot specify default expiration for a table.') if self.external_catalog_dataset_options is not None: raise app.UsageError( 'Cannot specify external_catalog_dataset_options for a table.' ) if self.expiration: expiration = int(self.expiration + time.time()) * 1000 view_query_arg = self.view or None materialized_view_query_arg = self.materialized_view or None external_data_config = None if self.external_table_definition is not None: external_data_config = frontend_utils.GetExternalDataConfig( self.external_table_definition, self.use_avro_logical_types, self.parquet_enum_as_string, self.parquet_enable_list_inference, self.metadata_cache_mode, self.object_metadata, self.preserve_ascii_control_characters, self.reference_file_schema_uri, self.file_set_spec_type, self.null_marker_flag.value, self.null_markers_flag.value, self.time_zone_flag.value, self.date_format_flag.value, self.datetime_format_flag.value, self.time_format_flag.value, self.timestamp_format_flag.value, self.source_column_match_flag.value, parquet_map_target_type=self.parquet_map_target_type_flag.value, timestamp_target_precision=self.timestamp_target_precision_flag.value, ) if (self.require_partition_filter is not None) and ( 'hivePartitioningOptions' in external_data_config ): raise app.UsageError( 'Cannot specify require_partition_filter for hive partition' ' tables.' ) if 'fileSetSpecType' in external_data_config: external_data_config['fileSetSpecType'] = ( frontend_utils.ParseFileSetSpecType( external_data_config['fileSetSpecType'] ) ) biglake_config = None has_all_required_biglake_config = ( self.connection_id and self.storage_uri and self.table_format ) has_some_required_biglake_config = ( self.connection_id or self.storage_uri or self.file_format or self.table_format ) if has_all_required_biglake_config: biglake_config = { 'connection_id': self.connection_id, 'storage_uri': self.storage_uri, 'file_format': self.file_format, 'table_format': self.table_format, } elif has_some_required_biglake_config: missing_fields = [] if not self.connection_id: missing_fields.append('connection_id') if not self.storage_uri: missing_fields.append('storage_uri') if not self.table_format: missing_fields.append('table_format') missing_fields = ', '.join(missing_fields) raise app.UsageError( f'BigLake tables require {missing_fields} to be specified' ) view_udf_resources = None if self.view_udf_resource: view_udf_resources = frontend_utils.ParseUdfResources( self.view_udf_resource ) time_partitioning = frontend_utils.ParseTimePartitioning( self.time_partitioning_type, self.time_partitioning_expiration, self.time_partitioning_field, None, self.require_partition_filter, ) clustering = frontend_utils.ParseClustering(self.clustering_fields) range_partitioning = frontend_utils.ParseRangePartitioning( self.range_partitioning ) table_constraints = None resource_tags = None if self.add_tags is not None: resource_tags = bq_utils.ParseTags(self.add_tags) client_table.create_table( apiclient=client.apiclient, reference=reference, ignore_existing=True, schema=schema, description=self.description, expiration=expiration, view_query=view_query_arg, materialized_view_query=materialized_view_query_arg, enable_refresh=self.enable_refresh, refresh_interval_ms=self.refresh_interval_ms, max_staleness=self.max_staleness, view_udf_resources=view_udf_resources, use_legacy_sql=self.use_legacy_sql, external_data_config=external_data_config, biglake_config=biglake_config, external_catalog_table_options=self.external_catalog_table_options, labels=labels, time_partitioning=time_partitioning, clustering=clustering, range_partitioning=range_partitioning, require_partition_filter=self.require_partition_filter, destination_kms_key=(self.destination_kms_key), table_constraints=table_constraints, resource_tags=resource_tags, ) self.printSuccessMessage(object_name, reference)