674 lines
25 KiB
Python
674 lines
25 KiB
Python
#!/usr/bin/env python
|
|
"""All the BigQuery CLI commands."""
|
|
|
|
from __future__ import absolute_import
|
|
from __future__ import division
|
|
from __future__ import print_function
|
|
|
|
import datetime
|
|
import time
|
|
from typing import Optional
|
|
|
|
from absl import flags
|
|
|
|
import bq_flags
|
|
from clients import client_job
|
|
from clients import utils as bq_client_utils
|
|
from frontend import bigquery_command
|
|
from frontend import bq_cached_client
|
|
from frontend import flags as frontend_flags
|
|
from frontend import utils as frontend_utils
|
|
from frontend import utils_flags
|
|
from frontend import utils_formatting
|
|
|
|
# These aren't relevant for user-facing docstrings:
|
|
# pylint: disable=g-doc-return-or-yield
|
|
# pylint: disable=g-doc-args
|
|
|
|
|
|
class Load(bigquery_command.BigqueryCmd):
|
|
usage = """load <destination_table> <source> <schema>"""
|
|
|
|
def __init__(self, name: str, fv: flags.FlagValues):
|
|
super(Load, self).__init__(name, fv)
|
|
flags.DEFINE_string(
|
|
'field_delimiter',
|
|
None,
|
|
'The character that indicates the boundary between columns in the '
|
|
'input file. "\\t" and "tab" are accepted names for tab.',
|
|
short_name='F',
|
|
flag_values=fv,
|
|
)
|
|
flags.DEFINE_enum(
|
|
'encoding',
|
|
None,
|
|
['UTF-8', 'ISO-8859-1', 'UTF-16BE', 'UTF-16LE', 'UTF-32BE', 'UTF-32LE'],
|
|
(
|
|
'The character encoding used by the input file. Options include:'
|
|
'\n ISO-8859-1 (also known as Latin-1)'
|
|
'\n UTF-8'
|
|
'\n UTF-16BE (UTF-16 BigEndian)'
|
|
'\n UTF-16LE (UTF-16 LittleEndian)'
|
|
'\n UTF-32BE (UTF-32 BigEndian)'
|
|
'\n UTF-32LE (UTF-32 LittleEndian)'
|
|
),
|
|
short_name='E',
|
|
flag_values=fv,
|
|
)
|
|
flags.DEFINE_integer(
|
|
'skip_leading_rows',
|
|
None,
|
|
'The number of rows at the beginning of the source file to skip.',
|
|
flag_values=fv,
|
|
)
|
|
flags.DEFINE_string(
|
|
'schema',
|
|
None,
|
|
'Either a filename or a comma-separated list of fields in the form '
|
|
'name[:type].',
|
|
flag_values=fv,
|
|
)
|
|
flags.DEFINE_boolean(
|
|
'replace',
|
|
False,
|
|
'If true existing data is erased when new data is loaded.',
|
|
flag_values=fv,
|
|
)
|
|
flags.DEFINE_boolean(
|
|
'replace_data',
|
|
False,
|
|
'If true, erase existing contents but not other table metadata like'
|
|
' schema before loading new data.',
|
|
flag_values=fv,
|
|
)
|
|
flags.DEFINE_string(
|
|
'quote',
|
|
None,
|
|
'Quote character to use to enclose records. Default is ". '
|
|
'To indicate no quote character at all, use an empty string.',
|
|
flag_values=fv,
|
|
)
|
|
flags.DEFINE_integer(
|
|
'max_bad_records',
|
|
None,
|
|
'Maximum number of bad records allowed before the entire job fails. '
|
|
'Only supported for CSV and NEWLINE_DELIMITED_JSON file formats.',
|
|
flag_values=fv,
|
|
)
|
|
flags.DEFINE_boolean(
|
|
'allow_quoted_newlines',
|
|
None,
|
|
'Whether to allow quoted newlines in CSV import data.',
|
|
flag_values=fv,
|
|
)
|
|
flags.DEFINE_boolean(
|
|
'allow_jagged_rows',
|
|
None,
|
|
'Whether to allow missing trailing optional columns '
|
|
'in CSV import data.',
|
|
flag_values=fv,
|
|
)
|
|
flags.DEFINE_boolean(
|
|
'preserve_ascii_control_characters',
|
|
None,
|
|
'Whether to preserve embedded Ascii Control characters in CSV import '
|
|
'data.',
|
|
flag_values=fv,
|
|
)
|
|
flags.DEFINE_boolean(
|
|
'ignore_unknown_values',
|
|
None,
|
|
'Whether to allow and ignore extra, unrecognized values in CSV or JSON '
|
|
'import data.',
|
|
flag_values=fv,
|
|
)
|
|
flags.DEFINE_enum(
|
|
'source_format',
|
|
None,
|
|
[
|
|
'CSV',
|
|
'NEWLINE_DELIMITED_JSON',
|
|
'DATASTORE_BACKUP',
|
|
'AVRO',
|
|
'PARQUET',
|
|
'ORC',
|
|
'THRIFT',
|
|
],
|
|
'Format of source data. Options include:'
|
|
'\n CSV'
|
|
'\n NEWLINE_DELIMITED_JSON'
|
|
'\n DATASTORE_BACKUP'
|
|
'\n AVRO'
|
|
'\n PARQUET'
|
|
'\n ORC'
|
|
'\n THRIFT',
|
|
flag_values=fv,
|
|
)
|
|
flags.DEFINE_list(
|
|
'projection_fields',
|
|
[],
|
|
'If sourceFormat is set to "DATASTORE_BACKUP", indicates which entity '
|
|
'properties to load into BigQuery from a Cloud Datastore backup. '
|
|
'Property names are case sensitive and must refer to top-level '
|
|
'properties.',
|
|
flag_values=fv,
|
|
)
|
|
flags.DEFINE_boolean(
|
|
'autodetect',
|
|
None,
|
|
'Enable auto detection of schema and options for formats that are not '
|
|
'self describing like CSV and JSON.',
|
|
flag_values=fv,
|
|
)
|
|
flags.DEFINE_multi_string(
|
|
'schema_update_option',
|
|
None,
|
|
'Can be specified when append to a table, or replace a table partition.'
|
|
' When specified, the schema of the destination table will be updated '
|
|
'with the schema of the new data. One or more of the following options '
|
|
'can be specified:'
|
|
'\n ALLOW_FIELD_ADDITION: allow new fields to be added'
|
|
'\n ALLOW_FIELD_RELAXATION: allow relaxing required fields to nullable',
|
|
flag_values=fv,
|
|
)
|
|
flags.DEFINE_string(
|
|
'null_marker',
|
|
None,
|
|
'An optional custom string that will represent a NULL value'
|
|
'in CSV import data.',
|
|
flag_values=fv,
|
|
)
|
|
flags.DEFINE_list(
|
|
'null_markers',
|
|
[],
|
|
'An optional list of custom strings that will represent a NULL value'
|
|
'in CSV import data.',
|
|
flag_values=fv,
|
|
)
|
|
flags.DEFINE_string(
|
|
'time_partitioning_type',
|
|
None,
|
|
'Enables time based partitioning on the table and set the type. The '
|
|
'default value is DAY, which will generate one partition per day. '
|
|
'Other supported values are HOUR, MONTH, and YEAR.',
|
|
flag_values=fv,
|
|
)
|
|
flags.DEFINE_integer(
|
|
'time_partitioning_expiration',
|
|
None,
|
|
'Enables time based partitioning on the table and sets the number of '
|
|
'seconds for which to keep the storage for the partitions in the table.'
|
|
' The storage in a partition will have an expiration time of its '
|
|
'partition time plus this value.',
|
|
flag_values=fv,
|
|
)
|
|
flags.DEFINE_string(
|
|
'range_partitioning',
|
|
None,
|
|
'Enables range partitioning on the table. The format should be '
|
|
'"field,start,end,interval". The table will be partitioned based on the'
|
|
' value of the field. Field must be a top-level, non-repeated INT64 '
|
|
'field. Start, end, and interval are INT64 values defining the ranges.',
|
|
flag_values=fv,
|
|
)
|
|
flags.DEFINE_string(
|
|
'time_partitioning_field',
|
|
None,
|
|
'Enables time based partitioning on the table and the table will be '
|
|
'partitioned based on the value of this field. If time based '
|
|
'partitioning is enabled without this value, the table will be '
|
|
'partitioned based on the loading time.',
|
|
flag_values=fv,
|
|
)
|
|
flags.DEFINE_string(
|
|
'destination_kms_key',
|
|
None,
|
|
'Cloud KMS key for encryption of the destination table data.',
|
|
flag_values=fv,
|
|
)
|
|
flags.DEFINE_boolean(
|
|
'require_partition_filter',
|
|
None,
|
|
'Whether to require partition filter for queries over this table. '
|
|
'Only apply to partitioned table.',
|
|
flag_values=fv,
|
|
)
|
|
flags.DEFINE_string(
|
|
'clustering_fields',
|
|
None,
|
|
'Comma-separated list of field names that specifies the columns on '
|
|
'which a table is clustered. To remove the clustering, set an empty '
|
|
'value.',
|
|
flag_values=fv,
|
|
)
|
|
flags.DEFINE_boolean(
|
|
'use_avro_logical_types',
|
|
None,
|
|
'If sourceFormat is set to "AVRO", indicates whether to enable '
|
|
'interpreting logical types into their corresponding types '
|
|
'(ie. TIMESTAMP), instead of only using their raw types (ie. INTEGER).',
|
|
flag_values=fv,
|
|
)
|
|
flags.DEFINE_string(
|
|
'reference_file_schema_uri',
|
|
None,
|
|
'provide a reference file with the reader schema, currently '
|
|
'enabled for the format: AVRO, PARQUET, ORC.',
|
|
flag_values=fv,
|
|
)
|
|
flags.DEFINE_boolean(
|
|
'parquet_enum_as_string',
|
|
None,
|
|
'Infer Parquet ENUM logical type as STRING '
|
|
'(instead of BYTES by default).',
|
|
flag_values=fv,
|
|
)
|
|
flags.DEFINE_boolean(
|
|
'parquet_enable_list_inference',
|
|
None,
|
|
frontend_utils.PARQUET_LIST_INFERENCE_DESCRIPTION,
|
|
flag_values=fv,
|
|
)
|
|
flags.DEFINE_string(
|
|
'hive_partitioning_mode',
|
|
None,
|
|
'Enables hive partitioning. AUTO indicates to perform '
|
|
'automatic type inference. STRINGS indicates to treat all hive '
|
|
'partition keys as STRING typed. No other values are accepted',
|
|
flag_values=fv,
|
|
)
|
|
flags.DEFINE_string(
|
|
'hive_partitioning_source_uri_prefix',
|
|
None,
|
|
'Prefix after which hive partition '
|
|
'encoding begins. For URIs like gs://bucket/path/key1=value/file, '
|
|
'the value should be gs://bucket/path.',
|
|
flag_values=fv,
|
|
)
|
|
flags.DEFINE_multi_enum(
|
|
'decimal_target_types',
|
|
None,
|
|
['NUMERIC', 'BIGNUMERIC', 'STRING'],
|
|
'Specifies the list of possible BigQuery data types to '
|
|
'which the source decimal values are converted. This list and the '
|
|
'precision and the scale parameters of the decimal field determine the '
|
|
'target type in the following preference order, and '
|
|
'one or more of the following options could be specified: '
|
|
'\n NUMERIC: decimal values could be converted to NUMERIC type, '
|
|
'depending on the precision and scale of the decimal schema.'
|
|
'\n BIGNUMERIC: decimal values could be converted to BIGNUMERIC type, '
|
|
'depending on the precision and scale of the decimal schema.'
|
|
'\n STRING: decimal values could be converted to STRING type, '
|
|
'depending on the precision and scale of the decimal schema.',
|
|
flag_values=fv,
|
|
)
|
|
flags.DEFINE_enum(
|
|
'file_set_spec_type',
|
|
None,
|
|
['FILE_SYSTEM_MATCH', 'NEW_LINE_DELIMITED_MANIFEST'],
|
|
'Specifies how to discover files given source URIs. '
|
|
'Options include: '
|
|
'\n FILE_SYSTEM_MATCH: expand source URIs by listing files from the '
|
|
'underlying object store. This is the default behavior.'
|
|
'\n NEW_LINE_DELIMITED_MANIFEST: indicate the source URIs provided are '
|
|
'new line delimited manifest files, where each line contains a URI '
|
|
'with no wild-card.',
|
|
flag_values=fv,
|
|
)
|
|
flags.DEFINE_string(
|
|
'thrift_schema_idl_root_dir',
|
|
None,
|
|
'If "source_format" is set to "THRIFT", indicates the root directory '
|
|
'of the Thrift IDL bundle containing all Thrift files that should be '
|
|
'used to parse the schema of the serialized Thrift records.',
|
|
flag_values=fv,
|
|
)
|
|
flags.DEFINE_string(
|
|
'thrift_schema_idl_uri',
|
|
None,
|
|
'If "source_format" is set to "THRIFT", indicates the file uri that '
|
|
'contains the Thrift IDL struct to be parsed as schema. This file will '
|
|
'be used as the entry point to parse the schema and all its included '
|
|
'Thrift IDL files should be in "thrift_schema_idl_root_dir".',
|
|
flag_values=fv,
|
|
)
|
|
flags.DEFINE_string(
|
|
'thrift_schema_struct',
|
|
None,
|
|
'If "source_format" is set to "THRIFT", indicates the root Thrift '
|
|
'struct that should be used as the schema. This struct should be '
|
|
'defined in the "thrift_schema_idl_uri" file.',
|
|
flag_values=fv,
|
|
)
|
|
flags.DEFINE_enum(
|
|
'thrift_deserialization',
|
|
None,
|
|
['T_BINARY_PROTOCOL'],
|
|
'If "source_format" is set to "THRIFT", configures how serialized '
|
|
'Thrift record should be deserialized (using TProtocol). '
|
|
'Options include: '
|
|
'\n T_BINARY_PROTOCOL',
|
|
flag_values=fv,
|
|
)
|
|
flags.DEFINE_enum(
|
|
'thrift_framing',
|
|
None,
|
|
['NOT_FRAMED', 'FRAMED_WITH_BIG_ENDIAN', 'FRAMED_WITH_LITTLE_ENDIAN'],
|
|
'If "source_format" is set to "THRIFT", configures how Thrift records '
|
|
'or data blocks are framed (e.g. using TFramedTransport). '
|
|
'Options includes: '
|
|
'\n NOT_FRAMED, '
|
|
'\n FRAMED_WITH_BIG_ENDIAN, '
|
|
'\n FRAMED_WITH_LITTLE_ENDIAN',
|
|
flag_values=fv,
|
|
)
|
|
flags.DEFINE_string(
|
|
'boundary_bytes_base64',
|
|
None,
|
|
'If "source_format" is set to "THRIFT", indicates the sequence of '
|
|
'boundary bytes (encoded in base64) that are added in front of the '
|
|
'serialized Thrift records, or data blocks, or the frame when used '
|
|
'with `thrift_framing`.',
|
|
flag_values=fv,
|
|
)
|
|
flags.DEFINE_enum(
|
|
'json_extension',
|
|
None,
|
|
['GEOJSON'],
|
|
'(experimental) Allowed values: '
|
|
'GEOJSON: only allowed when source_format is specified as '
|
|
'NEWLINE_DELIMITED_JSON. When specified, the input is loaded as '
|
|
'newline-delimited GeoJSON.',
|
|
flag_values=fv,
|
|
)
|
|
flags.DEFINE_enum(
|
|
'column_name_character_map',
|
|
None,
|
|
['STRICT', 'V1', 'V2'],
|
|
'Indicates the character map used for column names: '
|
|
'\n STRICT: The latest character map and reject invalid column names.'
|
|
'\n V1: Supports alphanumeric + underscore and name must start with a '
|
|
'letter or underscore. Invalid column names will be normalized.'
|
|
'\n V2: Supports flexible column name. Invalid column names will be '
|
|
'normalized.',
|
|
flag_values=fv,
|
|
)
|
|
flags.DEFINE_string(
|
|
'session_id',
|
|
None,
|
|
'If loading to a temporary table, specifies the session ID of the '
|
|
'temporary table',
|
|
flag_values=fv,
|
|
)
|
|
flags.DEFINE_boolean(
|
|
'copy_files_only',
|
|
None,
|
|
'[Experimental] Configures the load job to only copy files to the '
|
|
'destination BigLake managed table, without reading file content and '
|
|
'writing them to new files.',
|
|
flag_values=fv,
|
|
)
|
|
flags.DEFINE_string(
|
|
'time_zone',
|
|
None,
|
|
'Default time zone that will apply when parsing timestamp values that'
|
|
' have no specific time zone. For example, "America/Los_Angeles".',
|
|
flag_values=fv,
|
|
)
|
|
flags.DEFINE_string(
|
|
'date_format',
|
|
None,
|
|
'Format elements that define how the DATE values are formatted in the'
|
|
' input files. For example, "MM/DD/YYYY".',
|
|
flag_values=fv,
|
|
)
|
|
flags.DEFINE_string(
|
|
'datetime_format',
|
|
None,
|
|
'Format elements that define how the DATETIME values are formatted in'
|
|
' the input files. For example, "MM/DD/YYYY HH24:MI:SS.FF3".',
|
|
flag_values=fv,
|
|
)
|
|
flags.DEFINE_string(
|
|
'time_format',
|
|
None,
|
|
'Format elements that define how the TIME values are formatted in the'
|
|
' input files. For example, "HH24:MI:SS.FF3".',
|
|
flag_values=fv,
|
|
)
|
|
flags.DEFINE_string(
|
|
'timestamp_format',
|
|
None,
|
|
'Format elements that define how the TIMESTAMP values are formatted in'
|
|
' the input files. For example, "MM/DD/YYYY HH24:MI:SS.FF3".',
|
|
flag_values=fv,
|
|
)
|
|
flags.DEFINE_enum(
|
|
'source_column_match',
|
|
None,
|
|
['POSITION', 'NAME'],
|
|
'Controls the strategy used to match loaded columns to the schema.'
|
|
' Options include:'
|
|
'\n POSITION: matches by position. This option assumes that the columns'
|
|
' are ordered the same way as the schema.'
|
|
'\n NAME: matches by name. This option reads the header row as column'
|
|
' names and reorders columns to match the field names in the schema.',
|
|
flag_values=fv,
|
|
)
|
|
self.timestamp_target_precision_flag = (
|
|
frontend_flags.define_timestamp_target_precision(flag_values=fv)
|
|
)
|
|
self.parquet_map_target_type_flag = (
|
|
frontend_flags.define_parquet_map_target_type(flag_values=fv)
|
|
)
|
|
self.reservation_id_for_a_job_flag = (
|
|
frontend_flags.define_reservation_id_for_a_job(flag_values=fv)
|
|
)
|
|
self._ProcessCommandRc(fv)
|
|
|
|
def RunWithArgs(
|
|
self, destination_table: str, source: str, schema: Optional[str] = None
|
|
) -> Optional[int]:
|
|
"""Perform a load operation of source into destination_table.
|
|
|
|
Usage:
|
|
load <destination_table> <source> [<schema>] [--session_id=[session]]
|
|
|
|
The <destination_table> is the fully-qualified table name of table to
|
|
create, or append to if the table already exists.
|
|
|
|
To load to a temporary table, specify the table name in <destination_table>
|
|
without a dataset and specify the session id with --session_id.
|
|
|
|
The <source> argument can be a path to a single local file, or a
|
|
comma-separated list of URIs.
|
|
|
|
The <schema> argument should be either the name of a JSON file or a text
|
|
schema. This schema should be omitted if the table already has one.
|
|
|
|
In the case that the schema is provided in text form, it should be a
|
|
comma-separated list of entries of the form name[:type], where type will
|
|
default to string if not specified.
|
|
|
|
In the case that <schema> is a filename, it should be a JSON file
|
|
containing a single array, each entry of which should be an object with
|
|
properties 'name', 'type', and (optionally) 'mode'. For more detail:
|
|
https://cloud.google.com/bigquery/docs/schemas#specifying_a_json_schema_file
|
|
|
|
Note: the case of a single-entry schema with no type specified is
|
|
ambiguous; one can use name:string to force interpretation as a
|
|
text schema.
|
|
|
|
Examples:
|
|
bq load ds.new_tbl ./info.csv ./info_schema.json
|
|
bq load ds.new_tbl gs://mybucket/info.csv ./info_schema.json
|
|
bq load ds.small gs://mybucket/small.csv name:integer,value:string
|
|
bq load ds.small gs://mybucket/small.csv field1,field2,field3
|
|
bq load temp_tbl --session_id=my_session ./info.csv ./info_schema.json
|
|
|
|
Arguments:
|
|
destination_table: Destination table name.
|
|
source: Name of local file to import, or a comma-separated list of URI
|
|
paths to data to import.
|
|
schema: Either a text schema or JSON file, as above.
|
|
"""
|
|
client = bq_cached_client.Client.Get()
|
|
default_dataset_id = ''
|
|
if self.session_id:
|
|
default_dataset_id = '_SESSION'
|
|
table_reference = bq_client_utils.GetTableReference(
|
|
id_fallbacks=client,
|
|
identifier=destination_table,
|
|
default_dataset_id=default_dataset_id,
|
|
)
|
|
opts = {
|
|
'encoding': self.encoding,
|
|
'skip_leading_rows': self.skip_leading_rows,
|
|
'allow_quoted_newlines': self.allow_quoted_newlines,
|
|
'job_id': utils_flags.get_job_id_from_flags(),
|
|
'source_format': self.source_format,
|
|
'projection_fields': self.projection_fields,
|
|
}
|
|
if self.max_bad_records:
|
|
opts['max_bad_records'] = self.max_bad_records
|
|
if bq_flags.LOCATION.value:
|
|
opts['location'] = bq_flags.LOCATION.value
|
|
if self.session_id:
|
|
opts['connection_properties'] = [
|
|
{'key': 'session_id', 'value': self.session_id}
|
|
]
|
|
if self.copy_files_only:
|
|
opts['copy_files_only'] = self.copy_files_only
|
|
if self.replace:
|
|
opts['write_disposition'] = 'WRITE_TRUNCATE'
|
|
elif self.replace_data:
|
|
opts['write_disposition'] = 'WRITE_TRUNCATE_DATA'
|
|
if self.field_delimiter is not None:
|
|
opts['field_delimiter'] = frontend_utils.NormalizeFieldDelimiter(
|
|
self.field_delimiter
|
|
)
|
|
if self.quote is not None:
|
|
opts['quote'] = frontend_utils.NormalizeFieldDelimiter(self.quote)
|
|
if self.allow_jagged_rows is not None:
|
|
opts['allow_jagged_rows'] = self.allow_jagged_rows
|
|
if self.preserve_ascii_control_characters is not None:
|
|
opts['preserve_ascii_control_characters'] = (
|
|
self.preserve_ascii_control_characters
|
|
)
|
|
if self.ignore_unknown_values is not None:
|
|
opts['ignore_unknown_values'] = self.ignore_unknown_values
|
|
if self.autodetect is not None:
|
|
opts['autodetect'] = self.autodetect
|
|
if self.schema_update_option:
|
|
opts['schema_update_options'] = self.schema_update_option
|
|
if self.null_marker:
|
|
opts['null_marker'] = self.null_marker
|
|
if self.null_markers is not None:
|
|
opts['null_markers'] = self.null_markers
|
|
time_partitioning = frontend_utils.ParseTimePartitioning(
|
|
self.time_partitioning_type,
|
|
self.time_partitioning_expiration,
|
|
self.time_partitioning_field,
|
|
None,
|
|
self.require_partition_filter,
|
|
)
|
|
if time_partitioning is not None:
|
|
opts['time_partitioning'] = time_partitioning
|
|
range_partitioning = frontend_utils.ParseRangePartitioning(
|
|
self.range_partitioning
|
|
)
|
|
if range_partitioning:
|
|
opts['range_partitioning'] = range_partitioning
|
|
clustering = frontend_utils.ParseClustering(self.clustering_fields)
|
|
if clustering:
|
|
opts['clustering'] = clustering
|
|
if self.destination_kms_key is not None:
|
|
opts['destination_encryption_configuration'] = {
|
|
'kmsKeyName': self.destination_kms_key
|
|
}
|
|
if self.use_avro_logical_types is not None:
|
|
opts['use_avro_logical_types'] = self.use_avro_logical_types
|
|
if self.reference_file_schema_uri is not None:
|
|
opts['reference_file_schema_uri'] = self.reference_file_schema_uri
|
|
if self.hive_partitioning_mode is not None:
|
|
frontend_utils.ValidateHivePartitioningOptions(
|
|
self.hive_partitioning_mode
|
|
)
|
|
hive_partitioning_options = {}
|
|
hive_partitioning_options['mode'] = self.hive_partitioning_mode
|
|
if self.hive_partitioning_source_uri_prefix is not None:
|
|
hive_partitioning_options['sourceUriPrefix'] = (
|
|
self.hive_partitioning_source_uri_prefix
|
|
)
|
|
opts['hive_partitioning_options'] = hive_partitioning_options
|
|
if self.json_extension is not None:
|
|
opts['json_extension'] = self.json_extension
|
|
if self.column_name_character_map is not None:
|
|
opts['column_name_character_map'] = self.column_name_character_map
|
|
opts['decimal_target_types'] = self.decimal_target_types
|
|
if self.file_set_spec_type is not None:
|
|
opts['file_set_spec_type'] = frontend_utils.ParseFileSetSpecType(
|
|
self.file_set_spec_type
|
|
)
|
|
if self.time_zone is not None:
|
|
opts['time_zone'] = self.time_zone
|
|
if self.date_format is not None:
|
|
opts['date_format'] = self.date_format
|
|
if self.datetime_format is not None:
|
|
opts['datetime_format'] = self.datetime_format
|
|
if self.time_format is not None:
|
|
opts['time_format'] = self.time_format
|
|
if self.timestamp_format is not None:
|
|
opts['timestamp_format'] = self.timestamp_format
|
|
if self.source_column_match is not None:
|
|
opts['source_column_match'] = self.source_column_match
|
|
if self.timestamp_target_precision_flag.value is not None:
|
|
opts['timestamp_target_precision'] = (
|
|
self.timestamp_target_precision_flag.value
|
|
)
|
|
if opts['source_format'] == 'THRIFT':
|
|
thrift_options = {}
|
|
if self.thrift_schema_idl_root_dir is not None:
|
|
thrift_options['schema_idl_root_dir'] = self.thrift_schema_idl_root_dir
|
|
if self.thrift_schema_idl_uri is not None:
|
|
thrift_options['schema_idl_uri'] = self.thrift_schema_idl_uri
|
|
if self.thrift_schema_struct is not None:
|
|
thrift_options['schema_struct'] = self.thrift_schema_struct
|
|
# Default to 'THRIFT_BINARY_PROTOCOL_OPTION'.
|
|
thrift_options['deserialization_option'] = 'THRIFT_BINARY_PROTOCOL_OPTION'
|
|
if self.thrift_deserialization is not None:
|
|
if self.thrift_deserialization == 'T_BINARY_PROTOCOL':
|
|
thrift_options['deserialization_option'] = (
|
|
'THRIFT_BINARY_PROTOCOL_OPTION'
|
|
)
|
|
# Default to 'NOT_FRAMED'.
|
|
thrift_options['framing_option'] = 'NOT_FRAMED'
|
|
if self.thrift_framing is not None:
|
|
thrift_options['framing_option'] = self.thrift_framing
|
|
if self.boundary_bytes_base64 is not None:
|
|
thrift_options['boundary_bytes'] = self.boundary_bytes_base64
|
|
opts['thrift_options'] = thrift_options
|
|
if opts['source_format'] == 'PARQUET':
|
|
parquet_options = {}
|
|
if self.parquet_enum_as_string is not None:
|
|
parquet_options['enum_as_string'] = self.parquet_enum_as_string
|
|
if self.parquet_enable_list_inference is not None:
|
|
parquet_options['enable_list_inference'] = (
|
|
self.parquet_enable_list_inference
|
|
)
|
|
if self.parquet_map_target_type_flag.value is not None:
|
|
parquet_options['mapTargetType'] = (
|
|
self.parquet_map_target_type_flag.value
|
|
)
|
|
if parquet_options:
|
|
opts['parquet_options'] = parquet_options
|
|
if self.reservation_id_for_a_job_flag.present:
|
|
opts['reservation_id'] = self.reservation_id_for_a_job_flag.value
|
|
job = client_job.Load(
|
|
client, table_reference, source, schema=schema, **opts
|
|
)
|
|
if bq_flags.SYNCHRONOUS_MODE.value:
|
|
frontend_utils.PrintJobMessages(utils_formatting.format_job_info(job))
|
|
else:
|
|
self.PrintJobStartInfo(job)
|