1791 lines
64 KiB
Python
1791 lines
64 KiB
Python
#!/usr/bin/env python
|
|
"""Python script for interacting with BigQuery."""
|
|
|
|
from __future__ import absolute_import
|
|
from __future__ import division
|
|
from __future__ import print_function
|
|
|
|
import collections
|
|
import datetime
|
|
import functools
|
|
import json
|
|
import logging
|
|
import os
|
|
import re
|
|
import sys
|
|
from typing import Any, Dict, List, NamedTuple, Optional, Tuple, Type
|
|
|
|
from absl import app
|
|
from absl import flags
|
|
import yaml
|
|
|
|
import table_formatter
|
|
import bq_utils
|
|
from clients import utils as bq_client_utils
|
|
from frontend import utils_flags
|
|
from frontend import utils_formatting
|
|
from utils import bq_consts
|
|
from utils import bq_error
|
|
from utils import bq_id_utils
|
|
from pyglib import stringutil
|
|
|
|
# pylint: disable=g-multiple-import
|
|
if sys.version_info < (3, 11):
|
|
from typing_extensions import TypedDict, NotRequired # pylint: disable=g-import-not-at-top
|
|
else:
|
|
from typing import TypedDict, NotRequired # pylint: disable=g-import-not-at-top
|
|
# pylint: enable=g-multiple-import
|
|
|
|
FLAGS = flags.FLAGS
|
|
|
|
PARQUET_LIST_INFERENCE_DESCRIPTION = (
|
|
'Use schema inference specifically for Parquet LIST logical type.\n It'
|
|
' checks whether the LIST node is in the standard form as documented in:\n'
|
|
' https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#lists\n'
|
|
' <optional | required> group <name> (LIST) {\n repeated group list {\n'
|
|
' <optional | required> <element-type> element;\n }\n }\n Returns'
|
|
' the "element" node in list_element_node. The corresponding field for the'
|
|
' LIST node in the converted schema is treated as if the node has the'
|
|
' following schema:\n repeated <element-type> <name>\n This means nodes'
|
|
' "list" and "element" are omitted.\n\n Otherwise, the LIST node must be in'
|
|
' one of the forms described by the backward-compatibility rules as'
|
|
' documented in:\n'
|
|
' https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#backward-compatibility-rules\n'
|
|
' <optional | required> group <name> (LIST) {\n repeated <element-type>'
|
|
' <element-name>\n }\n Returns the <element-name> node in'
|
|
' list_element_node. The corresponding field for the LIST node in the'
|
|
' converted schema is treated as if the node has the following schema:\n'
|
|
' repeated <element-type> <name>\n This means the element node is omitted.'
|
|
)
|
|
|
|
CONNECTION_ID_PATTERN = re.compile(r'[\w-]+')
|
|
_RANGE_PATTERN = re.compile(r'^\[(\S+.+\S+), (\S+.+\S+)\)$')
|
|
|
|
_PARAMETERS_KEY = 'parameters'
|
|
_DEFAULT_STORAGE_LOCATION_URI_KEY = 'defaultStorageLocationUri'
|
|
_STORAGE_DESCRIPTOR_KEY = 'storageDescriptor'
|
|
_CONNECTION_ID_KEY = 'connectionId'
|
|
|
|
_DELIMITER_MAP = {
|
|
'tab': '\t',
|
|
'\\t': '\t',
|
|
}
|
|
_DDL_OPERATION_MAP = {
|
|
'SKIP': 'Skipped',
|
|
'CREATE': 'Created',
|
|
'REPLACE': 'Replaced',
|
|
'ALTER': 'Altered',
|
|
'DROP': 'Dropped',
|
|
}
|
|
|
|
|
|
def ValidateGlobalFlags():
|
|
"""Validate combinations of global flag values."""
|
|
if FLAGS.service_account and FLAGS.use_gce_service_account:
|
|
raise app.UsageError(
|
|
'Cannot specify both --service_account and --use_gce_service_account.'
|
|
)
|
|
|
|
|
|
def ValidateAtMostOneSelected(*args: Any) -> bool:
|
|
"""Validates that at most one of the argument flags is selected.
|
|
|
|
Args:
|
|
*args: Each flag to be tested parsed in as a separate arg.
|
|
|
|
Returns:
|
|
True if more than 1 flag was selected, False if 1 or 0 were selected.
|
|
"""
|
|
count = 0
|
|
for arg in args:
|
|
if arg:
|
|
count += 1
|
|
return count > 1
|
|
|
|
|
|
def ValidateAtMostOneSelectedAllowsDefault(*args: Any) -> bool:
|
|
"""Validates that at most one of the argument flags is selected.
|
|
|
|
if the arg exists but the value is the default value,
|
|
then it won't be counted. This is uself when users want to clear the
|
|
value while setting another value. For example, 'update --arg1=0 --arg2=100'
|
|
when arg1 and arg2 shouldn't coexist.
|
|
|
|
Args:
|
|
*args: Each flag to be tested parsed in as a separate arg.
|
|
|
|
Returns:
|
|
True if more than 1 flag was selected, False if 1 or 0 were selected.
|
|
"""
|
|
count = 0
|
|
for arg in args:
|
|
if arg and type(arg)() != arg:
|
|
count += 1
|
|
return count > 1
|
|
|
|
|
|
def ProcessSource(description: str, source: str) -> Tuple[Any, Any]:
|
|
"""Process "source" parameter used for bq update and bq mk command.
|
|
|
|
Args:
|
|
description: Description of the dataset.
|
|
source: source file path attached by "--source" parameter.
|
|
|
|
Returns:
|
|
new description if the source file updates the description, otherwise return
|
|
the original description.
|
|
acl if the source file updates the acl, otherwise return None.
|
|
"""
|
|
acl = None
|
|
if source is None:
|
|
return (description, acl)
|
|
if not os.path.exists(source):
|
|
raise app.UsageError('Source file not found: %s' % (source,))
|
|
if not os.path.isfile(source):
|
|
raise app.UsageError('Source path is not a file: %s' % (source,))
|
|
with open(source) as f:
|
|
try:
|
|
payload = json.load(f)
|
|
if 'description' in payload:
|
|
description = payload['description']
|
|
logging.debug(
|
|
'Both source file and description flag exist, using the value in'
|
|
' the source file.'
|
|
)
|
|
if 'access' in payload:
|
|
acl = payload['access']
|
|
except ValueError as e:
|
|
raise app.UsageError(
|
|
'Error decoding JSON schema from file %s: %s' % (source, e)
|
|
)
|
|
|
|
return (description, acl)
|
|
|
|
|
|
def PrintDryRunInfo(job):
|
|
"""Prints the dry run info."""
|
|
if FLAGS.format in ['prettyjson', 'json']:
|
|
bq_utils.PrintFormattedJsonObject(job)
|
|
return
|
|
# TODO: b/422281423 - Revert this check once server returns this field for
|
|
# all dry run queries again.
|
|
if 'totalBytesProcessed' not in job['statistics']['query']:
|
|
print(
|
|
'Query successfully validated. No information about number of bytes'
|
|
' processed. To see the full details of the job, run this query with'
|
|
' `--format=json` or `--format=prettyjson`.'
|
|
)
|
|
return
|
|
num_bytes = job['statistics']['query']['totalBytesProcessed']
|
|
num_bytes_accuracy = job['statistics']['query'].get(
|
|
'totalBytesProcessedAccuracy', 'PRECISE'
|
|
)
|
|
if FLAGS.format == 'csv':
|
|
print(num_bytes)
|
|
else:
|
|
if job['statistics']['query'].get('statementType', '') == 'LOAD_DATA':
|
|
print(
|
|
'Query successfully validated. Assuming the files are not modified, '
|
|
'running this query will process %s files loading %s bytes of data.'
|
|
% (
|
|
job['statistics']['query']['loadQueryStatistics']['inputFiles'],
|
|
job['statistics']['query']['loadQueryStatistics'][
|
|
'inputFileBytes'
|
|
],
|
|
)
|
|
)
|
|
elif num_bytes_accuracy == 'PRECISE':
|
|
print(
|
|
'Query successfully validated. Assuming the tables are not modified, '
|
|
'running this query will process %s bytes of data.' % (num_bytes,)
|
|
)
|
|
elif num_bytes_accuracy == 'LOWER_BOUND':
|
|
print(
|
|
'Query successfully validated. Assuming the tables are not modified, '
|
|
'running this query will process lower bound of %s bytes of data.'
|
|
% (num_bytes,)
|
|
)
|
|
elif num_bytes_accuracy == 'UPPER_BOUND':
|
|
print(
|
|
'Query successfully validated. Assuming the tables are not modified, '
|
|
'running this query will process upper bound of %s bytes of data.'
|
|
% (num_bytes,)
|
|
)
|
|
else:
|
|
if job['statistics']['query']['statementType'] == 'CREATE_MODEL':
|
|
print(
|
|
'Query successfully validated. The number of bytes that will '
|
|
'be processed by this query cannot be calculated automatically. '
|
|
'More information about this can be seen in '
|
|
'https://cloud.google.com/bigquery-ml/pricing#dry_run'
|
|
)
|
|
else:
|
|
print(
|
|
'Query successfully validated. Assuming the tables are not '
|
|
'modified, running this query will process %s of data and the '
|
|
'accuracy is unknown because of federated tables or clustered '
|
|
'tables.' % (num_bytes,)
|
|
)
|
|
|
|
|
|
def RawInput(message: str) -> str:
|
|
try:
|
|
return input(message)
|
|
except EOFError:
|
|
if sys.stdin.isatty():
|
|
print('\nGot EOF; exiting.')
|
|
else:
|
|
print('\nGot EOF; exiting. Is your input from a terminal?')
|
|
sys.exit(1)
|
|
|
|
|
|
def PromptWithDefault(message: str) -> str:
|
|
"""Prompts user with message, return key pressed or '' on enter."""
|
|
if FLAGS.headless:
|
|
print('Running --headless, accepting default for prompt: %s' % (message,))
|
|
return ''
|
|
return RawInput(message).lower()
|
|
|
|
|
|
def PromptYN(message: str) -> Optional[str]:
|
|
"""Prompts user with message, returning the key 'y', 'n', or '' on enter."""
|
|
response = None
|
|
while response not in ['y', 'n', '']:
|
|
response = PromptWithDefault(message)
|
|
return response
|
|
|
|
|
|
def NormalizeFieldDelimiter(field_delimiter: str) -> str:
|
|
"""Validates and returns the correct field_delimiter."""
|
|
# The only non-string delimiter we allow is None, which represents
|
|
# no field delimiter specified by the user.
|
|
if field_delimiter is None:
|
|
return field_delimiter
|
|
|
|
# Allow TAB and \\t substitution.
|
|
key = field_delimiter.lower()
|
|
return _DELIMITER_MAP.get(key, field_delimiter)
|
|
|
|
|
|
def ValidateHivePartitioningOptions(hive_partitioning_mode):
|
|
"""Validates the string provided is one the API accepts.
|
|
|
|
Should not receive None as an input, since that will fail the comparison.
|
|
Args:
|
|
hive_partitioning_mode: String representing which hive partitioning mode is
|
|
requested. Only 'AUTO' and 'STRINGS' are supported.
|
|
"""
|
|
if hive_partitioning_mode not in ['AUTO', 'STRINGS', 'CUSTOM']:
|
|
raise app.UsageError(
|
|
'Only the following hive partitioning modes are supported: "AUTO", '
|
|
'"STRINGS" and "CUSTOM"'
|
|
)
|
|
|
|
|
|
def ParseLabels(labels: List[str]) -> Dict[str, str]:
|
|
"""Parses a list of user-supplied strings representing labels.
|
|
|
|
Args:
|
|
labels: A list of user-supplied strings representing labels. It is expected
|
|
to be in the format "key:value".
|
|
|
|
Returns:
|
|
A dict mapping label keys to label values.
|
|
|
|
Raises:
|
|
UsageError: Incorrect label arguments were supplied.
|
|
"""
|
|
labels_dict = {}
|
|
for key_value in labels:
|
|
k, _, v = key_value.partition(':')
|
|
k = k.strip()
|
|
if k in labels_dict:
|
|
raise app.UsageError('Cannot specify label key "%s" multiple times' % k)
|
|
if k.strip():
|
|
labels_dict[k.strip()] = v.strip()
|
|
return labels_dict
|
|
|
|
|
|
def IsRangeBoundaryUnbounded(value: str) -> bool:
|
|
return value.upper() == 'UNBOUNDED' or value.upper() == 'NULL'
|
|
|
|
|
|
def ParseRangeString(value: str) -> Optional[Tuple[str, str]]:
|
|
match = _RANGE_PATTERN.match(value)
|
|
if not match:
|
|
return None
|
|
start, end = match.groups()
|
|
return start, end
|
|
|
|
|
|
class TablePrinter(object):
|
|
"""Base class for printing a table, with a default implementation."""
|
|
|
|
def __init__(self, **kwds):
|
|
super(TablePrinter, self).__init__()
|
|
# Most extended classes will require state.
|
|
for key, value in kwds.items():
|
|
setattr(self, key, value)
|
|
|
|
@staticmethod
|
|
def _ValidateFields(fields, formatter):
|
|
if isinstance(formatter, table_formatter.CsvFormatter):
|
|
for field in fields:
|
|
if field['type'].upper() == 'RECORD':
|
|
raise app.UsageError(
|
|
(
|
|
'Error printing table: Cannot print record '
|
|
'field "%s" in CSV format.'
|
|
)
|
|
% field['name']
|
|
)
|
|
if field.get('mode', 'NULLABLE').upper() == 'REPEATED':
|
|
raise app.UsageError(
|
|
(
|
|
'Error printing table: Cannot print repeated '
|
|
'field "%s" in CSV format.'
|
|
)
|
|
% (field['name'])
|
|
)
|
|
|
|
@staticmethod
|
|
def _NormalizeRecord(field, value, use_full_timestamp):
|
|
"""Returns bq-specific formatting of a RECORD type."""
|
|
result = collections.OrderedDict()
|
|
for subfield, subvalue in zip(field.get('fields', []), value):
|
|
result[subfield.get('name', '')] = TablePrinter.NormalizeField(
|
|
subfield, subvalue, use_full_timestamp
|
|
)
|
|
return result
|
|
|
|
@staticmethod
|
|
def _NormalizeTimestamp(unused_field, value, use_full_timestamp):
|
|
"""Returns bq-specific formatting of a TIMESTAMP type."""
|
|
try:
|
|
if use_full_timestamp:
|
|
return value
|
|
else:
|
|
date = datetime.datetime.fromtimestamp(
|
|
0, tz=datetime.timezone.utc
|
|
) + datetime.timedelta(seconds=float(value))
|
|
# Remove the extra timezone info "+00:00" at the end of the date.
|
|
date = date.replace(tzinfo=None)
|
|
# Our goal is the equivalent of '%Y-%m-%d %H:%M:%S' via strftime but that
|
|
# doesn't work for dates with years prior to 1900. Instead we zero out
|
|
# fractional seconds then call isoformat with a space separator.
|
|
date = date.replace(microsecond=0)
|
|
return date.isoformat(' ')
|
|
except (ValueError, OverflowError):
|
|
return '<date out of range for display>'
|
|
|
|
@staticmethod
|
|
def _NormalizeRange(field, value, use_full_timestamp):
|
|
"""Returns bq-specific formatting of a RANGE type."""
|
|
parsed = ParseRangeString(value)
|
|
if parsed is None:
|
|
return '<invalid range>'
|
|
start, end = parsed
|
|
|
|
if field.get('rangeElementType').get('type').upper() != 'TIMESTAMP':
|
|
start = start.upper() if IsRangeBoundaryUnbounded(start) else start
|
|
end = end.upper() if IsRangeBoundaryUnbounded(end) else end
|
|
return '[%s, %s)' % (start, end)
|
|
|
|
if IsRangeBoundaryUnbounded(start):
|
|
normalized_start = start.upper()
|
|
else:
|
|
normalized_start = TablePrinter._NormalizeTimestamp(
|
|
field, start, use_full_timestamp
|
|
)
|
|
if IsRangeBoundaryUnbounded(end):
|
|
normalized_end = end.upper()
|
|
else:
|
|
normalized_end = TablePrinter._NormalizeTimestamp(
|
|
field, end, use_full_timestamp
|
|
)
|
|
return '[%s, %s)' % (normalized_start, normalized_end)
|
|
|
|
@staticmethod
|
|
def NormalizeField(field, value, use_full_timestamp: bool):
|
|
"""Returns bq-specific formatting of a field."""
|
|
if value is None:
|
|
return None
|
|
if field.get('mode', '').upper() == 'REPEATED':
|
|
return [
|
|
TablePrinter._NormalizeSingleValue(field, value, use_full_timestamp)
|
|
for value in value
|
|
]
|
|
return TablePrinter._NormalizeSingleValue(field, value, use_full_timestamp)
|
|
|
|
@staticmethod
|
|
def _NormalizeSingleValue(field, value, use_full_timestamp: bool):
|
|
"""Returns formatting of a single field value."""
|
|
if field.get('type', '').upper() == 'RECORD':
|
|
return TablePrinter._NormalizeRecord(field, value, use_full_timestamp)
|
|
elif field.get('type', '').upper() == 'TIMESTAMP':
|
|
return TablePrinter._NormalizeTimestamp(field, value, use_full_timestamp)
|
|
elif field.get('type', '').upper() == 'RANGE':
|
|
return TablePrinter._NormalizeRange(field, value, use_full_timestamp)
|
|
return value
|
|
|
|
@staticmethod
|
|
def MaybeConvertToJson(value):
|
|
"""Converts dicts and lists to JSON; returns everything else as-is."""
|
|
if isinstance(value, dict) or isinstance(value, list):
|
|
return json.dumps(value, separators=(',', ':'), ensure_ascii=False)
|
|
return value
|
|
|
|
@staticmethod
|
|
def FormatRow(fields, row, formatter, use_full_timestamp: bool):
|
|
"""Convert fields in a single row to bq-specific formatting."""
|
|
values = [
|
|
TablePrinter.NormalizeField(field, value, use_full_timestamp)
|
|
for field, value in zip(fields, row)
|
|
]
|
|
# Convert complex values to JSON if we're not already outputting as such.
|
|
if not isinstance(formatter, table_formatter.JsonFormatter):
|
|
values = map(TablePrinter.MaybeConvertToJson, values)
|
|
# Convert NULL values to strings for CSV and non-JSON formats.
|
|
if isinstance(formatter, table_formatter.CsvFormatter):
|
|
values = ['' if value is None else value for value in values]
|
|
elif not isinstance(formatter, table_formatter.JsonFormatter):
|
|
values = ['NULL' if value is None else value for value in values]
|
|
return values
|
|
|
|
def PrintTable(self, fields, rows, use_full_timestamp: bool):
|
|
formatter = utils_flags.get_formatter_from_flags(secondary_format='pretty')
|
|
self._ValidateFields(fields, formatter)
|
|
formatter.AddFields(fields)
|
|
formatter.AddRows(
|
|
TablePrinter.FormatRow(fields, row, formatter, use_full_timestamp)
|
|
for row in rows
|
|
)
|
|
formatter.Print()
|
|
|
|
|
|
def CreateExternalTableDefinition(
|
|
source_format,
|
|
source_uris,
|
|
schema,
|
|
autodetect,
|
|
connection_id=None,
|
|
ignore_unknown_values=False,
|
|
hive_partitioning_mode=None,
|
|
hive_partitioning_source_uri_prefix=None,
|
|
require_hive_partition_filter=None,
|
|
use_avro_logical_types=False,
|
|
parquet_enum_as_string=False,
|
|
parquet_enable_list_inference=False,
|
|
metadata_cache_mode=None,
|
|
object_metadata=None,
|
|
preserve_ascii_control_characters=False,
|
|
reference_file_schema_uri=None,
|
|
encoding=None,
|
|
file_set_spec_type=None,
|
|
null_marker=None,
|
|
null_markers=None,
|
|
time_zone=None,
|
|
date_format=None,
|
|
datetime_format=None,
|
|
time_format=None,
|
|
timestamp_format=None,
|
|
source_column_match=None,
|
|
parquet_map_target_type=None,
|
|
timestamp_target_precision=None,
|
|
):
|
|
"""Creates an external table definition with the given URIs and the schema.
|
|
|
|
Arguments:
|
|
source_format: Format of source data. For CSV files, specify 'CSV'. For
|
|
Google spreadsheet files, specify 'GOOGLE_SHEETS'. For newline-delimited
|
|
JSON, specify 'NEWLINE_DELIMITED_JSON'. For Cloud Datastore backup,
|
|
specify 'DATASTORE_BACKUP'. For Avro files, specify 'AVRO'. For Orc files,
|
|
specify 'ORC'. For Parquet files, specify 'PARQUET'. For Iceberg tables,
|
|
specify 'ICEBERG'.
|
|
source_uris: Comma separated list of URIs that contain data for this table.
|
|
schema: Either an inline schema or path to a schema file.
|
|
autodetect: Indicates if format options, compression mode and schema be auto
|
|
detected from the source data. True - means that autodetect is on, False
|
|
means that it is off. None means format specific default: - For CSV it
|
|
means autodetect is OFF - For JSON it means that autodetect is ON. For
|
|
JSON, defaulting to autodetection is safer because the only option
|
|
autodetected is compression. If a schema is passed, then the user-supplied
|
|
schema is used.
|
|
connection_id: The user flag with the same name defined for the _Load
|
|
BigqueryCmd
|
|
ignore_unknown_values: Indicates if BigQuery should allow extra values that
|
|
are not represented in the table schema. If true, the extra values are
|
|
ignored. If false, records with extra columns are treated as bad records,
|
|
and if there are too many bad records, an invalid error is returned in the
|
|
job result. The default value is false. The sourceFormat property
|
|
determines what BigQuery treats as an extra value: - CSV: Trailing columns
|
|
- JSON: Named values that don't match any column names.
|
|
hive_partitioning_mode: Enables hive partitioning. AUTO indicates to
|
|
perform automatic type inference. STRINGS indicates to treat all hive
|
|
partition keys as STRING typed. No other values are accepted.
|
|
hive_partitioning_source_uri_prefix: Shared prefix for all files until hive
|
|
partitioning encoding begins.
|
|
require_hive_partition_filter: The user flag with the same name defined for
|
|
the _Load BigqueryCmd
|
|
use_avro_logical_types: The user flag with the same name defined for the
|
|
_Load BigqueryCmd
|
|
parquet_enum_as_string: The user flag with the same name defined for the
|
|
_Load BigqueryCmd
|
|
parquet_enable_list_inference: The user flag with the same name defined for
|
|
the _Load BigqueryCmd
|
|
metadata_cache_mode: Enables metadata cache for an external table with a
|
|
connection. Specify 'AUTOMATIC' to automatically refresh the cached
|
|
metadata. Specify 'MANUAL' to stop the automatic refresh.
|
|
object_metadata: Object Metadata Type.
|
|
preserve_ascii_control_characters: The user flag with the same name defined
|
|
for the _Load BigqueryCmd
|
|
reference_file_schema_uri: The user flag with the same name defined for the
|
|
_Load BigqueryCmd
|
|
encoding: Encoding types for CSV files. Available options are: 'UTF-8',
|
|
'ISO-8859-1', 'UTF-16BE', 'UTF-16LE', 'UTF-32BE', and 'UTF-32LE'. The
|
|
default value is 'UTF-8'.
|
|
file_set_spec_type: Set how to discover files given source URIs. Specify
|
|
'FILE_SYSTEM_MATCH' (default behavior) to expand source URIs by listing
|
|
files from the underlying object store. Specify
|
|
'NEW_LINE_DELIMITED_MANIFEST' to parse the URIs as new line delimited
|
|
manifest files, where each line contains a URI (No wild-card URIs are
|
|
supported).
|
|
null_marker: Specifies a string that represents a null value in a CSV file.
|
|
null_markers: Specifies a list of strings that represent null values in a
|
|
CSV file.
|
|
time_zone: Specifies the time zone for a CSV or JSON file.
|
|
date_format: Specifies the date format for a CSV or JSON file.
|
|
datetime_format: Specifies the datetime format for a CSV or JSON file.
|
|
time_format: Specifies the time format for a CSV or JSON file.
|
|
timestamp_format: Specifies the timestamp format for a CSV or JSON file.
|
|
source_column_match: Controls the strategy used to match loaded columns to
|
|
the schema.
|
|
parquet_map_target_type: Indicate the target type for parquet maps. If
|
|
unspecified, we represent parquet maps as map {repeated key_value {key,
|
|
value}}. This option can simplify this by omiting the key_value record if
|
|
it's equal to ARRAY_OF_STRUCT.
|
|
timestamp_target_precision: Precision (maximum number of total digits in
|
|
base 10) for seconds of TIMESTAMP type.
|
|
|
|
Returns:
|
|
A python dictionary that contains a external table definition for the given
|
|
format with the most common options set.
|
|
"""
|
|
try:
|
|
supported_formats = [
|
|
'CSV',
|
|
'NEWLINE_DELIMITED_JSON',
|
|
'DATASTORE_BACKUP',
|
|
'DELTA_LAKE',
|
|
'AVRO',
|
|
'ORC',
|
|
'PARQUET',
|
|
'GOOGLE_SHEETS',
|
|
'ICEBERG',
|
|
]
|
|
|
|
if source_format not in supported_formats:
|
|
raise app.UsageError('%s is not a supported format.' % source_format)
|
|
|
|
external_table_def = {'sourceFormat': source_format}
|
|
if file_set_spec_type is not None:
|
|
external_table_def['fileSetSpecType'] = file_set_spec_type
|
|
if metadata_cache_mode is not None:
|
|
external_table_def['metadataCacheMode'] = metadata_cache_mode
|
|
if time_zone is not None:
|
|
external_table_def['timeZone'] = time_zone
|
|
if date_format is not None:
|
|
external_table_def['dateFormat'] = date_format
|
|
if datetime_format is not None:
|
|
external_table_def['datetimeFormat'] = datetime_format
|
|
if time_format is not None:
|
|
external_table_def['timeFormat'] = time_format
|
|
if timestamp_format is not None:
|
|
external_table_def['timestampFormat'] = timestamp_format
|
|
if object_metadata is not None:
|
|
supported_obj_metadata_types = ['DIRECTORY', 'SIMPLE']
|
|
|
|
if object_metadata not in supported_obj_metadata_types:
|
|
raise app.UsageError(
|
|
'%s is not a supported Object Metadata Type.' % object_metadata
|
|
)
|
|
|
|
external_table_def['sourceFormat'] = None
|
|
external_table_def['objectMetadata'] = object_metadata
|
|
if timestamp_target_precision is not None:
|
|
external_table_def['timestampTargetPrecision'] = timestamp_target_precision
|
|
|
|
if external_table_def['sourceFormat'] == 'CSV':
|
|
if autodetect:
|
|
external_table_def['autodetect'] = True
|
|
external_table_def['csvOptions'] = yaml.safe_load("""
|
|
{
|
|
"quote": '"',
|
|
"encoding": "UTF-8"
|
|
}
|
|
""")
|
|
else:
|
|
external_table_def['csvOptions'] = yaml.safe_load("""
|
|
{
|
|
"allowJaggedRows": false,
|
|
"fieldDelimiter": ",",
|
|
"allowQuotedNewlines": false,
|
|
"quote": '"',
|
|
"skipLeadingRows": 0,
|
|
"encoding": "UTF-8"
|
|
}
|
|
""")
|
|
external_table_def['csvOptions'][
|
|
'preserveAsciiControlCharacters'
|
|
] = preserve_ascii_control_characters
|
|
external_table_def['csvOptions']['encoding'] = encoding or 'UTF-8'
|
|
if null_marker is not None:
|
|
external_table_def['csvOptions']['nullMarker'] = null_marker
|
|
if null_markers is not None:
|
|
external_table_def['csvOptions']['nullMarkers'] = null_markers
|
|
if source_column_match is not None:
|
|
external_table_def['csvOptions'][
|
|
'sourceColumnMatch'
|
|
] = source_column_match
|
|
elif external_table_def['sourceFormat'] == 'NEWLINE_DELIMITED_JSON':
|
|
if autodetect is None or autodetect:
|
|
external_table_def['autodetect'] = True
|
|
external_table_def['jsonOptions'] = {'encoding': encoding or 'UTF-8'}
|
|
elif external_table_def['sourceFormat'] == 'GOOGLE_SHEETS':
|
|
if autodetect is None or autodetect:
|
|
external_table_def['autodetect'] = True
|
|
else:
|
|
external_table_def['googleSheetsOptions'] = yaml.safe_load("""
|
|
{
|
|
"skipLeadingRows": 0
|
|
}
|
|
""")
|
|
elif external_table_def['sourceFormat'] == 'AVRO':
|
|
external_table_def['avroOptions'] = {
|
|
'useAvroLogicalTypes': use_avro_logical_types
|
|
}
|
|
if reference_file_schema_uri is not None:
|
|
external_table_def['referenceFileSchemaUri'] = reference_file_schema_uri
|
|
elif external_table_def['sourceFormat'] == 'PARQUET':
|
|
external_table_def['parquetOptions'] = {
|
|
'enumAsString': parquet_enum_as_string,
|
|
'enableListInference': parquet_enable_list_inference,
|
|
'mapTargetType': parquet_map_target_type,
|
|
}
|
|
if reference_file_schema_uri is not None:
|
|
external_table_def['referenceFileSchemaUri'] = reference_file_schema_uri
|
|
elif external_table_def['sourceFormat'] == 'ORC':
|
|
if reference_file_schema_uri is not None:
|
|
external_table_def['referenceFileSchemaUri'] = reference_file_schema_uri
|
|
elif (
|
|
external_table_def['sourceFormat'] == 'ICEBERG'
|
|
or external_table_def['sourceFormat'] == 'DELTA_LAKE'
|
|
):
|
|
source_format = (
|
|
'Iceberg'
|
|
if external_table_def['sourceFormat'] == 'ICEBERG'
|
|
else 'Delta Lake'
|
|
)
|
|
if autodetect is not None and not autodetect or schema:
|
|
raise app.UsageError(
|
|
'Cannot create %s table from user-specified schema.'
|
|
% (source_format,)
|
|
)
|
|
# Always autodetect schema for ICEBERG from manifest
|
|
external_table_def['autodetect'] = True
|
|
if len(source_uris.split(',')) != 1:
|
|
raise app.UsageError(
|
|
'Must provide only one source_uri for %s table.' % (source_format,)
|
|
)
|
|
|
|
|
|
if ignore_unknown_values:
|
|
external_table_def['ignoreUnknownValues'] = True
|
|
|
|
|
|
if hive_partitioning_mode is not None:
|
|
ValidateHivePartitioningOptions(hive_partitioning_mode)
|
|
hive_partitioning_options = {}
|
|
hive_partitioning_options['mode'] = hive_partitioning_mode
|
|
if hive_partitioning_source_uri_prefix is not None:
|
|
hive_partitioning_options['sourceUriPrefix'] = (
|
|
hive_partitioning_source_uri_prefix
|
|
)
|
|
external_table_def['hivePartitioningOptions'] = hive_partitioning_options
|
|
if require_hive_partition_filter:
|
|
hive_partitioning_options['requirePartitionFilter'] = True
|
|
|
|
if schema:
|
|
fields = bq_client_utils.ReadSchema(schema)
|
|
external_table_def['schema'] = {'fields': fields}
|
|
|
|
if connection_id:
|
|
external_table_def['connectionId'] = connection_id
|
|
|
|
external_table_def['sourceUris'] = source_uris.split(',')
|
|
|
|
return external_table_def
|
|
|
|
except ValueError as e:
|
|
raise app.UsageError(
|
|
'Error occurred while creating table definition: %s' % e
|
|
)
|
|
|
|
|
|
def GetExternalDataConfig(
|
|
file_path_or_simple_spec,
|
|
use_avro_logical_types=False,
|
|
parquet_enum_as_string=False,
|
|
parquet_enable_list_inference=False,
|
|
metadata_cache_mode=None,
|
|
object_metadata=None,
|
|
preserve_ascii_control_characters=None,
|
|
reference_file_schema_uri=None,
|
|
file_set_spec_type=None,
|
|
null_marker=None,
|
|
null_markers=None,
|
|
time_zone=None,
|
|
date_format=None,
|
|
datetime_format=None,
|
|
time_format=None,
|
|
timestamp_format=None,
|
|
source_column_match=None,
|
|
parquet_map_target_type=None,
|
|
timestamp_target_precision=None,
|
|
):
|
|
"""Returns a ExternalDataConfiguration from the file or specification string.
|
|
|
|
Determines if the input string is a file path or a string,
|
|
then returns either the parsed file contents, or the parsed configuration from
|
|
string. The file content is expected to be JSON representation of
|
|
ExternalDataConfiguration. The specification is expected to be of the form
|
|
schema@format=uri i.e. schema is separated from format and uri by '@'. If the
|
|
uri itself contains '@' or '=' then the JSON file option should be used.
|
|
"format=" can be omitted for CSV files.
|
|
|
|
Raises:
|
|
UsageError: when incorrect usage or invalid args are used.
|
|
"""
|
|
maybe_filepath = os.path.expanduser(file_path_or_simple_spec)
|
|
if os.path.isfile(maybe_filepath):
|
|
try:
|
|
with open(maybe_filepath) as external_config_file:
|
|
return yaml.safe_load(external_config_file)
|
|
except yaml.error.YAMLError as e:
|
|
raise app.UsageError(
|
|
'Error decoding YAML external table definition from file %s: %s'
|
|
% (maybe_filepath, e)
|
|
)
|
|
else:
|
|
source_format = 'CSV'
|
|
schema = None
|
|
connection_id = None
|
|
error_msg = (
|
|
'Error decoding external_table_definition. '
|
|
'external_table_definition should either be the name of a '
|
|
'JSON file or the text representation of an external table '
|
|
'definition. Given:%s'
|
|
) % (file_path_or_simple_spec)
|
|
|
|
parts = file_path_or_simple_spec.split('@')
|
|
if len(parts) == 1:
|
|
# Schema and connection are not specified.
|
|
format_and_uri = parts[0]
|
|
elif len(parts) == 2:
|
|
# when there are 2 components, it can be:
|
|
# 1. format=uri@connection_id.e.g csv=gs://bucket/file@us.conn1
|
|
# 2. schema@format=uri e.g.col1::INTEGER@csv=gs://bucket/file
|
|
# if the first element is format=uri, then second element is connnection.
|
|
# Else, the first is schema, second is format=uri.
|
|
if parts[0].find('://') >= 0:
|
|
# format=uri and connection specified.
|
|
format_and_uri = parts[0]
|
|
connection_id = parts[1]
|
|
else:
|
|
# Schema and format=uri are specified.
|
|
schema = parts[0]
|
|
format_and_uri = parts[1]
|
|
elif len(parts) == 3:
|
|
# Schema and connection both are specified
|
|
schema = parts[0]
|
|
format_and_uri = parts[1]
|
|
connection_id = parts[2]
|
|
else:
|
|
raise app.UsageError(error_msg)
|
|
|
|
separator_pos = format_and_uri.find('=')
|
|
if separator_pos < 0:
|
|
# Format is not specified
|
|
uri = format_and_uri
|
|
else:
|
|
source_format = format_and_uri[0:separator_pos]
|
|
uri = format_and_uri[separator_pos + 1 :]
|
|
|
|
if not uri:
|
|
raise app.UsageError(error_msg)
|
|
# When using short notation for external table definition
|
|
# autodetect is always performed.
|
|
|
|
return CreateExternalTableDefinition(
|
|
source_format,
|
|
uri,
|
|
schema,
|
|
True,
|
|
connection_id,
|
|
use_avro_logical_types=use_avro_logical_types,
|
|
parquet_enum_as_string=parquet_enum_as_string,
|
|
parquet_enable_list_inference=parquet_enable_list_inference,
|
|
metadata_cache_mode=metadata_cache_mode,
|
|
object_metadata=object_metadata,
|
|
preserve_ascii_control_characters=preserve_ascii_control_characters,
|
|
reference_file_schema_uri=reference_file_schema_uri,
|
|
file_set_spec_type=file_set_spec_type,
|
|
null_marker=null_marker,
|
|
null_markers=null_markers,
|
|
time_zone=time_zone,
|
|
date_format=date_format,
|
|
datetime_format=datetime_format,
|
|
time_format=time_format,
|
|
timestamp_format=timestamp_format,
|
|
source_column_match=source_column_match,
|
|
parquet_map_target_type=parquet_map_target_type,
|
|
timestamp_target_precision=timestamp_target_precision,
|
|
)
|
|
|
|
|
|
def GetJson(
|
|
file_path_or_json_string: str,
|
|
) -> Optional[Dict[str, Any]]:
|
|
"""Returns a JSON object from the file or a JSON string.
|
|
|
|
Determines if the input string is a file path or a string,
|
|
then returns either the parsed file contents, or the parsed JSON from
|
|
string. The file content is expected to be a JSON string.
|
|
|
|
Args:
|
|
file_path_or_json_string: Path to the JSON file or a JSON string.
|
|
|
|
Raises:
|
|
UsageError: when incorrect usage or invalid args are used.
|
|
"""
|
|
maybe_filepath = os.path.expanduser(file_path_or_json_string)
|
|
if os.path.isfile(maybe_filepath):
|
|
try:
|
|
with open(maybe_filepath) as json_file:
|
|
return json.load(json_file)
|
|
except json.decoder.JSONDecodeError as e:
|
|
raise app.UsageError(
|
|
'Error decoding JSON from file %s: %s' % (maybe_filepath, e)
|
|
)
|
|
else:
|
|
try:
|
|
return json.loads(file_path_or_json_string)
|
|
except json.decoder.JSONDecodeError as e:
|
|
raise app.UsageError(
|
|
'Error decoding JSON from string %s: %s'
|
|
% (file_path_or_json_string, e)
|
|
)
|
|
|
|
|
|
def UpdateExternalCatalogTableOptions(
|
|
current_options: Dict[str, Any],
|
|
external_options_str: str,
|
|
) -> Dict[str, Any]:
|
|
"""Updates the external catalog table options.
|
|
|
|
Args:
|
|
current_options: The current external catalog table options.
|
|
external_options_str: The new external catalog table options as a JSON
|
|
string or a file path.
|
|
|
|
Returns:
|
|
The updated external catalog table options.
|
|
"""
|
|
# Clear the parameters if they are present in the existing options but not
|
|
# in the new external catalog table options.
|
|
if current_options.get(_PARAMETERS_KEY) is not None:
|
|
current_options[_PARAMETERS_KEY] = {
|
|
k: None for k in current_options[_PARAMETERS_KEY]
|
|
}
|
|
# Clear the storage descriptor if they are present in the existing options
|
|
# but not in the new external catalog table options.
|
|
if current_options.get(_STORAGE_DESCRIPTOR_KEY) is not None:
|
|
current_options[_STORAGE_DESCRIPTOR_KEY] = {
|
|
k: None for k in current_options[_STORAGE_DESCRIPTOR_KEY]
|
|
}
|
|
|
|
external_catalog_table_options_dict = GetJson(external_options_str)
|
|
if _PARAMETERS_KEY in external_catalog_table_options_dict:
|
|
current_options.setdefault(_PARAMETERS_KEY, {})
|
|
current_options[_PARAMETERS_KEY].update(
|
|
external_catalog_table_options_dict[_PARAMETERS_KEY]
|
|
)
|
|
else:
|
|
current_options[_PARAMETERS_KEY] = None
|
|
if _STORAGE_DESCRIPTOR_KEY in external_catalog_table_options_dict:
|
|
current_options[_STORAGE_DESCRIPTOR_KEY] = (
|
|
external_catalog_table_options_dict[_STORAGE_DESCRIPTOR_KEY]
|
|
)
|
|
else:
|
|
current_options[_STORAGE_DESCRIPTOR_KEY] = None
|
|
if _CONNECTION_ID_KEY in external_catalog_table_options_dict:
|
|
current_options[_CONNECTION_ID_KEY] = external_catalog_table_options_dict[
|
|
_CONNECTION_ID_KEY
|
|
]
|
|
else:
|
|
current_options[_CONNECTION_ID_KEY] = None
|
|
return current_options
|
|
|
|
|
|
def UpdateExternalCatalogDatasetOptions(
|
|
current_options: Dict[str, Any],
|
|
external_options_str: str,
|
|
) -> Dict[str, Any]:
|
|
"""Updates the external catalog dataset options.
|
|
|
|
Args:
|
|
current_options: The current external catalog dataset options.
|
|
external_options_str: The new external catalog dataset options as a JSON
|
|
string or a file path.
|
|
|
|
Returns:
|
|
The updated external catalog dataset options.
|
|
"""
|
|
# Clear the parameters if they are present in the existing dataset but not
|
|
# in the new external catalog dataset options.
|
|
if current_options.get(_PARAMETERS_KEY) is not None:
|
|
current_options[_PARAMETERS_KEY] = {
|
|
k: None for k in current_options[_PARAMETERS_KEY]
|
|
}
|
|
external_catalog_dataset_options_dict = GetJson(external_options_str)
|
|
if _PARAMETERS_KEY in external_catalog_dataset_options_dict:
|
|
current_options.setdefault(_PARAMETERS_KEY, {})
|
|
for key, value in external_catalog_dataset_options_dict[
|
|
_PARAMETERS_KEY
|
|
].items():
|
|
current_options[_PARAMETERS_KEY][key] = value
|
|
else:
|
|
current_options[_PARAMETERS_KEY] = None
|
|
if _DEFAULT_STORAGE_LOCATION_URI_KEY in external_catalog_dataset_options_dict:
|
|
current_options[_DEFAULT_STORAGE_LOCATION_URI_KEY] = (
|
|
external_catalog_dataset_options_dict[_DEFAULT_STORAGE_LOCATION_URI_KEY]
|
|
)
|
|
else:
|
|
current_options[_DEFAULT_STORAGE_LOCATION_URI_KEY] = None
|
|
return current_options
|
|
|
|
|
|
def PrintPageToken(page_token):
|
|
"""Prints the page token in the pretty format.
|
|
|
|
Args:
|
|
page_token: The dictionary mapping of pageToken with string 'nextPageToken'.
|
|
"""
|
|
formatter = utils_flags.get_formatter_from_flags(secondary_format='pretty')
|
|
utils_formatting.configure_formatter(
|
|
formatter, bq_id_utils.ApiClientHelper.NextPageTokenReference
|
|
)
|
|
formatter.AddDict(page_token)
|
|
formatter.Print()
|
|
|
|
|
|
def ParseTimePartitioning(
|
|
partitioning_type=None,
|
|
partitioning_expiration=None,
|
|
partitioning_field=None,
|
|
partitioning_minimum_partition_date=None,
|
|
partitioning_require_partition_filter=None,
|
|
):
|
|
"""Parses time partitioning from the arguments.
|
|
|
|
Args:
|
|
partitioning_type: type for the time partitioning. Supported types are HOUR,
|
|
DAY, MONTH, and YEAR. The default value is DAY when other arguments are
|
|
specified, which generates one partition per day.
|
|
partitioning_expiration: number of seconds to keep the storage for a
|
|
partition. A negative value clears this setting.
|
|
partitioning_field: if not set, the table is partitioned based on the
|
|
loading time; if set, the table is partitioned based on the value of this
|
|
field.
|
|
partitioning_minimum_partition_date: lower boundary of partition date for
|
|
field based partitioning table.
|
|
partitioning_require_partition_filter: if true, queries on the table must
|
|
have a partition filter so not all partitions are scanned.
|
|
|
|
Returns:
|
|
Time partitioning if any of the arguments is not None, otherwise None.
|
|
|
|
Raises:
|
|
UsageError: when failed to parse.
|
|
"""
|
|
|
|
time_partitioning = {}
|
|
key_type = 'type'
|
|
key_expiration = 'expirationMs'
|
|
key_field = 'field'
|
|
key_minimum_partition_date = 'minimumPartitionDate'
|
|
key_require_partition_filter = 'requirePartitionFilter'
|
|
if partitioning_type is not None:
|
|
time_partitioning[key_type] = partitioning_type
|
|
if partitioning_expiration is not None:
|
|
time_partitioning[key_expiration] = partitioning_expiration * 1000
|
|
if partitioning_field is not None:
|
|
time_partitioning[key_field] = partitioning_field
|
|
if partitioning_minimum_partition_date is not None:
|
|
if partitioning_field is not None:
|
|
time_partitioning[key_minimum_partition_date] = (
|
|
partitioning_minimum_partition_date
|
|
)
|
|
else:
|
|
raise app.UsageError(
|
|
'Need to specify --time_partitioning_field for '
|
|
'--time_partitioning_minimum_partition_date.'
|
|
)
|
|
if partitioning_require_partition_filter is not None:
|
|
if time_partitioning:
|
|
time_partitioning[key_require_partition_filter] = (
|
|
partitioning_require_partition_filter
|
|
)
|
|
|
|
if time_partitioning:
|
|
if key_type not in time_partitioning:
|
|
time_partitioning[key_type] = 'DAY'
|
|
if (
|
|
key_expiration in time_partitioning
|
|
and time_partitioning[key_expiration] <= 0
|
|
):
|
|
time_partitioning[key_expiration] = None
|
|
return time_partitioning
|
|
else:
|
|
return None
|
|
|
|
|
|
def ParseFileSetSpecType(file_set_spec_type=None):
|
|
"""Parses the file set specification type from the arguments.
|
|
|
|
Args:
|
|
file_set_spec_type: specifies how to discover files given source URIs.
|
|
|
|
Returns:
|
|
file set specification type.
|
|
Raises:
|
|
UsageError: when an illegal value is passed.
|
|
"""
|
|
if file_set_spec_type is None:
|
|
return None
|
|
valid_spec_types = ['FILE_SYSTEM_MATCH', 'NEW_LINE_DELIMITED_MANIFEST']
|
|
if file_set_spec_type not in valid_spec_types:
|
|
raise app.UsageError(
|
|
'Error parsing file_set_spec_type, only FILE_SYSTEM_MATCH, '
|
|
'NEW_LINE_DELIMITED_MANIFEST or no value are accepted'
|
|
)
|
|
return 'FILE_SET_SPEC_TYPE_' + file_set_spec_type
|
|
|
|
|
|
def ParseClustering(
|
|
clustering_fields: Optional[str] = None,
|
|
) -> Optional[Dict[str, List[str]]]:
|
|
"""Parses clustering from the arguments.
|
|
|
|
Args:
|
|
clustering_fields: Comma-separated field names.
|
|
|
|
Returns:
|
|
Clustering if any of the arguments is not None, otherwise None. Special
|
|
case if clustering_fields is passed in as an empty string instead of None,
|
|
in which case we'll return {}, to support the scenario where user wants to
|
|
update a table and remove the clustering spec.
|
|
"""
|
|
|
|
if clustering_fields == '': # pylint: disable=g-explicit-bool-comparison
|
|
return {}
|
|
elif clustering_fields is not None:
|
|
return {'fields': clustering_fields.split(',')}
|
|
else:
|
|
return None
|
|
|
|
|
|
def ParseNumericTypeConversionMode(
|
|
numeric_type_conversion_mode: Optional[str] = None,
|
|
) -> Optional[str]:
|
|
"""Parses the numeric type conversion mode from the arguments.
|
|
|
|
Args:
|
|
numeric_type_conversion_mode: specifies how the numeric values are handled
|
|
when the value is out of scale.
|
|
|
|
Returns:
|
|
The conversion mode.
|
|
|
|
Raises:
|
|
UsageError: when an illegal value is passed.
|
|
"""
|
|
|
|
if numeric_type_conversion_mode is None:
|
|
return None
|
|
elif numeric_type_conversion_mode == 'ROUND':
|
|
return 'NUMERIC_TYPE_VALUE_ROUND'
|
|
else:
|
|
raise app.UsageError(
|
|
'Error parsing numeric_type_conversion_mode, only ROUND or no value '
|
|
'are accepted'
|
|
)
|
|
|
|
|
|
def ParseRangePartitioning(range_partitioning_spec=None):
|
|
"""Parses range partitioning from the arguments.
|
|
|
|
Args:
|
|
range_partitioning_spec: specification for range partitioning in the format
|
|
of field,start,end,interval.
|
|
|
|
Returns:
|
|
Range partitioning if range_partitioning_spec is not None, otherwise None.
|
|
Raises:
|
|
UsageError: when the spec fails to parse.
|
|
"""
|
|
|
|
range_partitioning = {}
|
|
key_field = 'field'
|
|
key_range = 'range'
|
|
key_range_start = 'start'
|
|
key_range_end = 'end'
|
|
key_range_interval = 'interval'
|
|
|
|
if range_partitioning_spec is not None:
|
|
parts = range_partitioning_spec.split(',')
|
|
if len(parts) != 4:
|
|
raise app.UsageError(
|
|
'Error parsing range_partitioning. range_partitioning should be in '
|
|
'the format of "field,start,end,interval"'
|
|
)
|
|
range_partitioning[key_field] = parts[0]
|
|
range_spec = {}
|
|
range_spec[key_range_start] = parts[1]
|
|
range_spec[key_range_end] = parts[2]
|
|
range_spec[key_range_interval] = parts[3]
|
|
range_partitioning[key_range] = range_spec
|
|
|
|
if range_partitioning:
|
|
return range_partitioning
|
|
else:
|
|
return None
|
|
|
|
|
|
def IsSuccessfulDmlOrDdlJob(printable_job_info: str) -> bool:
|
|
"""Returns True iff the job is successful and is a DML/DDL query job."""
|
|
return (
|
|
'Affected Rows' in printable_job_info
|
|
or 'DDL Operation Performed' in printable_job_info
|
|
)
|
|
|
|
|
|
def MaybeGetSessionTempObjectName(
|
|
dataset_id: str, object_id: str
|
|
) -> Optional[str]:
|
|
"""If we have a session temporary object, returns the user name of the object.
|
|
|
|
Args:
|
|
dataset_id: Dataset of object
|
|
object_id: Id of object
|
|
|
|
Returns:
|
|
If the object is a session temp object, the name of the object after
|
|
stripping out internal stuff such as session prefix and signature encodings.
|
|
|
|
If the object is not a session temp object, the return value is None.
|
|
"""
|
|
if not re.fullmatch('_[0-9a-f]{40}', dataset_id):
|
|
return None # Not an anonymous dataset
|
|
|
|
session_prefix_regexp = (
|
|
'_[0-9a-f]{8}_[0-9a-f]{4}_[0-9a-f]{4}_[0-9a-f]{4}_[0-9a-f]{12}_'
|
|
)
|
|
opt_signature_encoding_regexp = '(?:_b0a98f6_.*)?'
|
|
match = re.fullmatch(
|
|
session_prefix_regexp + '(.*?)' + opt_signature_encoding_regexp, object_id
|
|
)
|
|
if not match:
|
|
return None # No session prefix
|
|
return match.group(1)
|
|
|
|
|
|
def PrintJobMessages(printable_job_info):
|
|
"""Prints additional info from a job formatted for printing.
|
|
|
|
If the job had a fatal error, non-fatal warnings are not shown.
|
|
|
|
If any error/warning does not have a 'message' key, printable_job_info must
|
|
have 'jobReference' identifying the job.
|
|
|
|
For DML queries prints number of affected rows.
|
|
For DDL queries prints the performed operation and the target.
|
|
"""
|
|
messages = GetJobMessagesForPrinting(printable_job_info)
|
|
if messages:
|
|
print(messages)
|
|
|
|
|
|
def GetJobMessagesForPrinting(printable_job_info):
|
|
"""Similar to _PrintJobMessages(), but returns a string, rather than printing."""
|
|
result_lines = []
|
|
|
|
job_ref = '(unknown)' # Should never be seen, but beats a weird crash.
|
|
if 'jobReference' in printable_job_info:
|
|
job_ref = printable_job_info['jobReference']
|
|
|
|
# For failing jobs, display the error but not any warnings, because those
|
|
# may be more distracting than helpful.
|
|
if printable_job_info['State'] == 'FAILURE':
|
|
error_result = printable_job_info['status']['errorResult']
|
|
error_ls = printable_job_info['status'].get('errors', [])
|
|
error = bq_error.CreateBigqueryError(error_result, error_result, error_ls)
|
|
result_lines.append(
|
|
'Error encountered during job execution:\n%s\n' % (error,)
|
|
)
|
|
elif 'errors' in printable_job_info['status']:
|
|
warnings = printable_job_info['status']['errors']
|
|
result_lines.append((
|
|
'Warning%s encountered during job execution:\n'
|
|
% ('' if len(warnings) == 1 else 's')
|
|
))
|
|
recommend_show = False
|
|
for w in warnings:
|
|
# Some warnings include detailed error messages, and some just
|
|
# include programmatic error codes. Some have a 'location'
|
|
# separately, and some put it in the 'message' text.
|
|
if 'message' not in w:
|
|
recommend_show = True
|
|
else:
|
|
if 'location' in w:
|
|
message = '[%s] %s' % (w['location'], w['message'])
|
|
else:
|
|
message = w['message']
|
|
if message is not None:
|
|
message = message.encode('utf-8')
|
|
result_lines.append('%s\n' % message)
|
|
if recommend_show:
|
|
result_lines.append('Use "bq show -j %s" to view job warnings.' % job_ref)
|
|
elif 'Affected Rows' in printable_job_info:
|
|
result_lines.append(
|
|
'Number of affected rows: %s\n' % printable_job_info['Affected Rows']
|
|
)
|
|
elif 'DDL Target Table' in printable_job_info:
|
|
ddl_target_table = printable_job_info['DDL Target Table']
|
|
project_id = ddl_target_table.get('projectId')
|
|
dataset_id = ddl_target_table.get('datasetId')
|
|
table_id = ddl_target_table.get('tableId')
|
|
op = _DDL_OPERATION_MAP.get(
|
|
printable_job_info.get('DDL Operation Performed')
|
|
)
|
|
# DDL Target Table is returned for both TABLE DDL and DROP ALL ROW ACCESS
|
|
# POLICIES DDL statements.
|
|
if project_id and dataset_id and table_id and op:
|
|
if 'DDL Affected Row Access Policy Count' in printable_job_info:
|
|
ddl_affected_row_access_policy_count = printable_job_info[
|
|
'DDL Affected Row Access Policy Count'
|
|
]
|
|
result_lines.append(
|
|
'{op} {count} row access policies on table '
|
|
'{project}.{dataset}.{table}\n'.format(
|
|
op=op,
|
|
count=ddl_affected_row_access_policy_count,
|
|
project=project_id,
|
|
dataset=dataset_id,
|
|
table=table_id,
|
|
)
|
|
)
|
|
elif (
|
|
'Statement Type' in printable_job_info
|
|
and 'INDEX' in printable_job_info['Statement Type']
|
|
):
|
|
if 'SEARCH_INDEX' in printable_job_info['Statement Type']:
|
|
result_lines.append(
|
|
'%s search index on table %s.%s.%s\n'
|
|
% (
|
|
stringutil.ensure_str(op),
|
|
stringutil.ensure_str(project_id),
|
|
stringutil.ensure_str(dataset_id),
|
|
stringutil.ensure_str(table_id),
|
|
)
|
|
)
|
|
elif 'VECTOR_INDEX' in printable_job_info['Statement Type']:
|
|
index_progress_instruction = ''
|
|
if printable_job_info.get('DDL Operation Performed') in (
|
|
'CREATE',
|
|
'REPLACE',
|
|
):
|
|
index_progress_instruction = (
|
|
'Please query %s.%s.INFORMATION_SCHEMA to check the progress '
|
|
' of the index.\n'
|
|
) % (project_id, dataset_id)
|
|
result_lines.append(
|
|
'%s vector index on table %s.%s.%s\n%s'
|
|
% (
|
|
stringutil.ensure_str(op),
|
|
stringutil.ensure_str(project_id),
|
|
stringutil.ensure_str(dataset_id),
|
|
stringutil.ensure_str(table_id),
|
|
stringutil.ensure_str(index_progress_instruction),
|
|
)
|
|
)
|
|
else:
|
|
result_lines.append(
|
|
'%s %s.%s.%s\n'
|
|
% (
|
|
stringutil.ensure_str(op),
|
|
stringutil.ensure_str(project_id),
|
|
stringutil.ensure_str(dataset_id),
|
|
stringutil.ensure_str(table_id),
|
|
)
|
|
)
|
|
if 'Default Connection Stats' in printable_job_info:
|
|
default_connection_stats = printable_job_info[
|
|
'Default Connection Stats'
|
|
]
|
|
location_id = job_ref['location']
|
|
if 'provisioned' in default_connection_stats:
|
|
if printable_job_info['Statement Type'] == 'CREATE_MODEL':
|
|
target_type = 'model'
|
|
else:
|
|
target_type = 'table'
|
|
result_lines.append(
|
|
'Default connection created for %s [%s] in project [%s] in'
|
|
' region [%s]\n'
|
|
% (
|
|
stringutil.ensure_str(target_type),
|
|
stringutil.ensure_str(table_id),
|
|
stringutil.ensure_str(project_id),
|
|
stringutil.ensure_str(location_id),
|
|
)
|
|
)
|
|
if 'permissionUpdated' in default_connection_stats:
|
|
result_lines.append(
|
|
'Your IAM policy has been updated for the default connection\n'
|
|
)
|
|
elif 'DDL Target Routine' in printable_job_info:
|
|
ddl_target_routine = printable_job_info['DDL Target Routine']
|
|
project_id = ddl_target_routine.get('projectId')
|
|
dataset_id = ddl_target_routine.get('datasetId')
|
|
routine_id = ddl_target_routine.get('routineId')
|
|
op = _DDL_OPERATION_MAP.get(
|
|
printable_job_info.get('DDL Operation Performed')
|
|
)
|
|
temp_object_name = MaybeGetSessionTempObjectName(dataset_id, routine_id)
|
|
if temp_object_name is not None:
|
|
result_lines.append('%s temporary routine %s' % (op, temp_object_name))
|
|
else:
|
|
result_lines.append(
|
|
'%s %s.%s.%s' % (op, project_id, dataset_id, routine_id)
|
|
)
|
|
elif 'DDL Target Row Access Policy' in printable_job_info:
|
|
ddl_target_row_access_policy = printable_job_info[
|
|
'DDL Target Row Access Policy'
|
|
]
|
|
project_id = ddl_target_row_access_policy.get('projectId')
|
|
dataset_id = ddl_target_row_access_policy.get('datasetId')
|
|
table_id = ddl_target_row_access_policy.get('tableId')
|
|
row_access_policy_id = ddl_target_row_access_policy.get('policyId')
|
|
op = _DDL_OPERATION_MAP.get(
|
|
printable_job_info.get('DDL Operation Performed')
|
|
)
|
|
if project_id and dataset_id and table_id and row_access_policy_id and op:
|
|
result_lines.append(
|
|
'{op} row access policy {policy} on table {project}.{dataset}.{table}'
|
|
.format(
|
|
op=op,
|
|
policy=row_access_policy_id,
|
|
project=project_id,
|
|
dataset=dataset_id,
|
|
table=table_id,
|
|
)
|
|
)
|
|
elif 'Assertion' in printable_job_info:
|
|
result_lines.append('Assertion successful')
|
|
|
|
if 'Session Id' in printable_job_info:
|
|
result_lines.append('In session: %s' % printable_job_info['Session Id'])
|
|
|
|
return '\n'.join(result_lines)
|
|
|
|
|
|
def PrintObjectInfo(
|
|
object_info,
|
|
reference: bq_id_utils.ApiClientHelper.Reference,
|
|
custom_format: bq_consts.CustomPrintFormat,
|
|
print_reference: bool = True,
|
|
) -> None:
|
|
"""Prints the object with various formats."""
|
|
# The JSON formats are handled separately so that they don't print
|
|
# the record as a list of one record.
|
|
if custom_format == 'schema':
|
|
if 'schema' not in object_info or 'fields' not in object_info['schema']:
|
|
raise app.UsageError('Unable to retrieve schema from specified table.')
|
|
bq_utils.PrintFormattedJsonObject(object_info['schema']['fields'])
|
|
elif FLAGS.format in ['prettyjson', 'json']:
|
|
bq_utils.PrintFormattedJsonObject(object_info)
|
|
elif FLAGS.format in [None, 'sparse', 'pretty']:
|
|
formatter = utils_flags.get_formatter_from_flags()
|
|
utils_formatting.configure_formatter(
|
|
formatter,
|
|
type(reference),
|
|
print_format=custom_format,
|
|
object_info=object_info,
|
|
)
|
|
object_info = utils_formatting.format_info_by_type(
|
|
object_info, type(reference)
|
|
)
|
|
if object_info:
|
|
formatter.AddDict(object_info)
|
|
if reference.typename and print_reference:
|
|
print('%s %s\n' % (reference.typename.capitalize(), reference))
|
|
formatter.Print()
|
|
print()
|
|
if isinstance(reference, bq_id_utils.ApiClientHelper.JobReference):
|
|
PrintJobMessages(object_info)
|
|
else:
|
|
formatter = utils_flags.get_formatter_from_flags()
|
|
formatter.AddColumns(list(object_info.keys()))
|
|
formatter.AddDict(object_info)
|
|
formatter.Print()
|
|
|
|
|
|
def PrintObjectsArray(object_infos, objects_type):
|
|
if FLAGS.format in ['prettyjson', 'json']:
|
|
bq_utils.PrintFormattedJsonObject(object_infos)
|
|
elif FLAGS.format in [None, 'sparse', 'pretty']:
|
|
if not object_infos:
|
|
return
|
|
formatter = utils_flags.get_formatter_from_flags()
|
|
utils_formatting.configure_formatter(
|
|
formatter, objects_type, print_format='list'
|
|
)
|
|
formatted_infos = list(
|
|
map(
|
|
functools.partial(
|
|
utils_formatting.format_info_by_type,
|
|
object_type=objects_type,
|
|
),
|
|
object_infos,
|
|
)
|
|
)
|
|
for info in formatted_infos:
|
|
formatter.AddDict(info)
|
|
formatter.Print()
|
|
elif object_infos:
|
|
formatter = utils_flags.get_formatter_from_flags()
|
|
formatter.AddColumns(list(object_infos[0].keys()))
|
|
for info in object_infos:
|
|
formatter.AddDict(info)
|
|
formatter.Print()
|
|
|
|
|
|
class ResourceMetadata(TypedDict):
|
|
token: NotRequired[str] = None
|
|
unreachable: NotRequired[List[str]] = None
|
|
|
|
|
|
def PrintObjectsArrayWithMetadata(
|
|
objects_list: List[Any],
|
|
objects_type: Type[bq_id_utils.ApiClientHelper.Reference],
|
|
passed_flags: NamedTuple(
|
|
'PassedFlags',
|
|
[
|
|
('print_last_token', bool),
|
|
('print_unreachable', bool),
|
|
],
|
|
),
|
|
objects_metadata: Optional[ResourceMetadata],
|
|
) -> None:
|
|
"""Prints the objects array with metadata configured to print using flags.
|
|
|
|
If there is no `objects_metadata` passed in, then this function has the same
|
|
behaviour as `PrintObjectsArray`.
|
|
|
|
Different metadata can be printed by setting flags in `passed_flags`. With
|
|
a `format` of 'sparse' or 'pretty' then nothing will be printed if no flags
|
|
are set. With a format of 'prettyjson' or 'json' then the `objects_list`
|
|
will be printed as a `results` value even if no flags are set to print
|
|
metadata but if some are set, these will also be printed.
|
|
|
|
Arguments:
|
|
objects_list: The list of resources to print.
|
|
objects_type: The type of the resources to be printed.
|
|
passed_flags: Flags used to configure the printing behaviour.
|
|
objects_metadata: Optional metadata to be printed.
|
|
"""
|
|
if FLAGS.format in ['prettyjson', 'json']:
|
|
if passed_flags.print_last_token or passed_flags.print_unreachable:
|
|
json_object = {'results': objects_list}
|
|
if passed_flags.print_last_token and 'token' in objects_metadata:
|
|
json_object['token'] = objects_metadata['token']
|
|
if passed_flags.print_unreachable and 'unreachable' in objects_metadata:
|
|
json_object['unreachable'] = objects_metadata['unreachable']
|
|
else:
|
|
json_object = objects_list
|
|
bq_utils.PrintFormattedJsonObject(json_object)
|
|
elif FLAGS.format in [None, 'sparse', 'pretty']:
|
|
PrintObjectsArray(objects_list, objects_type)
|
|
if objects_metadata is None:
|
|
return
|
|
if passed_flags.print_last_token and 'token' in objects_metadata:
|
|
print('\nNext token: ' + objects_metadata['token'])
|
|
if passed_flags.print_unreachable and 'unreachable' in objects_metadata:
|
|
print('\nUnreachable: ' + ', '.join(objects_metadata['unreachable']))
|
|
|
|
|
|
def ParseUdfResources(udf_resources):
|
|
"""Parses UDF resources from an array of resource URIs.
|
|
|
|
Arguments:
|
|
udf_resources: Array of udf resource URIs.
|
|
|
|
Returns:
|
|
Array of UDF resources parsed into the format expected by the BigQuery API
|
|
client.
|
|
"""
|
|
|
|
if udf_resources is None:
|
|
return None
|
|
inline_udf_resources = []
|
|
external_udf_resources = []
|
|
for uris in udf_resources:
|
|
for uri in uris.split(','):
|
|
if os.path.isfile(uri):
|
|
with open(uri) as udf_file:
|
|
inline_udf_resources.append(udf_file.read())
|
|
else:
|
|
if not uri.startswith('gs://'):
|
|
raise app.UsageError(
|
|
'Non-inline resources must be Google Cloud Storage (gs://) URIs'
|
|
)
|
|
external_udf_resources.append(uri)
|
|
udfs = []
|
|
if inline_udf_resources:
|
|
for udf_code in inline_udf_resources:
|
|
udfs.append({'inlineCode': udf_code})
|
|
if external_udf_resources:
|
|
for uri in external_udf_resources:
|
|
udfs.append({'resourceUri': uri})
|
|
return udfs
|
|
|
|
|
|
def ValidateDatasetName(dataset_name: str) -> None:
|
|
"""A regex to ensure the dataset name is valid.
|
|
|
|
|
|
Arguments:
|
|
dataset_name: string name of the dataset to be validated.
|
|
|
|
Raises:
|
|
UsageError: An error occurred due to invalid dataset string.
|
|
"""
|
|
is_valid = re.fullmatch(r'[a-zA-Z0-9\_]{1,1024}', dataset_name)
|
|
if not is_valid:
|
|
raise app.UsageError(
|
|
'Dataset name: %s is invalid, must be letters '
|
|
'(uppercase or lowercase), numbers, and underscores up to '
|
|
'1024 characters.' % dataset_name
|
|
)
|
|
|
|
|
|
def ParseParameters(parameters):
|
|
"""Parses query parameters from an array of name:type:value.
|
|
|
|
Arguments:
|
|
parameters: An iterable of string-form query parameters: name:type:value.
|
|
Name may be omitted to indicate a positional parameter: :type:value. Type
|
|
may be omitted to indicate a string: name::value, or ::value.
|
|
|
|
Returns:
|
|
A list of query parameters in the form for the BigQuery API client.
|
|
"""
|
|
if not parameters:
|
|
return None
|
|
results = []
|
|
for param_string in parameters:
|
|
if os.path.isfile(param_string):
|
|
with open(param_string) as f:
|
|
results += json.load(f)
|
|
else:
|
|
results.append(ParseParameter(param_string))
|
|
return results
|
|
|
|
|
|
def SplitParam(param_string):
|
|
split = param_string.split(':', 1)
|
|
if len(split) != 2:
|
|
raise app.UsageError(
|
|
'Query parameters must be of the form: '
|
|
'"name:type:value", ":type:value", or "name::value". '
|
|
'An empty name produces a positional parameter. '
|
|
'An empty type produces a STRING parameter.'
|
|
)
|
|
return split
|
|
|
|
|
|
def ParseParameter(param_string):
|
|
"""Parse a string of the form <name><type>:<value> into each part."""
|
|
name, param_string = SplitParam(param_string)
|
|
try:
|
|
type_dict, value_dict = ParseParameterTypeAndValue(param_string)
|
|
except app.UsageError as e:
|
|
print('Error parsing parameter %s: %s' % (name, e))
|
|
sys.exit(1)
|
|
result = {'parameterType': type_dict, 'parameterValue': value_dict}
|
|
if name:
|
|
result['name'] = name
|
|
return result
|
|
|
|
|
|
def ParseParameterTypeAndValue(param_string):
|
|
"""Parse a string of the form <recursive_type>:<value> into each part."""
|
|
type_string, value_string = SplitParam(param_string)
|
|
if not type_string:
|
|
type_string = 'STRING'
|
|
type_dict = ParseParameterType(type_string)
|
|
return type_dict, ParseParameterValue(type_dict, value_string)
|
|
|
|
|
|
def ParseParameterType(type_string):
|
|
"""Parse a parameter type string into a JSON dict for the BigQuery API."""
|
|
type_dict = {'type': type_string.upper()}
|
|
if type_string.upper().startswith('ARRAY<') and type_string.endswith('>'):
|
|
type_dict = {
|
|
'type': 'ARRAY',
|
|
'arrayType': ParseParameterType(type_string[6:-1]),
|
|
}
|
|
if type_string.startswith('STRUCT<') and type_string.endswith('>'):
|
|
type_dict = {
|
|
'type': 'STRUCT',
|
|
'structTypes': ParseStructType(type_string[7:-1]),
|
|
}
|
|
if type_string.startswith('RANGE<') and type_string.endswith('>'):
|
|
type_dict = {
|
|
'type': 'RANGE',
|
|
'rangeElementType': ParseParameterType(type_string[6:-1]),
|
|
}
|
|
if not type_string:
|
|
raise app.UsageError('Query parameter missing type')
|
|
return type_dict
|
|
|
|
|
|
def ParseStructType(type_string):
|
|
"""Parse a Struct QueryParameter type into a JSON dict form."""
|
|
subtypes = []
|
|
for name, sub_type in StructTypeSplit(type_string):
|
|
subtypes.append({'type': ParseParameterType(sub_type), 'name': name})
|
|
return subtypes
|
|
|
|
|
|
def StructTypeSplit(type_string):
|
|
"""Yields single field-name, sub-types tuple from a StructType string.
|
|
|
|
Raises:
|
|
UsageError: When a field name is missing.
|
|
"""
|
|
while type_string:
|
|
next_span = type_string.split(',', 1)[0]
|
|
if '<' in next_span:
|
|
angle_count = 0
|
|
i = 0
|
|
for i in range(next_span.find('<'), len(type_string)):
|
|
if type_string[i] == '<':
|
|
angle_count += 1
|
|
if type_string[i] == '>':
|
|
angle_count -= 1
|
|
if angle_count == 0:
|
|
break
|
|
if angle_count != 0:
|
|
raise app.UsageError('Malformatted struct type')
|
|
next_span = type_string[: i + 1]
|
|
type_string = type_string[len(next_span) + 1 :]
|
|
splits = next_span.split(None, 1)
|
|
if len(splits) != 2:
|
|
raise app.UsageError('Struct parameter missing name for field')
|
|
yield splits
|
|
|
|
|
|
def FormatRfc3339(datetime_obj: datetime.datetime) -> str:
|
|
"""Formats a datetime.datetime object (UTC) in RFC3339.
|
|
|
|
https://developers.google.com/protocol-buffers/docs/reference/google.protobuf#timestamp
|
|
|
|
Args:
|
|
datetime_obj: A datetime.datetime object representing a datetime in UTC.
|
|
|
|
Returns:
|
|
The string representation of the date in RFC3339.
|
|
"""
|
|
return datetime_obj.isoformat('T') + 'Z'
|
|
|
|
|
|
def ParseRangeParameterValue(range_value: str) -> Tuple[str, str]:
|
|
"""Parse a range parameter value string into its components.
|
|
|
|
Args:
|
|
range_value: A range value string of the form "[<start>, <end>)".
|
|
|
|
Returns:
|
|
A tuple (<start>, <end>).
|
|
|
|
Raises:
|
|
app.UsageError: if the input range value string was not formatted correctly.
|
|
"""
|
|
parsed = ParseRangeString(range_value)
|
|
if parsed is None:
|
|
raise app.UsageError(
|
|
f'Invalid range parameter value: {range_value}. Expected format:'
|
|
' "[<start>, <end>)"'
|
|
)
|
|
return parsed
|
|
|
|
|
|
def ParseParameterValue(type_dict, value_input):
|
|
"""Parse a parameter value of type `type_dict` from value_input.
|
|
|
|
Arguments:
|
|
type_dict: The JSON-dict type as which to parse `value_input`.
|
|
value_input: Either a string representing the value, or a JSON dict for
|
|
array and value types.
|
|
"""
|
|
if 'structTypes' in type_dict:
|
|
if isinstance(value_input, str):
|
|
if value_input == 'NULL':
|
|
return {'structValues': None}
|
|
value_input = json.loads(value_input)
|
|
type_map = dict([(x['name'], x['type']) for x in type_dict['structTypes']])
|
|
values = {}
|
|
for field_name, value in value_input.items():
|
|
values[field_name] = ParseParameterValue(type_map[field_name], value)
|
|
return {'structValues': values}
|
|
if 'arrayType' in type_dict:
|
|
if isinstance(value_input, str):
|
|
if value_input == 'NULL':
|
|
return {'arrayValues': None}
|
|
try:
|
|
value_input = json.loads(value_input)
|
|
except json.decoder.JSONDecodeError:
|
|
tb = sys.exc_info()[2]
|
|
# pylint: disable=raise-missing-from
|
|
raise app.UsageError(
|
|
'Error parsing string as JSON: %s' % value_input
|
|
).with_traceback(tb)
|
|
values = [
|
|
ParseParameterValue(type_dict['arrayType'], x) for x in value_input
|
|
]
|
|
if not values: # Workaround to pass empty array parameter.
|
|
return {'value': {}} # An empty arrayValues list is the same as NULL.
|
|
return {'arrayValues': values}
|
|
if 'rangeElementType' in type_dict:
|
|
if value_input == 'NULL':
|
|
return {'rangeValue': None}
|
|
start, end = ParseRangeParameterValue(value_input)
|
|
return {
|
|
'rangeValue': {
|
|
'start': ParseParameterValue(type_dict['rangeElementType'], start),
|
|
'end': ParseParameterValue(type_dict['rangeElementType'], end),
|
|
}
|
|
}
|
|
return {'value': value_input if value_input != 'NULL' else None}
|