feat: Add new gcloud commands, API clients, and third-party libraries across various services.

This commit is contained in:
2026-01-01 20:26:35 +01:00
parent 5e23cbece0
commit a19e592eb7
25221 changed files with 8324611 additions and 0 deletions

View File

@@ -0,0 +1,313 @@
#!/usr/bin/env python
"""API utils for the BQ CLI."""
import json
import logging
import re
from typing import Dict, NamedTuple, Optional, Union
import urllib
from absl import flags
from utils import bq_consts
from utils import bq_error
Service = bq_consts.Service
# TODO(b/151445175) Remove Apiary formats.
STATIC_HTTP_ENDPOINT_RE_PREFIX = (
'https?://(www|(staging-www|test-bigquery).sandbox|bigquery-sq|bigquery)'
'.(google|googleapis).com'
)
_GDU_DOMAIN = 'googleapis.com'
def _get_service_name_from_enum(service: Service) -> str:
"""Returns the service name for the given service enum."""
if service is Service.BIGQUERY:
return 'bigquery'
elif service is Service.CONNECTIONS:
return 'bigqueryconnection'
elif service is Service.RESERVATIONS:
return 'bigqueryreservation'
elif service is Service.DTS:
return 'bigquerydatatransfer'
elif service is Service.ANALYTICS_HUB:
return 'analyticshub'
elif service is Service.BIGLAKE:
return 'biglake'
elif service is Service.BQ_IAM:
return 'bigquery'
else:
raise ValueError(f'Unsupported service: {service}')
def _get_tpc_service_endpoint_hostname(
service_enum: Service = Service.BIGQUERY,
universe_domain: str = _GDU_DOMAIN,
region: Optional[str] = None,
is_mtls: bool = False,
is_rep: bool = False,
is_lep: bool = False,
) -> str:
"""Returns the TPC service endpoint hostname."""
logging.info(
'Building a root URL for the %s service in the "%s" universe for region'
' "%s" %s mTLS, %s REP, and %s LEP',
service_enum,
universe_domain,
region,
'with' if is_mtls else 'without',
'with' if is_rep else 'without',
'with' if is_lep else 'without',
)
service = _get_service_name_from_enum(service_enum)
# These are taken from here:
# https://docs.google.com/document/d/1c0l65oyQ_iUvhOSHXKF9SWPS7WIu4VZYuXiPFn3zDx8
# Fully qualified, MTLS, REP:
if is_mtls and is_rep and region:
return f'{service}.{region}.rep.mtls.{universe_domain}'
# Fully qualified, non-MTLS, REP:
if not is_mtls and is_rep and region:
return f'{service}.{region}.rep.{universe_domain}'
# MTLS, non-regional:
if is_mtls and not region:
return f'{service}.mtls.{universe_domain}'
# MTLS omitted, LEP
if not is_mtls and is_lep and region:
return f'{region}-{service}.{universe_domain}'
# Purpose, region, and MTLS omitted (default):
return f'{service}.{universe_domain}'
def add_trailing_slash_if_missing(url: str) -> str:
if not url.endswith('/'):
return url + '/'
return url
def get_tpc_root_url_from_flags(
service: Service,
inputted_flags: NamedTuple(
'InputtedFlags',
[
('API', flags.FlagHolder[Optional[str]]),
('UNIVERSE_DOMAIN', flags.FlagHolder[Optional[str]]),
('LOCATION', flags.FlagHolder[Optional[str]]),
('USE_LEP', flags.FlagHolder[bool]),
('USE_REP', flags.FlagHolder[bool]),
('USE_REGIONAL_ENDPOINTS', flags.FlagHolder[bool]),
('MTLS', flags.FlagHolder[bool]),
],
),
) -> str:
"""Takes BQ CLI flags to build a root URL to make requests to.
If the `api` flag is set, and is a http/https URL then it will be used
otherwise the result is built up from the different options for a TPC service
endpoint.
Args:
service: The service that this request will be made to. Usually the API that
is being hit.
inputted_flags: The flags set, usually straight from bq_flags.
Returns:
The root URL to be used for BQ requests. This is built from the service
being targeted and a number of flags as arguments. It's intended to be used
both for building the URL to request the discovery doc from, and to override
the rootUrl and servicePath values of the discovery doc when they're
incorrect. It always ends with a trailing slash.
Raises:
BigqueryClientError: If the flags are used incorrectly.
"""
number_of_flags_requesting_a_regional_api = [
inputted_flags.USE_LEP.value,
inputted_flags.USE_REP.value,
inputted_flags.USE_REGIONAL_ENDPOINTS.value,
].count(True)
if number_of_flags_requesting_a_regional_api > 1:
raise bq_error.BigqueryClientError(
'Only one of use_lep, use_rep or use_regional_endpoints can be used at'
' a time'
)
if (
number_of_flags_requesting_a_regional_api == 1
and not inputted_flags.LOCATION.value
):
raise bq_error.BigqueryClientError(
'A region is needed when the use_lep, use_rep or use_regional_endpoints'
' flags are used.'
)
if (
inputted_flags.API.present
):
logging.info(
'Looking for a root URL and an `api` value was found, using that: %s',
inputted_flags.API.value,
)
return add_trailing_slash_if_missing(inputted_flags.API.value)
# The BQ CLI tool has historically interpreted the location flag to mean
# a resource from a specific region will be requested from a global API.
# For our initial implementation, this code maintains this behaviour.
if (
number_of_flags_requesting_a_regional_api == 0
and inputted_flags.LOCATION.value
):
region = None
else:
region = inputted_flags.LOCATION.value
# Re-evaluate the usage of LEP requests for `use_regional_endpoints`. It was
# originally implemented this way as part of b/211695055 but now that LEP is
# being replace by REP, and there is varied support across the different BQ
# service APIs, then this should be revisited.
if inputted_flags.USE_REGIONAL_ENDPOINTS.value:
logging.info(
'Building a root URL and `use_regional_endpoints` is present,'
' forcing LEP'
)
is_lep = True
else:
is_lep = inputted_flags.USE_LEP.value
# Use the default `universe_domain` value if it's not set, so long as there
# was no `api` flag specified (handled above). The initial implementation is
# done this way since historically the `api` flag defined a default value, and
# that had to be handled or migrated. Since there is already enough risk with
# this change the behaviour was kept. Precedence for their values does mean it
# makes sense for the `api` default to eventually become the `None` value.
if inputted_flags.UNIVERSE_DOMAIN.value:
universe_domain = inputted_flags.UNIVERSE_DOMAIN.value
else:
universe_domain = _GDU_DOMAIN
hostname = _get_tpc_service_endpoint_hostname(
service_enum=service,
universe_domain=universe_domain,
region=region,
is_mtls=inputted_flags.MTLS.value,
is_rep=inputted_flags.USE_REP.value,
is_lep=is_lep,
)
root_url = add_trailing_slash_if_missing(
urllib.parse.urlunsplit(
urllib.parse.SplitResult(
scheme='https', netloc=hostname, path='', query='', fragment=''
)
)
)
logging.info('Final root URL built as: %s', root_url)
return root_url
def add_api_key_to_discovery_url(
discovery_url: str,
universe_domain: Optional[str],
inputted_flags: NamedTuple(
'InputtedFlags',
[
(
'BIGQUERY_DISCOVERY_API_KEY_FLAG',
flags.FlagHolder[Optional[str]],
),
],
),
key: Optional[str] = None,
labels: Optional[str] = None,
) -> str:
"""Adds an API key to the URL."""
# TODO: b/361181701 - Clean up the duplicate key check logic when cleaning up
# discovery_url_extra handling.
if key and f'key={key}' in discovery_url:
logging.info(
'API key %s has already been added, presumably from'
' --discovery_url_extra',
key,
)
return discovery_url
if 'key=' in discovery_url:
logging.info(
'An API key already exists in the URL, presumably from'
' --discovery_url_extra, so not adding any new key'
)
return discovery_url
if not key:
key = inputted_flags.BIGQUERY_DISCOVERY_API_KEY_FLAG.value
logging.info(
'No API key has been set, using value from the'
' `bigquery_discovery_api_key` flag targeting the universe_domain'
' (%s)',
universe_domain,
)
if key:
if '?' in discovery_url:
delimiter = '&'
else:
delimiter = '?'
discovery_url += f'{delimiter}key={key}'
if labels:
discovery_url += f'&labels={labels}'
logging.info('Discovery URL has been updated (%s)', discovery_url)
return discovery_url
def get_discovery_url_from_root_url(
root_url: str, api_version: str = 'v2'
) -> str:
"""Returns the discovery doc URL from a root URL."""
parts = urllib.parse.urlsplit(root_url)
query = urllib.parse.urlencode({'version': api_version})
parts = parts._replace(path='/$discovery/rest', query=query)
return urllib.parse.urlunsplit(parts)
# This typing here is minimal needed for our current use cases but doesn't
# express how complicated the returned object can be.
def parse_discovery_doc(
discovery_document: Union[str, bytes],
) -> Dict[str, str]:
"""Takes a downloaded discovery document and parses it.
Args:
discovery_document: The discovery doc to parse.
Returns:
The parsed api doc.
"""
if isinstance(discovery_document, str):
return json.loads(discovery_document)
elif isinstance(discovery_document, bytes):
return json.loads(discovery_document.decode('utf-8'))
raise ValueError(
f'Unsupported discovery document type: {type(discovery_document)}'
)
def is_gdu_universe(universe_domain: Optional[str]) -> bool:
"""Returns whether the universe domain is GDU."""
if not universe_domain:
return False
return universe_domain == _GDU_DOMAIN
def is_gdu_url(url: Optional[str]) -> bool:
"""Returns whether the url is GDU."""
if not url:
return False
return _GDU_DOMAIN in url or (
re.compile(STATIC_HTTP_ENDPOINT_RE_PREFIX).match(url) is not None
)

View File

@@ -0,0 +1,36 @@
#!/usr/bin/env python
"""Constants for the BQ CLI."""
import enum
from typing import Literal
class Service(enum.Enum):
"""Enum for the different BigQuery APIs supported."""
ANALYTICS_HUB = 1
BIGLAKE = 2
BIGQUERY = 3
CONNECTIONS = 4
RESERVATIONS = 5
DTS = 6
# This is the BQ core discovery doc with some IAM additions. See cl/600781292
# for exactly what is added. There is some context in b/296612193 but IAM
# needs to be a separate enum option until Dataset IAM is launched
# (b/284146366).
BQ_IAM = 7
FormatType = Literal['json', 'prettyjson', 'csv', 'sparse', 'pretty']
CustomPrintFormat = Literal[
# Commands
'list',
'make',
'show',
# Resources
'materialized_view',
'schema',
'table_replica',
'view',
]

View File

@@ -0,0 +1,195 @@
#!/usr/bin/env python
"""Utility functions and classes for BQ CLI errors."""
import textwrap
from typing import Dict, List, Optional
import bq_flags
from utils import bq_logging
P12_DEPRECATION_MESSAGE = (
'BQ CLI no longer supports the deprecated P12 format by default. To migrate'
' to the new JSON service account key format, follow the steps in'
' https://cloud.google.com/iam/docs/keys-create-delete#creating. To force'
' BQ CLI to recognize P12 keys, re-run the command with'
' --nouse_google_auth.'
)
class BigqueryError(Exception):
"""Class to represent a BigQuery error."""
class BigqueryTypeError(BigqueryError):
"""A BQ CLI type error that should not show a stack trace."""
class BigqueryCommunicationError(BigqueryError):
"""Error communicating with the server."""
class BigqueryInterfaceError(BigqueryError):
"""Response from server missing required fields."""
class BigqueryServiceError(BigqueryError):
"""Base class of Bigquery-specific error responses.
The BigQuery server received request and returned an error.
"""
def __init__(
self,
message: str,
error: Dict[str, str],
error_list: List[Dict[str, str]],
job_ref: Optional[str] = None,
*args,
**kwds,
):
# pylint: disable=g-doc-args
# pylint: disable=keyword-arg-before-vararg
"""Initializes a BigqueryServiceError.
Args:
message: A user-facing error message.
error: The error dictionary, code may inspect the 'reason' key.
error_list: A list of additional entries, for example a load job may
contain multiple errors here for each error encountered during
processing.
job_ref: Optional JobReference string, if this error was encountered while
processing a job.
"""
super().__init__(message, *args, **kwds)
self.error = error
self.error_list = error_list
self.job_ref = job_ref
def __repr__(self):
return '%s: error=%s, error_list=%s, job_ref=%s' % (
self.__class__.__name__,
self.error,
self.error_list,
self.job_ref,
)
class BigqueryNotFoundError(BigqueryServiceError):
"""The requested resource or identifier was not found."""
class BigqueryDuplicateError(BigqueryServiceError):
"""The requested resource or identifier already exists."""
class BigqueryAccessDeniedError(BigqueryServiceError):
"""The user does not have access to the requested resource."""
class BigqueryInvalidQueryError(BigqueryServiceError):
"""The SQL statement is invalid."""
class BigqueryTermsOfServiceError(BigqueryAccessDeniedError):
"""User has not ACK'd ToS."""
class BigqueryBackendError(BigqueryServiceError):
"""A backend error typically corresponding to retriable HTTP 5xx failures."""
class BigqueryClientError(BigqueryError):
"""Invalid use of BigqueryClient."""
class BigqueryClientConfigurationError(BigqueryClientError):
"""Invalid configuration of BigqueryClient."""
class BigquerySchemaError(BigqueryClientError):
"""Error in locating or parsing the schema."""
class BigqueryTableConstraintsError(BigqueryClientError):
"""Error in locating or parsing the table constraints."""
def CreateBigqueryError(
error: Dict[str, str],
server_error: Dict[str, str],
error_ls: List[Dict[str, str]],
job_ref: Optional[str] = None,
session_id: Optional[str] = None,
) -> BigqueryError:
"""Returns a BigqueryError for json error embedded in server_error.
If error_ls contains any errors other than the given one, those
are also included in the returned message.
Args:
error: The primary error to convert.
server_error: The error returned by the server. (This is only used in the
case that error is malformed.)
error_ls: Additional errors to include in the error message.
job_ref: String representation a JobReference, if this is an error
associated with a job.
session_id: Id of the session if the job is part of one.
Returns:
BigqueryError representing error.
"""
reason = error.get('reason')
if job_ref:
message = f"Error processing job '{job_ref}': {error.get('message')}"
else:
message = error.get('message', '')
# We don't want to repeat the "main" error message.
new_errors = [err for err in error_ls if err != error]
if new_errors:
message += '\nFailure details:\n'
wrap_error_message = True
new_error_messages = [
': '.join(filter(None, [err.get('location'), err.get('message')]))
for err in new_errors
]
if wrap_error_message:
message += '\n'.join(
textwrap.fill(msg, initial_indent=' - ', subsequent_indent=' ')
for msg in new_error_messages
)
else:
error_message = '\n'.join(new_error_messages)
if error_message:
message += '- ' + error_message
if session_id:
message += '\nIn session: %s' % session_id
# Sometimes we will have type(message) being <type 'unicode'>, for example
# from an invalid query containing a non-English string. Reduce this
# to <type 'string'> now -- otherwise it's a trap for any code that
# tries to %s-format the exception later: str() uses 'ascii' codec.
# And the message is for display only, so this shouldn't confuse other code.
message = bq_logging.EncodeForPrinting(message)
if not reason or not message:
return BigqueryInterfaceError(
'Error reported by server with missing error fields. '
'Server returned: %s' % (str(server_error),)
)
if reason == 'notFound':
return BigqueryNotFoundError(message, error, error_ls, job_ref=job_ref)
if reason == 'duplicate':
return BigqueryDuplicateError(message, error, error_ls, job_ref=job_ref)
if reason == 'accessDenied':
return BigqueryAccessDeniedError(message, error, error_ls, job_ref=job_ref)
if reason == 'invalidQuery':
return BigqueryInvalidQueryError(message, error, error_ls, job_ref=job_ref)
if reason == 'termsOfServiceNotAccepted':
return BigqueryTermsOfServiceError(
message, error, error_ls, job_ref=job_ref
)
if reason == 'backendError':
return BigqueryBackendError(message, error, error_ls, job_ref=job_ref)
# We map the remaining errors to BigqueryServiceError.
return BigqueryServiceError(message, error, error_ls, job_ref=job_ref)

View File

@@ -0,0 +1,224 @@
#!/usr/bin/env python
"""BQ CLI helper functions for error handling."""
import codecs
import http.client
import logging
import sys
import textwrap
import time
import traceback
from absl import app
from absl import flags
from google.auth import exceptions as google_auth_exceptions
import googleapiclient
import httplib2
import oauth2client_4_0.client
import bq_utils
from gcloud_wrapper import bq_to_gcloud_config_classes
from utils import bq_error
from utils import bq_gcloud_utils
from utils import bq_logging
from pyglib import stringutil
FLAGS = flags.FLAGS
_BIGQUERY_TOS_MESSAGE = (
'In order to get started, please visit the Google APIs Console to '
'create a project and agree to our Terms of Service:\n'
'\thttps://console.cloud.google.com/\n\n'
'For detailed sign-up instructions, please see our Getting Started '
'Guide:\n'
'\thttps://cloud.google.com/bigquery/docs/quickstarts/'
'quickstart-command-line\n\n'
'Once you have completed the sign-up process, please try your command '
'again.'
)
def process_error(
err: BaseException,
name: str = 'unknown',
message_prefix: str = 'You have encountered a bug in the BigQuery CLI.',
) -> int:
"""Translate an error message into some printing and a return code."""
bq_logging.ConfigurePythonLogger(FLAGS.apilog)
logger = logging.getLogger(__name__)
if isinstance(err, SystemExit):
logger.exception('An error has caused the tool to exit', exc_info=err)
return err.code # sys.exit called somewhere, hopefully intentionally.
response = []
retcode = 1
(etype, value, tb) = sys.exc_info()
trace = ''.join(traceback.format_exception(etype, value, tb))
contact_us_msg = _generate_contact_us_message()
platform_str = bq_utils.GetPlatformString()
error_details = (
textwrap.dedent("""\
========================================
== Platform ==
%s
== bq version ==
%s
== Command line ==
%s
== UTC timestamp ==
%s
== Error trace ==
%s
========================================
""")
% (
platform_str,
stringutil.ensure_str(bq_utils.VERSION_NUMBER),
[stringutil.ensure_str(item) for item in sys.argv],
time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime()),
stringutil.ensure_str(trace),
)
)
codecs.register_error('strict', codecs.replace_errors)
message = bq_logging.EncodeForPrinting(err)
if isinstance(
err, (bq_error.BigqueryNotFoundError, bq_error.BigqueryDuplicateError)
):
response.append('BigQuery error in %s operation: %s' % (name, message))
retcode = 2
elif isinstance(err, bq_error.BigqueryTermsOfServiceError):
response.append(str(err) + '\n')
response.append(_BIGQUERY_TOS_MESSAGE)
elif isinstance(err, bq_error.BigqueryInvalidQueryError):
response.append('Error in query string: %s' % (message,))
elif (
isinstance(err, bq_error.BigqueryServiceError)
and 'API requires a quota project, which is not set by default' in message
):
response.append(
'Bigquery service returned an error in %s operation: %s.'
'\n\n'
'Please make sure you have the correct quota project ID set through '
'--quota_project_id or gcloud config set billing/quota_project. '
% (name, message)
)
elif isinstance(err, bq_error.BigqueryError) and not isinstance(
err, bq_error.BigqueryInterfaceError
):
response.append('BigQuery error in %s operation: %s' % (name, message))
elif isinstance(err, (app.UsageError, bq_error.BigqueryTypeError)):
response.append(message)
elif isinstance(
err, (bq_to_gcloud_config_classes.BigqueryGcloudDelegationUserError)
):
response.append(message)
elif isinstance(err, SyntaxError) or isinstance(
err, bq_error.BigquerySchemaError
):
response.append('Invalid input: %s' % (message,))
elif isinstance(err, flags.Error):
response.append('Error parsing command: %s' % (message,))
elif isinstance(err, KeyboardInterrupt):
response.append('')
else: # pylint: disable=broad-except
# Errors with traceback information are printed here.
# The traceback module has nicely formatted the error trace
# for us, so we don't want to undo that via TextWrap.
if isinstance(err, bq_error.BigqueryInterfaceError):
message_prefix = (
'Bigquery service returned an invalid reply in %s operation: %s.'
'\n\n'
'Please make sure you are using the latest version '
'of the bq tool and try again. '
'If this problem persists, you may have encountered a bug in the '
'bigquery client.' % (name, message)
)
elif isinstance(err, oauth2client_4_0.client.Error):
message_prefix = (
'Authorization error. This may be a network connection problem, '
'so please try again. If this problem persists, the credentials '
'may be corrupt. Try deleting and re-creating your credentials. '
'You can delete your credentials using '
'"bq init --delete_credentials".'
'\n\n'
'If this problem still occurs, you may have encountered a bug '
'in the bigquery client.'
)
elif isinstance(err, google_auth_exceptions.RefreshError):
credential_type = 'service account'
message_prefix = (
'Authorization error. If you used %s credentials, the server likely '
'returned an Unauthorized response. Verify that you are using the '
'correct account with the correct permissions to access the service '
'endpoint.'
'\n\n'
'If this problem still occurs, you may have encountered a bug '
'in the bigquery client.' % (credential_type)
)
elif (
isinstance(err, http.client.HTTPException)
or isinstance(err, googleapiclient.errors.Error)
or isinstance(err, httplib2.HttpLib2Error)
):
message_prefix = (
'Network connection problem encountered, please try again.'
'\n\n'
'If this problem persists, you may have encountered a bug in the '
'bigquery client.'
)
message = message_prefix + ' ' + contact_us_msg
wrap_error_message = True
if wrap_error_message:
message = flags.text_wrap(message)
print(message)
print(error_details)
response.append(
'Unexpected exception in %s operation: %s' % (name, message)
)
response_message = '\n'.join(response)
wrap_error_message = True
if wrap_error_message:
response_message = flags.text_wrap(response_message)
logger.exception(response_message, exc_info=err)
print(response_message)
return retcode
def _generate_contact_us_message() -> str:
"""Generates the Contact Us message."""
# pragma pylint: disable=line-too-long
contact_us_msg = (
'Please file a bug report in our '
'public '
'issue tracker:\n'
' https://issuetracker.google.com/issues/new?component=187149&template=0\n'
'Please include a brief description of '
'the steps that led to this issue, as well as '
'any rows that can be made public from '
'the following information: \n\n'
)
# If an internal user runs the public BQ CLI, show the internal issue tracker.
try:
gcloud_configs = bq_gcloud_utils.load_config()
gcloud_core_properties = gcloud_configs.get('core')
if (
'account' in gcloud_core_properties
and '@google.com' in gcloud_core_properties['account']
):
contact_us_msg = contact_us_msg.replace('public', 'internal').replace(
'https://issuetracker.google.com/issues/new?component=187149&template=0',
'http://b/issues/new?component=60322&template=178900',
)
except Exception: # pylint: disable=broad-exception-caught
# No-op if unable to determine the active account using gcloud.
pass
return contact_us_msg

View File

@@ -0,0 +1,188 @@
#!/usr/bin/env python
"""BQ CLI helper functions for gcloud interactions."""
import json
import logging
import subprocess
from typing import Dict
from absl import flags
import bq_utils
from gcloud_wrapper import gcloud_runner
# Cache of `gcloud config list` to be used in load_config().
_config_cache = None
def _use_gcloud_value_if_exists_and_flag_is_default_value(
flag_values: flags._flagvalues.FlagValues,
flag_name: str,
gcloud_config_section: Dict[str, str],
gcloud_property_name: str,
):
"""Updates flag if it's using the default and the gcloud value exists."""
if not gcloud_config_section:
return
if gcloud_property_name not in gcloud_config_section:
return
flag = flag_values[flag_name]
gcloud_value = gcloud_config_section[gcloud_property_name]
logging.debug('Gcloud config exists for %s', gcloud_property_name)
if flag.using_default_value:
logging.info(
'The `%s` flag is using a default value and a value is set in gcloud,'
' using that: %s',
flag_name,
gcloud_value,
)
bq_utils.UpdateFlag(flag_values, flag_name, gcloud_value)
elif flag.value != gcloud_value:
logging.warning(
'Executing with different configuration than in gcloud.'
'The flag "%s" has become set to "%s" but gcloud sets "%s" as "%s".'
'To update the gcloud value, start from `gcloud config list`.',
flag_name,
flag.value,
gcloud_property_name,
gcloud_value,
)
def process_config(flag_values: flags._flagvalues.FlagValues) -> None:
"""Processes the user configs from gcloud and sets flag values accordingly."""
if not flag_values.use_gcloud_config:
logging.info(
"'use_gcloud_config' is false, skipping gcloud config processing."
)
return
configs = load_config()
core_config = configs.get('core', {})
billing_config = configs.get('billing', {})
context_aware = configs.get('context_aware', {})
auth_config = configs.get('auth', {})
api_endpoint_overrides = configs.get('api_endpoint_overrides', {})
_use_gcloud_value_if_exists_and_flag_is_default_value(
flag_values=flag_values,
flag_name='project_id',
gcloud_config_section=core_config,
gcloud_property_name='project',
)
_use_gcloud_value_if_exists_and_flag_is_default_value(
flag_values=flag_values,
flag_name='quota_project_id',
gcloud_config_section=billing_config,
gcloud_property_name='quota_project',
)
_use_gcloud_value_if_exists_and_flag_is_default_value(
flag_values=flag_values,
flag_name='universe_domain',
gcloud_config_section=core_config,
gcloud_property_name='universe_domain',
)
_use_gcloud_value_if_exists_and_flag_is_default_value(
flag_values=flag_values,
flag_name='request_reason',
gcloud_config_section=core_config,
gcloud_property_name='request_reason',
)
_use_gcloud_value_if_exists_and_flag_is_default_value(
flag_values=flag_values,
flag_name='api',
gcloud_config_section=api_endpoint_overrides,
gcloud_property_name='bigquery',
)
_use_gcloud_value_if_exists_and_flag_is_default_value(
flag_values=flag_values,
flag_name='bigquery_discovery_api_key',
gcloud_config_section=core_config,
gcloud_property_name='api_key',
)
_use_gcloud_value_if_exists_and_flag_is_default_value(
flag_values=flag_values,
flag_name='mtls',
gcloud_config_section=context_aware,
gcloud_property_name='use_client_certificate',
)
if not auth_config or not core_config:
return
try:
access_token_file = auth_config['access_token_file']
universe_domain = core_config['universe_domain']
except KeyError:
# This is expected if these attributes aren't in the config file.
return
if access_token_file and universe_domain:
if (
not flag_values['oauth_access_token'].using_default_value
or not flag_values['use_google_auth'].using_default_value
):
logging.warning(
'Users gcloud config file and bigqueryrc file have incompatible'
' configurations. Defaulting to the bigqueryrc file'
)
return
logging.info(
'Using the gcloud configuration to get TPC authorisation from'
' access_token_file'
)
try:
with open(access_token_file) as token_file:
token = token_file.read().strip()
except IOError:
logging.warning(
'Could not open `access_token_file` file, ignoring gcloud settings'
)
else:
bq_utils.UpdateFlag(flag_values, 'oauth_access_token', token)
bq_utils.UpdateFlag(flag_values, 'use_google_auth', True)
def load_config() -> Dict[str, Dict[str, str]]:
"""Loads the user configs from gcloud, cache the result, and returns them as a dictionary."""
global _config_cache
if _config_cache is not None:
logging.info('Using cached gcloud config')
return _config_cache
_config_cache = {}
try:
process = gcloud_runner.run_gcloud_command(
['config', 'list', '--format=json'], stderr=subprocess.STDOUT
)
out, err = process.communicate()
except FileNotFoundError as e:
# TODO: b/365836272 - Catch gcloud-not-found error in gcloud_runner.
logging.warning(
'Continuing with empty gcloud config data due to error: %s', str(e)
)
return _config_cache
if err:
logging.warning(
'Continuing with empty gcloud config data due to error: %s', err
)
return _config_cache
try:
_config_cache = json.loads(out)
except json.JSONDecodeError as e:
logging.warning(
'Continuing with empty gcloud config data due to invalid config'
' format: %s',
e,
)
return _config_cache

View File

@@ -0,0 +1,386 @@
#!/usr/bin/env python
"""BQ CLI helper functions for IDs."""
import collections
import sys
from typing import Any, Optional, Tuple, Type, Union
from absl import app
from utils import bq_error
from pyglib import stringutil
collections_abc = collections
if sys.version_info > (3, 8):
collections_abc = collections.abc
class ApiClientHelper:
"""Static helper methods and classes not provided by the discovery client."""
def __init__(self, *unused_args, **unused_kwds):
raise NotImplementedError('Cannot instantiate static class ApiClientHelper')
class Reference(collections_abc.Mapping):
"""Base class for Reference objects returned by apiclient."""
_required_fields = frozenset()
_optional_fields = frozenset()
_format_str = ''
def __init__(self, **kwds):
# pylint: disable=unidiomatic-typecheck Check if this isn't a subclass.
if type(self) == ApiClientHelper.Reference:
self.typename: str = 'unimplemented'
raise NotImplementedError(
'Cannot instantiate abstract class ApiClientHelper.Reference'
)
for name in self._required_fields:
if not kwds.get(name, ''):
raise ValueError(
'Missing required argument %s to %s'
% (name, self.__class__.__name__)
)
setattr(self, name, kwds[name])
for name in self._optional_fields:
if kwds.get(name, ''):
setattr(self, name, kwds[name])
@classmethod
def Create(cls, **kwds: Any) -> 'ApiClientHelper.Reference':
"""Factory method for this class."""
args = dict(
(k, v)
for k, v in kwds.items()
if k in cls._required_fields.union(cls._optional_fields)
)
return cls(**args)
def __iter__(self):
return iter(self._required_fields.union(self._optional_fields))
def __getitem__(self, key):
if key in self._optional_fields:
if key in self.__dict__:
return self.__dict__[key]
else:
return None
if key in self._required_fields:
return self.__dict__[key]
raise KeyError(key)
def __hash__(self):
return hash(str(self))
def __len__(self):
return len(self._required_fields.union(self._optional_fields))
def __str__(self):
return stringutil.ensure_str(self._format_str % dict(self))
def __repr__(self):
return "%s '%s'" % (self.typename, self)
def __eq__(self, other):
d = dict(other)
return all(
getattr(self, name, None) == d.get(name, None)
for name in self._required_fields.union(self._optional_fields)
)
class JobReference(Reference):
"""A JobReference."""
_required_fields = frozenset(('projectId', 'jobId'))
_optional_fields = frozenset(('location',))
_format_str = '%(projectId)s:%(jobId)s'
typename = 'job'
def __init__(self, **kwds):
# pylint: disable=invalid-name Aligns with API
self.projectId: str = kwds['projectId']
self.jobId: str = kwds['jobId']
# pylint: enable=invalid-name
super().__init__(**kwds)
def GetProjectReference(self) -> 'ApiClientHelper.ProjectReference':
return ApiClientHelper.ProjectReference.Create(projectId=self.projectId)
class ProjectReference(Reference):
"""A ProjectReference."""
_required_fields = frozenset(('projectId',))
_format_str = '%(projectId)s'
typename = 'project'
def __init__(self, **kwds):
# pylint: disable=invalid-name Aligns with API
self.projectId: str = kwds['projectId']
# pylint: enable=invalid-name
super().__init__(**kwds)
def GetDatasetReference(
self, dataset_id: str
) -> 'ApiClientHelper.DatasetReference':
return ApiClientHelper.DatasetReference.Create(
projectId=self.projectId, datasetId=dataset_id
)
def GetTableReference(
self, dataset_id: str, table_id: str
) -> 'ApiClientHelper.TableReference':
return ApiClientHelper.TableReference.Create(
projectId=self.projectId, datasetId=dataset_id, tableId=table_id
)
class DatasetReference(Reference):
"""A DatasetReference."""
_required_fields = frozenset(('projectId', 'datasetId'))
_format_str = '%(projectId)s:%(datasetId)s'
typename = 'dataset'
def __init__(self, **kwds):
# pylint: disable=invalid-name Aligns with API
self.projectId: str = kwds['projectId']
self.datasetId: str = kwds['datasetId']
# pylint: enable=invalid-name
super().__init__(**kwds)
def GetProjectReference(self) -> 'ApiClientHelper.ProjectReference':
return ApiClientHelper.ProjectReference.Create(projectId=self.projectId)
def GetTableReference(
self, table_id: str
) -> 'ApiClientHelper.TableReference':
return ApiClientHelper.TableReference.Create(
projectId=self.projectId, datasetId=self.datasetId, tableId=table_id
)
class TableReference(Reference):
"""A TableReference."""
_required_fields = frozenset(('projectId', 'datasetId', 'tableId'))
_format_str = '%(projectId)s:%(datasetId)s.%(tableId)s'
typename = 'table'
def __init__(self, **kwds):
# pylint: disable=invalid-name Aligns with API
self.projectId: str = kwds['projectId']
self.datasetId: str = kwds['datasetId']
self.tableId: str = kwds['tableId']
# pylint: enable=invalid-name
super().__init__(**kwds)
def GetDatasetReference(self) -> 'ApiClientHelper.DatasetReference':
return ApiClientHelper.DatasetReference.Create(
projectId=self.projectId, datasetId=self.datasetId
)
def GetProjectReference(self) -> 'ApiClientHelper.ProjectReference':
return ApiClientHelper.ProjectReference.Create(projectId=self.projectId)
class ModelReference(Reference):
_required_fields = frozenset(('projectId', 'datasetId', 'modelId'))
_format_str = '%(projectId)s:%(datasetId)s.%(modelId)s'
typename = 'model'
def __init__(self, **kwds):
# pylint: disable=invalid-name Aligns with API
self.projectId: str = kwds['projectId']
self.datasetId: str = kwds['datasetId']
self.modelId: str = kwds['modelId']
# pylint: enable=invalid-name
super().__init__(**kwds)
class RoutineReference(Reference):
"""A RoutineReference."""
_required_fields = frozenset(('projectId', 'datasetId', 'routineId'))
_format_str = '%(projectId)s:%(datasetId)s.%(routineId)s'
_path_str = (
'projects/%(projectId)s/datasets/%(datasetId)s/routines/%(routineId)s'
)
typename = 'routine'
def __init__(self, **kwds):
# pylint: disable=invalid-name Aligns with API
self.projectId: str = kwds['projectId']
self.datasetId: str = kwds['datasetId']
self.routineId: str = kwds['routineId']
# pylint: enable=invalid-name
super().__init__(**kwds)
def path(self) -> str:
return self._path_str % dict(self)
class RowAccessPolicyReference(Reference):
_required_fields = frozenset(
('projectId', 'datasetId', 'tableId', 'policyId')
)
_format_str = '%(projectId)s:%(datasetId)s.%(tableId)s.%(policyId)s'
typename = 'row access policy'
def __init__(self, **kwds):
# pylint: disable=invalid-name Aligns with API
self.projectId: str = kwds['projectId']
self.datasetId: str = kwds['datasetId']
self.tableId: str = kwds['tableId']
self.policyId: str = kwds['policyId']
# pylint: enable=invalid-name
super().__init__(**kwds)
class TransferConfigReference(Reference):
_required_fields = frozenset(('transferConfigName',))
_format_str = '%(transferConfigName)s'
typename = 'transfer config'
def __init__(self, **kwds):
# pylint: disable=invalid-name Aligns with API
self.transferConfigName: str = kwds['transferConfigName']
# pylint: enable=invalid-name
super().__init__(**kwds)
class TransferRunReference(Reference):
_required_fields = frozenset(('transferRunName',))
_format_str = '%(transferRunName)s'
typename = 'transfer run'
def __init__(self, **kwds):
# pylint: disable=invalid-name Aligns with API
self.transferRunName: str = kwds['transferRunName']
# pylint: enable=invalid-name
super().__init__(**kwds)
class NextPageTokenReference(Reference):
_required_fields = frozenset(('pageTokenId',))
_format_str = '%(pageTokenId)s'
typename = 'page token'
class TransferLogReference(TransferRunReference):
pass
class EncryptionServiceAccount(Reference):
_required_fields = frozenset(('serviceAccount',))
_format_str = '%(serviceAccount)s'
# typename is set to none because the EncryptionServiceAccount does not
# store a 'reference', so when the object info is printed, it will omit
# an unnecessary line that would have tried to print a reference in other
# cases, i.e. datasets, tables, etc.
typename = None
class ReservationReference(Reference):
_required_fields = frozenset(('projectId', 'location', 'reservationId'))
_format_str = '%(projectId)s:%(location)s.%(reservationId)s'
_path_str = 'projects/%(projectId)s/locations/%(location)s/reservations/%(reservationId)s'
typename = 'reservation'
def path(self) -> str: # pylint: disable=invalid-name Legacy
return self._path_str % dict(self)
class CapacityCommitmentReference(Reference):
"""Helper class to provide a reference to capacity commitment."""
_required_fields = frozenset(
('projectId', 'location', 'capacityCommitmentId')
)
_format_str = '%(projectId)s:%(location)s.%(capacityCommitmentId)s'
_path_str = 'projects/%(projectId)s/locations/%(location)s/capacityCommitments/%(capacityCommitmentId)s'
typename = 'capacity commitment'
def __init__(self, **kwds):
# pylint: disable=invalid-name Aligns with API
self.projectId: str = kwds['projectId']
self.location: str = kwds['location']
self.capacityCommitmentId: str = kwds['capacityCommitmentId']
# pylint: enable=invalid-name
super().__init__(**kwds)
def path(self) -> str: # pylint: disable=invalid-name Legacy
return self._path_str % dict(self)
class ReservationAssignmentReference(Reference):
"""Helper class to provide a reference to reservation assignment."""
_required_fields = frozenset(
('projectId', 'location', 'reservationId', 'reservationAssignmentId')
)
_format_str = '%(projectId)s:%(location)s.%(reservationId)s.%(reservationAssignmentId)s'
_path_str = 'projects/%(projectId)s/locations/%(location)s/reservations/%(reservationId)s/assignments/%(reservationAssignmentId)s'
_reservation_format_str = '%(projectId)s:%(location)s.%(reservationId)s'
typename = 'reservation assignment'
def path(self) -> str: # pylint: disable=invalid-name Legacy
return self._path_str % dict(self)
def reservation_path(self) -> str: # pylint: disable=invalid-name Legacy
return self._reservation_format_str % dict(self)
class BiReservationReference(Reference):
"""Helper class to provide a reference to bi reservation."""
_required_fields = frozenset(('projectId', 'location'))
_format_str = '%(projectId)s:%(location)s'
_path_str = 'projects/%(projectId)s/locations/%(location)s/biReservation'
_create_path_str = 'projects/%(projectId)s/locations/%(location)s'
typename = 'bi reservation'
def path(self) -> str: # pylint: disable=invalid-name Legacy
return self._path_str % dict(self)
def create_path(self) -> str: # pylint: disable=invalid-name Legacy
return self._create_path_str % dict(self)
class ReservationGroupReference(Reference):
"""Helper class to provide a reference to reservation group."""
_required_fields = frozenset(
('projectId', 'location', 'reservationGroupId')
)
_format_str = '%(projectId)s:%(location)s.%(reservationGroupId)s'
_path_str = 'projects/%(projectId)s/locations/%(location)s/reservationGroups/%(reservationGroupId)s'
typename = 'reservation group'
def __init__(self, **kwds):
# pylint: disable=invalid-name Aligns with API
self.projectId: str = kwds['projectId']
self.location: str = kwds['location']
self.reservationGroupId: str = kwds['reservationGroupId']
# pylint: enable=invalid-name
super().__init__(**kwds)
def path(self) -> str: # pylint: disable=invalid-name Legacy
return self._path_str % dict(self)
class ConnectionReference(Reference):
_required_fields = frozenset(('projectId', 'location', 'connectionId'))
_format_str = '%(projectId)s.%(location)s.%(connectionId)s'
_path_str = 'projects/%(projectId)s/locations/%(location)s/connections/%(connectionId)s'
typename = 'connection'
def path(self) -> str: # pylint: disable=invalid-name Legacy
return self._path_str % dict(self)
def typecheck( # pylint: disable=invalid-name
obj: ApiClientHelper.Reference,
types: Union[
Type[Optional[ApiClientHelper.Reference]],
Tuple[Type[Optional[ApiClientHelper.Reference]], ...],
],
message: Optional[str] = None,
method: Optional[str] = None,
# In code on the surface, taking user input, we throw a usage error.
is_usage_error: bool = False,
) -> None:
"""Ensure the obj is the correct type, or throw a BigqueryTypeError."""
if not isinstance(obj, types):
if not message:
if method:
message = 'Invalid reference for %s: %r' % (method, obj)
else:
message = 'Type of %r is not one of %s' % (obj, types)
if is_usage_error:
raise app.UsageError(message)
else:
raise bq_error.BigqueryTypeError(message)

View File

@@ -0,0 +1,142 @@
#!/usr/bin/env python
"""Utility functions for BQ CLI logging."""
import datetime
import logging
import os
import sys
from typing import Optional, TextIO, Union
from absl import flags
from absl import logging as absl_logging
from googleapiclient import model
_UNIQUE_SUFFIX: str = ''
def GetUniqueSuffix() -> str:
global _UNIQUE_SUFFIX
if not _UNIQUE_SUFFIX:
_UNIQUE_SUFFIX = datetime.datetime.now().strftime('%z_%Y%m%d_%H%M%S.%f')
return _UNIQUE_SUFFIX
def GetLogDirectory(apilog: Optional[str] = None) -> Optional[str]:
"""Returns a directory to log to."""
if apilog and os.path.isdir(apilog):
full_path = apilog
# If this is a blaze test put the logs in a directory that will be seen in the
# artifact tab in the sponge UI.
elif 'TEST_UNDECLARED_OUTPUTS_DIR' in os.environ:
full_path = os.path.join(
os.environ['TEST_UNDECLARED_OUTPUTS_DIR'], 'bq_logs'
)
# If this is a Kokoro test put the logs in a directory that will be seen in
# the artifact tab in the sponge UI.
elif 'KOKORO_ARTIFACTS_DIR' in os.environ:
full_path = os.path.join(os.environ['KOKORO_ARTIFACTS_DIR'], 'bq_logs')
else:
return None
os.makedirs(full_path, exist_ok=True)
return full_path
def SaveStringToLogDirectoryIfAvailable(
file_prefix: str,
content: Union[str, bytes],
apilog: Optional[str] = None,
) -> None:
"""Saves string content to a file in the log directory."""
log_dir = GetLogDirectory(apilog)
if not log_dir:
return
if isinstance(content, bytes):
content = content.decode('utf-8')
filename = f'{file_prefix}_{GetUniqueSuffix()}.log'
path = os.path.join(log_dir, filename)
with open(path, 'w') as f:
f.write(content)
def _SetLogFile(logfile: TextIO):
absl_logging.use_python_logging(quiet=True)
absl_logging.get_absl_handler().python_handler.stream = logfile
def ConfigurePythonLogger(apilog: Optional[str] = None):
"""Sets up Python logger.
Applications can configure logging however they want, but this
captures one pattern of logging which seems useful when dealing with
a single command line option for determining logging.
Args:
apilog: To log to sys.stdout, specify '', '-', '1', 'true', or 'stdout'. To
log to sys.stderr, specify 'stderr'. To log to a file, specify the file
path. Specify None to disable logging.
"""
log_messages = []
if apilog is None:
apilog = GetLogDirectory()
log_messages.append(
'No logging set and we are in a test environment, logs will be in a'
' directory based on the test environment.'
)
if apilog is None:
# Effectively turn off logging.
logging.debug(
'There is no apilog flag so non-critical logging is disabled.'
)
logging.disable(logging.CRITICAL)
else:
if apilog in ('', '-', '1', 'true', 'stdout'):
_SetLogFile(sys.stdout)
elif apilog == 'stderr':
_SetLogFile(sys.stderr)
elif apilog:
if os.path.isdir(apilog):
log_messages.append(f'Logging to directory: {apilog}')
apilog = os.path.join(
apilog,
f'bq_cli_{GetUniqueSuffix()}.log',
)
_SetLogFile(open(apilog, 'a'))
else:
logging.basicConfig(level=logging.INFO)
# Turn on apiclient logging of http requests and responses. (Here
# we handle both the flags interface from apiclient < 1.2 and the
# module global in apiclient >= 1.2.)
if hasattr(flags.FLAGS, 'dump_request_response'):
flags.FLAGS.dump_request_response = True
else:
model.dump_request_response = True
for log in log_messages:
logging.info(log)
def EncodeForPrinting(o: object) -> str:
"""Safely encode an object as the encoding for sys.stdout."""
# Not all file objects provide an encoding attribute, so we make sure to
# handle the case where the attribute is completely absent.
encoding = getattr(sys.stdout, 'encoding', None) or 'ascii'
# We want to prevent conflicts in python2 between formatting
# a str type with a unicode type, e.g. b'abc%s' % (u'[unicode]',)
# where the byte type will be auto decoded as ascii thus causing
# an error.
# Thus we only want to encode the object if it's passed in as a
# unicode type and the unicode type is not a str type.
if isinstance(o, type('')) and not isinstance(o, str):
return o.encode(encoding, 'backslashreplace')
else:
return str(o)
def ConfigureLogging(apilog: Optional[str] = None):
try:
ConfigurePythonLogger(apilog)
except IOError as e:
if e.errno == 2:
print('Could not configure logging. %s: %s' % (e.strerror, e.filename))
sys.exit(1)
raise e

View File

@@ -0,0 +1,504 @@
#!/usr/bin/env python
# pylint: disable=g-unknown-interpreter
# Copyright 2012 Google Inc. All Rights Reserved.
"""Bigquery Client library for Python."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import json
import os
import re
from typing import Any, Dict, List, NamedTuple, Optional, TypedDict
from utils import bq_error
from utils import bq_id_utils
# Maximum number of jobs that can be retrieved by ListJobs (sanity limit).
MAX_RESULTS = 100000
GCS_SCHEME_PREFIX = 'gs://'
# Maps supported connection type names to the corresponding property in the
# connection proto.
CONNECTION_TYPE_TO_PROPERTY_MAP = {
'CLOUD_SQL': 'cloudSql',
'AWS': 'aws',
'Azure': 'azure',
'SQL_DATA_SOURCE': 'sqlDataSource',
'CLOUD_SPANNER': 'cloudSpanner',
'CLOUD_RESOURCE': 'cloudResource',
'SPARK': 'spark',
}
CONNECTION_PROPERTY_TO_TYPE_MAP = {
p: t for t, p in CONNECTION_TYPE_TO_PROPERTY_MAP.items()
}
CONNECTION_TYPES = CONNECTION_TYPE_TO_PROPERTY_MAP.keys()
def MakeAccessRolePropertiesJson(iam_role_id: str) -> str:
"""Returns properties for a connection with IAM role id.
Args:
iam_role_id: IAM role id.
Returns:
JSON string with properties to create a connection with IAM role id.
"""
return '{"accessRole": {"iamRoleId": "%s"}}' % iam_role_id
def MakeTenantIdPropertiesJson(tenant_id: str) -> str:
"""Returns properties for a connection with tenant id.
Args:
tenant_id: tenant id.
Returns:
JSON string with properties to create a connection with customer's tenant
id.
"""
return '{"customerTenantId": "%s"}' % tenant_id
def MakeAzureFederatedAppClientIdPropertiesJson(
federated_app_client_id: str,
) -> str:
"""Returns properties for a connection with a federated app (client) id.
Args:
federated_app_client_id: federated application (client) id.
Returns:
JSON string with properties to create a connection with customer's federated
application (client) id.
"""
return '{"federatedApplicationClientId": "%s"}' % federated_app_client_id
def MakeAzureFederatedAppClientAndTenantIdPropertiesJson(
tenant_id: str, federated_app_client_id: str
) -> str:
"""Returns properties for a connection with tenant and federated app ids.
Args:
tenant_id: tenant id
federated_app_client_id: federated application (client) id.
Returns:
JSON string with properties to create a connection with customer's tenant
and federated application (client) ids.
"""
return '{"customerTenantId": "%s", "federatedApplicationClientId" : "%s"}' % (
tenant_id,
federated_app_client_id,
)
def ToLowerCamel(name: str) -> str:
"""Convert a name with underscores to camelcase."""
return re.sub('_[a-z]', lambda match: match.group(0)[1].upper(), name)
def ApplyParameters(config, **kwds) -> None:
"""Adds all kwds to config dict, adjusting keys to camelcase.
Note this does not remove entries that are set to None, however.
Args:
config: A configuration dict.
**kwds: A dict of keys and values to set in the config.
"""
config.update((ToLowerCamel(k), v) for k, v in kwds.items() if v is not None)
def FormatProjectIdentifierForTransfers(
project_reference: 'bq_id_utils.ApiClientHelper.ProjectReference',
location: str,
) -> str:
"""Formats a project identifier for data transfers.
Data transfer API calls take in the format projects/(projectName), so because
by default project IDs take the format (projectName), add the beginning format
to perform data transfer commands
Args:
project_reference: The project id to format for data transfer commands.
location: The location id, e.g. 'us' or 'eu'.
Returns:
The formatted project name for transfers.
"""
return 'projects/' + project_reference.projectId + '/locations/' + location
def ParseJson(
json_string: Optional[str],
) -> Dict[str, Dict[str, Dict[str, Any]]]:
"""Wrapper for standard json parsing, may throw BigQueryClientError."""
try:
return json.loads(json_string)
except ValueError as e:
raise bq_error.BigqueryClientError(
'Error decoding JSON from string %s: %s' % (json_string, e)
)
class InsertEntry(NamedTuple):
insert_id: Optional[str] # Optional here is to support legacy tests.
record: object
def JsonToInsertEntry(
insert_id: Optional[str], # Optional here is to support legacy tests.
json_string: str,
) -> InsertEntry:
"""Parses a JSON encoded record and returns an InsertEntry.
Arguments:
insert_id: Id for the insert, can be None.
json_string: The JSON encoded data to be converted.
Returns:
InsertEntry object for adding to a table.
"""
try:
row = json.loads(json_string)
if not isinstance(row, dict):
raise bq_error.BigqueryClientError('Value is not a JSON object')
return InsertEntry(insert_id, row)
except ValueError as e:
raise bq_error.BigqueryClientError('Could not parse object: %s' % (str(e),))
def GetSessionId(job):
"""Helper to return the session id if the job is part of one.
Args:
job: a job resource to get statistics and sessionInfo from.
Returns:
sessionId, if the job is part of a session.
"""
stats = job.get('statistics', {})
if 'sessionInfo' in stats and 'sessionId' in stats['sessionInfo']:
return stats['sessionInfo']['sessionId']
return None
def GetJobTypeName(job_info):
"""Helper for job printing code."""
job_names = set(('extract', 'load', 'query', 'copy'))
try:
return (
set(job_info.get('configuration', {}).keys())
.intersection(job_names)
.pop()
)
except KeyError:
return None
def ProcessSources(source_string: str) -> List[str]:
"""Take a source string and return a list of URIs.
The list will consist of either a single local filename, which
we check exists and is a file, or a list of gs:// uris.
Args:
source_string: A comma-separated list of URIs.
Returns:
List of one or more valid URIs, as strings.
Raises:
bq_error.BigqueryClientError: if no valid list of sources can be
determined.
"""
sources = [source.strip() for source in source_string.split(',')]
gs_uris = [
source for source in sources if source.startswith(GCS_SCHEME_PREFIX)
]
if not sources:
raise bq_error.BigqueryClientError('No sources specified')
if gs_uris:
if len(gs_uris) != len(sources):
raise bq_error.BigqueryClientError(
'All URIs must begin with "{}" if any do.'.format(GCS_SCHEME_PREFIX)
)
return sources
else:
source = sources[0]
if len(sources) > 1:
raise bq_error.BigqueryClientError(
'Local upload currently supports only one file, found %d'
% (len(sources),)
)
if not os.path.exists(source):
raise bq_error.BigqueryClientError(
'Source file not found: %s' % (source,)
)
if not os.path.isfile(source):
raise bq_error.BigqueryClientError(
'Source path is not a file: %s' % (source,)
)
return sources
def KindToName(kind):
"""Convert a kind to just a type name."""
return kind.partition('#')[2]
def GetConnectionType(connection):
for t, p in CONNECTION_TYPE_TO_PROPERTY_MAP.items():
if p in connection:
return t
return None
def ConstructObjectReference(object_info):
"""Construct a Reference from a server response."""
if 'kind' in object_info:
typename = KindToName(object_info['kind'])
lower_camel = typename + 'Reference'
if lower_camel not in object_info:
raise ValueError(
'Cannot find %s in object of type %s: %s'
% (lower_camel, typename, object_info)
)
else:
typename = ''
keys = [k for k in object_info if k.endswith('Reference')]
if len(keys) != 1:
raise ValueError(
'Expected one Reference, found %s: %s' % (len(keys), keys)
)
lower_camel = keys[0]
upper_camel = lower_camel[0].upper() + lower_camel[1:]
reference_type = getattr(bq_id_utils.ApiClientHelper, upper_camel, None)
if reference_type is None:
raise ValueError('Unknown reference type: %s' % (typename,))
return reference_type.Create(**object_info[lower_camel])
def ConstructObjectInfo(reference):
"""Construct an Object from an ObjectReference."""
typename = reference.__class__.__name__
lower_camel = typename[0].lower() + typename[1:]
return {lower_camel: dict(reference)}
def PrepareListRequest(
reference,
max_results: Optional[int] = None,
page_token: Optional[str] = None,
filter_expression: Optional[str] = None,
):
"""Create and populate a list request."""
request = dict(reference)
if max_results is not None:
request['maxResults'] = max_results
if filter_expression is not None:
request['filter'] = filter_expression
if page_token is not None:
request['pageToken'] = page_token
return request
## Data transfer request types
# pylint: disable=invalid-name
class TransferListRequest(TypedDict):
parent: str
pageSize: Optional[int]
pageToken: Optional[str]
dataSourceIds: Optional[List[str]]
# pylint: enable=invalid-name
def PrepareTransferListRequest(
reference: bq_id_utils.ApiClientHelper.ProjectReference,
location: str,
page_size: Optional[int] = None,
page_token: Optional[str] = None,
data_source_ids: Optional[str] = None,
) -> TransferListRequest:
"""Create and populate a list request."""
request = dict(
parent=FormatProjectIdentifierForTransfers(reference, location)
)
if page_size is not None:
request['pageSize'] = page_size
if page_token is not None:
request['pageToken'] = page_token
if data_source_ids is not None:
data_source_ids = data_source_ids.split(':')
if data_source_ids[0] == 'dataSourceIds':
data_source_ids = data_source_ids[1].split(',')
request['dataSourceIds'] = data_source_ids
else:
raise bq_error.BigqueryError(
"Invalid filter flag values: '%s'. "
"Expected format: '--filter=dataSourceIds:id1,id2'"
% data_source_ids[0]
)
return request
def ParseStateFilterExpression(
filter_expression: Optional[str] = None,
) -> Optional[List[str]]:
"""Parses the state filter for list jobs.
Args:
filter_expression: A string containing the state filter, e.g., 'state:done'.
Returns:
A single state filter or a list of filters to apply. Returns None if no
filter is provided.
Raises:
bq_error.BigqueryClientError: if the filter expression is invalid.
"""
if filter_expression is None:
return None
if filter_expression.startswith('states:'):
try:
return filter_expression.split(':')[1].split(',')
except IndexError as e:
raise bq_error.BigqueryError(
'Invalid flag argument "' + filter_expression + '"'
) from e
else:
raise bq_error.BigqueryError(
'Invalid flag argument "'
+ filter_expression
+ ', the expression must start with "states:"'
)
def PrepareTransferRunListRequest(
reference: str,
run_attempt: Optional[str],
max_results: Optional[int] = None,
page_token: Optional[str] = None,
states: Optional[str] = None,
):
"""Create and populate a transfer run list request."""
request = dict(parent=reference)
request['runAttempt'] = run_attempt
if max_results is not None:
if max_results > MAX_RESULTS:
max_results = MAX_RESULTS
request['pageSize'] = max_results
if states is not None:
request['states'] = ParseStateFilterExpression(states)
if page_token is not None:
request['pageToken'] = page_token
return request
def PrepareListTransferLogRequest(
reference: str,
max_results: Optional[int] = None,
page_token: Optional[str] = None,
message_type: Optional[str] = None,
):
"""Create and populate a transfer log list request."""
request = dict(parent=reference)
if max_results is not None:
if max_results > MAX_RESULTS:
max_results = MAX_RESULTS
request['pageSize'] = max_results
if page_token is not None:
request['pageToken'] = page_token
if message_type is not None:
if 'messageTypes:' in message_type:
try:
message_type = message_type.split(':')[1].split(',')
request['messageTypes'] = message_type
except IndexError as e:
raise bq_error.BigqueryError(
'Invalid flag argument "' + message_type + '"'
) from e
else:
raise bq_error.BigqueryError(
'Invalid flag argument "' + message_type + '"'
)
return request
def ProcessParamsFlag(params: str, items: Dict[str, Any]):
"""Processes the params flag.
Args:
params: The user specified parameters. The parameters should be in JSON
format given as a string. Ex: --params="{'param':'value'}".
items: The body that contains information of all the flags set.
Returns:
items: The body after it has been updated with the params flag.
Raises:
bq_error.BigqueryError: If there is an error with the given params.
"""
try:
parsed_params = json.loads(params)
except Exception as e:
raise bq_error.BigqueryError(
'Parameters should be specified in JSON format when creating the'
' transfer configuration.'
) from e
items['params'] = parsed_params
return items
def ProcessRefreshWindowDaysFlag(
refresh_window_days: str,
data_source_info: Dict[str, Any],
items: Dict[str, Any],
data_source: str,
):
"""Processes the Refresh Window Days flag.
Args:
refresh_window_days: The user specified refresh window days.
data_source_info: The data source of the transfer config.
items: The body that contains information of all the flags set.
data_source: The data source of the transfer config.
Returns:
items: The body after it has been updated with the
refresh window days flag.
Raises:
bq_error.BigqueryError: If the data source does not support (custom)
window days.
"""
if 'dataRefreshType' in data_source_info:
if data_source_info['dataRefreshType'] == 'CUSTOM_SLIDING_WINDOW':
items['data_refresh_window_days'] = refresh_window_days
return items
else:
raise bq_error.BigqueryError(
"Data source '%s' does not support custom refresh window days."
% data_source
)
else:
raise bq_error.BigqueryError(
"Data source '%s' does not support refresh window days." % data_source
)