feat: Add new gcloud commands, API clients, and third-party libraries across various services.

This commit is contained in:
2026-01-01 20:26:35 +01:00
parent 5e23cbece0
commit a19e592eb7
25221 changed files with 8324611 additions and 0 deletions

View File

@@ -0,0 +1,226 @@
# -*- coding: utf-8 -*- #
# Copyright 2015 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Utilities for building the dataflow CLI."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
import json
import re
from apitools.base.py import exceptions
from apitools.base.py import list_pager
from googlecloudsdk.api_lib.dataflow import apis
from googlecloudsdk.api_lib.dataflow import exceptions as dataflow_exceptions
from googlecloudsdk.core import log
from googlecloudsdk.core import properties
from googlecloudsdk.core import resources
# Regular expression to match only metrics from Dataflow. Currently, this should
# match at least "dataflow" and "dataflow/v1b3". User metrics have an origin set
# as /^user/.
DATAFLOW_METRICS_RE = re.compile('^dataflow')
DATAFLOW_API_DEFAULT_REGION = apis.DATAFLOW_API_DEFAULT_REGION
# Regular expression to only match watermark metrics.
WINDMILL_WATERMARK_RE = re.compile('^(.*)-windmill-(.*)-watermark')
JOBS_COLLECTION = 'dataflow.projects.locations.jobs'
DEFAULT_REGION_MESSAGE = 'Defaults to \'{0}\'.'.format(
DATAFLOW_API_DEFAULT_REGION)
def GetErrorMessage(error):
"""Extract the error message from an HTTPError.
Args:
error: The error exceptions.HttpError thrown by the API client.
Returns:
A string describing the error.
"""
try:
content_obj = json.loads(error.content)
return content_obj.get('error', {}).get('message', '')
except ValueError:
log.err.Print(error.response)
return 'Unknown error'
def MakeErrorMessage(error, job_id='', project_id='', region_id=''):
"""Create a standard error message across commands.
Args:
error: The error exceptions.HttpError thrown by the API client.
job_id: The job ID that was used in the command.
project_id: The project ID that was used in the command.
region_id: The region ID that was used in the command.
Returns:
str, a standard error message.
"""
if job_id:
job_id = ' with job ID [{0}]'.format(job_id)
if project_id:
project_id = ' in project [{0}]'.format(project_id)
if region_id:
region_id = ' in regional endpoint [{0}]'.format(region_id)
return 'Failed operation{0}{1}{2}: {3}'.format(job_id, project_id, region_id,
GetErrorMessage(error))
def YieldExceptionWrapper(generator, job_id='', project_id='', region_id=''):
"""Wraps a generator to catch any exceptions.
Args:
generator: The error exceptions.HttpError thrown by the API client.
job_id: The job ID that was used in the command.
project_id: The project ID that was used in the command.
region_id: The region ID that was used in the command.
Yields:
The generated object.
Raises:
dataflow_exceptions.ServiceException: An exception for errors raised by
the service.
"""
try:
for x in generator:
yield x
except exceptions.HttpError as e:
raise dataflow_exceptions.ServiceException(
MakeErrorMessage(e, job_id, project_id, region_id))
def YieldFromList(service,
request,
limit=None,
batch_size=100,
field='items',
batch_size_attribute='maxResults',
predicate=None,
job_id='',
project_id='',
region_id=''):
"""Returns a wrapped list_page.YieldFromList to catch any exceptions.
Args:
service: apitools_base.BaseApiService, A service with a .List() method.
request: protorpc.messages.Message, The request message corresponding to the
service's .List() method, with all the attributes populated except the
.maxResults and .pageToken attributes.
limit: int, The maximum number of records to yield. None if all available
records should be yielded.
batch_size: int, The number of items to retrieve per request.
field: str, The field in the response that will be a list of items.
batch_size_attribute: str, The name of the attribute in a response message
holding the maximum number of results to be returned. None if
caller-specified batch size is unsupported.
predicate: lambda, A function that returns true for items to be yielded.
job_id: The job ID that was used in the command.
project_id: The project ID that was used in the command.
region_id: The region ID that was used in the command.
Returns:
The wrapped generator.
Raises:
dataflow_exceptions.ServiceException: if list request failed.
"""
method = 'List'
if not region_id:
method = 'Aggregated'
pager = list_pager.YieldFromList(
service=service,
request=request,
limit=limit,
batch_size=batch_size,
field=field,
batch_size_attribute=batch_size_attribute,
predicate=predicate,
method=method)
return YieldExceptionWrapper(pager, job_id, project_id, region_id)
def JobsUriFunc(resource):
"""Transform a job resource into a URL string.
Args:
resource: The DisplayInfo job object
Returns:
URL to the job
"""
ref = resources.REGISTRY.Parse(
resource.id,
params={
'projectId': properties.VALUES.core.project.GetOrFail,
'location': resource.location
},
collection=JOBS_COLLECTION)
return ref.SelfLink()
def JobsUriFromId(job_id, region_id):
"""Transform a job ID into a URL string.
Args:
job_id: The job ID
region_id: Region ID of the job's regional endpoint.
Returns:
URL to the job
"""
ref = resources.REGISTRY.Parse(
job_id,
params={
'projectId': properties.VALUES.core.project.GetOrFail,
'location': region_id
},
collection=JOBS_COLLECTION)
return ref.SelfLink()
# TODO(b/139889563): Remove this method when args region is changed to required
def GetRegion(args):
"""Get region to be used in Dataflow services.
Args:
args: Argument passed in when running gcloud dataflow command
Returns:
Region specified by user from --region flag in args, then fall back to
'us-central1'.
"""
region = args.region
if not region:
region = DATAFLOW_API_DEFAULT_REGION
msg = ('`--region` not set; defaulting to \'{0}\'. In an upcoming ' +
'release, users must specify a region explicitly. See https://' +
'cloud.google.com/dataflow/docs/concepts/regional-endpoints ' +
'for additional details.'
).format(DATAFLOW_API_DEFAULT_REGION)
log.warning(msg)
return region

View File

@@ -0,0 +1,167 @@
# -*- coding: utf-8 -*- #
# Copyright 2015 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Helpers for writing commands interacting with jobs and their IDs.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.calliope import actions
from googlecloudsdk.calliope import arg_parsers
from googlecloudsdk.command_lib.dataflow import dataflow_util
from googlecloudsdk.core import properties
from googlecloudsdk.core import resources
def ArgsForJobRef(parser):
"""Register flags for specifying a single Job ID.
Args:
parser: The argparse.ArgParser to configure with job-filtering arguments.
"""
parser.add_argument('job', metavar='JOB_ID', help='Job ID to operate on.')
# TODO(b/139889563): Mark as required when default region is removed
parser.add_argument(
'--region',
metavar='REGION_ID',
help=('Region ID of the job\'s regional endpoint. ' +
dataflow_util.DEFAULT_REGION_MESSAGE))
def ArgsForJobRefs(parser, **kwargs):
"""Register flags for specifying jobs using positional job IDs.
Args:
parser: The argparse.ArgParser to configure with job ID arguments.
**kwargs: Extra arguments to pass to the add_argument call.
"""
parser.add_argument(
'jobs', metavar='JOB_ID', help='Job IDs to operate on.', **kwargs)
# TODO(b/139889563): Mark as required when default region is removed
parser.add_argument(
'--region',
metavar='REGION_ID',
help=('Region ID of the jobs\' regional endpoint. ' +
dataflow_util.DEFAULT_REGION_MESSAGE))
def ExtractJobRef(args):
"""Extract the Job Ref for a command. Used with ArgsForJobRef.
Args:
args: The command line arguments.
Returns:
A Job resource.
"""
job = args.job
region = dataflow_util.GetRegion(args)
return resources.REGISTRY.Parse(
job,
params={
'projectId': properties.VALUES.core.project.GetOrFail,
'location': region
},
collection='dataflow.projects.locations.jobs')
def ExtractJobRefs(args):
"""Extract the Job Refs for a command. Used with ArgsForJobRefs.
Args:
args: The command line arguments that were provided to this invocation.
Returns:
A list of job resources.
"""
jobs = args.jobs
region = dataflow_util.GetRegion(args)
return [
resources.REGISTRY.Parse(
job,
params={
'projectId': properties.VALUES.core.project.GetOrFail,
'location': region
},
collection='dataflow.projects.locations.jobs') for job in jobs
]
def CommonArgs(parser):
"""Register flags applicable to all template launches.
Args:
parser: argparse.ArgumentParser to register arguments with.
"""
parser.add_argument(
'--dataflow-kms-key',
help='The Cloud KMS key to protect the job resources.')
parser.add_argument(
'--disable-public-ips',
action=actions.StoreBooleanProperty(
properties.VALUES.dataflow.disable_public_ips),
help='The Cloud Dataflow workers must not use public IP addresses.')
parser.add_argument(
'--max-workers', type=int, help='The maximum number of workers to run.')
parser.add_argument(
'--network',
help='The Compute Engine network for launching instances to '
'run your pipeline.')
parser.add_argument(
'--num-workers', type=int, help='The initial number of workers to use.')
parser.add_argument(
'--service-account-email',
type=arg_parsers.RegexpValidator(r'.*@.*\..*',
'must provide a valid email address'),
help='The service account to run the workers as.')
parser.add_argument(
'--subnetwork',
help='The Compute Engine subnetwork for launching instances '
'to run your pipeline.')
parser.add_argument(
'--worker-machine-type',
help='The type of machine to use for workers. Defaults to '
'server-specified.')
group = parser.add_group(mutex=True, help='Worker location options.')
group.add_argument(
'--worker-region',
type=arg_parsers.RegexpValidator(r'\w+-\w+\d',
'must provide a valid region'),
help='The region to run the workers in.')
group.add_argument(
'--worker-zone',
type=arg_parsers.RegexpValidator(r'\w+-\w+\d-\w',
'must provide a valid zone'),
help='The zone to run the workers in.')
group.add_argument(
'--zone',
type=arg_parsers.RegexpValidator(r'\w+-\w+\d-\w',
'must provide a valid zone'),
help='The zone to run the workers in.',
action=actions.DeprecationAction(
'--zone',
warn=('The {flag_name} option is deprecated; '
'use --worker-region or --worker-zone instead.'),
removed=False))

View File

@@ -0,0 +1,18 @@
project:
name: project
collection: dataflow.projects
attributes:
- &project
parameter_name: projectId
attribute_name: project
help: The project name.
property: core/project
job:
name: job
collection: dataflow.projects.jobs
attributes:
- *project
- parameter_name: jobId
attribute_name: job
help: The job name.

View File

@@ -0,0 +1,144 @@
# -*- coding: utf-8 -*- #
# Copyright 2019 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Helpers for writing commands interacting with Cloud Dataflow snapshots.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.calliope import arg_parsers
from googlecloudsdk.command_lib.dataflow import dataflow_util
from googlecloudsdk.core import properties
from googlecloudsdk.core import resources
import six
def ArgsForSnapshotRef(parser):
"""Register flags for specifying a single Snapshot ID.
Args:
parser: The argparse.ArgParser to configure with snapshot arguments.
"""
parser.add_argument(
'snapshot',
metavar='SNAPSHOT_ID',
help='ID of the Cloud Dataflow snapshot.')
parser.add_argument(
'--region',
required=True,
metavar='REGION_ID',
help='Region ID of the snapshot regional endpoint.')
def ArgsForSnapshotJobRef(parser):
"""Register flags for specifying a single Job ID.
Args:
parser: The argparse.ArgParser to configure with job-filtering arguments.
"""
parser.add_argument(
'--job-id',
required=True,
metavar='JOB_ID',
help='The job ID to snapshot.')
parser.add_argument(
'--region',
required=True,
metavar='REGION_ID',
help='The region ID of the snapshot and job\'s regional endpoint.')
def ArgsForListSnapshot(parser):
"""Register flags for listing Cloud Dataflow snapshots.
Args:
parser: The argparse.ArgParser to configure with job-filtering arguments.
"""
parser.add_argument(
'--job-id',
required=False,
metavar='JOB_ID',
help='The job ID to use to filter the snapshots list.')
parser.add_argument(
'--region',
required=True,
metavar='REGION_ID',
help='The region ID of the snapshot and job\'s regional endpoint.')
def ArgsForSnapshotTtl(parser):
"""Register flags for specifying a snapshot ttl.
Args:
parser: the argparse.ArgParser to configure with a ttl argument.
"""
parser.add_argument(
'--snapshot-ttl',
default='7d',
metavar='DURATION',
type=arg_parsers.Duration(lower_bound='1h', upper_bound='30d'),
help='Time to live for the snapshot.')
def ExtractSnapshotRef(args):
"""Extract the Snapshot Ref for a command. Used with ArgsForSnapshotRef.
Args:
args: The command line arguments.
Returns:
A Snapshot resource.
"""
snapshot = args.snapshot
region = dataflow_util.GetRegion(args)
return resources.REGISTRY.Parse(
snapshot,
params={
'projectId': properties.VALUES.core.project.GetOrFail,
'location': region
},
collection='dataflow.projects.locations.snapshots')
def ExtractSnapshotJobRef(args):
"""Extract the Job Ref for a command. Used with ArgsForSnapshotJobRef.
Args:
args: The command line arguments.
Returns:
A Job resource.
"""
job = args.job_id
region = dataflow_util.GetRegion(args)
return resources.REGISTRY.Parse(
job,
params={
'projectId': properties.VALUES.core.project.GetOrFail,
'location': region
},
collection='dataflow.projects.locations.jobs')
def ExtractSnapshotTtlDuration(args):
"""Extract the Duration string for the Snapshot ttl.
Args:
args: The command line arguments.
Returns:
A duration string for the snapshot ttl.
"""
return six.text_type(args.snapshot_ttl) + 's'

View File

@@ -0,0 +1,209 @@
# -*- coding: utf-8 -*- #
# Copyright 2020 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Helpers for writing commands interacting with Cloud Dataflow SQL.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
import collections
import json
from googlecloudsdk.calliope import arg_parsers
from googlecloudsdk.calliope import exceptions
from googlecloudsdk.calliope.concepts import concepts
from googlecloudsdk.command_lib.dataflow import dataflow_util
from googlecloudsdk.command_lib.dataflow import job_utils
from googlecloudsdk.command_lib.util.concepts import concept_parsers
from googlecloudsdk.command_lib.util.concepts import presentation_specs
from googlecloudsdk.core import properties
def ArgsForSqlQuery(parser):
"""Register flags for running a SQL query.
Args:
parser: The argparse.ArgParser to configure with query arguments.
"""
job_utils.CommonArgs(parser)
parser.add_argument(
'query', metavar='QUERY', help='The SQL query to execute.')
parser.add_argument(
'--job-name',
help='The unique name to assign to the Cloud Dataflow job.',
required=True)
parser.add_argument(
'--region',
type=arg_parsers.RegexpValidator(r'\w+-\w+\d',
'must provide a valid region'),
help=('Region ID of the job\'s regional endpoint. '
+ dataflow_util.DEFAULT_REGION_MESSAGE),
required=True)
output_group = parser.add_group(
required=True, help='The destination(s) for the output of the query.')
concept_parsers.ConceptParser([
presentation_specs.ResourcePresentationSpec(
'--bigquery-table',
concepts.ResourceSpec(
'bigquery.tables',
resource_name='BigQuery table',
tableId=concepts.ResourceParameterAttributeConfig(
name='bigquery-table', help_text='The BigQuery table ID.'),
projectId=concepts.ResourceParameterAttributeConfig(
name='bigquery-project',
help_text='The BigQuery project ID.'),
datasetId=concepts.ResourceParameterAttributeConfig(
name='bigquery-dataset',
help_text='The BigQuery dataset ID.')),
'The BigQuery table to write query output to.',
prefixes=False,
group=output_group),
presentation_specs.ResourcePresentationSpec(
'--pubsub-topic',
concepts.ResourceSpec(
'pubsub.projects.topics',
resource_name='Pub/Sub topic',
topicsId=concepts.ResourceParameterAttributeConfig(
name='pubsub-topic', help_text='The Pub/Sub topic ID.'),
projectsId=concepts.ResourceParameterAttributeConfig(
name='pubsub-project',
help_text='The Pub/Sub project ID.')),
'The Cloud Pub/Sub topic to write query output to.',
prefixes=False,
group=output_group),
]).AddToParser(parser)
parser.add_argument(
'--bigquery-write-disposition',
help='The behavior of the BigQuery write operation.',
choices=['write-empty', 'write-truncate', 'write-append'],
default='write-empty')
parser.add_argument(
'--pubsub-create-disposition',
help='The behavior of the Pub/Sub create operation.',
choices=['create-if-not-found', 'fail-if-not-found'],
default='create-if-not-found')
parameter_group = parser.add_mutually_exclusive_group()
parameter_group.add_argument(
'--parameter',
action='append',
help='Parameters to pass to a query. Parameters must use the format '
'name:type:value, for example min_word_count:INT64:250.')
parameter_group.add_argument(
'--parameters-file',
help='Path to a file containing query parameters in JSON format.'
' e.g. [{"parameterType": {"type": "STRING"}, "parameterValue":'
' {"value": "foo"}, "name": "x"}, {"parameterType": {"type":'
' "FLOAT64"}, "parameterValue": {"value": "1.0"}, "name": "y"}]')
parser.add_argument(
'--dry-run',
action='store_true',
help='Construct but do not run the SQL pipeline, for smoke testing.')
parser.add_argument(
'--sql-launcher-template-engine',
hidden=True,
help='The template engine to use for the SQL launcher template.',
choices=['flex', 'dynamic'],
default='flex')
parser.add_argument(
'--sql-launcher-template',
hidden=True,
help='The full GCS path to a SQL launcher template spec, e.g. '
'gs://dataflow-sql-templates-us-west1/cloud_dataflow_sql_launcher_template_20201208_RC00/sql_launcher_flex_template. '
'If None is specified, default to the latest release in the region. '
'Note that older releases are not guaranteed to be compatible.')
def ExtractOutputs(args):
"""Parses outputs from args, returning a JSON string with the results."""
outputs = []
if args.bigquery_table:
bq_project = None
dataset = None
table = None
table_parts = args.bigquery_table.split('.')
if len(table_parts) == 3:
bq_project, dataset, table = table_parts
elif len(table_parts) == 2:
dataset, table = table_parts
elif len(table_parts) == 1:
table, = table_parts
else:
raise exceptions.InvalidArgumentException(
'--bigquery-table',
'Malformed table identifier. Use format "project.dataset.table".')
if bq_project is None:
bq_project = args.bigquery_project if args.bigquery_project else properties.VALUES.core.project.GetOrFail(
)
elif args.bigquery_project and args.bigquery_project != bq_project:
raise exceptions.InvalidArgumentException(
'--bigquery-project',
'"{}" does not match project "{}" set in qualified `--bigquery-table`.'
.format(args.bigquery_project, bq_project))
if dataset is None:
if not args.bigquery_dataset:
raise exceptions.RequiredArgumentException(
'--bigquery-dataset',
'Must be specified when `--bigquery-table` is unqualified.')
dataset = args.bigquery_dataset
elif args.bigquery_dataset and args.bigquery_dataset != dataset:
raise exceptions.InvalidArgumentException(
'--bigquery-dataset',
'"{}" does not match dataset "{}" set in qualified `--bigquery-table`.'
.format(args.bigquery_dataset, dataset))
table_config = collections.OrderedDict([('projectId', bq_project),
('datasetId', dataset),
('tableId', table)])
write_disposition = {
'write-empty': 'WRITE_EMPTY',
'write-truncate': 'WRITE_TRUNCATE',
'write-append': 'WRITE_APPEND'
}[args.bigquery_write_disposition]
bq_config = collections.OrderedDict([('type', 'bigquery'),
('table', table_config),
('writeDisposition',
write_disposition)])
outputs.append(bq_config)
if args.pubsub_topic:
create_disposition = {
'create-if-not-found': 'CREATE_IF_NOT_FOUND',
'fail-if-not-found': 'FAIL_IF_NOT_FOUND'
}[args.pubsub_create_disposition]
pubsub_config = collections.OrderedDict([
('type', 'pubsub'),
('projectId',
args.pubsub_project if args.pubsub_project else
properties.VALUES.core.project.GetOrFail()),
('topic', args.pubsub_topic),
('createDisposition', create_disposition)
])
outputs.append(pubsub_config)
return json.dumps(outputs)