feat: Add new gcloud commands, API clients, and third-party libraries across various services.

This commit is contained in:
2026-01-01 20:26:35 +01:00
parent 5e23cbece0
commit a19e592eb7
25221 changed files with 8324611 additions and 0 deletions

View File

@@ -0,0 +1,59 @@
# -*- coding: utf-8 -*- #
# Copyright 2014 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""The main command group for Cloud Dataflow.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.calliope import base
from googlecloudsdk.core import log
SERVICE_NAME = 'dataflow'
DATAFLOW_MESSAGES_MODULE_KEY = 'dataflow_messages'
DATAFLOW_APITOOLS_CLIENT_KEY = 'dataflow_client'
DATAFLOW_REGISTRY_KEY = 'dataflow_registry'
@base.ReleaseTracks(base.ReleaseTrack.ALPHA, base.ReleaseTrack.BETA,
base.ReleaseTrack.GA)
class Dataflow(base.Group):
"""Manage Google Cloud Dataflow resources.
The gcloud dataflow command group lets you manage Google Cloud Dataflow
resources.
Cloud Dataflow is a unified programming model and a managed service for
developing and executing a wide range of data processing patterns
including ETL, batch computation, and continuous computation.
More information on Cloud Dataflow can be found here:
https://cloud.google.com/dataflow and detailed documentation can be found
here: https://cloud.google.com/dataflow/docs/
"""
category = base.DATA_ANALYTICS_CATEGORY
def Filter(self, context, args):
# TODO(b/190530367): Determine if command group works with project number
base.RequireProjectID(args)
del context, args
base.DisableUserProjectQuota()
self.EnableSelfSignedJwtForTracks(
[base.ReleaseTrack.ALPHA, base.ReleaseTrack.BETA]
)

View File

@@ -0,0 +1,30 @@
# -*- coding: utf-8 -*- #
# Copyright 2020 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""The command group for gcloud dataflow flex_template.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.calliope import base
@base.ReleaseTracks(base.ReleaseTrack.GA, base.ReleaseTrack.BETA)
class FlexTemplate(base.Group):
"""A group of subcommands for working with Dataflow flex template.
"""
pass

View File

@@ -0,0 +1,438 @@
# -*- coding: utf-8 -*- #
# Copyright 2020 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Implementation of gcloud dataflow flex_template build command.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
import json
from googlecloudsdk.api_lib.dataflow import apis
from googlecloudsdk.calliope import actions
from googlecloudsdk.calliope import arg_parsers
from googlecloudsdk.calliope import base
from googlecloudsdk.core import properties
def _CommonArgs(parser):
"""Registers flags for this command.
Args:
parser: argparse.ArgumentParser to register arguments with.
"""
image_args = parser.add_mutually_exclusive_group(required=True)
image_building_args = image_args.add_argument_group()
yaml_args = image_args.add_argument_group()
parser.add_argument(
'template_file_gcs_path',
metavar='TEMPLATE_FILE_GCS_PATH',
help=('The Google Cloud Storage location of the flex template file.'
'Overrides if file already exists.'),
type=arg_parsers.RegexpValidator(r'^gs://.*',
'Must begin with \'gs://\''))
image_args.add_argument(
'--image',
help=('Path to the any image registry location of the prebuilt flex '
'template image.'))
parser.add_argument(
'--image-repository-username-secret-id',
help=('Secret Manager secret id for the username to authenticate to '
'private registry. Should be in the format '
'projects/{project}/secrets/{secret}/versions/{secret_version} or '
'projects/{project}/secrets/{secret}. If the version is not '
'provided latest version will be used.'),
type=arg_parsers.RegexpValidator(
r'^projects\/[^\n\r\/]+\/secrets\/[^\n\r\/]+(\/versions\/[^\n\r\/]+)?$',
'Must be in the format '
'\'projects/{project}/secrets/{secret}\' or'
'\'projects/{project}/secrets/{secret}/versions/{secret_version}\'.'))
parser.add_argument(
'--image-repository-password-secret-id',
help=('Secret Manager secret id for the password to authenticate to '
'private registry. Should be in the format '
'projects/{project}/secrets/{secret}/versions/{secret_version} or '
'projects/{project}/secrets/{secret}. If the version is not '
'provided latest version will be used.'),
type=arg_parsers.RegexpValidator(
r'^projects\/[^\n\r\/]+\/secrets\/[^\n\r\/]+(\/versions\/[^\n\r\/]+)?$',
'Must be in the format '
'\'projects/{project}/secrets/{secret}\' or'
'\'projects/{project}/secrets/{secret}/versions/{secret_version}\'.'))
parser.add_argument(
'--image-repository-cert-path',
help=('The full URL to self-signed certificate of private registry in '
'Cloud Storage. For example, gs://mybucket/mycerts/selfsigned.crt. '
'The certificate provided in Cloud Storage must be DER-encoded and '
'may be supplied in binary or printable (Base64) encoding. If the '
'certificate is provided in Base64 encoding, it must be bounded at '
'the beginning by -----BEGIN CERTIFICATE-----, and must be bounded '
'at the end by -----END CERTIFICATE-----. If this parameter is '
'provided, the docker daemon in the template launcher will be '
'instructed to trust that certificate. '),
type=arg_parsers.RegexpValidator(r'^gs://.*',
'Must begin with \'gs://\''))
parser.add_argument(
'--sdk-language',
help='SDK language of the flex template job.',
choices=['JAVA', 'PYTHON', 'GO', 'YAML'],
required=True,
)
parser.add_argument(
'--metadata-file',
help='Local path to the metadata json file for the flex template.',
type=arg_parsers.FileContents())
parser.add_argument(
'--print-only',
help=('Prints the container spec to stdout. Does not save in '
'Google Cloud Storage.'),
default=False,
action=actions.StoreBooleanProperty(
properties.VALUES.dataflow.print_only))
parser.add_argument(
'--staging-location',
help=('Default Google Cloud Storage location to stage local files.'
"(Must be a URL beginning with 'gs://'.)"),
type=arg_parsers.RegexpValidator(r'^gs://.*',
'Must begin with \'gs://\''))
parser.add_argument(
'--temp-location',
help=('Default Google Cloud Storage location to stage temporary files. '
'If not set, defaults to the value for --staging-location.'
"(Must be a URL beginning with 'gs://'.)"),
type=arg_parsers.RegexpValidator(r'^gs://.*',
'Must begin with \'gs://\''))
parser.add_argument(
'--service-account-email',
type=arg_parsers.RegexpValidator(r'.*@.*\..*',
'must provide a valid email address'),
help='Default service account to run the workers as.')
parser.add_argument(
'--cloud-build-service-account',
type=arg_parsers.RegexpValidator(
r'.*@.*\..*', 'must provide a valid email address'
),
help=(
'Service account to run the Cloud Build in the format'
' projects/{project}/serviceAccounts/{service_account}. Ensure that'
" the account you are using to run 'gcloud dataflow flex-template"
" build' has 'ServiceAccountUser' role on the specified Cloud"
' Build service account you provide with the'
' --cloud-build-service-account flag. The specified service account'
' must have required permissions to build the image. If the specified'
' service account is in a project that is different from the project'
' where you are starting builds, see'
' https://cloud.google.com/build/docs/securing-builds/configure-user-specified-service-accounts#cross-project_set_up'
' to grant the necessary access.'
),
)
parser.add_argument(
'--max-workers',
type=int,
help='Default maximum number of workers to run.',
)
parser.add_argument(
'--disable-public-ips',
action=actions.StoreBooleanProperty(
properties.VALUES.dataflow.disable_public_ips),
help='Cloud Dataflow workers must not use public IP addresses.')
parser.add_argument(
'--num-workers',
type=int,
help='Initial number of workers to use by default.')
parser.add_argument(
'--worker-machine-type',
help='Default type of machine to use for workers. Defaults to '
'server-specified.')
parser.add_argument(
'--subnetwork',
help='Default Compute Engine subnetwork for launching instances '
'to run your pipeline.')
parser.add_argument(
'--network',
help='Default Compute Engine network for launching instances to '
'run your pipeline.')
parser.add_argument(
'--dataflow-kms-key',
help='Default Cloud KMS key to protect the job resources.')
region_group = parser.add_mutually_exclusive_group()
region_group.add_argument(
'--worker-region',
help='Default region to run the workers in.')
region_group.add_argument(
'--worker-zone',
help='Default zone to run the workers in.')
parser.add_argument(
'--enable-streaming-engine',
action=actions.StoreBooleanProperty(
properties.VALUES.dataflow.enable_streaming_engine),
help='Enable Streaming Engine for the streaming job by default.')
parser.add_argument(
'--gcs-log-dir',
help=('Google Cloud Storage directory to save build logs.'
"(Must be a URL beginning with 'gs://'.)"),
type=arg_parsers.RegexpValidator(r'^gs://.*',
'Must begin with \'gs://\''),
default=None)
parser.add_argument(
'--additional-experiments',
metavar='ADDITIONAL_EXPERIMENTS',
type=arg_parsers.ArgList(),
action=arg_parsers.UpdateAction,
help=
('Default experiments to pass to the job.'))
parser.add_argument(
'--additional-user-labels',
metavar='ADDITIONAL_USER_LABELS',
type=arg_parsers.ArgDict(),
action=arg_parsers.UpdateAction,
help=(
'Default user labels to pass to the job. Example: '
'--additional-user-labels=\'{"key1":"value1"}\''
),
)
image_building_args.add_argument(
'--image-gcr-path',
help=('The Google Container Registry or Google Artifact Registry '
'location to store the flex template image to be built.'),
type=arg_parsers.RegexpValidator(
r'^(.*\.){0,1}gcr.io/.*|^(.){2,}-docker.pkg.dev/.*',
('Must begin with \'[multi-region.]gcr.io/\' or '
'\'[region.]-docker.pkg.dev/\'. Please check '
'https://cloud.google.com/container-registry/docs/overview '
'for available multi-regions in GCR or '
'https://cloud.google.com/artifact-registry/docs/repo-organize#'
'locations for available location in GAR')),
required=True)
pipeline_args = image_building_args.add_mutually_exclusive_group(
required=True)
pipeline_args.add_argument(
'--jar',
metavar='JAR',
type=arg_parsers.ArgList(),
action=arg_parsers.UpdateAction,
help=('Local path to your dataflow pipeline jar file and all their '
'dependent jar files required for the flex template classpath. '
'You can pass them as a comma separated list or repeat '
'individually with --jar flag. Ex: --jar="code.jar,dep.jar" or '
'--jar code.jar, --jar dep.jar.'))
pipeline_args.add_argument(
'--py-path',
metavar='PY_PATH',
type=arg_parsers.ArgList(),
action=arg_parsers.UpdateAction,
help=('Local path to your dataflow pipeline python files and all their '
'dependent files required for the flex template classpath. '
'You can pass them as a comma separated list or repeat '
'individually with --py-path flag. '
'Ex: --py-path="path/pipleline/,path/dependency/" or '
'--py-path path/pipleline/, --py-path path/dependency/.'))
pipeline_args.add_argument(
'--go-binary-path',
metavar='GO_BINARY_PATH',
help=('Local path to your compiled dataflow pipeline Go binary. '
'The binary should be compiled to run on the target worker '
'architecture (usually linux-amd64). See '
'https://beam.apache.org/documentation/sdks/go-cross-compilation/ '
'for more information.'))
# This is set here as image_args is required.
yaml_args.add_argument(
'--yaml-pipeline-path',
required=True,
metavar='YAML_PIPELINE_PATH',
type=arg_parsers.FileContents(),
help='Local path to your YAML pipeline file.',
)
yaml_args.add_argument(
'--yaml-image',
metavar='YAML_IMAGE',
help=(
'Path to the any image registry location of the prebuilt yaml '
'template image.'
),
)
image_building_args.add_argument(
'--flex-template-base-image',
help=(
'Flex template base image to be used while building the container'
' image. Allowed choices are allowed labels (JAVA11/17/21/25,'
' PYTHON3, GO), supported distroless images'
' (JAVA11/17/21/25_DISTROLESS, GO_DISTROLESS), or full gcr.io path of'
' the specific version of the base image. For labels, we use the'
' latest base image version to build the container. You can also'
' provide a specific version from this link '
' https://gcr.io/dataflow-templates-base/'
),
# JAVA8 is deprecated and removed from help text. Allow it until Beam 3
# release.
type=arg_parsers.RegexpValidator(
r'^(JAVA11|JAVA17|JAVA21|JAVA25|GO)(_DISTROLESS)?$|^JAVA8$|^PYTHON3$|^gcr.io/.*',
'Must be JAVA11, JAVA17, JAVA21, JAVA25, PYTHON3, GO, (or with'
' `_DISTROLESS` suffix for supported distroless variants), or begin'
" with 'gcr.io/'",
),
required=True,
)
image_building_args.add_argument(
'--env',
metavar='ENV',
type=arg_parsers.ArgDict(),
action=arg_parsers.UpdateAction,
help=
('Environment variables to create for the Dockerfile. '
'You can pass them as a comma separated list or repeat individually '
'with --env flag. Ex: --env="A=B,C=D" or --env A=B, --env C=D.'
'When you reference files/dir in env variables, please specify relative '
'path to the paths passed via --py-path.Ex: if you pass. '
'--py-path="path/pipleline/" then set '
'FLEX_TEMPLATE_PYTHON_PY_FILE="pipeline/pipeline.py" '
'You can find the list of supported environment variables in this '
'link. https://cloud.google.com/dataflow/docs/guides/templates/'
'configuring-flex-templates'
'#setting_required_dockerfile_environment_variables.'),
required=True)
def _CommonRun(args):
"""Runs the command.
Args:
args: The arguments that were provided to this command invocation.
Returns:
A Job message.
"""
template_args = apis.TemplateArguments(
max_workers=args.max_workers,
num_workers=args.num_workers,
network=args.network,
subnetwork=args.subnetwork,
worker_machine_type=args.worker_machine_type,
kms_key_name=args.dataflow_kms_key,
staging_location=args.staging_location,
temp_location=args.temp_location,
disable_public_ips=properties.VALUES.dataflow.disable_public_ips.GetBool(
),
service_account_email=args.service_account_email,
worker_region=args.worker_region,
worker_zone=args.worker_zone,
enable_streaming_engine=properties.VALUES.dataflow.enable_streaming_engine
.GetBool(),
additional_experiments=args.additional_experiments,
additional_user_labels=args.additional_user_labels)
if args.sdk_language == 'YAML':
if not args.yaml_pipeline_path:
raise ValueError('yaml_pipeline_path is required.')
metadata = json.loads(args.metadata_file)
metadata['yamlDefinition'] = args.yaml_pipeline_path
args.metadata_file = json.dumps(metadata, indent=4)
image_path = apis.Templates.GetYamlTemplateImage(args)
else:
image_path = args.image
if not image_path:
image_path = args.image_gcr_path
apis.Templates.BuildAndStoreFlexTemplateImage(
args.image_gcr_path,
args.flex_template_base_image,
args.jar,
args.py_path,
args.go_binary_path,
args.env,
args.sdk_language,
args.gcs_log_dir,
args.cloud_build_service_account,
)
return apis.Templates.BuildAndStoreFlexTemplateFile(
args.template_file_gcs_path, image_path, args.metadata_file,
args.sdk_language, args.print_only, template_args,
args.image_repository_username_secret_id,
args.image_repository_password_secret_id, args.image_repository_cert_path)
@base.DefaultUniverseOnly
@base.ReleaseTracks(base.ReleaseTrack.GA, base.ReleaseTrack.BETA)
class Build(base.Command):
"""Builds a flex template file from the specified parameters."""
detailed_help = {
'DESCRIPTION':
'Builds a flex template file from the specified parameters.',
'EXAMPLES':
"""\
To build and store a flex template JSON file, run:
$ {command} gs://template-file-gcs-path --image=gcr://image-path \
--metadata-file=/local/path/to/metadata.json --sdk-language=JAVA
If using prebuilt template image from private registry, run:
$ {command} gs://template-file-gcs-path \
--image=private.registry.com:3000/image-path \
--image-repository-username-secret-id="projects/test-project/secrets/username-secret"
--image-repository-password-secret-id="projects/test-project/secrets/password-secret/versions/latest"
--metadata-file=metadata.json
--sdk-language=JAVA
To build the template image and flex template JSON file, run:
$ {command} gs://template-file-gcs-path \
--image-gcr-path=gcr://path-to-store-image \
--jar=path/to/pipeline.jar --jar=path/to/dependency.jar \
--env=FLEX_TEMPLATE_JAVA_MAIN_CLASS=classpath \
--flex-template-base-image=JAVA11 \
--metadata-file=/local/path/to/metadata.json --sdk-language=JAVA
""",
}
@staticmethod
def Args(parser):
_CommonArgs(parser)
def Run(self, args):
return _CommonRun(args)

View File

@@ -0,0 +1,241 @@
# -*- coding: utf-8 -*- #
# Copyright 2020 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Implementation of gcloud dataflow flex_template run command.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.api_lib.dataflow import apis
from googlecloudsdk.calliope import actions
from googlecloudsdk.calliope import arg_parsers
from googlecloudsdk.calliope import base
from googlecloudsdk.command_lib.dataflow import dataflow_util
from googlecloudsdk.core import properties
@base.ReleaseTracks(base.ReleaseTrack.GA, base.ReleaseTrack.BETA)
@base.UniverseCompatible
class Run(base.Command):
"""Runs a job from the specified path."""
detailed_help = {
'DESCRIPTION':
'Runs a job from the specified flex template gcs path.',
'EXAMPLES':
"""\
To run a job from the flex template, run:
$ {command} my-job --template-file-gcs-location=gs://flex-template-path --region=europe-west1 --parameters=input="gs://input",output="gs://output-path" --max-workers=5
""",
}
@staticmethod
def Args(parser):
"""Register flags for this command.
Args:
parser: argparse.ArgumentParser to register arguments with.
"""
parser.add_argument(
'job_name',
metavar='JOB_NAME',
help='Unique name to assign to the job.')
parser.add_argument(
'--template-file-gcs-location',
help=('Google Cloud Storage location of the flex template to run. '
"(Must be a URL beginning with 'gs://'.)"),
type=arg_parsers.RegexpValidator(r'^gs://.*',
'Must begin with \'gs://\''),
required=True)
parser.add_argument(
'--region',
metavar='REGION_ID',
help=('Region ID of the job\'s regional endpoint. ' +
dataflow_util.DEFAULT_REGION_MESSAGE))
parser.add_argument(
'--staging-location',
help=('Default Google Cloud Storage location to stage local files.'
"(Must be a URL beginning with 'gs://'.)"),
type=arg_parsers.RegexpValidator(r'^gs://.*',
'Must begin with \'gs://\''))
parser.add_argument(
'--temp-location',
help=('Default Google Cloud Storage location to stage temporary files. '
'If not set, defaults to the value for --staging-location.'
"(Must be a URL beginning with 'gs://'.)"),
type=arg_parsers.RegexpValidator(r'^gs://.*',
'Must begin with \'gs://\''))
parser.add_argument(
'--service-account-email',
type=arg_parsers.RegexpValidator(r'.*@.*\..*',
'must provide a valid email address'),
help='Service account to run the workers as.')
parser.add_argument(
'--max-workers', type=int, help='Maximum number of workers to run.')
parser.add_argument(
'--disable-public-ips',
action=actions.StoreBooleanProperty(
properties.VALUES.dataflow.disable_public_ips),
help='Cloud Dataflow workers must not use public IP addresses.')
parser.add_argument(
'--num-workers', type=int, help='Initial number of workers to use.')
parser.add_argument(
'--worker-machine-type',
help='Type of machine to use for workers. Defaults to '
'server-specified.')
parser.add_argument(
'--launcher-machine-type',
help='The machine type to use for launching the job. The default is'
'n1-standard-1.')
parser.add_argument(
'--subnetwork',
help='Compute Engine subnetwork for launching instances '
'to run your pipeline.')
parser.add_argument(
'--network',
help='Compute Engine network for launching instances to '
'run your pipeline.')
parser.add_argument(
'--dataflow-kms-key',
help='Cloud KMS key to protect the job resources.')
region_group = parser.add_mutually_exclusive_group()
region_group.add_argument(
'--worker-region',
help='Region to run the workers in.')
region_group.add_argument(
'--worker-zone',
help='Zone to run the workers in.')
parser.add_argument(
'--enable-streaming-engine',
action=actions.StoreBooleanProperty(
properties.VALUES.dataflow.enable_streaming_engine),
help='Enabling Streaming Engine for the streaming job.')
parser.add_argument(
'--additional-experiments',
metavar='ADDITIONAL_EXPERIMENTS',
type=arg_parsers.ArgList(),
action=arg_parsers.UpdateAction,
help=(
'Additional experiments to pass to the job. Example: '
'--additional-experiments=experiment1,experiment2=value2'
),
)
parser.add_argument(
'--additional-pipeline-options',
metavar='ADDITIONAL_PIPELINE_OPTIONS',
type=arg_parsers.ArgList(),
action=arg_parsers.UpdateAction,
help=(
'Additional pipeline options to pass to the job. Example: '
'--additional-pipeline-options=option1=value1,option2=value2'
),
)
parser.add_argument(
'--additional-user-labels',
metavar='ADDITIONAL_USER_LABELS',
type=arg_parsers.ArgDict(),
action=arg_parsers.UpdateAction,
help=(
'Additional user labels to pass to the job. Example: '
'--additional-user-labels=\'key1=value1,key2=value2\''
),
)
parser.add_argument(
'--parameters',
metavar='PARAMETERS',
type=arg_parsers.ArgDict(),
action=arg_parsers.UpdateAction,
help=
('Parameters to pass to the job.'))
streaming_update_args = parser.add_argument_group()
streaming_update_args.add_argument(
'--transform-name-mappings',
metavar='TRANSFORM_NAME_MAPPINGS',
type=arg_parsers.ArgDict(),
action=arg_parsers.UpdateAction,
help=
('Transform name mappings for the streaming update job.'))
streaming_update_args.add_argument(
'--update',
help=('Set this to true for streaming update jobs.'),
action=arg_parsers.StoreTrueFalseAction,
required=True)
parser.add_argument(
'--flexrs-goal',
help=('FlexRS goal for the flex template job.'),
choices=['COST_OPTIMIZED', 'SPEED_OPTIMIZED'])
def Run(self, args):
"""Runs the command.
Args:
args: The arguments that were provided to this command invocation.
Returns:
A Job message.
"""
arguments = apis.TemplateArguments(
project_id=properties.VALUES.core.project.Get(required=True),
region_id=dataflow_util.GetRegion(args),
job_name=args.job_name,
gcs_location=args.template_file_gcs_location,
max_workers=args.max_workers,
num_workers=args.num_workers,
network=args.network,
subnetwork=args.subnetwork,
worker_machine_type=args.worker_machine_type,
launcher_machine_type=args.launcher_machine_type,
kms_key_name=args.dataflow_kms_key,
staging_location=args.staging_location,
temp_location=args.temp_location,
disable_public_ips=
properties.VALUES.dataflow.disable_public_ips.GetBool(),
service_account_email=args.service_account_email,
worker_region=args.worker_region,
worker_zone=args.worker_zone,
enable_streaming_engine=
properties.VALUES.dataflow.enable_streaming_engine.GetBool(),
additional_experiments=args.additional_experiments,
additional_pipeline_options=args.additional_pipeline_options,
additional_user_labels=args.additional_user_labels,
streaming_update=args.update,
transform_name_mappings=args.transform_name_mappings,
flexrs_goal=args.flexrs_goal,
parameters=args.parameters)
return apis.Templates.CreateJobFromFlexTemplate(arguments)

View File

@@ -0,0 +1,29 @@
# -*- coding: utf-8 -*- #
# Copyright 2015 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""The command group for gcloud dataflow jobs.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.calliope import base
class Jobs(base.Group):
"""A group of subcommands for working with Dataflow jobs.
"""
pass

View File

@@ -0,0 +1,88 @@
# -*- coding: utf-8 -*- #
# Copyright 2025 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Implementation of gcloud dataflow jobs archive command."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.api_lib.dataflow import apis
from googlecloudsdk.calliope import base
from googlecloudsdk.command_lib.dataflow import job_utils
from googlecloudsdk.core import log
from googlecloudsdk.core.console import console_io
@base.ReleaseTracks(base.ReleaseTrack.GA, base.ReleaseTrack.BETA)
@base.DefaultUniverseOnly
class Archive(base.Command):
"""Archives a job.
Archives a single job. The job must be in a terminal state, otherwise the
request will be rejected.
This command will require confirmation to run.
## EXAMPLES
To archive job `2025-03-15_14_23_56-1234567890123456`, run:
$ {command} 2025-03-15_14_23_56-1234567890123456
"""
@staticmethod
def Args(parser):
job_utils.ArgsForJobRef(parser)
def Run(self, args):
"""Runs the command.
Args:
args: The arguments that were provided to this command invocation.
Returns:
A Job message.
"""
job_ref = job_utils.ExtractJobRef(args)
job_id = job_ref.jobId
console_io.PromptContinue(
message='Job [{}] will be archived.'.format(job_id), cancel_on_no=True
)
messages = apis.GetMessagesModule()
job = messages.Job(
jobMetadata=messages.JobMetadata(
userDisplayProperties=messages.JobMetadata.UserDisplayPropertiesValue(
additionalProperties=[
messages.JobMetadata.UserDisplayPropertiesValue.AdditionalProperty(
key='archived', value='true'
)
]
)
)
)
request = messages.DataflowProjectsLocationsJobsUpdateRequest(
jobId=job_ref.jobId,
location=job_ref.location,
projectId=job_ref.projectId,
job=job,
updateMask='job_metadata.user_display_properties.archived',
)
result = apis.Jobs.GetService().Update(request)
log.status.Print('Archived job [{}].'.format(job_id))
return result

View File

@@ -0,0 +1,74 @@
# -*- coding: utf-8 -*- #
# Copyright 2015 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Implementation of gcloud dataflow jobs cancel command."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.api_lib.dataflow import apis
from googlecloudsdk.api_lib.util import exceptions
from googlecloudsdk.calliope import base
from googlecloudsdk.command_lib.dataflow import job_utils
from googlecloudsdk.core import log
from googlecloudsdk.core.console import console_io
@base.ReleaseTracks(base.ReleaseTrack.BETA, base.ReleaseTrack.GA)
@base.DefaultUniverseOnly
class Cancel(base.Command):
"""Cancels all jobs that match the command line arguments."""
@staticmethod
def Args(parser):
"""Register flags for this command."""
job_utils.ArgsForJobRefs(parser, nargs='+')
parser.add_argument(
'--force',
action='store_true',
help=(
'Forcibly cancels a Dataflow job. Regular cancel must have been'
' attempted at least 30 minutes prior for a job to be force'
' cancelled.'
),
)
def Run(self, args):
"""This is what gets called when the user runs this command.
Args:
args: all the arguments that were provided to this command invocation.
"""
for job_ref in job_utils.ExtractJobRefs(args):
try:
if args.force:
console_io.PromptContinue(
message='Force cancellation will leak VMs the cancelled Dataflow job created that must be manually cleaned up.',
cancel_on_no=True,
)
apis.Jobs.Cancel(
job_ref.jobId,
args.force,
project_id=job_ref.projectId,
region_id=job_ref.location)
log.status.Print('Cancelled job [{0}]'.format(job_ref.jobId))
except exceptions.HttpException as error:
log.status.Print(
'Failed to cancel job [{0}]: {1} Ensure that you have permission '
'to access the job and that the `--region` flag, {2}, matches the'
" job's region.".format(
job_ref.jobId, error.payload.status_message, job_ref.location
)
)

View File

@@ -0,0 +1,26 @@
# -*- coding: utf-8 -*- #
# Copyright 2021 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Command group for managing Dataflow job configurations."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.calliope import base
@base.ReleaseTracks(base.ReleaseTrack.ALPHA)
class Config(base.Group):
"""Manage Dataflow job configurations."""

View File

@@ -0,0 +1,38 @@
release_tracks: [ALPHA]
command_type: CONFIG_EXPORT
help_text:
brief: Export the configuration for a Dataflow job.
description: |
*{command}* exports the configuration for a Dataflow job.
Job configurations can be exported in
Kubernetes Resource Model (krm) or Terraform HCL formats. The
default format is `krm`.
Specifying `--all` allows you to export the configurations for all
jobs within the project.
Specifying `--path` allows you to export the configuration(s) to
a local directory.
examples: |
To export the configuration for a job, run:
$ {command} my-job
To export the configuration for a job to a file, run:
$ {command} my-job --path=/path/to/dir/
To export the configuration for a job in Terraform
HCL format, run:
$ {command} my-job --resource-format=terraform
To export the configurations for all jobs within a
project, run:
$ {command} --all
arguments:
resource:
help_text: Job to export the configuration for.
spec: !REF googlecloudsdk.command_lib.dataflow.resources:job

View File

@@ -0,0 +1,77 @@
# -*- coding: utf-8 -*- #
# Copyright 2015 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Implementation of gcloud dataflow jobs describe command.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.api_lib.dataflow import apis
from googlecloudsdk.calliope import base
from googlecloudsdk.command_lib.dataflow import job_utils
@base.ReleaseTracks(base.ReleaseTrack.BETA, base.ReleaseTrack.GA)
class Describe(base.DescribeCommand):
"""Outputs the Job object resulting from the Get API.
By default this will display the Summary view which includes:
- Project ID
- Regional Endpoint
- Job ID
- Job Name
- Job Type (Batch vs. Streaming)
- Job Create Time
- Job Status (Running, Done, Cancelled, Failed)
- Job Status Time
Notable values that are only in the full view:
- Environment (staging Jars, information about workers, etc.)
- Steps from the workflow graph
"""
@staticmethod
def Args(parser):
"""Register flags for this command.
Args:
parser: argparse.ArgumentParser to register arguments with.
"""
job_utils.ArgsForJobRef(parser)
parser.add_argument(
'--full',
action='store_const',
const=apis.Jobs.GET_REQUEST.ViewValueValuesEnum.JOB_VIEW_ALL,
default=apis.Jobs.GET_REQUEST.ViewValueValuesEnum.JOB_VIEW_SUMMARY,
help='Retrieve the full Job rather than the summary view')
def Run(self, args):
"""Runs the command.
Args:
args: The arguments that were provided to this command invocation.
Returns:
A Job message.
"""
job_ref = job_utils.ExtractJobRef(args)
return apis.Jobs.Get(
job_ref.jobId,
project_id=job_ref.projectId,
region_id=job_ref.location,
view=args.full)

View File

@@ -0,0 +1,68 @@
# -*- coding: utf-8 -*- #
# Copyright 2015 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Implementation of gcloud dataflow jobs drain command.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.api_lib.dataflow import apis
from googlecloudsdk.api_lib.util import exceptions
from googlecloudsdk.calliope import base
from googlecloudsdk.command_lib.dataflow import job_utils
from googlecloudsdk.core import log
@base.ReleaseTracks(base.ReleaseTrack.BETA, base.ReleaseTrack.GA)
@base.DefaultUniverseOnly
class Drain(base.Command):
"""Drains all jobs that match the command line arguments.
Once Drain is triggered, the pipeline will stop accepting new inputs.
The input watermark will be advanced to infinity. Elements already in the
pipeline will continue to be processed. Drained jobs can safely be
cancelled.
"""
@staticmethod
def Args(parser):
"""Register flags for this command."""
job_utils.ArgsForJobRefs(parser, nargs='+')
def Run(self, args):
"""This is what gets called when the user runs this command.
Args:
args: all the arguments that were provided to this command invocation.
"""
for job_ref in job_utils.ExtractJobRefs(args):
try:
apis.Jobs.Drain(
job_ref.jobId,
project_id=job_ref.projectId,
region_id=job_ref.location)
log.status.Print('Started draining job [{0}]'.format(job_ref.jobId))
except exceptions.HttpException as error:
log.status.Print(
(
"Failed to drain job [{0}]: {1} Ensure that you have permission"
" to access the job and that the `--region` flag, {2}, matches"
" the job's region."
).format(
job_ref.jobId, error.payload.status_message, job_ref.location
)
)

View File

@@ -0,0 +1,73 @@
# -*- coding: utf-8 -*- #
# Copyright 2015 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Implementation of gcloud dataflow jobs export-steps command."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.api_lib.dataflow import apis
from googlecloudsdk.api_lib.dataflow import step_graph
from googlecloudsdk.api_lib.dataflow import step_json
from googlecloudsdk.calliope import base
from googlecloudsdk.command_lib.dataflow import job_utils
from googlecloudsdk.core import log
@base.ReleaseTracks(base.ReleaseTrack.ALPHA, base.ReleaseTrack.BETA)
class ExportSteps(base.Command):
"""Exports information about the steps for the given job.
The only currently supported format is to a GraphViz dot file.
"""
@staticmethod
def Args(parser):
"""Register flags for this command.
Args:
parser: argparse.ArgumentParser to register arguments with.
"""
job_utils.ArgsForJobRef(parser)
def Run(self, args):
"""Runs the command.
Args:
args: All the arguments that were provided to this command invocation.
Returns:
An iterator over the steps in the given job.
"""
job_ref = job_utils.ExtractJobRef(args)
return step_json.ExtractSteps(
apis.Jobs.Get(
job_ref.jobId,
project_id=job_ref.projectId,
region_id=job_ref.location,
view=apis.Jobs.GET_REQUEST.ViewValueValuesEnum.JOB_VIEW_ALL))
def Display(self, args, steps):
"""This method is called to print the result of the Run() method.
Args:
args: all the arguments that were provided to this command invocation.
steps: The step information returned from Run().
"""
if steps:
for line in step_graph.YieldGraphviz(steps, 'StepGraph'):
log.out.write(line)
log.out.write('\n')

View File

@@ -0,0 +1,244 @@
# -*- coding: utf-8 -*- #
# Copyright 2015 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Implementation of gcloud dataflow jobs list command."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.api_lib.dataflow import apis
from googlecloudsdk.api_lib.dataflow import job_display
from googlecloudsdk.calliope import arg_parsers
from googlecloudsdk.calliope import base
from googlecloudsdk.command_lib.dataflow import dataflow_util
from googlecloudsdk.core import log
from googlecloudsdk.core import properties
from googlecloudsdk.core.resource import resource_filter
from googlecloudsdk.core.util import times
@base.ReleaseTracks(base.ReleaseTrack.BETA, base.ReleaseTrack.GA)
class List(base.ListCommand):
"""Lists all jobs in a particular project, optionally filtered by region.
By default, 100 jobs in the current project are listed; this can be overridden
with the gcloud --project flag, and the --limit flag.
Using the --region flag will only list jobs from the given regional endpoint.
## EXAMPLES
Filter jobs with the given name:
$ {command} --filter="name=my-wordcount"
List jobs with from a given region:
$ {command} --region="europe-west1"
List jobs created this year:
$ {command} --created-after=2018-01-01
List jobs created more than a week ago:
$ {command} --created-before=-P1W
"""
@staticmethod
def Args(parser):
"""Register flags for this command."""
base.ASYNC_FLAG.RemoveFromParser(parser)
# Set manageable limits on number of jobs that are listed.
base.LIMIT_FLAG.SetDefault(parser, 100)
base.PAGE_SIZE_FLAG.SetDefault(parser, 20)
# Flags for filtering jobs.
parser.add_argument(
'--status',
choices={
'all':
('Returns running jobs first, ordered on creation timestamp, '
'then, returns all terminated jobs ordered on the termination '
'timestamp.'),
'terminated':
('Filters the jobs that have a terminated state, ordered on '
'the termination timestamp. Example terminated states: Done, '
'Updated, Cancelled, etc.'),
'active':
('Filters the jobs that are running ordered on the creation '
'timestamp.'),
},
help='Filter the jobs to those with the selected status.')
parser.add_argument(
'--created-after',
type=arg_parsers.Datetime.Parse,
help=('Filter the jobs to those created after the given time. '
'See $ gcloud topic datetimes for information on time formats. '
'For example, `2018-01-01` is the first day of the year, and '
'`-P2W` is 2 weeks ago.'))
parser.add_argument(
'--created-before',
type=arg_parsers.Datetime.Parse,
help=('Filter the jobs to those created before the given time. '
'See $ gcloud topic datetimes for information on time formats.'))
parser.add_argument(
'--region',
metavar='REGION',
help=(
'Only resources from the given region are queried. '
'If not provided, an attempt will be made to query from all '
'available regions. In the event of an outage, jobs from certain '
'regions may not be available.'))
parser.display_info.AddFormat("""
table(
id:label=JOB_ID,
name:label=NAME,
type:label=TYPE,
creationTime.yesno(no="-"),
state,
location:label=REGION
)
""")
parser.display_info.AddUriFunc(dataflow_util.JobsUriFunc)
def Run(self, args):
"""Runs the command.
Args:
args: All the arguments that were provided to this command invocation.
Returns:
An iterator over Job messages.
"""
if args.filter:
filter_expr = resource_filter.Compile(args.filter)
def EvalFilter(x):
return (filter_expr.Evaluate(job_display.DisplayInfo(x)) and
_JobFilter(args)(x))
filter_pred = EvalFilter
else:
filter_pred = _JobFilter(args)
project_id = properties.VALUES.core.project.Get(required=True)
jobs = self._JobSummariesForProject(project_id, args, filter_pred)
return [job_display.DisplayInfo(job) for job in jobs]
def _JobSummariesForProject(self, project_id, args, filter_predicate):
"""Get the list of job summaries that match the predicate.
Args:
project_id: The project ID to retrieve
args: parsed command line arguments
filter_predicate: The filter predicate to apply
Returns:
An iterator over all the matching jobs.
"""
request = None
service = None
status_filter = self._StatusArgToFilter(args.status, args.region)
if args.region:
request = apis.Jobs.LIST_REQUEST(
projectId=project_id, location=args.region, filter=status_filter)
service = apis.Jobs.GetService()
else:
log.status.Print(
'`--region` not set; getting jobs from all available regions. ' +
'Some jobs may be missing in the event of an outage. ' +
'https://cloud.google.com/dataflow/docs/concepts/regional-endpoints')
request = apis.Jobs.AGGREGATED_LIST_REQUEST(
projectId=project_id, filter=status_filter)
service = apis.GetClientInstance().projects_jobs
return dataflow_util.YieldFromList(
project_id=project_id,
region_id=args.region,
service=service,
request=request,
limit=args.limit,
batch_size=args.page_size,
field='jobs',
batch_size_attribute='pageSize',
predicate=filter_predicate)
def _StatusArgToFilter(self, status, region=None):
"""Return a string describing the job status.
Args:
status: The job status enum
region: The region argument, to select the correct wrapper message.
Returns:
string describing the job status
"""
filter_value_enum = None
if region:
filter_value_enum = (
apis.GetMessagesModule().DataflowProjectsLocationsJobsListRequest
.FilterValueValuesEnum)
else:
filter_value_enum = (
apis.GetMessagesModule().DataflowProjectsJobsAggregatedRequest
.FilterValueValuesEnum)
value_map = {
'all': filter_value_enum.ALL,
'terminated': filter_value_enum.TERMINATED,
'active': filter_value_enum.ACTIVE,
}
return value_map.get(status, filter_value_enum.ALL)
class _JobFilter(object):
"""Predicate for filtering jobs."""
def __init__(self, args):
"""Create a _JobFilter from the given args.
Args:
args: The argparse.Namespace containing the parsed arguments.
"""
self.preds = []
if args.created_after or args.created_before:
self._ParseTimePredicate(args.created_after, args.created_before)
def __call__(self, job):
return all([pred(job) for pred in self.preds])
def _ParseTimePredicate(self, after, before):
"""Return a predicate for filtering jobs by their creation time.
Args:
after: Only return true if the job was created after this time.
before: Only return true if the job was created before this time.
"""
if after and (not before):
self.preds.append(lambda x: times.ParseDateTime(x.createTime) > after)
elif (not after) and before:
self.preds.append(lambda x: times.ParseDateTime(x.createTime) <= before)
elif after and before:
def _Predicate(x):
create_time = times.ParseDateTime(x.createTime)
return after < create_time and create_time <= before
self.preds.append(_Predicate)

View File

@@ -0,0 +1,78 @@
# -*- coding: utf-8 -*- #
# Copyright 2020 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Implementation of gcloud dataflow jobs resume-unsupported-sdk command.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.api_lib.dataflow import apis
from googlecloudsdk.api_lib.util import exceptions
from googlecloudsdk.calliope import base
from googlecloudsdk.command_lib.dataflow import job_utils
from googlecloudsdk.core import log
@base.ReleaseTracks(base.ReleaseTrack.ALPHA)
@base.DefaultUniverseOnly
class Resume(base.Command):
"""Resumes job running with the specified job id.
Resumes a pipeline job which is running on an unsupported SDK version.
"""
@staticmethod
def Args(parser):
"""Register flags for this command."""
job_utils.ArgsForJobRef(parser)
parser.add_argument(
"--token",
help=("The resume token unique to the job."),
required=True)
def Run(self, args):
"""This is what gets called when the user runs this command.
Args:
args: all the arguments that were provided to this command invocation.
"""
job_ref = job_utils.ExtractJobRef(args)
try:
apis.Jobs.ResumeUnsupportedSDK(
job_ref.jobId,
"unsupported_sdk_temporary_override_token=" + args.token,
project_id=job_ref.projectId,
region_id=job_ref.location)
log.status.Print("Resuming job running on unsupported SDK version [{0}]. "
"This job may be cancelled in the future. For more "
"details, see https://cloud.google.com/dataflow/docs/"
"support/sdk-version-support-status.".format(
job_ref.jobId))
except exceptions.HttpException as error:
log.status.Print(
(
"Failed to resume job [{0}]: {1} Ensure that you have permission "
"to access the job, the `--region` flag, {2}, is correct for the "
"job and the `--token` flag, {3}, corresponds to the job."
).format(
job_ref.jobId,
error.payload.status_message,
job_ref.location,
args.token,
)
)

View File

@@ -0,0 +1,175 @@
# -*- coding: utf-8 -*- #
# Copyright 2015 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Implementation of gcloud dataflow jobs run command."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.api_lib.dataflow import apis
from googlecloudsdk.calliope import actions
from googlecloudsdk.calliope import arg_parsers
from googlecloudsdk.calliope import base
from googlecloudsdk.command_lib.dataflow import dataflow_util
from googlecloudsdk.command_lib.dataflow import job_utils
from googlecloudsdk.core import properties
def _CommonArgs(parser):
"""Register flags for this command.
Args:
parser: argparse.ArgumentParser to register arguments with.
"""
job_utils.CommonArgs(parser)
parser.add_argument(
'job_name',
metavar='JOB_NAME',
help='The unique name to assign to the job.')
parser.add_argument(
'--gcs-location',
help=('The Google Cloud Storage location of the job template to run. '
"(Must be a URL beginning with 'gs://'.)"),
type=arg_parsers.RegexpValidator(r'^gs://.*',
'Must begin with \'gs://\''),
required=True)
parser.add_argument(
'--staging-location',
help=('The Google Cloud Storage location to stage temporary files. '
"(Must be a URL beginning with 'gs://'.)"),
type=arg_parsers.RegexpValidator(r'^gs://.*',
'Must begin with \'gs://\''))
parser.add_argument(
'--parameters',
metavar='PARAMETERS',
type=arg_parsers.ArgDict(),
action=arg_parsers.UpdateAction,
help='The parameters to pass to the job.')
parser.add_argument(
'--enable-streaming-engine',
action=actions.StoreBooleanProperty(
properties.VALUES.dataflow.enable_streaming_engine),
help='Enabling Streaming Engine for the streaming job.')
parser.add_argument(
'--additional-experiments',
metavar='ADDITIONAL_EXPERIMENTS',
type=arg_parsers.ArgList(),
action=arg_parsers.UpdateAction,
help=('Additional experiments to pass to the job. These experiments are '
'appended to any experiments already set by the template.'))
parser.add_argument(
'--additional-user-labels',
metavar='ADDITIONAL_USER_LABELS',
type=arg_parsers.ArgDict(),
action=arg_parsers.UpdateAction,
help=(
'Additional user labels to pass to the job. Example: '
'--additional-user-labels=\'key1=value1,key2=value2\''
),
)
# TODO(b/139889563): Mark as required when default region is removed
parser.add_argument(
'--region',
metavar='REGION_ID',
help=('Region ID of the job\'s regional endpoint. ' +
dataflow_util.DEFAULT_REGION_MESSAGE))
streaming_update_args = parser.add_argument_group()
streaming_update_args.add_argument(
'--update',
help='Set this to true for streaming update jobs.',
action=arg_parsers.StoreTrueFalseAction,
required=True,
)
streaming_update_args.add_argument(
'--transform-name-mappings',
metavar='TRANSFORM_NAME_MAPPINGS',
type=arg_parsers.ArgDict(),
action=arg_parsers.UpdateAction,
help='Transform name mappings for the streaming update job.',
)
def _CommonRun(args):
"""Runs the command.
Args:
args: The arguments that were provided to this command invocation.
Returns:
A Job message.
"""
arguments = apis.TemplateArguments(
project_id=properties.VALUES.core.project.Get(required=True),
region_id=dataflow_util.GetRegion(args),
job_name=args.job_name,
gcs_location=args.gcs_location,
zone=args.zone,
max_workers=args.max_workers,
num_workers=args.num_workers,
network=args.network,
subnetwork=args.subnetwork,
worker_machine_type=args.worker_machine_type,
staging_location=args.staging_location,
kms_key_name=args.dataflow_kms_key,
disable_public_ips=properties.VALUES.dataflow.disable_public_ips.GetBool(),
parameters=args.parameters,
service_account_email=args.service_account_email,
worker_region=args.worker_region,
worker_zone=args.worker_zone,
enable_streaming_engine=properties.VALUES.dataflow.enable_streaming_engine.GetBool(),
streaming_update=args.update,
transform_name_mappings=args.transform_name_mappings,
additional_experiments=args.additional_experiments,
additional_user_labels=args.additional_user_labels,
)
if args.update:
return apis.Templates.LaunchDynamicTemplate(arguments)
else:
return apis.Templates.Create(arguments)
@base.UniverseCompatible
@base.ReleaseTracks(base.ReleaseTrack.GA)
class Run(base.Command):
"""Runs a job from the specified path."""
@staticmethod
def Args(parser):
_CommonArgs(parser)
def Run(self, args):
return _CommonRun(args)
@base.UniverseCompatible
@base.ReleaseTracks(base.ReleaseTrack.BETA)
class RunBeta(Run):
"""Runs a job from the specified path."""
@staticmethod
def Args(parser):
_CommonArgs(parser)
def Run(self, args):
return _CommonRun(args)

View File

@@ -0,0 +1,90 @@
# -*- coding: utf-8 -*- #
# Copyright 2015 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Implementation of gcloud dataflow jobs show command.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.api_lib.dataflow import apis
from googlecloudsdk.api_lib.dataflow import job_display
from googlecloudsdk.api_lib.dataflow import step_json
from googlecloudsdk.calliope import base
from googlecloudsdk.command_lib.dataflow import job_utils
@base.ReleaseTracks(base.ReleaseTrack.BETA, base.ReleaseTrack.GA)
class Show(base.Command):
"""Shows a short description of the given job.
"""
@staticmethod
def Args(parser):
"""Register flags for this command.
Args:
parser: argparse.ArgumentParser to register arguments with.
"""
job_utils.ArgsForJobRef(parser)
parser.add_argument(
'--environment', action='store_true',
help='If present, the environment will be listed.')
parser.add_argument(
'--steps', action='store_true',
help='If present, the steps will be listed.')
def Run(self, args):
"""Runs the command.
Args:
args: The arguments that were provided to this command invocation.
Returns:
A Job message.
"""
job_ref = job_utils.ExtractJobRef(args)
job = apis.Jobs.Get(
job_id=job_ref.jobId,
project_id=job_ref.projectId,
region_id=job_ref.location,
view=apis.Jobs.GET_REQUEST.ViewValueValuesEnum.JOB_VIEW_ALL)
# Extract the basic display information for the job
shown_job = job_display.DisplayInfo(job)
if args.environment:
shown_job.environment = job.environment
if args.steps:
shown_job.steps = [
self._PrettyStep(step) for step in step_json.ExtractSteps(job)]
return shown_job
def _PrettyStep(self, step):
"""Prettify a given step, by only extracting certain pieces of info.
Args:
step: The step to prettify.
Returns:
A dictionary describing the step.
"""
return {
'id': step['name'],
'user_name': step['properties']['user_name']
}

View File

@@ -0,0 +1,154 @@
# -*- coding: utf-8 -*- #
# Copyright 2023 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Implementation of gcloud dataflow jobs update-options command."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.api_lib.dataflow import apis
from googlecloudsdk.calliope import base
from googlecloudsdk.calliope import exceptions
from googlecloudsdk.command_lib.dataflow import job_utils
@base.ReleaseTracks(base.ReleaseTrack.GA, base.ReleaseTrack.BETA)
class UpdateOptions(base.Command):
"""Update pipeline options on-the-fly for running Dataflow jobs.
This command can modify properties of running Dataflow jobs. Currently, only
updating autoscaling settings for Streaming Engine jobs is supported.
Adjust the autoscaling settings for Streaming Engine Dataflow jobs by
providing at-least one of --min-num-workers or --max-num-workers or
--worker-utilization-hint (or all 3), or --unset-worker-utilization-hint
(which cannot be run at the same time as --worker-utilization-hint but works
with the others).
Allow a few minutes for the changes to take effect.
Note that autoscaling settings can only be modified on-the-fly for Streaming
Engine jobs. Attempts to modify batch job or Streaming Appliance jobs will
fail.
## EXAMPLES
Modify autoscaling settings to scale between 5-10 workers:
$ {command} --min-num-workers=5 --max-num-workers=10
Require a job to use at least 2 workers:
$ {command} --min-num-workers=2
Require a job to use at most 20 workers:
$ {command} --max-num-workers=20
Adjust the hint of target worker utilization to 70% for horizontal
autoscaling:
$ {command} --worker-utilization-hint=0.7
"Unset" worker utilization hint so that horizontal scaling will rely on its
default CPU utilization target:
$ {command} --unset-worker-utilization-hint
"""
@staticmethod
def Args(parser):
"""Register flags for this command."""
job_utils.ArgsForJobRef(parser)
parser.add_argument(
'--min-num-workers',
type=int,
help=(
'Lower-bound for autoscaling, between 1-1000. Only supported for'
' streaming-engine jobs.'
),
)
parser.add_argument(
'--max-num-workers',
type=int,
help=(
'Upper-bound for autoscaling, between 1-1000. Only supported for'
' streaming-engine jobs.'
),
)
parser.add_argument(
'--worker-utilization-hint',
type=float,
help=(
'Target CPU utilization for autoscaling, ranging from 0.1 to 0.9.'
' Only supported for streaming-engine jobs with autoscaling'
' enabled.'
),
)
parser.add_argument(
'--unset-worker-utilization-hint',
action='store_true',
help=(
'Unset --worker-utilization-hint. This causes the'
' job autoscaling to fall back to internal tunings'
' if they exist, or otherwise use the default hint value.'
),
)
def Run(self, args):
"""Called when the user runs gcloud dataflow jobs update-options ...
Args:
args: all the arguments that were provided to this command invocation.
Returns:
The updated Job
"""
if (
args.min_num_workers is None
and args.max_num_workers is None
and args.worker_utilization_hint is None
and not args.unset_worker_utilization_hint
):
raise exceptions.OneOfArgumentsRequiredException(
[
'--min-num-workers',
'--max-num-workers',
'--worker-utilization-hint',
'--unset-worker-utilization-hint',
],
'You must provide at-least one field to update',
)
elif (
args.worker_utilization_hint is not None
and args.unset_worker_utilization_hint
):
raise exceptions.ConflictingArgumentsException(
'The arguments --worker-utilization-hint and'
' --unset-worker-utilization-hint are mutually exclusive (as the'
' unset command will unset the given hint), and must be called'
' separately.',
)
job_ref = job_utils.ExtractJobRef(args)
return apis.Jobs.UpdateOptions(
job_ref.jobId,
project_id=job_ref.projectId,
region_id=job_ref.location,
min_num_workers=args.min_num_workers,
max_num_workers=args.max_num_workers,
worker_utilization_hint=args.worker_utilization_hint,
unset_worker_utilization_hint=args.unset_worker_utilization_hint,
)

View File

@@ -0,0 +1,30 @@
# -*- coding: utf-8 -*- #
# Copyright 2015 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""The command group for gcloud dataflow logs.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.calliope import base
@base.ReleaseTracks(base.ReleaseTrack.ALPHA, base.ReleaseTrack.BETA)
class Logs(base.Group):
"""A group of subcommands for working with Dataflow logs.
"""
pass

View File

@@ -0,0 +1,142 @@
# -*- coding: utf-8 -*- #
# Copyright 2015 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Implementation of gcloud dataflow logs list command.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.api_lib.dataflow import apis
from googlecloudsdk.calliope import arg_parsers
from googlecloudsdk.calliope import base
from googlecloudsdk.command_lib.dataflow import dataflow_util
from googlecloudsdk.command_lib.dataflow import job_utils
from googlecloudsdk.core.util import times
class List(base.ListCommand):
"""Retrieve the job logs for a specific job.
Retrieves the job logs from a specified job using the Dataflow Messages API
with at least the specified importance level. Can also be used to display
logs between a given time period using the --before and --after flags. These
logs are produced by the service and are distinct from worker logs. Worker
logs can be found in Cloud Logging.
## EXAMPLES
Retrieve only error logs:
$ {command} --importance=error
Retrieve all logs after some date:
$ {command} --after="2016-08-12 00:00:00"
Retrieve logs from this year:
$ {command} --after=2018-01-01
Retrieve logs more than a week old:
$ {command} --before=-P1W
"""
@staticmethod
def Args(parser):
job_utils.ArgsForJobRef(parser)
base.SORT_BY_FLAG.RemoveFromParser(parser)
base.URI_FLAG.RemoveFromParser(parser)
base.ASYNC_FLAG.RemoveFromParser(parser)
base.PAGE_SIZE_FLAG.RemoveFromParser(parser)
parser.add_argument(
'--after',
type=arg_parsers.Datetime.Parse,
help=('Only display messages logged after the given time. '
'See $ gcloud topic datetimes for information on time formats. '
'For example, `2018-01-01` is the first day of the year, and '
'`-P2W` is 2 weeks ago.'))
parser.add_argument(
'--before',
type=arg_parsers.Datetime.Parse,
help=('Only display messages logged before the given time. '
'See $ gcloud topic datetimes for information on time formats.'))
parser.add_argument(
'--importance',
choices=['debug', 'detailed', 'warning', 'error'],
default='warning',
help='Minimum importance a message must have to be displayed.')
parser.display_info.AddFormat("""
table[no-heading,pad=1](
messageImportance.enum(dataflow.JobMessage),
time.date(tz=LOCAL):label=TIME,
id,
messageText:label=TEXT
)
""")
symbols = {'dataflow.JobMessage::enum': {
'JOB_MESSAGE_DETAILED': 'd',
'JOB_MESSAGE_DEBUG': 'D',
'JOB_MESSAGE_WARNING': 'W',
'JOB_MESSAGE_ERROR': 'E',
}}
parser.display_info.AddTransforms(symbols)
def Run(self, args):
"""This is what gets called when the user runs this command.
Args:
args: all the arguments that were provided to this command invocation.
Returns:
None on success, or a string containing the error message.
"""
job_ref = job_utils.ExtractJobRef(args)
importance_enum = (
apis.Messages.LIST_REQUEST.MinimumImportanceValueValuesEnum)
importance_map = {
'debug': importance_enum.JOB_MESSAGE_DEBUG,
'detailed': importance_enum.JOB_MESSAGE_DETAILED,
'error': importance_enum.JOB_MESSAGE_ERROR,
'warning': importance_enum.JOB_MESSAGE_WARNING,
}
request = apis.Messages.LIST_REQUEST(
projectId=job_ref.projectId,
jobId=job_ref.jobId,
location=job_ref.location,
minimumImportance=(args.importance and importance_map[args.importance]),
# Note: It if both are present, startTime > endTime, because we will
# return messages with actual time [endTime, startTime).
startTime=args.after and times.FormatDateTime(args.after),
endTime=args.before and times.FormatDateTime(args.before))
return dataflow_util.YieldFromList(
job_id=job_ref.jobId,
project_id=job_ref.projectId,
region_id=job_ref.location,
service=apis.Messages.GetService(),
request=request,
batch_size=args.limit,
batch_size_attribute='pageSize',
field='jobMessages')

View File

@@ -0,0 +1,30 @@
# -*- coding: utf-8 -*- #
# Copyright 2015 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""The command group for gcloud dataflow metrics.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.calliope import base
@base.ReleaseTracks(base.ReleaseTrack.ALPHA, base.ReleaseTrack.BETA)
class Metrics(base.Group):
"""A group of subcommands for working with Dataflow metrics.
"""
pass

View File

@@ -0,0 +1,221 @@
# -*- coding: utf-8 -*- #
# Copyright 2015 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Implementation of gcloud dataflow metrics list command.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
import re
from googlecloudsdk.api_lib.dataflow import apis
from googlecloudsdk.api_lib.dataflow import exceptions
from googlecloudsdk.calliope import arg_parsers
from googlecloudsdk.calliope import base
from googlecloudsdk.command_lib.dataflow import dataflow_util
from googlecloudsdk.command_lib.dataflow import job_utils
from googlecloudsdk.core.util import times
class List(base.ListCommand):
"""Retrieves the metrics from a specific job.
This command can be used to explore the job's metrics at a fine-grained level.
## EXAMPLES
Filter metrics with the given name:
$ {command} JOB --filter="name=ElementCount"
Filter child metrics with matching transforms:
$ {command} JOB --transform=WordCount
Filter child output metrics:
$ {command} JOB --transform=WordCount/Write.*out
Filter all output metrics:
$ {command} JOB --transform=.*out
Filter all custom-defined user metrics
$ {command} JOB --source=user
Filter metrics with a scalar value greater than a threshold.
$ {command} JOB --filter="scalar > 50"
List metrics that have changed in the last 2 weeks:
$ {command} JOB --changed-after=-P2W
"""
USER_SOURCE = 'user'
SERVICE_SOURCE = 'service'
@staticmethod
def Args(parser):
"""Register flags for this command."""
job_utils.ArgsForJobRef(parser)
base.PAGE_SIZE_FLAG.RemoveFromParser(parser)
base.SORT_BY_FLAG.RemoveFromParser(parser)
base.URI_FLAG.RemoveFromParser(parser)
parser.add_argument(
'--changed-after',
type=arg_parsers.Datetime.Parse,
help=('Only display metrics that have changed after the given time. '
'See $ gcloud topic datetimes for information on time formats. '
'For example, `2018-01-01` is the first day of the year, and '
'`-P2W` is 2 weeks ago.'))
parser.add_argument(
'--hide-committed',
default=False,
action='store_true',
help='If true, hide committed values.')
parser.add_argument(
'--transform',
help='Filters only the metrics that prefix match the given regex.')
parser.add_argument(
'--source',
choices={
'all': 'Retrieves all metrics.',
'service': 'Retrieves only dataflow service metrics.',
'user': 'Retrieves only custom user metrics.',
},
default='all',
help='Set the metrics source.')
parser.add_argument(
'--tentative',
default=False,
action='store_true',
help='If true, display tentative values.')
def Run(self, args):
"""This is what gets called when the user runs this command.
Args:
args: all the arguments that were provided to this command invocation.
Returns:
List of metric values.
Raises:
exceptions.InvalidExclusionException: If the excluded metrics are not
valid.
"""
job_ref = job_utils.ExtractJobRef(args)
start_time = args.changed_after and times.FormatDateTime(args.changed_after)
preds = []
if not args.tentative and args.hide_committed:
raise exceptions.InvalidExclusionException(
'Cannot exclude both tentative and committed metrics.')
elif not args.tentative and not args.hide_committed:
preds.append(lambda m: self._GetContextValue(m, 'tentative') != 'true')
elif args.tentative and args.hide_committed:
preds.append(lambda m: self._GetContextValue(m, 'tentative') == 'true')
preds.append(lambda m: self._FilterBySource(m, args.source))
preds.append(lambda m: self._FilterByTransform(m, args.transform))
if args.changed_after:
preds.append(
lambda m: times.ParseDateTime(m.updateTime) > args.changed_after)
response = apis.Metrics.Get(
job_ref.jobId,
project_id=job_ref.projectId,
region_id=job_ref.location,
start_time=start_time)
return [self._Format(m) for m in response.metrics
if all([pred(m) for pred in preds])]
def _IsSentinelWatermark(self, metric):
"""This returns true if the metric is a watermark with a sentinel value.
Args:
metric: A single UpdateMetric returned from the API.
Returns:
True if the metric is a sentinel value, false otherwise.
"""
# Currently, we only apply the change from kInt64(MAX|MIN) to sentinel
# values from dataflow metrics.
if not dataflow_util.DATAFLOW_METRICS_RE.match(metric.name.origin):
return False
if not dataflow_util.WINDMILL_WATERMARK_RE.match(metric.name.name):
return False
return (metric.scalar.integer_value == -1 or
metric.scalar.integer_value == -2)
def _GetWatermarkSentinelDescription(self, metric):
"""This method gets the description of the watermark sentinel value.
There are only two watermark sentinel values we care about, -1 represents a
watermark at kInt64Min. -2 represents a watermark at kInt64Max. This runs
on the assumption that _IsSentinelWatermark was called first.
Args:
metric: A single UpdateMetric returned from the API.
Returns:
The sentinel description.
"""
value = metric.scalar.integer_value
if value == -1:
return 'Unknown watermark'
return 'Max watermark'
def _Format(self, metric):
"""Performs extra formatting for sentinel values or otherwise.
Args:
metric: A single UpdateMetric returned from the API.
Returns:
The formatted metric.
"""
if self._IsSentinelWatermark(metric):
metric.scalar.string_value = self._GetWatermarkSentinelDescription(metric)
metric.scalar.reset('integer_value')
return metric
def _FilterByTransform(self, metric, transform):
output_user_name = self._GetContextValue(metric, 'output_user_name') or ''
step = self._GetContextValue(metric, 'step') or ''
transform = re.compile(transform or '')
if transform.match(output_user_name) or transform.match(step):
return True
return False
def _FilterBySource(self, metric, source):
if source == self.USER_SOURCE:
return metric.name.origin == 'user'
elif source == self.SERVICE_SOURCE:
return metric.name.origin == 'dataflow/v1b3'
return True
def _GetContextValue(self, metric, key):
if metric.name.context:
for prop in metric.name.context.additionalProperties:
if prop.key == key:
return prop.value
return None

View File

@@ -0,0 +1,29 @@
# -*- coding: utf-8 -*- #
# Copyright 2019 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""The command group for gcloud dataflow snapshots.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.calliope import base
class Snapshots(base.Group):
"""A group of subcommands for working with Cloud Dataflow snapshots.
"""
pass

View File

@@ -0,0 +1,71 @@
# -*- coding: utf-8 -*- #
# Copyright 2019 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Command to snapshot a Cloud Dataflow job."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.api_lib.dataflow import apis
from googlecloudsdk.calliope import base
from googlecloudsdk.command_lib.dataflow import snapshot_utils
class Create(base.Command):
"""Creates a snapshot for a Cloud Dataflow job."""
detailed_help = {
'DESCRIPTION':
'{description}',
'EXAMPLES':
"""\
To create a Cloud Dataflow snapshot with sources for a running job, run:
$ {command} --job-id=JOB_ID --region=JOB_REGION --snapshot-sources=true --snapshot-ttl=7d
""",
}
@staticmethod
def Args(parser):
"""Register flags for this command.
Args:
parser: argparse.ArgumentParser to register arguments with.
"""
snapshot_utils.ArgsForSnapshotJobRef(parser)
snapshot_utils.ArgsForSnapshotTtl(parser)
parser.add_argument(
'--snapshot-sources',
type=bool,
default=False,
help='If true, snapshots will also be created for the Cloud Pub/Sub ' +
'sources of the Cloud Dataflow job.')
def Run(self, args):
"""Runs the command.
Args:
args: The arguments that were provided to this command invocation.
Returns:
A Snapshot message.
"""
job_ref = snapshot_utils.ExtractSnapshotJobRef(args)
return apis.Jobs.Snapshot(
job_ref.jobId,
project_id=job_ref.projectId,
region_id=job_ref.location,
ttl=snapshot_utils.ExtractSnapshotTtlDuration(args),
snapshot_sources=args.snapshot_sources)

View File

@@ -0,0 +1,65 @@
# -*- coding: utf-8 -*- #
# Copyright 2019 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Command to delete a Cloud Dataflow snapshot.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.api_lib.dataflow import apis
from googlecloudsdk.calliope import base
from googlecloudsdk.command_lib.dataflow import snapshot_utils
class Delete(base.Command):
"""Delete a Cloud Dataflow snapshot.
"""
detailed_help = {
'DESCRIPTION':
'{description}',
'EXAMPLES':
"""\
To delete an existing Cloud Dataflow snapshot, run:
$ {command} SNAPSHOT_ID --region=SNAPSHOT_REGION
""",
}
@staticmethod
def Args(parser):
"""Register flags for this command.
Args:
parser: argparse.ArgumentParser to register arguments with.
"""
snapshot_utils.ArgsForSnapshotRef(parser)
def Run(self, args):
"""Runs the command.
Args:
args: The arguments that were provided to this command invocation.
Returns:
A Snapshot message.
"""
snapshot_ref = snapshot_utils.ExtractSnapshotRef(args)
return apis.Snapshots.Delete(
snapshot_id=snapshot_ref.snapshotId,
project_id=snapshot_ref.projectId,
region_id=snapshot_ref.location)

View File

@@ -0,0 +1,65 @@
# -*- coding: utf-8 -*- #
# Copyright 2019 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Command to describe a Cloud Dataflow snapshot.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.api_lib.dataflow import apis
from googlecloudsdk.calliope import base
from googlecloudsdk.command_lib.dataflow import snapshot_utils
class Describe(base.Command):
"""Describe a Cloud Dataflow snapshot.
"""
detailed_help = {
'DESCRIPTION':
'{description}',
'EXAMPLES':
"""\
To see details about a Cloud Dataflow snapshot, run:
$ {command} SNAPSHOT_ID --region=SNAPSHOT_REGION
""",
}
@staticmethod
def Args(parser):
"""Register flags for this command.
Args:
parser: argparse.ArgumentParser to register arguments with.
"""
snapshot_utils.ArgsForSnapshotRef(parser)
def Run(self, args):
"""Runs the command.
Args:
args: The arguments that were provided to this command invocation.
Returns:
A Snapshot message.
"""
snapshot_ref = snapshot_utils.ExtractSnapshotRef(args)
return apis.Snapshots.Get(
snapshot_id=snapshot_ref.snapshotId,
project_id=snapshot_ref.projectId,
region_id=snapshot_ref.location)

View File

@@ -0,0 +1,67 @@
# -*- coding: utf-8 -*- #
# Copyright 2019 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Command to list Cloud Dataflow snapshots."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.api_lib.dataflow import apis
from googlecloudsdk.calliope import base
from googlecloudsdk.command_lib.dataflow import dataflow_util
from googlecloudsdk.command_lib.dataflow import snapshot_utils
from googlecloudsdk.core import properties
class List(base.Command):
"""List all Cloud Dataflow snapshots in a project in the specified region, optionally filtered by job ID."""
detailed_help = {
'DESCRIPTION':
'{description}',
'EXAMPLES':
"""\
To list all Cloud Dataflow snapshots in the us-central1 region, run:
$ {command} --region=us-central1
To list all Cloud Dataflow snapshots for a job, run:
$ {command} --job-id=JOB_ID --region=JOB_REGION
""",
}
@staticmethod
def Args(parser):
"""Register flags for this command.
Args:
parser: argparse.ArgumentParser to register arguments with.
"""
snapshot_utils.ArgsForListSnapshot(parser)
def Run(self, args):
"""Runs the command.
Args:
args: The arguments that were provided to this command invocation.
Returns:
A Snapshot message.
"""
return apis.Snapshots.List(
job_id=args.job_id,
project_id=properties.VALUES.core.project.GetOrFail(),
region_id=dataflow_util.GetRegion(args))

View File

@@ -0,0 +1,48 @@
# -*- coding: utf-8 -*- #
# Copyright 2020 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""The command group for gcloud dataflow sql."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.calliope import base
@base.ReleaseTracks(base.ReleaseTrack.BETA, base.ReleaseTrack.GA)
@base.Deprecate(
is_removed=True,
warning=(
'This command is deprecated and will be removed January 31, 2025. '
'Please see [Beam YAML]'
'(https://beam.apache.org/documentation/sdks/yaml/) '
'and [Beam notebooks]'
'(https://cloud.google.com/dataflow/docs/guides/notebook-advanced#beam-sql) '
'for alternatives.'
),
error=(
'This command has been removed. '
'Please see [Beam YAML]'
'(https://beam.apache.org/documentation/sdks/yaml/) '
'and [Beam notebooks]'
'(https://cloud.google.com/dataflow/docs/guides/notebook-advanced#beam-sql) '
'for alternatives.'
),
)
@base.DefaultUniverseOnly
class Sql(base.Group):
"""A group of subcommands for working with Dataflow SQL."""
pass

View File

@@ -0,0 +1,121 @@
# -*- coding: utf-8 -*- #
# Copyright 2020 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Implementation of `gcloud dataflow sql query` command."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import collections
from googlecloudsdk.api_lib.dataflow import apis
from googlecloudsdk.api_lib.dataflow import sql_query_parameters
from googlecloudsdk.calliope import base
from googlecloudsdk.command_lib.dataflow import dataflow_util
from googlecloudsdk.command_lib.dataflow import sql_util
from googlecloudsdk.core import properties
DETAILED_HELP = {
'DESCRIPTION':
'Execute the user-specified SQL query on Dataflow. Queries must '
'comply to the ZetaSQL dialect (https://github.com/google/zetasql). '
'Results may be written to either BigQuery or Cloud Pub/Sub.',
'EXAMPLES':
"""\
To execute a simple SQL query on Dataflow that reads from and writes to BigQuery, run:
$ {command} 'SELECT word FROM bigquery.table.`my-project`.input_dataset.input_table where count > 3' --job-name=my-job --region=us-west1 --bigquery-dataset=my_output_dataset --bigquery-table=my_output_table
To execute a simple SQL query on Dataflow that reads from and writes to Cloud
Pub/Sub, run:
$ {command} 'SELECT word FROM pubsub.topic.`my-project`.input_topic where count > 3' --job-name=my-job --region=us-west1 --pubsub-topic=my_output_topic
To join data from BigQuery and Cloud Pub/Sub and write the result to Cloud
Pub/Sub, run:
$ {command} 'SELECT bq.name AS name FROM pubsub.topic.`my-project`.input_topic p INNER JOIN bigquery.table.`my-project`.input_dataset.input_table bq ON p.id = bq.id' --job-name=my-job --region=us-west1 --pubsub-topic=my_output_topic
To execute a parameterized SQL query that reads from and writes to BigQuery, run:
$ {command} 'SELECT word FROM bigquery.table.`my-project`.input_dataset.input_table where count > @threshold' --parameter=threshold:INT64:5 --job-name=my-job --region=us-west1 --bigquery-dataset=my_output_dataset --bigquery-table=my_output_table
""",
}
@base.ReleaseTracks(base.ReleaseTrack.BETA, base.ReleaseTrack.GA)
class Query(base.Command):
"""Execute the user-specified SQL query on Dataflow."""
detailed_help = DETAILED_HELP
@staticmethod
def Args(parser):
"""Register flags for this command.
Args:
parser: argparse.ArgumentParser to register arguments with.
"""
sql_util.ArgsForSqlQuery(parser)
def Run(self, args):
use_dynamic_engine = (args.sql_launcher_template_engine == 'dynamic')
region = dataflow_util.GetRegion(args)
if args.sql_launcher_template:
gcs_location = args.sql_launcher_template
else:
if use_dynamic_engine:
suffix = 'sql_launcher_template'
else:
suffix = 'sql_launcher_flex_template'
gcs_location = 'gs://dataflow-sql-templates-{}/latest/{}'.format(
region, suffix)
if args.parameters_file:
query_parameters = sql_query_parameters.ParseParametersFile(
args.parameters_file)
elif args.parameter:
query_parameters = sql_query_parameters.ParseParametersList(
args.parameter)
else:
query_parameters = '[]'
template_parameters = collections.OrderedDict([
('dryRun', 'true' if args.dry_run else 'false'),
('outputs', sql_util.ExtractOutputs(args)),
('queryParameters', query_parameters),
('queryString', args.query),
])
arguments = apis.TemplateArguments(
project_id=properties.VALUES.core.project.GetOrFail(),
region_id=region,
job_name=args.job_name,
gcs_location=gcs_location,
zone=args.zone,
max_workers=args.max_workers,
disable_public_ips=properties.VALUES.dataflow.disable_public_ips
.GetBool(),
parameters=template_parameters,
service_account_email=args.service_account_email,
kms_key_name=args.dataflow_kms_key,
num_workers=args.num_workers,
network=args.network,
subnetwork=args.subnetwork,
worker_machine_type=args.worker_machine_type,
worker_region=args.worker_region,
worker_zone=args.worker_zone)
if use_dynamic_engine:
return apis.Templates.LaunchDynamicTemplate(arguments)
return apis.Templates.CreateJobFromFlexTemplate(arguments)

View File

@@ -0,0 +1,30 @@
# -*- coding: utf-8 -*- #
# Copyright 2024 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""The command group for gcloud dataflow yaml."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.calliope import base
@base.DefaultUniverseOnly
@base.ReleaseTracks(base.ReleaseTrack.GA, base.ReleaseTrack.BETA)
class Yaml(base.Group):
"""A group of subcommands for launching Beam YAML jobs on Dataflow."""
pass

View File

@@ -0,0 +1,227 @@
# -*- coding: utf-8 -*- #
# Copyright 2024 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Implementation of gcloud dataflow yaml run command."""
from googlecloudsdk.api_lib.dataflow import apis
from googlecloudsdk.api_lib.storage import storage_api
from googlecloudsdk.api_lib.storage import storage_util
from googlecloudsdk.calliope import arg_parsers
from googlecloudsdk.calliope import base
from googlecloudsdk.command_lib.dataflow import dataflow_util
from googlecloudsdk.core import properties
from googlecloudsdk.core import yaml
from googlecloudsdk.core.util import files
@base.DefaultUniverseOnly
@base.ReleaseTracks(base.ReleaseTrack.GA, base.ReleaseTrack.BETA)
class Run(base.Command):
"""Runs a job from the specified path."""
detailed_help = {
'DESCRIPTION': (
'Runs a job from the specified YAML description or '
'Cloud Storage path.'
),
'EXAMPLES': """\
To run a job from YAML, run:
$ {command} my-job --yaml-pipeline-file=gs://yaml-path --region=europe-west1
""",
}
@staticmethod
def Args(parser):
"""Register flags for this command.
Args:
parser: argparse.ArgumentParser to register arguments with.
"""
parser.add_argument(
'job_name', metavar='JOB_NAME', help='Unique name to assign to the job.'
)
group = parser.add_mutually_exclusive_group(required=True)
group.add_argument(
'--yaml-pipeline-file',
help=(
'Path of a file defining the YAML pipeline to run. '
"(Must be a local file or a URL beginning with 'gs://'.)"
),
)
group.add_argument(
'--yaml-pipeline', help='Inline definition of the YAML pipeline to run.'
)
parser.add_argument(
'--region',
metavar='REGION_ID',
help=(
"Region ID of the job's regional endpoint. "
+ dataflow_util.DEFAULT_REGION_MESSAGE
),
)
parser.add_argument(
'--pipeline-options',
metavar='OPTIONS=VALUE;OPTION=VALUE',
type=arg_parsers.ArgDict(),
action=arg_parsers.UpdateAction,
help='Pipeline options to pass to the job.',
)
parser.add_argument(
'--jinja-variables',
metavar='JSON_OBJECT',
help='Jinja2 variables to be used in reifying the yaml.',
)
parser.add_argument(
'--template-file-gcs-location',
help=('Google Cloud Storage location of the YAML template to run. '
"(Must be a URL beginning with 'gs://'.)"),
type=arg_parsers.RegexpValidator(r'^gs://.*',
'Must begin with \'gs://\''),
)
parser.add_argument(
'--network',
help=(
'Compute Engine network for launching worker instances to run '
'the pipeline. If not set, the default network is used.'
),
)
parser.add_argument(
'--subnetwork',
help=(
'Compute Engine subnetwork for launching worker instances to '
'run the pipeline. If not set, the default subnetwork is used.'
),
)
def Run(self, args):
"""Runs the command.
Args:
args: The arguments that were provided to this command invocation.
Returns:
A Job message.
"""
parameters = dict(args.pipeline_options or {})
# These are required and mutually exclusive due to the grouping above.
if args.yaml_pipeline_file:
yaml_contents = _try_get_yaml_contents(args.yaml_pipeline_file)
if yaml_contents is None:
parameters['yaml_pipeline_file'] = args.yaml_pipeline_file
else:
parameters['yaml_pipeline'] = yaml_contents
else:
parameters['yaml_pipeline'] = args.yaml_pipeline
if args.jinja_variables:
parameters['jinja_variables'] = args.jinja_variables
if 'yaml_pipeline' in parameters and 'jinja-variables' not in parameters:
_validate_yaml(parameters['yaml_pipeline'])
region_id = _get_region_from_yaml_or_default(
parameters.get('yaml_pipeline'), args
)
gcs_location = (
args.template_file_gcs_location
or apis.Templates.YAML_TEMPLATE_GCS_LOCATION.format(region_id)
)
arguments = apis.TemplateArguments(
project_id=properties.VALUES.core.project.Get(required=True),
region_id=region_id,
job_name=args.job_name,
gcs_location=gcs_location,
parameters=parameters,
network=args.network,
subnetwork=args.subnetwork,
)
return apis.Templates.CreateJobFromFlexTemplate(arguments)
def _validate_yaml(yaml_pipeline):
# TODO(b/320740846): Do more complete validation without requiring importing
# the entire beam library.
try:
_ = yaml.load(yaml_pipeline)
except Exception as exn:
raise ValueError('yaml_pipeline must be a valid yaml.') from exn
def _get_region_from_yaml_or_default(yaml_pipeline, args):
"""Gets the region from yaml pipeline or args, or falls back to default."""
region = args.region
options_region = None
try:
pipeline_data = yaml.load(yaml_pipeline)
if not pipeline_data:
return dataflow_util.GetRegion(args)
if 'options' in pipeline_data and 'region' in pipeline_data['options']:
options_region = pipeline_data['options']['region']
if '{' in options_region or '}' in options_region:
raise yaml.YAMLParseError(
'yaml pipeline contains unparsable region: {0}. Found curly braces '
'in region. Falling back to default region.'.format(options_region)
)
except yaml.YAMLParseError as exn:
if not region:
print(
'Failed to get region from yaml pipeline: {0}. If using jinja '
'variables, parsing may fail. Falling back to default '
'region.'.format(exn)
)
if options_region:
if region and region != options_region:
raise ValueError(
'Region specified in yaml pipeline options ({0}) does not match'
' region specified in command line ({1})'.format(
options_region, region
)
)
return options_region
return dataflow_util.GetRegion(args)
def _try_get_yaml_contents(yaml_pipeline_file):
"""Reads yaml contents from the specified file if it is accessable."""
if not yaml_pipeline_file.startswith('gs://'):
return files.ReadFileContents(yaml_pipeline_file)
storage_client = storage_api.StorageClient()
obj_ref = storage_util.ObjectReference.FromUrl(yaml_pipeline_file)
try:
return storage_client.ReadObject(obj_ref).read().decode('utf-8')
except Exception as e: # pylint: disable=broad-exception-caught
print(
'Unable to read file {0} due to incorrect file path or insufficient'
' read permissions. Will not be able to validate the yaml pipeline or'
' determine the region from the yaml pipeline'
' options. Error: {1}'.format(yaml_pipeline_file, e)
)
return None