242 lines
8.4 KiB
Python
242 lines
8.4 KiB
Python
# -*- coding: utf-8 -*- #
|
|
# Copyright 2020 Google LLC. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
"""Implementation of gcloud dataflow flex_template run command.
|
|
"""
|
|
from __future__ import absolute_import
|
|
from __future__ import division
|
|
from __future__ import unicode_literals
|
|
|
|
from googlecloudsdk.api_lib.dataflow import apis
|
|
from googlecloudsdk.calliope import actions
|
|
from googlecloudsdk.calliope import arg_parsers
|
|
from googlecloudsdk.calliope import base
|
|
from googlecloudsdk.command_lib.dataflow import dataflow_util
|
|
from googlecloudsdk.core import properties
|
|
|
|
|
|
@base.ReleaseTracks(base.ReleaseTrack.GA, base.ReleaseTrack.BETA)
|
|
@base.UniverseCompatible
|
|
class Run(base.Command):
|
|
"""Runs a job from the specified path."""
|
|
|
|
detailed_help = {
|
|
'DESCRIPTION':
|
|
'Runs a job from the specified flex template gcs path.',
|
|
'EXAMPLES':
|
|
"""\
|
|
To run a job from the flex template, run:
|
|
|
|
$ {command} my-job --template-file-gcs-location=gs://flex-template-path --region=europe-west1 --parameters=input="gs://input",output="gs://output-path" --max-workers=5
|
|
""",
|
|
}
|
|
|
|
@staticmethod
|
|
def Args(parser):
|
|
"""Register flags for this command.
|
|
|
|
Args:
|
|
parser: argparse.ArgumentParser to register arguments with.
|
|
"""
|
|
parser.add_argument(
|
|
'job_name',
|
|
metavar='JOB_NAME',
|
|
help='Unique name to assign to the job.')
|
|
|
|
parser.add_argument(
|
|
'--template-file-gcs-location',
|
|
help=('Google Cloud Storage location of the flex template to run. '
|
|
"(Must be a URL beginning with 'gs://'.)"),
|
|
type=arg_parsers.RegexpValidator(r'^gs://.*',
|
|
'Must begin with \'gs://\''),
|
|
required=True)
|
|
|
|
parser.add_argument(
|
|
'--region',
|
|
metavar='REGION_ID',
|
|
help=('Region ID of the job\'s regional endpoint. ' +
|
|
dataflow_util.DEFAULT_REGION_MESSAGE))
|
|
|
|
parser.add_argument(
|
|
'--staging-location',
|
|
help=('Default Google Cloud Storage location to stage local files.'
|
|
"(Must be a URL beginning with 'gs://'.)"),
|
|
type=arg_parsers.RegexpValidator(r'^gs://.*',
|
|
'Must begin with \'gs://\''))
|
|
|
|
parser.add_argument(
|
|
'--temp-location',
|
|
help=('Default Google Cloud Storage location to stage temporary files. '
|
|
'If not set, defaults to the value for --staging-location.'
|
|
"(Must be a URL beginning with 'gs://'.)"),
|
|
type=arg_parsers.RegexpValidator(r'^gs://.*',
|
|
'Must begin with \'gs://\''))
|
|
|
|
parser.add_argument(
|
|
'--service-account-email',
|
|
type=arg_parsers.RegexpValidator(r'.*@.*\..*',
|
|
'must provide a valid email address'),
|
|
help='Service account to run the workers as.')
|
|
|
|
parser.add_argument(
|
|
'--max-workers', type=int, help='Maximum number of workers to run.')
|
|
|
|
parser.add_argument(
|
|
'--disable-public-ips',
|
|
action=actions.StoreBooleanProperty(
|
|
properties.VALUES.dataflow.disable_public_ips),
|
|
help='Cloud Dataflow workers must not use public IP addresses.')
|
|
|
|
parser.add_argument(
|
|
'--num-workers', type=int, help='Initial number of workers to use.')
|
|
|
|
parser.add_argument(
|
|
'--worker-machine-type',
|
|
help='Type of machine to use for workers. Defaults to '
|
|
'server-specified.')
|
|
|
|
parser.add_argument(
|
|
'--launcher-machine-type',
|
|
help='The machine type to use for launching the job. The default is'
|
|
'n1-standard-1.')
|
|
|
|
parser.add_argument(
|
|
'--subnetwork',
|
|
help='Compute Engine subnetwork for launching instances '
|
|
'to run your pipeline.')
|
|
|
|
parser.add_argument(
|
|
'--network',
|
|
help='Compute Engine network for launching instances to '
|
|
'run your pipeline.')
|
|
|
|
parser.add_argument(
|
|
'--dataflow-kms-key',
|
|
help='Cloud KMS key to protect the job resources.')
|
|
|
|
region_group = parser.add_mutually_exclusive_group()
|
|
region_group.add_argument(
|
|
'--worker-region',
|
|
help='Region to run the workers in.')
|
|
|
|
region_group.add_argument(
|
|
'--worker-zone',
|
|
help='Zone to run the workers in.')
|
|
|
|
parser.add_argument(
|
|
'--enable-streaming-engine',
|
|
action=actions.StoreBooleanProperty(
|
|
properties.VALUES.dataflow.enable_streaming_engine),
|
|
help='Enabling Streaming Engine for the streaming job.')
|
|
|
|
parser.add_argument(
|
|
'--additional-experiments',
|
|
metavar='ADDITIONAL_EXPERIMENTS',
|
|
type=arg_parsers.ArgList(),
|
|
action=arg_parsers.UpdateAction,
|
|
help=(
|
|
'Additional experiments to pass to the job. Example: '
|
|
'--additional-experiments=experiment1,experiment2=value2'
|
|
),
|
|
)
|
|
|
|
parser.add_argument(
|
|
'--additional-pipeline-options',
|
|
metavar='ADDITIONAL_PIPELINE_OPTIONS',
|
|
type=arg_parsers.ArgList(),
|
|
action=arg_parsers.UpdateAction,
|
|
help=(
|
|
'Additional pipeline options to pass to the job. Example: '
|
|
'--additional-pipeline-options=option1=value1,option2=value2'
|
|
),
|
|
)
|
|
|
|
parser.add_argument(
|
|
'--additional-user-labels',
|
|
metavar='ADDITIONAL_USER_LABELS',
|
|
type=arg_parsers.ArgDict(),
|
|
action=arg_parsers.UpdateAction,
|
|
help=(
|
|
'Additional user labels to pass to the job. Example: '
|
|
'--additional-user-labels=\'key1=value1,key2=value2\''
|
|
),
|
|
)
|
|
|
|
parser.add_argument(
|
|
'--parameters',
|
|
metavar='PARAMETERS',
|
|
type=arg_parsers.ArgDict(),
|
|
action=arg_parsers.UpdateAction,
|
|
help=
|
|
('Parameters to pass to the job.'))
|
|
streaming_update_args = parser.add_argument_group()
|
|
streaming_update_args.add_argument(
|
|
'--transform-name-mappings',
|
|
metavar='TRANSFORM_NAME_MAPPINGS',
|
|
type=arg_parsers.ArgDict(),
|
|
action=arg_parsers.UpdateAction,
|
|
help=
|
|
('Transform name mappings for the streaming update job.'))
|
|
|
|
streaming_update_args.add_argument(
|
|
'--update',
|
|
help=('Set this to true for streaming update jobs.'),
|
|
action=arg_parsers.StoreTrueFalseAction,
|
|
required=True)
|
|
|
|
parser.add_argument(
|
|
'--flexrs-goal',
|
|
help=('FlexRS goal for the flex template job.'),
|
|
choices=['COST_OPTIMIZED', 'SPEED_OPTIMIZED'])
|
|
|
|
def Run(self, args):
|
|
"""Runs the command.
|
|
|
|
Args:
|
|
args: The arguments that were provided to this command invocation.
|
|
|
|
Returns:
|
|
A Job message.
|
|
"""
|
|
arguments = apis.TemplateArguments(
|
|
project_id=properties.VALUES.core.project.Get(required=True),
|
|
region_id=dataflow_util.GetRegion(args),
|
|
job_name=args.job_name,
|
|
gcs_location=args.template_file_gcs_location,
|
|
max_workers=args.max_workers,
|
|
num_workers=args.num_workers,
|
|
network=args.network,
|
|
subnetwork=args.subnetwork,
|
|
worker_machine_type=args.worker_machine_type,
|
|
launcher_machine_type=args.launcher_machine_type,
|
|
kms_key_name=args.dataflow_kms_key,
|
|
staging_location=args.staging_location,
|
|
temp_location=args.temp_location,
|
|
disable_public_ips=
|
|
properties.VALUES.dataflow.disable_public_ips.GetBool(),
|
|
service_account_email=args.service_account_email,
|
|
worker_region=args.worker_region,
|
|
worker_zone=args.worker_zone,
|
|
enable_streaming_engine=
|
|
properties.VALUES.dataflow.enable_streaming_engine.GetBool(),
|
|
additional_experiments=args.additional_experiments,
|
|
additional_pipeline_options=args.additional_pipeline_options,
|
|
additional_user_labels=args.additional_user_labels,
|
|
streaming_update=args.update,
|
|
transform_name_mappings=args.transform_name_mappings,
|
|
flexrs_goal=args.flexrs_goal,
|
|
parameters=args.parameters)
|
|
return apis.Templates.CreateJobFromFlexTemplate(arguments)
|
|
|