439 lines
17 KiB
Python
439 lines
17 KiB
Python
# -*- coding: utf-8 -*- #
|
|
# Copyright 2020 Google LLC. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
"""Implementation of gcloud dataflow flex_template build command.
|
|
"""
|
|
from __future__ import absolute_import
|
|
from __future__ import division
|
|
from __future__ import unicode_literals
|
|
|
|
import json
|
|
|
|
from googlecloudsdk.api_lib.dataflow import apis
|
|
from googlecloudsdk.calliope import actions
|
|
from googlecloudsdk.calliope import arg_parsers
|
|
from googlecloudsdk.calliope import base
|
|
from googlecloudsdk.core import properties
|
|
|
|
|
|
def _CommonArgs(parser):
|
|
"""Registers flags for this command.
|
|
|
|
Args:
|
|
parser: argparse.ArgumentParser to register arguments with.
|
|
"""
|
|
image_args = parser.add_mutually_exclusive_group(required=True)
|
|
image_building_args = image_args.add_argument_group()
|
|
yaml_args = image_args.add_argument_group()
|
|
parser.add_argument(
|
|
'template_file_gcs_path',
|
|
metavar='TEMPLATE_FILE_GCS_PATH',
|
|
help=('The Google Cloud Storage location of the flex template file.'
|
|
'Overrides if file already exists.'),
|
|
type=arg_parsers.RegexpValidator(r'^gs://.*',
|
|
'Must begin with \'gs://\''))
|
|
|
|
image_args.add_argument(
|
|
'--image',
|
|
help=('Path to the any image registry location of the prebuilt flex '
|
|
'template image.'))
|
|
|
|
parser.add_argument(
|
|
'--image-repository-username-secret-id',
|
|
help=('Secret Manager secret id for the username to authenticate to '
|
|
'private registry. Should be in the format '
|
|
'projects/{project}/secrets/{secret}/versions/{secret_version} or '
|
|
'projects/{project}/secrets/{secret}. If the version is not '
|
|
'provided latest version will be used.'),
|
|
type=arg_parsers.RegexpValidator(
|
|
r'^projects\/[^\n\r\/]+\/secrets\/[^\n\r\/]+(\/versions\/[^\n\r\/]+)?$',
|
|
'Must be in the format '
|
|
'\'projects/{project}/secrets/{secret}\' or'
|
|
'\'projects/{project}/secrets/{secret}/versions/{secret_version}\'.'))
|
|
|
|
parser.add_argument(
|
|
'--image-repository-password-secret-id',
|
|
help=('Secret Manager secret id for the password to authenticate to '
|
|
'private registry. Should be in the format '
|
|
'projects/{project}/secrets/{secret}/versions/{secret_version} or '
|
|
'projects/{project}/secrets/{secret}. If the version is not '
|
|
'provided latest version will be used.'),
|
|
type=arg_parsers.RegexpValidator(
|
|
r'^projects\/[^\n\r\/]+\/secrets\/[^\n\r\/]+(\/versions\/[^\n\r\/]+)?$',
|
|
'Must be in the format '
|
|
'\'projects/{project}/secrets/{secret}\' or'
|
|
'\'projects/{project}/secrets/{secret}/versions/{secret_version}\'.'))
|
|
|
|
parser.add_argument(
|
|
'--image-repository-cert-path',
|
|
help=('The full URL to self-signed certificate of private registry in '
|
|
'Cloud Storage. For example, gs://mybucket/mycerts/selfsigned.crt. '
|
|
'The certificate provided in Cloud Storage must be DER-encoded and '
|
|
'may be supplied in binary or printable (Base64) encoding. If the '
|
|
'certificate is provided in Base64 encoding, it must be bounded at '
|
|
'the beginning by -----BEGIN CERTIFICATE-----, and must be bounded '
|
|
'at the end by -----END CERTIFICATE-----. If this parameter is '
|
|
'provided, the docker daemon in the template launcher will be '
|
|
'instructed to trust that certificate. '),
|
|
type=arg_parsers.RegexpValidator(r'^gs://.*',
|
|
'Must begin with \'gs://\''))
|
|
|
|
parser.add_argument(
|
|
'--sdk-language',
|
|
help='SDK language of the flex template job.',
|
|
choices=['JAVA', 'PYTHON', 'GO', 'YAML'],
|
|
required=True,
|
|
)
|
|
|
|
parser.add_argument(
|
|
'--metadata-file',
|
|
help='Local path to the metadata json file for the flex template.',
|
|
type=arg_parsers.FileContents())
|
|
|
|
parser.add_argument(
|
|
'--print-only',
|
|
help=('Prints the container spec to stdout. Does not save in '
|
|
'Google Cloud Storage.'),
|
|
default=False,
|
|
action=actions.StoreBooleanProperty(
|
|
properties.VALUES.dataflow.print_only))
|
|
|
|
parser.add_argument(
|
|
'--staging-location',
|
|
help=('Default Google Cloud Storage location to stage local files.'
|
|
"(Must be a URL beginning with 'gs://'.)"),
|
|
type=arg_parsers.RegexpValidator(r'^gs://.*',
|
|
'Must begin with \'gs://\''))
|
|
|
|
parser.add_argument(
|
|
'--temp-location',
|
|
help=('Default Google Cloud Storage location to stage temporary files. '
|
|
'If not set, defaults to the value for --staging-location.'
|
|
"(Must be a URL beginning with 'gs://'.)"),
|
|
type=arg_parsers.RegexpValidator(r'^gs://.*',
|
|
'Must begin with \'gs://\''))
|
|
|
|
parser.add_argument(
|
|
'--service-account-email',
|
|
type=arg_parsers.RegexpValidator(r'.*@.*\..*',
|
|
'must provide a valid email address'),
|
|
help='Default service account to run the workers as.')
|
|
|
|
parser.add_argument(
|
|
'--cloud-build-service-account',
|
|
type=arg_parsers.RegexpValidator(
|
|
r'.*@.*\..*', 'must provide a valid email address'
|
|
),
|
|
help=(
|
|
'Service account to run the Cloud Build in the format'
|
|
' projects/{project}/serviceAccounts/{service_account}. Ensure that'
|
|
" the account you are using to run 'gcloud dataflow flex-template"
|
|
" build' has 'ServiceAccountUser' role on the specified Cloud"
|
|
' Build service account you provide with the'
|
|
' --cloud-build-service-account flag. The specified service account'
|
|
' must have required permissions to build the image. If the specified'
|
|
' service account is in a project that is different from the project'
|
|
' where you are starting builds, see'
|
|
' https://cloud.google.com/build/docs/securing-builds/configure-user-specified-service-accounts#cross-project_set_up'
|
|
' to grant the necessary access.'
|
|
),
|
|
)
|
|
|
|
parser.add_argument(
|
|
'--max-workers',
|
|
type=int,
|
|
help='Default maximum number of workers to run.',
|
|
)
|
|
|
|
parser.add_argument(
|
|
'--disable-public-ips',
|
|
action=actions.StoreBooleanProperty(
|
|
properties.VALUES.dataflow.disable_public_ips),
|
|
help='Cloud Dataflow workers must not use public IP addresses.')
|
|
|
|
parser.add_argument(
|
|
'--num-workers',
|
|
type=int,
|
|
help='Initial number of workers to use by default.')
|
|
|
|
parser.add_argument(
|
|
'--worker-machine-type',
|
|
help='Default type of machine to use for workers. Defaults to '
|
|
'server-specified.')
|
|
|
|
parser.add_argument(
|
|
'--subnetwork',
|
|
help='Default Compute Engine subnetwork for launching instances '
|
|
'to run your pipeline.')
|
|
|
|
parser.add_argument(
|
|
'--network',
|
|
help='Default Compute Engine network for launching instances to '
|
|
'run your pipeline.')
|
|
|
|
parser.add_argument(
|
|
'--dataflow-kms-key',
|
|
help='Default Cloud KMS key to protect the job resources.')
|
|
|
|
region_group = parser.add_mutually_exclusive_group()
|
|
region_group.add_argument(
|
|
'--worker-region',
|
|
help='Default region to run the workers in.')
|
|
|
|
region_group.add_argument(
|
|
'--worker-zone',
|
|
help='Default zone to run the workers in.')
|
|
|
|
parser.add_argument(
|
|
'--enable-streaming-engine',
|
|
action=actions.StoreBooleanProperty(
|
|
properties.VALUES.dataflow.enable_streaming_engine),
|
|
help='Enable Streaming Engine for the streaming job by default.')
|
|
|
|
parser.add_argument(
|
|
'--gcs-log-dir',
|
|
help=('Google Cloud Storage directory to save build logs.'
|
|
"(Must be a URL beginning with 'gs://'.)"),
|
|
type=arg_parsers.RegexpValidator(r'^gs://.*',
|
|
'Must begin with \'gs://\''),
|
|
default=None)
|
|
|
|
parser.add_argument(
|
|
'--additional-experiments',
|
|
metavar='ADDITIONAL_EXPERIMENTS',
|
|
type=arg_parsers.ArgList(),
|
|
action=arg_parsers.UpdateAction,
|
|
help=
|
|
('Default experiments to pass to the job.'))
|
|
|
|
parser.add_argument(
|
|
'--additional-user-labels',
|
|
metavar='ADDITIONAL_USER_LABELS',
|
|
type=arg_parsers.ArgDict(),
|
|
action=arg_parsers.UpdateAction,
|
|
help=(
|
|
'Default user labels to pass to the job. Example: '
|
|
'--additional-user-labels=\'{"key1":"value1"}\''
|
|
),
|
|
)
|
|
|
|
image_building_args.add_argument(
|
|
'--image-gcr-path',
|
|
help=('The Google Container Registry or Google Artifact Registry '
|
|
'location to store the flex template image to be built.'),
|
|
type=arg_parsers.RegexpValidator(
|
|
r'^(.*\.){0,1}gcr.io/.*|^(.){2,}-docker.pkg.dev/.*',
|
|
('Must begin with \'[multi-region.]gcr.io/\' or '
|
|
'\'[region.]-docker.pkg.dev/\'. Please check '
|
|
'https://cloud.google.com/container-registry/docs/overview '
|
|
'for available multi-regions in GCR or '
|
|
'https://cloud.google.com/artifact-registry/docs/repo-organize#'
|
|
'locations for available location in GAR')),
|
|
required=True)
|
|
pipeline_args = image_building_args.add_mutually_exclusive_group(
|
|
required=True)
|
|
pipeline_args.add_argument(
|
|
'--jar',
|
|
metavar='JAR',
|
|
type=arg_parsers.ArgList(),
|
|
action=arg_parsers.UpdateAction,
|
|
help=('Local path to your dataflow pipeline jar file and all their '
|
|
'dependent jar files required for the flex template classpath. '
|
|
'You can pass them as a comma separated list or repeat '
|
|
'individually with --jar flag. Ex: --jar="code.jar,dep.jar" or '
|
|
'--jar code.jar, --jar dep.jar.'))
|
|
|
|
pipeline_args.add_argument(
|
|
'--py-path',
|
|
metavar='PY_PATH',
|
|
type=arg_parsers.ArgList(),
|
|
action=arg_parsers.UpdateAction,
|
|
help=('Local path to your dataflow pipeline python files and all their '
|
|
'dependent files required for the flex template classpath. '
|
|
'You can pass them as a comma separated list or repeat '
|
|
'individually with --py-path flag. '
|
|
'Ex: --py-path="path/pipleline/,path/dependency/" or '
|
|
'--py-path path/pipleline/, --py-path path/dependency/.'))
|
|
|
|
pipeline_args.add_argument(
|
|
'--go-binary-path',
|
|
metavar='GO_BINARY_PATH',
|
|
help=('Local path to your compiled dataflow pipeline Go binary. '
|
|
'The binary should be compiled to run on the target worker '
|
|
'architecture (usually linux-amd64). See '
|
|
'https://beam.apache.org/documentation/sdks/go-cross-compilation/ '
|
|
'for more information.'))
|
|
|
|
# This is set here as image_args is required.
|
|
yaml_args.add_argument(
|
|
'--yaml-pipeline-path',
|
|
required=True,
|
|
metavar='YAML_PIPELINE_PATH',
|
|
type=arg_parsers.FileContents(),
|
|
help='Local path to your YAML pipeline file.',
|
|
)
|
|
|
|
yaml_args.add_argument(
|
|
'--yaml-image',
|
|
metavar='YAML_IMAGE',
|
|
help=(
|
|
'Path to the any image registry location of the prebuilt yaml '
|
|
'template image.'
|
|
),
|
|
)
|
|
|
|
image_building_args.add_argument(
|
|
'--flex-template-base-image',
|
|
help=(
|
|
'Flex template base image to be used while building the container'
|
|
' image. Allowed choices are allowed labels (JAVA11/17/21/25,'
|
|
' PYTHON3, GO), supported distroless images'
|
|
' (JAVA11/17/21/25_DISTROLESS, GO_DISTROLESS), or full gcr.io path of'
|
|
' the specific version of the base image. For labels, we use the'
|
|
' latest base image version to build the container. You can also'
|
|
' provide a specific version from this link '
|
|
' https://gcr.io/dataflow-templates-base/'
|
|
),
|
|
# JAVA8 is deprecated and removed from help text. Allow it until Beam 3
|
|
# release.
|
|
type=arg_parsers.RegexpValidator(
|
|
r'^(JAVA11|JAVA17|JAVA21|JAVA25|GO)(_DISTROLESS)?$|^JAVA8$|^PYTHON3$|^gcr.io/.*',
|
|
'Must be JAVA11, JAVA17, JAVA21, JAVA25, PYTHON3, GO, (or with'
|
|
' `_DISTROLESS` suffix for supported distroless variants), or begin'
|
|
" with 'gcr.io/'",
|
|
),
|
|
required=True,
|
|
)
|
|
|
|
image_building_args.add_argument(
|
|
'--env',
|
|
metavar='ENV',
|
|
type=arg_parsers.ArgDict(),
|
|
action=arg_parsers.UpdateAction,
|
|
help=
|
|
('Environment variables to create for the Dockerfile. '
|
|
'You can pass them as a comma separated list or repeat individually '
|
|
'with --env flag. Ex: --env="A=B,C=D" or --env A=B, --env C=D.'
|
|
'When you reference files/dir in env variables, please specify relative '
|
|
'path to the paths passed via --py-path.Ex: if you pass. '
|
|
'--py-path="path/pipleline/" then set '
|
|
'FLEX_TEMPLATE_PYTHON_PY_FILE="pipeline/pipeline.py" '
|
|
'You can find the list of supported environment variables in this '
|
|
'link. https://cloud.google.com/dataflow/docs/guides/templates/'
|
|
'configuring-flex-templates'
|
|
'#setting_required_dockerfile_environment_variables.'),
|
|
required=True)
|
|
|
|
|
|
def _CommonRun(args):
|
|
"""Runs the command.
|
|
|
|
Args:
|
|
args: The arguments that were provided to this command invocation.
|
|
|
|
Returns:
|
|
A Job message.
|
|
"""
|
|
template_args = apis.TemplateArguments(
|
|
max_workers=args.max_workers,
|
|
num_workers=args.num_workers,
|
|
network=args.network,
|
|
subnetwork=args.subnetwork,
|
|
worker_machine_type=args.worker_machine_type,
|
|
kms_key_name=args.dataflow_kms_key,
|
|
staging_location=args.staging_location,
|
|
temp_location=args.temp_location,
|
|
disable_public_ips=properties.VALUES.dataflow.disable_public_ips.GetBool(
|
|
),
|
|
service_account_email=args.service_account_email,
|
|
worker_region=args.worker_region,
|
|
worker_zone=args.worker_zone,
|
|
enable_streaming_engine=properties.VALUES.dataflow.enable_streaming_engine
|
|
.GetBool(),
|
|
additional_experiments=args.additional_experiments,
|
|
additional_user_labels=args.additional_user_labels)
|
|
|
|
if args.sdk_language == 'YAML':
|
|
if not args.yaml_pipeline_path:
|
|
raise ValueError('yaml_pipeline_path is required.')
|
|
metadata = json.loads(args.metadata_file)
|
|
metadata['yamlDefinition'] = args.yaml_pipeline_path
|
|
args.metadata_file = json.dumps(metadata, indent=4)
|
|
image_path = apis.Templates.GetYamlTemplateImage(args)
|
|
else:
|
|
image_path = args.image
|
|
|
|
if not image_path:
|
|
image_path = args.image_gcr_path
|
|
apis.Templates.BuildAndStoreFlexTemplateImage(
|
|
args.image_gcr_path,
|
|
args.flex_template_base_image,
|
|
args.jar,
|
|
args.py_path,
|
|
args.go_binary_path,
|
|
args.env,
|
|
args.sdk_language,
|
|
args.gcs_log_dir,
|
|
args.cloud_build_service_account,
|
|
)
|
|
|
|
return apis.Templates.BuildAndStoreFlexTemplateFile(
|
|
args.template_file_gcs_path, image_path, args.metadata_file,
|
|
args.sdk_language, args.print_only, template_args,
|
|
args.image_repository_username_secret_id,
|
|
args.image_repository_password_secret_id, args.image_repository_cert_path)
|
|
|
|
|
|
@base.DefaultUniverseOnly
|
|
@base.ReleaseTracks(base.ReleaseTrack.GA, base.ReleaseTrack.BETA)
|
|
class Build(base.Command):
|
|
"""Builds a flex template file from the specified parameters."""
|
|
|
|
detailed_help = {
|
|
'DESCRIPTION':
|
|
'Builds a flex template file from the specified parameters.',
|
|
'EXAMPLES':
|
|
"""\
|
|
To build and store a flex template JSON file, run:
|
|
|
|
$ {command} gs://template-file-gcs-path --image=gcr://image-path \
|
|
--metadata-file=/local/path/to/metadata.json --sdk-language=JAVA
|
|
|
|
If using prebuilt template image from private registry, run:
|
|
|
|
$ {command} gs://template-file-gcs-path \
|
|
--image=private.registry.com:3000/image-path \
|
|
--image-repository-username-secret-id="projects/test-project/secrets/username-secret"
|
|
--image-repository-password-secret-id="projects/test-project/secrets/password-secret/versions/latest"
|
|
--metadata-file=metadata.json
|
|
--sdk-language=JAVA
|
|
|
|
To build the template image and flex template JSON file, run:
|
|
|
|
$ {command} gs://template-file-gcs-path \
|
|
--image-gcr-path=gcr://path-to-store-image \
|
|
--jar=path/to/pipeline.jar --jar=path/to/dependency.jar \
|
|
--env=FLEX_TEMPLATE_JAVA_MAIN_CLASS=classpath \
|
|
--flex-template-base-image=JAVA11 \
|
|
--metadata-file=/local/path/to/metadata.json --sdk-language=JAVA
|
|
""",
|
|
}
|
|
|
|
@staticmethod
|
|
def Args(parser):
|
|
_CommonArgs(parser)
|
|
|
|
def Run(self, args):
|
|
return _CommonRun(args)
|