feat: Add new gcloud commands, API clients, and third-party libraries across various services.

This commit is contained in:
2026-01-01 20:26:35 +01:00
parent 5e23cbece0
commit a19e592eb7
25221 changed files with 8324611 additions and 0 deletions

View File

@@ -0,0 +1,133 @@
# -*- coding: utf-8 -*- #
# Copyright 2015 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Utilities for building the dataproc clusters CLI."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
import abc
import collections
import os
from apitools.base.py import encoding
from googlecloudsdk.api_lib.dataproc import constants
from googlecloudsdk.api_lib.dataproc import exceptions
from googlecloudsdk.api_lib.dataproc import storage_helpers
from googlecloudsdk.core import log
from googlecloudsdk.core.util import files
import six
import six.moves.urllib.parse
class JobBase(six.with_metaclass(abc.ABCMeta, object)):
"""Base class for Jobs."""
def __init__(self, *args, **kwargs):
super(JobBase, self).__init__(*args, **kwargs)
self.files_by_type = {}
self.files_to_stage = []
self._staging_dir = None
def _GetStagedFile(self, file_str):
"""Validate file URI and register it for uploading if it is local."""
drive, _ = os.path.splitdrive(file_str)
uri = six.moves.urllib.parse.urlsplit(file_str, allow_fragments=False)
# Determine the file is local to this machine if no scheme besides a drive
# is passed. file:// URIs are interpreted as living on VMs.
is_local = drive or not uri.scheme
if not is_local:
# Non-local files are already staged.
# TODO(b/36057257): Validate scheme.
return file_str
if not os.path.exists(file_str):
raise files.Error('File Not Found: [{0}].'.format(file_str))
if self._staging_dir is None:
# we raise this exception only if there are files to stage but the staging
# location couldn't be determined. In case where files are already staged
# this exception is not raised
raise exceptions.ArgumentError(
'Could not determine where to stage local file {0}. When submitting '
'a job to a cluster selected via --cluster-labels, either\n'
'- a staging bucket must be provided via the --bucket argument, or\n'
'- all provided files must be non-local.'.format(file_str))
basename = os.path.basename(file_str)
self.files_to_stage.append(file_str)
staged_file = six.moves.urllib.parse.urljoin(self._staging_dir, basename)
return staged_file
def ValidateAndStageFiles(self):
"""Validate file URIs and upload them if they are local."""
for file_type, file_or_files in six.iteritems(self.files_by_type):
# TODO(b/36049793): Validate file suffixes.
if not file_or_files:
continue
elif isinstance(file_or_files, six.string_types):
self.files_by_type[file_type] = self._GetStagedFile(file_or_files)
else:
staged_files = [self._GetStagedFile(f) for f in file_or_files]
self.files_by_type[file_type] = staged_files
if self.files_to_stage:
log.info('Staging local files {0} to {1}.'.format(self.files_to_stage,
self._staging_dir))
storage_helpers.Upload(self.files_to_stage, self._staging_dir)
def GetStagingDir(self, cluster, cluster_pool, job_id, bucket=None):
"""Determine the GCS directory to stage job resources in."""
if bucket is None and cluster is None:
return None
if bucket is None:
# If bucket is not provided, fall back to cluster's staging bucket.
if cluster.config:
bucket = cluster.config.configBucket
elif cluster.virtualClusterConfig:
bucket = cluster.virtualClusterConfig.stagingBucket
else:
# This is only needed if the request needs to stage files. If it doesn't
# everything will work. If it does need to stage files, then it will
# fail with a message saying --bucket should be specified.
return None
environment = 'unresolved'
if cluster is not None:
environment = cluster.clusterUuid
if cluster_pool is not None:
environment = cluster_pool
staging_dir = (
'gs://{bucket}/{prefix}/{environment}/jobs/{job_id}/staging/'.format(
bucket=bucket,
prefix=constants.GCS_METADATA_PREFIX,
environment=environment,
job_id=job_id))
return staging_dir
def BuildLoggingConfig(self, messages, driver_logging):
"""Build LoggingConfig from parameters."""
if not driver_logging:
return None
value_enum = (messages.LoggingConfig.DriverLogLevelsValue.
AdditionalProperty.ValueValueValuesEnum)
config = collections.OrderedDict(
[(key, value_enum(value)) for key, value in driver_logging.items()])
return messages.LoggingConfig(
driverLogLevels=encoding.DictToAdditionalPropertyMessage(
config,
messages.LoggingConfig.DriverLogLevelsValue))
def PopulateFilesByType(self, args):
self.files_by_type.update(self.GetFilesByType(args))

View File

@@ -0,0 +1,110 @@
# -*- coding: utf-8 -*- #
# Copyright 2023 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Base class for Flink Job."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
import argparse
from apitools.base.py import encoding
from googlecloudsdk.calliope import arg_parsers
from googlecloudsdk.command_lib.dataproc.jobs import base as job_base
from googlecloudsdk.command_lib.dataproc.jobs import util as job_util
class FlinkBase(job_base.JobBase):
"""Submit a Java or Scala Flink job to a cluster."""
@staticmethod
def Args(parser):
"""Parses command-line arguments specific to submitting Flink jobs."""
parser.add_argument(
'--savepoint',
help=(
'HCFS URI of the savepoint that is used to refer to the state of '
'the previously stopped job. The new job will resume previous '
'state from there.'
),
)
parser.add_argument(
'--jars',
type=arg_parsers.ArgList(),
metavar='JAR',
default=[],
help=(
'Comma-separated list of jar files to provide to the '
'task manager classpaths.'
),
)
parser.add_argument(
'job_args',
nargs=argparse.REMAINDER,
help='The job arguments to pass.',
)
parser.add_argument(
'--properties',
type=arg_parsers.ArgDict(),
metavar='PROPERTY=VALUE',
help=(
'List of key=value pairs to configure Flink. For a list of '
'available properties, see: '
'https://nightlies.apache.org/flink/flink-docs-master/docs/deployment/config/.'
),
)
parser.add_argument(
'--properties-file', help=job_util.PROPERTIES_FILE_HELP_TEXT
)
parser.add_argument(
'--driver-log-levels',
type=arg_parsers.ArgDict(),
metavar='PACKAGE=LEVEL',
help=(
'List of package to log4j log level pairs to configure driver '
'logging. For example: root=FATAL,com.example=INFO.'
),
)
@staticmethod
def GetFilesByType(args):
"""Returns a dict of files by their type (main_jar, jars, etc.)."""
return {'main_jar': args.main_jar, 'jars': args.jars}
@staticmethod
def ConfigureJob(messages, job, files_by_type, logging_config, args):
"""Populates the flinkJob member of the given job."""
flink_job = messages.FlinkJob(
args=args.job_args or [],
mainClass=args.main_class,
mainJarFileUri=files_by_type['main_jar'],
jarFileUris=files_by_type['jars'],
loggingConfig=logging_config,
savepointUri=args.savepoint
)
job_properties = job_util.BuildJobProperties(
args.properties, args.properties_file
)
if job_properties:
# Sort properties to ensure tests comparing messages not fail on ordering.
flink_job.properties = encoding.DictToAdditionalPropertyMessage(
job_properties, messages.FlinkJob.PropertiesValue, sort_items=True
)
job.flinkJob = flink_job

View File

@@ -0,0 +1,107 @@
# -*- coding: utf-8 -*- #
# Copyright 2015 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Base class for Hadoop Job."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
import argparse
from apitools.base.py import encoding
from googlecloudsdk.calliope import arg_parsers
from googlecloudsdk.command_lib.dataproc.jobs import base as job_base
from googlecloudsdk.command_lib.dataproc.jobs import util as job_util
class HadoopBase(job_base.JobBase):
"""Common functionality between release tracks."""
@staticmethod
def Args(parser):
"""Parses command-line arguments specific to submitting Hadoop jobs."""
parser.add_argument(
'--jars',
type=arg_parsers.ArgList(),
metavar='JAR',
default=[],
help=('Comma separated list of jar files to be provided to the MR and '
'driver classpaths.'))
parser.add_argument(
'--files',
type=arg_parsers.ArgList(),
metavar='FILE',
default=[],
help='Comma separated list of file paths to be provided to the job. '
'A file path can either be a path to a local file or a path '
'to a file already in a Cloud Storage bucket.')
parser.add_argument(
'--archives',
type=arg_parsers.ArgList(),
metavar='ARCHIVE',
default=[],
help=('Comma separated list of archives to be provided to the job. '
'must be one of the following file formats: .zip, .tar, .tar.gz, '
'or .tgz.'))
parser.add_argument(
'job_args',
nargs=argparse.REMAINDER,
help='The arguments to pass to the driver.')
parser.add_argument(
'--properties',
type=arg_parsers.ArgDict(),
metavar='PROPERTY=VALUE',
help='A list of key value pairs to configure Hadoop.')
parser.add_argument(
'--properties-file',
help=job_util.PROPERTIES_FILE_HELP_TEXT)
parser.add_argument(
'--driver-log-levels',
type=arg_parsers.ArgDict(),
metavar='PACKAGE=LEVEL',
help=('A list of package to log4j log level pairs to configure driver '
'logging. For example: root=FATAL,com.example=INFO'))
@staticmethod
def GetFilesByType(args):
"""Returns a dict of files by their type (jars, archives, etc.)."""
return {
'main_jar': args.main_jar,
'jars': args.jars,
'archives': args.archives,
'files': args.files}
@staticmethod
def ConfigureJob(messages, job, files_by_type, logging_config, args):
"""Populates the hadoopJob member of the given job."""
hadoop_job = messages.HadoopJob(
args=args.job_args or [],
archiveUris=files_by_type['archives'],
fileUris=files_by_type['files'],
jarFileUris=files_by_type['jars'],
mainClass=args.main_class,
mainJarFileUri=files_by_type['main_jar'],
loggingConfig=logging_config)
job_properties = job_util.BuildJobProperties(
args.properties, args.properties_file)
if job_properties:
# Sort properties to ensure tests comparing messages not fail on ordering.
hadoop_job.properties = encoding.DictToAdditionalPropertyMessage(
job_properties, messages.HadoopJob.PropertiesValue, sort_items=True)
job.hadoopJob = hadoop_job

View File

@@ -0,0 +1,99 @@
# -*- coding: utf-8 -*- #
# Copyright 2015 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Base class for Hive Job."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from apitools.base.py import encoding
from googlecloudsdk.calliope import arg_parsers
from googlecloudsdk.command_lib.dataproc.jobs import base as job_base
from googlecloudsdk.command_lib.dataproc.jobs import util as job_util
class HiveBase(job_base.JobBase):
"""Common functionality between release tracks."""
@staticmethod
def Args(parser):
"""Performs command line parsing specific to Hive."""
driver = parser.add_mutually_exclusive_group(required=True)
driver.add_argument(
'--execute', '-e',
metavar='QUERY',
dest='queries',
action='append',
default=[],
help='A Hive query to execute as part of the job.')
driver.add_argument(
'--file', '-f',
help='HCFS URI of file containing Hive script to execute as the job.')
parser.add_argument(
'--jars',
type=arg_parsers.ArgList(),
metavar='JAR',
default=[],
help=('Comma separated list of jar files to be provided to the '
'Hive and MR. May contain UDFs.'))
parser.add_argument(
'--params',
type=arg_parsers.ArgDict(),
metavar='PARAM=VALUE',
help='A list of key value pairs to set variables in the Hive queries.')
parser.add_argument(
'--properties',
type=arg_parsers.ArgDict(),
metavar='PROPERTY=VALUE',
help='A list of key value pairs to configure Hive.')
parser.add_argument(
'--properties-file',
help=job_util.PROPERTIES_FILE_HELP_TEXT)
parser.add_argument(
'--continue-on-failure',
action='store_true',
help='Whether to continue if a single query fails.')
@staticmethod
def GetFilesByType(args):
return {
'jars': args.jars,
'file': args.file}
@staticmethod
def ConfigureJob(messages, job, files_by_type, args):
"""Populates the hiveJob member of the given job."""
hive_job = messages.HiveJob(
continueOnFailure=args.continue_on_failure,
jarFileUris=files_by_type['jars'],
queryFileUri=files_by_type['file'])
if args.queries:
hive_job.queryList = messages.QueryList(queries=args.queries)
if args.params:
hive_job.scriptVariables = encoding.DictToAdditionalPropertyMessage(
args.params, messages.HiveJob.ScriptVariablesValue)
job_properties = job_util.BuildJobProperties(
args.properties, args.properties_file)
if job_properties:
# Sort properties to ensure tests comparing messages not fail on ordering.
hive_job.properties = encoding.DictToAdditionalPropertyMessage(
job_properties, messages.HiveJob.PropertiesValue, sort_items=True)
job.hiveJob = hive_job

View File

@@ -0,0 +1,107 @@
# -*- coding: utf-8 -*- #
# Copyright 2015 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Base class for Pig Job."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from apitools.base.py import encoding
from googlecloudsdk.calliope import arg_parsers
from googlecloudsdk.command_lib.dataproc.jobs import base as job_base
from googlecloudsdk.command_lib.dataproc.jobs import util as job_util
class PigBase(job_base.JobBase):
"""Submit a Pig job to a cluster."""
@staticmethod
def Args(parser):
"""Performs command-line argument parsing specific to Pig."""
driver = parser.add_mutually_exclusive_group(required=True)
driver.add_argument(
'--execute', '-e',
metavar='QUERY',
dest='queries',
action='append',
default=[],
help='A Pig query to execute as part of the job.')
driver.add_argument(
'--file', '-f',
help='HCFS URI of file containing Pig script to execute as the job.')
parser.add_argument(
'--jars',
type=arg_parsers.ArgList(),
metavar='JAR',
default=[],
help=('Comma separated list of jar files to be provided to '
'Pig and MR. May contain UDFs.'))
parser.add_argument(
'--params',
type=arg_parsers.ArgDict(),
metavar='PARAM=VALUE',
help='A list of key value pairs to set variables in the Pig queries.')
parser.add_argument(
'--properties',
type=arg_parsers.ArgDict(),
metavar='PROPERTY=VALUE',
help='A list of key value pairs to configure Pig.')
parser.add_argument(
'--properties-file',
help=job_util.PROPERTIES_FILE_HELP_TEXT)
parser.add_argument(
'--continue-on-failure',
action='store_true',
help='Whether to continue if a single query fails.')
parser.add_argument(
'--driver-log-levels',
type=arg_parsers.ArgDict(),
metavar='PACKAGE=LEVEL',
help=('A list of package to log4j log level pairs to configure driver '
'logging. For example: root=FATAL,com.example=INFO'))
@staticmethod
def GetFilesByType(args):
return {
'jars': args.jars,
'file': args.file}
@staticmethod
def ConfigureJob(messages, job, files_by_type, logging_config, args):
"""Populates the pigJob member of the given job."""
pig_job = messages.PigJob(
continueOnFailure=args.continue_on_failure,
jarFileUris=files_by_type['jars'],
queryFileUri=files_by_type['file'],
loggingConfig=logging_config)
if args.queries:
pig_job.queryList = messages.QueryList(queries=args.queries)
if args.params:
pig_job.scriptVariables = encoding.DictToAdditionalPropertyMessage(
args.params, messages.PigJob.ScriptVariablesValue)
job_properties = job_util.BuildJobProperties(
args.properties, args.properties_file)
if job_properties:
# Sort properties to ensure tests comparing messages not fail on ordering.
pig_job.properties = encoding.DictToAdditionalPropertyMessage(
job_properties, messages.PigJob.PropertiesValue, sort_items=True)
job.pigJob = pig_job

View File

@@ -0,0 +1,102 @@
# -*- coding: utf-8 -*- #
# Copyright 2019 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Base class for the Presto job."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from apitools.base.py import encoding
from googlecloudsdk.calliope import arg_parsers
from googlecloudsdk.command_lib.dataproc.jobs import base as job_base
from googlecloudsdk.command_lib.dataproc.jobs import util as job_util
class PrestoBase(job_base.JobBase):
"""Submit a Presto job to a cluster."""
@staticmethod
def Args(parser):
"""Parses command line arguments specific to submitting Presto jobs."""
driver = parser.add_mutually_exclusive_group(required=True)
driver.add_argument(
'--execute',
'-e',
metavar='QUERY',
dest='queries',
action='append',
default=[],
help='A Presto query to execute.')
driver.add_argument(
'--file',
'-f',
help='HCFS URI of file containing the Presto script to execute.')
parser.add_argument(
'--properties',
type=arg_parsers.ArgDict(),
metavar='PARAM=VALUE',
help='A list of key value pairs to set Presto session properties.')
parser.add_argument(
'--properties-file',
help=job_util.PROPERTIES_FILE_HELP_TEXT)
parser.add_argument(
'--driver-log-levels',
type=arg_parsers.ArgDict(),
metavar='PACKAGE=LEVEL',
help=('A list of package-to-log4j log level pairs to configure driver '
'logging. For example: root=FATAL,com.example=INFO'))
parser.add_argument(
'--continue-on-failure',
action='store_true',
help='Whether to continue if a query fails.')
parser.add_argument(
'--query-output-format',
help=('The query output display format. See the Presto documentation '
'for supported output formats.'))
parser.add_argument(
'--client-tags',
type=arg_parsers.ArgList(),
metavar='CLIENT_TAG',
help='A list of Presto client tags to attach to this query.')
@staticmethod
def GetFilesByType(args):
return {'file': args.file}
@staticmethod
def ConfigureJob(messages, job, files_by_type, logging_config, args):
"""Populates the prestoJob member of the given job."""
presto_job = messages.PrestoJob(
continueOnFailure=args.continue_on_failure,
queryFileUri=files_by_type['file'],
loggingConfig=logging_config)
if args.queries:
presto_job.queryList = messages.QueryList(queries=args.queries)
if args.query_output_format:
presto_job.outputFormat = args.query_output_format
if args.client_tags:
presto_job.clientTags = args.client_tags
job_properties = job_util.BuildJobProperties(
args.properties, args.properties_file)
if job_properties:
# Sort properties to ensure tests comparing messages not fail on ordering.
presto_job.properties = encoding.DictToAdditionalPropertyMessage(
job_properties, messages.PrestoJob.PropertiesValue, sort_items=True)
job.prestoJob = presto_job

View File

@@ -0,0 +1,158 @@
# -*- coding: utf-8 -*- #
# Copyright 2023 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Base class for PySpark Job."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
import argparse
from apitools.base.py import encoding
from googlecloudsdk.calliope import arg_parsers
from googlecloudsdk.calliope import base
from googlecloudsdk.command_lib.dataproc.jobs import base as job_base
from googlecloudsdk.command_lib.dataproc.jobs import util as job_util
@base.Hidden
class PyFlinkBase(job_base.JobBase):
"""Submit a PyFlink job to a cluster."""
@staticmethod
def Args(parser):
"""Performs command-line argument parsing specific to PyFlink."""
parser.add_argument(
'py_file', help='HCFS URI of the main Python file.'
)
parser.add_argument(
'--savepoint',
help='HCFS URI of the savepoint that contains the saved job progress.',
)
parser.add_argument(
'--py-files',
type=arg_parsers.ArgList(),
metavar='PY_FILE',
default=[],
help=(
'Comma-separated list of custom Python files to provide to the'
' job. Supports standard resource file suffixes, such as'
' .py, .egg, .zip and .whl. This also supports passing a directory.'
),
)
parser.add_argument(
'--py-requirements',
help=(
'A requirements.txt file that defines third-party dependencies.'
' These dependencies are installed and added to the PYTHONPATH of'
' the python UDF worker.'
),
)
parser.add_argument(
'--py-module',
help=(
'Python module with program entry point. This option should be used'
' with --pyFiles.'
),
)
parser.add_argument(
'--jars',
type=arg_parsers.ArgList(),
metavar='JAR',
default=[],
help=(
'Comma-separated list of jar files to provide to the '
'task manager classpaths.'
),
)
parser.add_argument(
'--archives',
type=arg_parsers.ArgList(),
metavar='ARCHIVE',
default=[],
help=(
'Comma-separated list of archives to be extracted into the working'
' directory of the python UDF worker. Must be one of the following '
'file formats: .zip, .tar, .tar.gz, or .tgz.'
),
)
parser.add_argument(
'job_args',
nargs=argparse.REMAINDER,
help='The job arguments to pass.',
)
parser.add_argument(
'--properties',
type=arg_parsers.ArgDict(),
metavar='PROPERTY=VALUE',
help=(
'List of key=value pairs to configure PyFlink. For a list of '
'available properties, see: '
'https://nightlies.apache.org/flink/flink-docs-master/docs/deployment/config/'
),
)
parser.add_argument(
'--properties-file', help=job_util.PROPERTIES_FILE_HELP_TEXT
)
parser.add_argument(
'--driver-log-levels',
type=arg_parsers.ArgDict(),
metavar='PACKAGE=LEVEL',
help=(
'List of key=value pairs to configure driver logging, where the key'
' is a package and the value is the log4j log level. For '
'example: root=FATAL,com.example=INFO.'
),
)
@staticmethod
def GetFilesByType(args):
return {
'py_file': args.py_file,
'py_files': args.py_files,
'archives': args.archives,
'py_requirements': args.py_requirements,
'jars': args.jars,
}
@staticmethod
def ConfigureJob(messages, job, files_by_type, logging_config, args):
"""Populates the pyflinkJob member of the given job."""
pyflink_job = messages.PyFlinkJob(
args=args.job_args or [],
archiveUris=files_by_type['archives'],
pythonFileUris=files_by_type['py_files'],
jarFileUris=files_by_type['jars'],
pythonRequirements=files_by_type['py_requirements'],
pythonModule=args.py_module,
mainPythonFileUri=files_by_type['py_file'],
loggingConfig=logging_config,
savepointUri=args.savepoint
)
job_properties = job_util.BuildJobProperties(
args.properties, args.properties_file
)
if job_properties:
# Sort properties to ensure tests comparing messages not fail on ordering.
pyflink_job.properties = encoding.DictToAdditionalPropertyMessage(
job_properties, messages.PyFlinkJob.PropertiesValue, sort_items=True
)
job.pyflinkJob = pyflink_job

View File

@@ -0,0 +1,128 @@
# -*- coding: utf-8 -*- #
# Copyright 2015 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Base class for PySpark Job."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
import argparse
from apitools.base.py import encoding
from googlecloudsdk.calliope import arg_parsers
from googlecloudsdk.command_lib.dataproc.jobs import base as job_base
from googlecloudsdk.command_lib.dataproc.jobs import util as job_util
class PySparkBase(job_base.JobBase):
"""Submit a PySpark job to a cluster."""
@staticmethod
def Args(parser):
"""Performs command-line argument parsing specific to PySpark."""
parser.add_argument(
'py_file',
help='Main .py file to run as the driver.')
parser.add_argument(
'--py-files',
type=arg_parsers.ArgList(),
metavar='PY_FILE',
default=[],
help=('Comma separated list of Python files to be provided to the job. '
'Must be one of the following file formats '
'".py, .zip, or .egg".'))
parser.add_argument(
'--jars',
type=arg_parsers.ArgList(),
metavar='JAR',
default=[],
help=('Comma separated list of jar files to be provided to the '
'executor and driver classpaths.'))
parser.add_argument(
'--files',
type=arg_parsers.ArgList(),
metavar='FILE',
default=[],
help=('Comma separated list of files to be placed in the working '
'directory of both the app driver and executors.'))
parser.add_argument(
'--archives',
type=arg_parsers.ArgList(),
metavar='ARCHIVE',
default=[],
help=(
'Comma separated list of archives to be extracted into the working '
'directory of each executor. '
'Must be one of the following file formats: .zip, .tar, .tar.gz, '
'or .tgz.'))
parser.add_argument(
'job_args',
nargs=argparse.REMAINDER,
help='Arguments to pass to the driver.')
parser.add_argument(
'--properties',
type=arg_parsers.ArgDict(),
metavar='PROPERTY=VALUE',
help='List of key value pairs to configure PySpark. For a list of '
'available properties, see: '
'https://spark.apache.org/docs/latest/'
'configuration.html#available-properties.')
parser.add_argument(
'--properties-file',
help=job_util.PROPERTIES_FILE_HELP_TEXT)
parser.add_argument(
'--driver-log-levels',
type=arg_parsers.ArgDict(),
metavar='PACKAGE=LEVEL',
help=('List of key value pairs to configure driver logging, where key '
'is a package and value is the log4j log level. For '
'example: root=FATAL,com.example=INFO'))
@staticmethod
def GetFilesByType(args):
return {
'py_file': args.py_file,
'py_files': args.py_files,
'archives': args.archives,
'files': args.files,
'jars': args.jars}
@staticmethod
def ConfigureJob(messages, job, files_by_type, logging_config, args):
"""Populates the pysparkJob member of the given job."""
pyspark_job = messages.PySparkJob(
args=args.job_args or [],
archiveUris=files_by_type['archives'],
fileUris=files_by_type['files'],
jarFileUris=files_by_type['jars'],
pythonFileUris=files_by_type['py_files'],
mainPythonFileUri=files_by_type['py_file'],
loggingConfig=logging_config,
)
job_properties = job_util.BuildJobProperties(
args.properties, args.properties_file
)
if job_properties:
# Sort properties to ensure tests comparing messages not fail on ordering.
pyspark_job.properties = encoding.DictToAdditionalPropertyMessage(
job_properties, messages.PySparkJob.PropertiesValue, sort_items=True
)
job.pysparkJob = pyspark_job

View File

@@ -0,0 +1,114 @@
# -*- coding: utf-8 -*- #
# Copyright 2015 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Base class for Spark Job."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
import argparse
from apitools.base.py import encoding
from googlecloudsdk.calliope import arg_parsers
from googlecloudsdk.command_lib.dataproc.jobs import base as job_base
from googlecloudsdk.command_lib.dataproc.jobs import util as job_util
class SparkBase(job_base.JobBase):
"""Submit a Java or Scala Spark job to a cluster."""
@staticmethod
def Args(parser):
"""Parses command-line arguments specific to submitting Spark jobs."""
parser.add_argument(
'--jars',
type=arg_parsers.ArgList(),
metavar='JAR',
default=[],
help=('Comma separated list of jar files to be provided to the '
'executor and driver classpaths.'))
parser.add_argument(
'--files',
type=arg_parsers.ArgList(),
metavar='FILE',
default=[],
help=('Comma separated list of files to be placed in the working '
'directory of both the app driver and executors.'))
parser.add_argument(
'--archives',
type=arg_parsers.ArgList(),
metavar='ARCHIVE',
default=[],
help=(
'Comma separated list of archives to be extracted into the working '
'directory of each executor. '
'Must be one of the following file formats: .zip, .tar, .tar.gz, '
'or .tgz.'))
parser.add_argument(
'job_args',
nargs=argparse.REMAINDER,
help='Arguments to pass to the driver.')
parser.add_argument(
'--properties',
type=arg_parsers.ArgDict(),
metavar='PROPERTY=VALUE',
help='List of key value pairs to configure Spark. For a list of '
'available properties, see: '
'https://spark.apache.org/docs/latest/'
'configuration.html#available-properties.')
parser.add_argument(
'--properties-file',
help=job_util.PROPERTIES_FILE_HELP_TEXT)
parser.add_argument(
'--driver-log-levels',
type=arg_parsers.ArgDict(),
metavar='PACKAGE=LEVEL',
help=('List of package to log4j log level pairs to configure driver '
'logging. For example: root=FATAL,com.example=INFO'))
@staticmethod
def GetFilesByType(args):
"""Returns a dict of files by their type (jars, archives, etc.)."""
return {
'main_jar': args.main_jar,
'jars': args.jars,
'archives': args.archives,
'files': args.files}
@staticmethod
def ConfigureJob(messages, job, files_by_type, logging_config, args):
"""Populates the sparkJob member of the given job."""
spark_job = messages.SparkJob(
args=args.job_args or [],
archiveUris=files_by_type['archives'],
fileUris=files_by_type['files'],
jarFileUris=files_by_type['jars'],
mainClass=args.main_class,
mainJarFileUri=files_by_type['main_jar'],
loggingConfig=logging_config,
)
job_properties = job_util.BuildJobProperties(
args.properties, args.properties_file
)
if job_properties:
# Sort properties to ensure tests comparing messages not fail on ordering.
spark_job.properties = encoding.DictToAdditionalPropertyMessage(
job_properties, messages.SparkJob.PropertiesValue, sort_items=True
)
job.sparkJob = spark_job

View File

@@ -0,0 +1,104 @@
# -*- coding: utf-8 -*- #
# Copyright 2015 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Base class for SparkR Job."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
import argparse
from apitools.base.py import encoding
from googlecloudsdk.calliope import arg_parsers
from googlecloudsdk.command_lib.dataproc.jobs import base as job_base
from googlecloudsdk.command_lib.dataproc.jobs import util as job_util
class SparkRBase(job_base.JobBase):
"""Submit a SparkR job to a cluster."""
@staticmethod
def Args(parser):
"""Performs command-line argument parsing specific to SparkR."""
parser.add_argument('r_file', help='Main .R file to run as the driver.')
parser.add_argument(
'--files',
type=arg_parsers.ArgList(),
metavar='FILE',
default=[],
help='Comma separated list of files to be placed in the working '
'directory of both the app driver and executors.')
parser.add_argument(
'--archives',
type=arg_parsers.ArgList(),
metavar='ARCHIVE',
default=[],
help=(
'Comma separated list of archives to be extracted into the working '
'directory of each executor. '
'Must be one of the following file formats: .zip, .tar, .tar.gz, '
'or .tgz.'))
parser.add_argument(
'job_args',
nargs=argparse.REMAINDER,
help='Arguments to pass to the driver.')
parser.add_argument(
'--properties',
type=arg_parsers.ArgDict(),
metavar='PROPERTY=VALUE',
help='List of key value pairs to configure SparkR. For a list of '
'available properties, see: '
'https://spark.apache.org/docs/latest/'
'configuration.html#available-properties.')
parser.add_argument(
'--properties-file',
help=job_util.PROPERTIES_FILE_HELP_TEXT)
parser.add_argument(
'--driver-log-levels',
type=arg_parsers.ArgDict(),
metavar='PACKAGE=LEVEL',
help=('List of key value pairs to configure driver logging, where key '
'is a package and value is the log4j log level. For '
'example: root=FATAL,com.example=INFO'))
@staticmethod
def GetFilesByType(args):
return {
'r_file': args.r_file,
'archives': args.archives,
'files': args.files
}
@staticmethod
def ConfigureJob(messages, job, files_by_type, logging_config, args):
"""Populates the sparkRJob member of the given job."""
spark_r_job = messages.SparkRJob(
args=args.job_args or [],
archiveUris=files_by_type['archives'],
fileUris=files_by_type['files'],
mainRFileUri=files_by_type['r_file'],
loggingConfig=logging_config,
)
job_properties = job_util.BuildJobProperties(
args.properties, args.properties_file
)
if job_properties:
spark_r_job.properties = encoding.DictToMessage(
job_properties, messages.SparkRJob.PropertiesValue
)
job.sparkRJob = spark_r_job

View File

@@ -0,0 +1,106 @@
# -*- coding: utf-8 -*- #
# Copyright 2015 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Base class for Spark Sql Job."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from apitools.base.py import encoding
from googlecloudsdk.calliope import arg_parsers
from googlecloudsdk.command_lib.dataproc.jobs import base as job_base
from googlecloudsdk.command_lib.dataproc.jobs import util as job_util
class SparkSqlBase(job_base.JobBase):
"""Submit a Spark SQL job to a cluster."""
@staticmethod
def Args(parser):
"""Parses command-line arguments specific to submitting SparkSql jobs."""
driver = parser.add_mutually_exclusive_group(required=True)
driver.add_argument(
'--execute', '-e',
metavar='QUERY',
dest='queries',
action='append',
default=[],
help='A Spark SQL query to execute as part of the job.')
driver.add_argument(
'--file', '-f',
help=('HCFS URI of file containing Spark SQL script to execute as '
'the job.'))
parser.add_argument(
'--jars',
type=arg_parsers.ArgList(),
metavar='JAR',
default=[],
help=('Comma separated list of jar files to be provided to the '
'executor and driver classpaths. May contain UDFs.'))
parser.add_argument(
'--params',
type=arg_parsers.ArgDict(),
metavar='PARAM=VALUE',
help='A list of key value pairs to set variables in the Hive queries.')
parser.add_argument(
'--properties',
type=arg_parsers.ArgDict(),
metavar='PROPERTY=VALUE',
help='A list of key value pairs to configure Hive.')
parser.add_argument(
'--properties-file',
help=job_util.PROPERTIES_FILE_HELP_TEXT)
parser.add_argument(
'--driver-log-levels',
type=arg_parsers.ArgDict(),
metavar='PACKAGE=LEVEL',
help=('A list of package to log4j log level pairs to configure driver '
'logging. For example: root=FATAL,com.example=INFO'))
@staticmethod
def GetFilesByType(args):
return {
'jars': args.jars,
'file': args.file}
@staticmethod
def ConfigureJob(messages, job, files_by_type, logging_config, args):
"""Populates the sparkSqlJob member of the given job."""
spark_sql_job = messages.SparkSqlJob(
jarFileUris=files_by_type['jars'],
queryFileUri=files_by_type['file'],
loggingConfig=logging_config,
)
if args.queries:
spark_sql_job.queryList = messages.QueryList(queries=args.queries)
if args.params:
spark_sql_job.scriptVariables = encoding.DictToAdditionalPropertyMessage(
args.params, messages.SparkSqlJob.ScriptVariablesValue
)
job_properties = job_util.BuildJobProperties(
args.properties, args.properties_file
)
if job_properties:
# Sort properties to ensure tests comparing messages not fail on ordering.
spark_sql_job.properties = encoding.DictToAdditionalPropertyMessage(
job_properties, messages.SparkSqlJob.PropertiesValue, sort_items=True
)
job.sparkSqlJob = spark_sql_job

View File

@@ -0,0 +1,174 @@
# -*- coding: utf-8 -*- #
# Copyright 2015 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Utilities for building the dataproc clusters CLI."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.api_lib.dataproc import dataproc as dp
from googlecloudsdk.api_lib.dataproc import util
from googlecloudsdk.calliope import arg_parsers
from googlecloudsdk.calliope import base
from googlecloudsdk.command_lib.util.args import labels_util
from googlecloudsdk.core import log
class JobSubmitter(base.Command):
"""Submit a job to a cluster."""
@classmethod
def Args(cls, parser):
"""Register flags for this command."""
labels_util.AddCreateLabelsFlags(parser)
parser.add_argument(
'--max-failures-per-hour',
type=int,
help=('Specifies the maximum number of times a job can be restarted '
'per hour in event of failure. '
'Default is 0 (no retries after job failure).'))
parser.add_argument(
'--max-failures-total',
type=int,
help=('Specifies the maximum total number of times a job can be '
'restarted after the job fails. '
'Default is 0 (no retries after job failure).'))
parser.add_argument(
'--driver-required-memory-mb',
type=int,
help=(
'The memory allocation requested by the job driver in megabytes'
' (MB) for execution on the driver node group (it is used only by'
' clusters with a driver node group).'
),
)
parser.add_argument(
'--driver-required-vcores',
type=int,
help=(
'The vCPU allocation requested by the job driver for execution on'
' the driver node group (it is used only by clusters with a driver'
' node group).'
),
)
parser.add_argument(
'--ttl',
hidden=True,
type=arg_parsers.Duration(),
help=(
'The maximum duration this job is allowed to run before being'
' killed automatically. Specified using a s, m, h, or d (seconds,'
' minutes, hours, or days) suffix. The minimum value is 10 minutes'
' (10m), and the maximum value is 14 days (14d) Run'
' [gcloud topic datetimes]'
' (https://cloud.google.com/sdk/gcloud/reference/topic/datetimes)'
' for information on duration formats.'
),
)
cluster_placement = parser.add_mutually_exclusive_group(required=True)
cluster_placement.add_argument(
'--cluster', help='The Dataproc cluster to submit the job to.'
)
labels_util.GetCreateLabelsFlag(
'Labels of Dataproc cluster on which to place the job.',
'cluster-labels',
).AddToParser(cluster_placement)
def Run(self, args):
"""This is what gets called when the user runs this command."""
dataproc = dp.Dataproc(self.ReleaseTrack())
request_id = util.GetUniqueId()
job_id = args.id if args.id else request_id
# Don't use ResourceArgument, because --id is hidden by default
job_ref = util.ParseJob(job_id, dataproc)
self.PopulateFilesByType(args)
cluster = None
if args.cluster is not None:
cluster_ref = util.ParseCluster(args.cluster, dataproc)
request = dataproc.messages.DataprocProjectsRegionsClustersGetRequest(
projectId=cluster_ref.projectId,
region=cluster_ref.region,
clusterName=cluster_ref.clusterName)
cluster = dataproc.client.projects_regions_clusters.Get(request)
cluster_pool = None
if args.cluster_labels is not None:
if 'cluster-pool' in args.cluster_labels:
cluster_pool = args.cluster_labels['cluster-pool']
self._staging_dir = self.GetStagingDir(
cluster, cluster_pool, job_ref.jobId, bucket=args.bucket)
self.ValidateAndStageFiles()
job = dataproc.messages.Job(
reference=dataproc.messages.JobReference(
projectId=job_ref.projectId, jobId=job_ref.jobId),
placement=dataproc.messages.JobPlacement(clusterName=args.cluster))
self.ConfigureJob(dataproc.messages, job, args)
if args.driver_required_memory_mb and args.driver_required_vcores:
driver_scheduling_config = dataproc.messages.DriverSchedulingConfig(
memoryMb=args.driver_required_memory_mb,
vcores=args.driver_required_vcores)
job.driverSchedulingConfig = driver_scheduling_config
if args.max_failures_per_hour or args.max_failures_total or args.ttl:
scheduling = dataproc.messages.JobScheduling(
maxFailuresPerHour=args.max_failures_per_hour
if args.max_failures_per_hour
else None,
maxFailuresTotal=args.max_failures_total
if args.max_failures_total
else None,
ttl=str(args.ttl) + 's' if args.ttl else None,
)
job.scheduling = scheduling
request = dataproc.messages.DataprocProjectsRegionsJobsSubmitRequest(
projectId=job_ref.projectId,
region=job_ref.region,
submitJobRequest=dataproc.messages.SubmitJobRequest(
job=job,
requestId=request_id))
job = dataproc.client.projects_regions_jobs.Submit(request)
log.status.Print('Job [{0}] submitted.'.format(job_id))
if not args.async_:
job = util.WaitForJobTermination(
dataproc,
job,
job_ref,
message='Waiting for job completion',
goal_state=dataproc.messages.JobStatus.StateValueValuesEnum.DONE,
error_state=dataproc.messages.JobStatus.StateValueValuesEnum.ERROR,
stream_driver_log=True)
log.status.Print('Job [{0}] finished successfully.'.format(job_id))
return job
@staticmethod
def ConfigureJob(messages, job, args):
"""Add type-specific job configuration to job message."""
# Parse labels (if present)
job.labels = labels_util.ParseCreateArgs(args, messages.Job.LabelsValue)
job.placement.clusterLabels = labels_util.ParseCreateArgs(
args,
messages.JobPlacement.ClusterLabelsValue,
labels_dest='cluster_labels')

View File

@@ -0,0 +1,102 @@
# -*- coding: utf-8 -*- #
# Copyright 2022 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Base class for the Trino job."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from apitools.base.py import encoding
from googlecloudsdk.calliope import arg_parsers
from googlecloudsdk.command_lib.dataproc.jobs import base as job_base
from googlecloudsdk.command_lib.dataproc.jobs import util as job_util
class TrinoBase(job_base.JobBase):
"""Submit a Trino job to a cluster."""
@staticmethod
def Args(parser):
"""Parses command line arguments specific to submitting Trino jobs."""
driver = parser.add_mutually_exclusive_group(required=True)
driver.add_argument(
'--execute',
'-e',
metavar='QUERY',
dest='queries',
action='append',
default=[],
help='A Trino query to execute.')
driver.add_argument(
'--file',
'-f',
help='HCFS URI of file containing the Trino script to execute.')
parser.add_argument(
'--properties',
type=arg_parsers.ArgDict(),
metavar='PARAM=VALUE',
help='A list of key value pairs to set Trino session properties.')
parser.add_argument(
'--properties-file',
help=job_util.PROPERTIES_FILE_HELP_TEXT)
parser.add_argument(
'--driver-log-levels',
type=arg_parsers.ArgDict(),
metavar='PACKAGE=LEVEL',
help=('A list of package-to-log4j log level pairs to configure driver '
'logging. For example: root=FATAL,com.example=INFO'))
parser.add_argument(
'--continue-on-failure',
action='store_true',
help='Whether to continue if a query fails.')
parser.add_argument(
'--query-output-format',
help=('The query output display format. See the Trino documentation '
'for supported output formats.'))
parser.add_argument(
'--client-tags',
type=arg_parsers.ArgList(),
metavar='CLIENT_TAG',
help='A list of Trino client tags to attach to this query.')
@staticmethod
def GetFilesByType(args):
return {'file': args.file}
@staticmethod
def ConfigureJob(messages, job, files_by_type, logging_config, args):
"""Populates the trinoJob member of the given job."""
trino_job = messages.TrinoJob(
continueOnFailure=args.continue_on_failure,
queryFileUri=files_by_type['file'],
loggingConfig=logging_config)
if args.queries:
trino_job.queryList = messages.QueryList(queries=args.queries)
if args.query_output_format:
trino_job.outputFormat = args.query_output_format
if args.client_tags:
trino_job.clientTags = args.client_tags
job_properties = job_util.BuildJobProperties(
args.properties, args.properties_file)
if job_properties:
# Sort properties to ensure tests comparing messages not fail on ordering.
trino_job.properties = encoding.DictToAdditionalPropertyMessage(
job_properties, messages.TrinoJob.PropertiesValue, sort_items=True)
job.trinoJob = trino_job

View File

@@ -0,0 +1,88 @@
# -*- coding: utf-8 -*- #
# Copyright 2022 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Helper class for jobs."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.api_lib.dataproc import exceptions
from googlecloudsdk.api_lib.dataproc import storage_helpers
from googlecloudsdk.core import yaml
from googlecloudsdk.core.console import console_io
PROPERTIES_FILE_HELP_TEXT = """\
Path to a local file or a file in a Cloud Storage bucket containing
configuration properties for the job. The client machine running this command
must have read permission to the file.
Specify properties in the form of property=value in the text file. For example:
```
# Properties to set for the job:
key1=value1
key2=value2
# Comment out properties not used.
# key3=value3
```
If a property is set in both `--properties` and `--properties-file`, the
value defined in `--properties` takes precedence.
"""
def BuildJobProperties(arg_properties, properties_file):
"""Build job properties.
Merges properties from the arg_properties and properties_file. If a property
is set in both, the value in arg_properties is used.
Args:
arg_properties: A dictionary of property=value pairs.
properties_file: Path or URI to a text file with property=value lines
and/or comments. File can be a local file or a gs:// file.
Returns:
A dictionary merged properties
Example:
BuildJobProperties({'foo':'bar'}, 'gs://test-bucket/job_properties.conf')
"""
job_properties = {}
if properties_file:
try:
if properties_file.startswith('gs://'):
data = storage_helpers.ReadObject(properties_file)
else:
data = console_io.ReadFromFileOrStdin(properties_file, binary=False)
except Exception as e:
raise exceptions.Error('Cannot read properties-file: {0}'.format(e))
try:
yaml.allow_duplicate_keys = True
key_values = yaml.load(data.strip().replace('=', ': '), round_trip=True)
if key_values:
for key, value in key_values.items():
job_properties[key] = value
except Exception:
raise exceptions.ParseError(
'Cannot parse properties-file: {0}, '.format(properties_file) +
'make sure file format is a text file with list of key=value')
if arg_properties:
job_properties.update(arg_properties)
return job_properties