feat: Add new gcloud commands, API clients, and third-party libraries across various services.

This commit is contained in:
2026-01-01 20:26:35 +01:00
parent 5e23cbece0
commit a19e592eb7
25221 changed files with 8324611 additions and 0 deletions

View File

@@ -0,0 +1,14 @@
# -*- coding: utf-8 -*- #
# Copyright 2016 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

View File

@@ -0,0 +1,147 @@
# -*- coding: utf-8 -*- #
# Copyright 2021 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Factory class for Batch message."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.command_lib.dataproc.shared_messages import (
environment_config_factory as ecf,
)
from googlecloudsdk.command_lib.dataproc.shared_messages import (
runtime_config_factory as rcf,
)
from googlecloudsdk.command_lib.util.args import labels_util
class BatchMessageFactory(object):
"""Factory class for Batch message.
Factory class for configuring argument parser and creating a Batch message
from the parsed arguments.
"""
INVALID_BATCH_TYPE_ERR_MSG = 'Invalid batch job type: {}.'
MISSING_BATCH_ERR_MSG = 'Missing batch job.'
def __init__(
self,
dataproc,
runtime_config_factory_override=None,
environment_config_factory_override=None,
):
"""Builder class for Batch message.
Batch message factory. Only the flags added in AddArguments are handled.
User need to provide batch job type specific message during message
creation.
Args:
dataproc: A api_lib.dataproc.Dataproc instance.
runtime_config_factory_override: Override the default RuntimeConfigFactory
instance.
environment_config_factory_override: Override the default
EnvironmentConfigFactory instance.
"""
self.dataproc = dataproc
# Construct available batch type to keyword mapping.
self._batch2key = {
self.dataproc.messages.SparkBatch: 'sparkBatch',
self.dataproc.messages.SparkRBatch: 'sparkRBatch',
self.dataproc.messages.SparkSqlBatch: 'sparkSqlBatch',
self.dataproc.messages.PySparkBatch: 'pysparkBatch',
self.dataproc.messages.RayBatch: 'rayBatch',
}
self.runtime_config_factory = runtime_config_factory_override
if not self.runtime_config_factory:
self.runtime_config_factory = rcf.RuntimeConfigFactory(
self.dataproc, include_autotuning=True, include_cohort=True
)
self.environment_config_factory = environment_config_factory_override
if not self.environment_config_factory:
self.environment_config_factory = ecf.EnvironmentConfigFactory(
self.dataproc
)
def GetMessage(self, args, batch_job):
"""Creates a Batch message from given args.
Create a Batch message from given arguments. Only the arguments added in
AddAddArguments are handled. User need to provide bath job type specific
message during message creation.
Args:
args: Parsed argument.
batch_job: Batch type job instance.
Returns:
A Batch message instance.
Raises:
AttributeError: When batch_job is invalid.
"""
if not batch_job:
raise AttributeError(BatchMessageFactory.MISSING_BATCH_ERR_MSG)
if not isinstance(batch_job, tuple(self._batch2key.keys())):
raise AttributeError(
BatchMessageFactory.INVALID_BATCH_TYPE_ERR_MSG.format(type(batch_job))
)
kwargs = {}
kwargs[self._batch2key[type(batch_job)]] = batch_job
if args.labels:
kwargs['labels'] = labels_util.ParseCreateArgs(
args, self.dataproc.messages.Batch.LabelsValue
)
runtime_config = self.runtime_config_factory.GetMessage(args)
if runtime_config:
kwargs['runtimeConfig'] = runtime_config
environment_config = self.environment_config_factory.GetMessage(args)
if environment_config:
kwargs['environmentConfig'] = environment_config
if not kwargs:
return None
return self.dataproc.messages.Batch(**kwargs)
def AddArguments(parser):
"""Adds arguments related to Batch message.
Add Batch arguments to the given parser. Job specific arguments are not
handled, and need to be set during factory instantiation.
Args:
parser: A argument parser.
"""
labels_util.AddCreateLabelsFlags(parser)
_AddDependency(parser)
def _AddDependency(parser):
rcf.AddArguments(parser, include_autotuning=True, include_cohort=True)
ecf.AddArguments(parser)

View File

@@ -0,0 +1,86 @@
# -*- coding: utf-8 -*- #
# Copyright 2021 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Batches submit command utility."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
import sys
from googlecloudsdk.api_lib.dataproc import util
from googlecloudsdk.api_lib.dataproc.poller import gce_batch_poller
from googlecloudsdk.api_lib.dataproc.poller import rm_batch_poller
from googlecloudsdk.api_lib.util import waiter
from googlecloudsdk.command_lib.dataproc.batches import batch_version_util
from googlecloudsdk.command_lib.dataproc.batches import (
batches_create_request_factory)
from googlecloudsdk.core import log
def Submit(batch_workload_message, dataproc, args):
"""Submits a batch workload.
Submits a batch workload and streams output if necessary.
Make sure the parsed argument contains all the necessary arguments before
calling. It should be fine if the arg parser was passed to
BatchesCreateRequestFactory's AddArguments function previously.
Args:
batch_workload_message: A batch workload message. For example, a SparkBatch
instance.
dataproc: An api_lib.dataproc.Dataproc instance.
args: Parsed arguments.
Returns:
Remote return value for a BatchesCreate request.
"""
request = batches_create_request_factory.BatchesCreateRequestFactory(
dataproc).GetRequest(args, batch_workload_message)
batch_op = dataproc.client.projects_locations_batches.Create(request)
log.status.Print('Batch [{}] submitted.'.format(request.batchId))
metadata = util.ParseOperationJsonMetadata(
batch_op.metadata, dataproc.messages.BatchOperationMetadata)
for warning in metadata.warnings:
log.warning(warning)
if not args.async_:
# Get the batch workload to obtain the resolved version.
batch_ref = '{}/batches/{}'.format(request.parent, request.batchId)
batch = dataproc.client.projects_locations_batches.Get(
dataproc.messages.DataprocProjectsLocationsBatchesGetRequest(
name=batch_ref
)
)
if batch_version_util.is_rm_batch(batch):
poller = rm_batch_poller.RmBatchPoller(dataproc)
else:
poller = gce_batch_poller.GceBatchPoller(dataproc)
waiter.WaitFor(
poller,
batch_ref,
max_wait_ms=sys.maxsize,
sleep_ms=5000,
wait_ceiling_ms=5000,
exponential_sleep_multiplier=1.0,
custom_tracker=None,
tracker_update_func=poller.TrackerUpdateFunction,
)
log.status.Print('Batch [{}] finished.'.format(request.batchId))
return batch_op

View File

@@ -0,0 +1,29 @@
# -*- coding: utf-8 -*- #
# Copyright 2025 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Utility class for working with batch versions."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
def is_rm_batch(batch):
"""Returns true if the batch is using resource manager based on the version."""
is_gce_spark = (
batch.runtimeConfig.version.startswith('1')
or batch.runtimeConfig.version.startswith('2')
) and batch.rayBatch is None
return not is_gce_spark

View File

@@ -0,0 +1,150 @@
# -*- coding: utf-8 -*- #
# Copyright 2021 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Factory class for BatchesCreateRequest message."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
import re
from googlecloudsdk.api_lib.dataproc import util
from googlecloudsdk.calliope import arg_parsers
from googlecloudsdk.command_lib.dataproc import flags
from googlecloudsdk.command_lib.dataproc.batches import batch_message_factory
class BatchesCreateRequestFactory(object):
"""Factory class handling BatchesCreateRequest message.
Factory class for configure argument parser and create
BatchesCreateRequest message from parsed argument.
"""
def __init__(self, dataproc, batch_message_factory_override=None):
"""Factory for BatchesCreateRequest message.
Only handles general submit flags added by this class. User needs to
provide job specific message when creating the request message.
Args:
dataproc: A api_lib.dataproc.Dataproc instance.
batch_message_factory_override: Override BatchMessageFactory instance.
"""
self.dataproc = dataproc
self.batch_message_factory = batch_message_factory_override
if not self.batch_message_factory:
self.batch_message_factory = (
batch_message_factory.BatchMessageFactory(self.dataproc))
def GetRequest(self, args, batch_job):
"""Creates a BatchesCreateRequest message.
Creates a BatchesCreateRequest message. The factory only handles the
arguments added in AddArguments function. User needs to provide job
specific message instance.
Args:
args: Parsed arguments.
batch_job: A batch job typed message instance.
Returns:
BatchesCreateRequest: A configured BatchesCreateRequest.
"""
kwargs = {}
# Overwrite region if location is provided
if hasattr(args, 'location') and args.location:
args.region = args.location
kwargs['parent'] = args.CONCEPTS.region.Parse().RelativeName()
# Recommendation: Always set a request ID for a create batch request.
kwargs['requestId'] = args.request_id
if not kwargs['requestId']:
kwargs['requestId'] = util.GetUniqueId()
# This behavior conflicts with protobuf definition.
# Remove this if auto assign batch ID on control plane is enabled.
kwargs['batchId'] = args.batch
if not kwargs['batchId']:
kwargs['batchId'] = kwargs['requestId']
kwargs['batch'] = self.batch_message_factory.GetMessage(args, batch_job)
return self.dataproc.messages.DataprocProjectsLocationsBatchesCreateRequest(
**kwargs)
def AddArguments(parser, api_version):
"""Add arguments related to BatchesCreateRequest message.
Add BatchesCreateRequest arguments to parser. This only includes general
arguments for all `batches submit` commands. Batch job type specific
arguments are not included, and those messages need to be passed in during
message construction (when calling GetMessage).
Args:
parser: A argument parser instance.
api_version: Api version to use.
"""
flags.AddProjectsLocationsResourceArg(parser, api_version)
batch_id_pattern = re.compile(r'^[a-z0-9][-a-z0-9]{2,61}[a-z0-9]$')
parser.add_argument(
'--batch',
type=arg_parsers.CustomFunctionValidator(
batch_id_pattern.match,
(
'Only lowercase letters (a-z), numbers (0-9), and hyphens (-) are'
' allowed. The length must be between 4 and 63 characters.'
),
),
help=(
'The ID of the batch job to submit. The ID must contain only'
' lowercase letters (a-z), numbers (0-9) and hyphens (-). The length'
' of the name must be between 4 and 63 characters. If this argument'
' is not provided, a random generated UUID will be used.'
),
)
request_id_pattern = re.compile(r'^[a-zA-Z0-9_-]{1,40}$')
parser.add_argument(
'--request-id',
type=arg_parsers.CustomFunctionValidator(
request_id_pattern.match,
(
'Only letters (a-z, A-Z), numbers (0-9), underscores (_), and'
' hyphens (-) are allowed. The length must not exceed 40'
' characters.'
),
),
help=(
'A unique ID that identifies the request. If the service '
'receives two batch create requests with the same request_id, '
'the second request is ignored and the operation that '
'corresponds to the first batch created and stored in the '
'backend is returned. '
'Recommendation: Always set this value to a UUID. '
'The value must contain only letters (a-z, A-Z), numbers (0-9), '
'underscores (_), and hyphens (-). The maximum length is 40 '
'characters.'
),
)
_AddDependency(parser)
def _AddDependency(parser):
batch_message_factory.AddArguments(parser)

View File

@@ -0,0 +1,101 @@
# -*- coding: utf-8 -*- #
# Copyright 2021 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Factory class for PySparkBatch message."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.command_lib.dataproc import flags
from googlecloudsdk.command_lib.dataproc import local_file_uploader
class PySparkBatchFactory(object):
"""Factory class for PySparkBatch message."""
def __init__(self, dataproc):
"""Factory class for SparkBatch message.
Args:
dataproc: A Dataproc instance.
"""
self.dataproc = dataproc
def UploadLocalFilesAndGetMessage(self, args):
"""upload user local files and creates a PySparkBatch message.
Upload user local files and point URIs to the local files to the uploaded
URIs.
Creates a PySparkBatch message from parsed arguments.
Args:
args: Parsed arguments.
Returns:
PySparkBatch: A PySparkBatch message.
Raises:
AttributeError: Bucket is required to upload local files, but not
specified.
"""
kwargs = {}
if args.args:
kwargs['args'] = args.args
dependencies = {}
# Upload requires a list.
dependencies['mainPythonFileUri'] = [args.MAIN_PYTHON_FILE]
if args.py_files:
dependencies['pythonFileUris'] = args.py_files
if args.jars:
dependencies['jarFileUris'] = args.jars
if args.files:
dependencies['fileUris'] = args.files
if args.archives:
dependencies['archiveUris'] = args.archives
if local_file_uploader.HasLocalFiles(dependencies):
if not args.deps_bucket:
raise AttributeError('--deps-bucket was not specified.')
dependencies = local_file_uploader.Upload(args.deps_bucket, dependencies)
# Move mainPythonFileUri out of the list.
dependencies['mainPythonFileUri'] = dependencies['mainPythonFileUri'][0]
# Merge the dict first for compatibility.
# Old python versions don't support multi unpacking of dictionaries.
kwargs.update(dependencies)
return self.dataproc.messages.PySparkBatch(**kwargs)
def AddArguments(parser):
"""Adds arguments related to PySparkBatch message."""
flags.AddMainPythonFile(parser)
flags.AddPythonFiles(parser)
flags.AddJarFiles(parser)
flags.AddOtherFiles(parser)
flags.AddArchives(parser)
flags.AddArgs(parser)
# Cloud Storage bucket to upload workload dependencies.
# It is required until we figure out a place to upload user files.
flags.AddBucket(parser)

View File

@@ -0,0 +1,85 @@
# -*- coding: utf-8 -*- #
# Copyright 2024 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Factory class for RayBatch message."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.command_lib.dataproc import flags
from googlecloudsdk.command_lib.dataproc import local_file_uploader
class RayBatchFactory(object):
"""Factory class for RayBatch message."""
def __init__(self, dataproc):
"""Factory class for RayBatch message.
Args:
dataproc: A Dataproc instance.
"""
self.dataproc = dataproc
def UploadLocalFilesAndGetMessage(self, args):
"""upload user local files and creates a RayBatch message.
Upload user local files and point URIs to the local files to the uploaded
URIs.
Creates a RayBatch message from parsed arguments.
Args:
args: Parsed arguments.
Returns:
RayBatch: A RayBatch message.
Raises:
AttributeError: Bucket is required to upload local files, but not
specified.
"""
kwargs = {}
if args.args:
kwargs['args'] = args.args
dependencies = {}
# Upload requires a list.
dependencies['mainPythonFileUri'] = [args.MAIN_PYTHON_FILE]
if local_file_uploader.HasLocalFiles(dependencies):
if not args.deps_bucket:
raise AttributeError('--deps-bucket was not specified.')
dependencies = local_file_uploader.Upload(args.deps_bucket, dependencies)
# Move mainPythonFileUri out of the list.
dependencies['mainPythonFileUri'] = dependencies['mainPythonFileUri'][0]
# Merge the dict first for compatibility.
# Old python versions don't support multi unpacking of dictionaries.
kwargs.update(dependencies)
return self.dataproc.messages.RayBatch(**kwargs)
def AddArguments(parser):
"""Adds arguments related to RayBatch message."""
flags.AddMainPythonFile(parser)
flags.AddArgs(parser)
# Cloud Storage bucket to upload workload dependencies.
# It is required until we figure out a place to upload user files.
flags.AddBucket(parser)

View File

@@ -0,0 +1,103 @@
# -*- coding: utf-8 -*- #
# Copyright 2021 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Factory class for SparkBatch message."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.command_lib.dataproc import flags
from googlecloudsdk.command_lib.dataproc import local_file_uploader
class SparkBatchFactory(object):
"""Factory class for SparkBatch message."""
def __init__(self, dataproc):
"""Factory class for SparkBatch message.
Args:
dataproc: A Dataproc instance.
"""
self.dataproc = dataproc
def UploadLocalFilesAndGetMessage(self, args):
"""Uploads local files and creates a SparkBatch message.
Uploads user local files and change the URIs to local files to point to
uploaded URIs.
Creates a SparkBatch message from parsed arguments.
Args:
args: Parsed arguments.
Returns:
SparkBatch: A SparkBatch message.
Raises:
AttributeError: Main class and jar are missing, or both were provided.
Bucket is required to upload local files, but not specified.
"""
kwargs = {}
if args.args:
kwargs['args'] = args.args
if not args.main_class and not args.main_jar:
raise AttributeError('Missing JVM main.')
if args.main_class and args.main_jar:
raise AttributeError('Can\'t provide both main class and jar.')
dependencies = {}
if args.main_class:
kwargs['mainClass'] = args.main_class
else:
# Upload requires a list.
dependencies['mainJarFileUri'] = [args.main_jar]
if args.jars:
dependencies['jarFileUris'] = args.jars
if args.files:
dependencies['fileUris'] = args.files
if args.archives:
dependencies['archiveUris'] = args.archives
if local_file_uploader.HasLocalFiles(dependencies):
if not args.deps_bucket:
raise AttributeError('--deps-bucket was not specified.')
dependencies = local_file_uploader.Upload(args.deps_bucket, dependencies)
# Move mainJarFileUri out of the list.
if 'mainJarFileUri' in dependencies:
dependencies['mainJarFileUri'] = dependencies['mainJarFileUri'][0]
# Merge the dictionaries first for backward compatibility.
kwargs.update(dependencies)
return self.dataproc.messages.SparkBatch(**kwargs)
def AddArguments(parser):
flags.AddJvmMainMutex(parser)
flags.AddArgs(parser)
flags.AddJarFiles(parser)
flags.AddOtherFiles(parser)
flags.AddArchives(parser)
flags.AddBucket(parser)

View File

@@ -0,0 +1,89 @@
# -*- coding: utf-8 -*- #
# Copyright 2021 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Factory class for SparkRBatch message."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.command_lib.dataproc import flags
from googlecloudsdk.command_lib.dataproc import local_file_uploader
class SparkRBatchFactory(object):
"""Factory class for SparkRBatch message."""
def __init__(self, dataproc):
"""Factory class for SparkRBatch message.
Args:
dataproc: A Dataproc instance.
"""
self.dataproc = dataproc
def UploadLocalFilesAndGetMessage(self, args):
"""Upload local files and creates a SparkRBatch message.
Upload user local files and change local file URIs to point to the uploaded
URIs.
Creates a SparkRBatch message based on parsed arguments.
Args:
args: Parsed arguments.
Returns:
A SparkRBatch message.
Raises:
AttributeError: Bucket is required to upload local files, but not
specified.
"""
kwargs = {}
if args.args:
kwargs['args'] = args.args
dependencies = {}
# Upload requires a list.
dependencies['mainRFileUri'] = [args.MAIN_R_FILE]
if args.files:
dependencies['fileUris'] = args.files
if args.archives:
dependencies['archiveUris'] = args.archives
if local_file_uploader.HasLocalFiles(dependencies):
if not args.deps_bucket:
raise AttributeError('--deps-bucket was not specified.')
dependencies = local_file_uploader.Upload(args.deps_bucket, dependencies)
# Get mainRFileUri out of the list for message construction.
dependencies['mainRFileUri'] = dependencies['mainRFileUri'][0]
# Merge the dictionaries first for backward compatibility.
kwargs.update(dependencies)
return self.dataproc.messages.SparkRBatch(**kwargs)
def AddArguments(parser):
flags.AddMainRFile(parser)
flags.AddArgs(parser)
flags.AddOtherFiles(parser)
flags.AddArchives(parser)
flags.AddBucket(parser)

View File

@@ -0,0 +1,93 @@
# -*- coding: utf-8 -*- #
# Copyright 2021 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Factory class for SparkSqlBatch message."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from apitools.base.py import encoding
from googlecloudsdk.command_lib.dataproc import flags
from googlecloudsdk.command_lib.dataproc import local_file_uploader
class SparkSqlBatchFactory(object):
"""Factory class for SparkSqlBatch message."""
def __init__(self, dataproc):
"""Factory class for SparkSqlBatch message.
Args:
dataproc: A Dataproc instance.
"""
self.dataproc = dataproc
def UploadLocalFilesAndGetMessage(self, args):
"""Uploads local files and creates a SparkSqlBatch message.
Uploads user local files and change the URIs to local files to uploaded
URIs.
Creates a SparkSqlBatch message.
Args:
args: Parsed arguments.
Returns:
A SparkSqlBatch message instance.
Raises:
AttributeError: Bucket is required to upload local files, but not
specified.
"""
kwargs = {}
dependencies = {}
# Upload requires a list.
dependencies['queryFileUri'] = [args.SQL_SCRIPT]
if args.jars:
dependencies['jarFileUris'] = args.jars
params = args.vars
if params:
kwargs['queryVariables'] = encoding.DictToAdditionalPropertyMessage(
params,
self.dataproc.messages.SparkSqlBatch.QueryVariablesValue,
sort_items=True)
if local_file_uploader.HasLocalFiles(dependencies):
if not args.deps_bucket:
raise AttributeError('--deps-bucket was not specified.')
dependencies = local_file_uploader.Upload(args.deps_bucket, dependencies)
# Move main SQL script out of the list.
dependencies['queryFileUri'] = dependencies['queryFileUri'][0]
# Merge the dictionaries first for compatibility.
kwargs.update(dependencies)
return self.dataproc.messages.SparkSqlBatch(**kwargs)
def AddArguments(parser):
flags.AddMainSqlScript(parser)
flags.AddJarFiles(parser)
flags.AddSqlScriptVariables(parser)
# Cloud Storage bucket to upload workload dependencies.
# It is required until we figure out a place to upload user files.
flags.AddBucket(parser)

View File

@@ -0,0 +1,929 @@
# -*- coding: utf-8 -*- #
# Copyright 2018 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Flags for workflow templates related commands."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
import argparse
import json
from googlecloudsdk.calliope import actions
from googlecloudsdk.calliope import arg_parsers
from googlecloudsdk.calliope.concepts import concepts
from googlecloudsdk.calliope.concepts import deps
from googlecloudsdk.command_lib.util.concepts import concept_parsers
from googlecloudsdk.core import properties
import six
def _RegionAttributeConfig():
fallthroughs = [deps.PropertyFallthrough(properties.VALUES.dataproc.region)]
return concepts.ResourceParameterAttributeConfig(
name='region',
help_text=(
'Dataproc region for the {resource}. Each Dataproc '
'region constitutes an independent resource namespace constrained to '
'deploying instances into Compute Engine zones inside the '
'region. Overrides the default `dataproc/region` property '
'value for this command invocation.'),
fallthroughs=fallthroughs)
def _LocationAttributeConfig():
fallthroughs = [deps.PropertyFallthrough(properties.VALUES.dataproc.location)]
return concepts.ResourceParameterAttributeConfig(
name='location',
help_text=(
'Dataproc location for the {resource}. Each Dataproc '
'location constitutes an independent resource namespace constrained '
'to deploying instances into Compute Engine zones inside the '
'location. Overrides the default `dataproc/location` property '
'value for this command invocation.'),
fallthroughs=fallthroughs)
def AddRegionFlag(parser):
region_prop = properties.VALUES.dataproc.region
parser.add_argument(
'--region',
help=region_prop.help_text,
# Don't set default, because it would override users' property setting.
action=actions.StoreProperty(region_prop))
def AddLocationFlag(parser):
location_prop = properties.VALUES.dataproc.location
parser.add_argument(
'--location',
help=location_prop.help_text,
# Don't set default, because it would override user's property setting.
action=actions.StoreProperty(location_prop))
def AddProjectsLocationsResourceArg(parser, api_version):
"""Add resrouce arg for projects/{}/locations/{}."""
spec = concepts.ResourceSpec(
'dataproc.projects.locations',
api_version=api_version,
resource_name='region',
disable_auto_completers=True,
projectsId=concepts.DEFAULT_PROJECT_ATTRIBUTE_CONFIG,
locationsId=_RegionAttributeConfig())
concept_parsers.ConceptParser.ForResource(
'--region',
spec,
properties.VALUES.dataproc.region.help_text,
required=True).AddToParser(parser)
def AddAsync(parser):
"""Adds async flag with our own help text."""
parser.add_argument(
'--async',
action='store_true',
dest='async_',
help=('Return immediately without waiting for the operation in '
'progress to complete.'))
def ClusterConfig():
return concepts.ResourceParameterAttributeConfig(
name='cluster',
help_text='The Cluster name.',
)
def _GetClusterResourceSpec(api_version):
return concepts.ResourceSpec(
'dataproc.projects.regions.clusters',
api_version=api_version,
resource_name='cluster',
disable_auto_completers=True,
projectId=concepts.DEFAULT_PROJECT_ATTRIBUTE_CONFIG,
region=_RegionAttributeConfig(),
clusterName=ClusterConfig(),
)
def AddClusterResourceArg(parser, verb, api_version):
concept_parsers.ConceptParser.ForResource(
'cluster',
_GetClusterResourceSpec(api_version),
'The name of the cluster to {}.'.format(verb),
required=True).AddToParser(parser)
def GkeClusterConfig():
return concepts.ResourceParameterAttributeConfig(
name='gke-cluster',
help_text='The GKE Cluster path.',
)
def _DataprocRegionFallthrough():
return [
deps.ArgFallthrough('--region'),
deps.PropertyFallthrough(properties.VALUES.dataproc.region)
]
def _GkeLocationAttributeConfig():
return concepts.ResourceParameterAttributeConfig(
name='gke-cluster-location',
help_text='GKE region for the {resource}.',
fallthroughs=_DataprocRegionFallthrough())
def _GetGkeClusterResourceSpec():
return concepts.ResourceSpec(
'container.projects.locations.clusters',
resource_name='gke-cluster',
projectsId=concepts.DEFAULT_PROJECT_ATTRIBUTE_CONFIG,
locationsId=_GkeLocationAttributeConfig(),
clustersId=GkeClusterConfig(),
)
def AddGkeClusterResourceArg(parser):
concept_parsers.ConceptParser.ForResource(
'--gke-cluster',
_GetGkeClusterResourceSpec(),
'The GKE cluster to install the Dataproc cluster on.',
required=True).AddToParser(parser)
def MetastoreServiceConfig():
return concepts.ResourceParameterAttributeConfig(
name='metastore-service',
help_text='Dataproc Metastore Service to be used as an external metastore.'
)
def _MetastoreServiceLocationAttributeConfig():
return concepts.ResourceParameterAttributeConfig(
name='metastore-service-location',
help_text='Dataproc Metastore location for the {resource}.',
fallthroughs=_DataprocRegionFallthrough())
def _GetMetastoreServiceResourceSpec():
return concepts.ResourceSpec(
'metastore.projects.locations.services',
resource_name='metastore-service',
projectsId=concepts.DEFAULT_PROJECT_ATTRIBUTE_CONFIG,
locationsId=_MetastoreServiceLocationAttributeConfig(),
servicesId=MetastoreServiceConfig(),
)
def AddMetastoreServiceResourceArg(parser):
concept_parsers.ConceptParser.ForResource(
'--metastore-service',
_GetMetastoreServiceResourceSpec(),
'Dataproc Metastore Service to be used as an external metastore.',
).AddToParser(parser)
def HistoryServerClusterConfig():
return concepts.ResourceParameterAttributeConfig(
name='history-server-cluster',
help_text='Spark History Server. '
'Resource name of an existing Dataproc cluster to act as a '
'Spark History Server for workloads run on the Cluster.')
def _HistoryServerClusterRegionAttributeConfig():
return concepts.ResourceParameterAttributeConfig(
name='history-server-cluster-region',
help_text=('Compute Engine region for the {resource}. It must be the '
'same region as the Dataproc cluster that is being created.'),
fallthroughs=_DataprocRegionFallthrough())
def _GetHistoryServerClusterResourceSpec():
return concepts.ResourceSpec(
'dataproc.projects.regions.clusters',
resource_name='history-server-cluster',
projectId=concepts.DEFAULT_PROJECT_ATTRIBUTE_CONFIG,
region=_HistoryServerClusterRegionAttributeConfig(),
clusterName=HistoryServerClusterConfig(),
)
def AddHistoryServerClusterResourceArg(parser):
concept_parsers.ConceptParser.ForResource(
'--history-server-cluster',
_GetHistoryServerClusterResourceSpec(),
'A Dataproc Cluster created as a History Server, see https://cloud.google.com/dataproc/docs/concepts/jobs/history-server',
).AddToParser(parser)
def AddZoneAndExcludedZonesFlags(parser, short_flags=True):
"""Add zone and excluded zones flag."""
zone_and_excluded_zones_group = parser.add_argument_group(mutex=True)
zone_and_excluded_zones_group.add_argument(
'--zone',
*(['-z'] if short_flags else []),
help="""
The compute zone (e.g. us-central1-a) for the cluster. If empty
and --region is set to a value other than `global`, the server will
pick a zone in the region.
""",
action=actions.StoreProperty(properties.VALUES.compute.zone)
)
zone_and_excluded_zones_group.add_argument(
'--auto-zone-exclude-zones',
type=arg_parsers.ArgList(),
default=[],
metavar='ZONE',
hidden=True,
help="""
A comma-separated list of compute zones (e.g. us-central1-a) to
exclude when picking the zone for the cluster.
""",
)
def AddVersionFlag(parser):
parser.add_argument(
'--version', type=int, help='The version of the workflow template.')
def AddFileFlag(parser, input_type, action):
# Examples: workflow template to run/export/import, cluster to create.
parser.add_argument(
'--file',
help='The YAML file containing the {0} to {1}'.format(input_type, action),
required=True)
def AddMainPythonFile(parser):
parser.add_argument(
'MAIN_PYTHON_FILE',
help=('URI of the main Python file to use as the Spark driver. '
'Must be a ``.py\'\' file.'))
def AddJvmMainMutex(parser):
"""Main class or main jar."""
main_group = parser.add_mutually_exclusive_group(required=True)
main_group.add_argument(
'--class',
dest='main_class',
help=('Class contains the main method of the job. '
'The jar file that contains the class must be in the classpath '
'or specified in `jar_files`.'))
main_group.add_argument(
'--jar', dest='main_jar', help='URI of the main jar file.')
def AddMainSqlScript(parser):
parser.add_argument(
'SQL_SCRIPT',
help='URI of the script that contains Spark SQL queries to execute.')
def AddSqlScriptVariables(parser):
"""Add --params flag."""
parser.add_argument(
'--vars',
type=arg_parsers.ArgDict(),
metavar='NAME=VALUE',
help=('Mapping of query variable names to values (equivalent to the '
'Spark SQL command: SET name="value";).'))
def AddJarFiles(parser):
"""Add --jars flag."""
parser.add_argument(
'--jars',
type=arg_parsers.ArgList(),
metavar='JAR',
default=[],
help=('Comma-separated list of jar files to be provided to the '
'classpaths.'))
def AddMainRFile(parser):
parser.add_argument(
'MAIN_R_FILE',
help=('URI of the main R file to use as the driver. '
'Must be a ``.R\'\' or ``.r\'\' file.'))
def AddPythonFiles(parser):
"""Add --py-files flag."""
parser.add_argument(
'--py-files',
type=arg_parsers.ArgList(),
metavar='PY',
default=[],
help=('Comma-separated list of Python scripts to be passed to the '
'PySpark framework. Supported file types: ``.py\'\', ``.egg\'\' '
'and ``.zip.\'\''))
def AddOtherFiles(parser):
parser.add_argument(
'--files',
type=arg_parsers.ArgList(),
metavar='FILE',
default=[],
help='Files to be placed in the working directory.')
def AddArchives(parser):
parser.add_argument(
'--archives',
type=arg_parsers.ArgList(),
metavar='ARCHIVE',
default=[],
help=('Archives to be extracted into the working directory. '
'Supported file types: .jar, .tar, .tar.gz, .tgz, and .zip.'))
def AddArgs(parser):
"""Remaining args to the program."""
parser.add_argument(
'args',
metavar='JOB_ARG',
nargs=argparse.REMAINDER,
default=[],
help='Arguments to pass to the driver.')
def AddBucket(parser):
"""Cloud Storage bucket to upload workload dependencies."""
parser.add_argument(
'--deps-bucket',
help=('A Cloud Storage bucket to upload workload '
'dependencies.'))
def JobConfig():
return concepts.ResourceParameterAttributeConfig(
name='job',
help_text='The Job ID.',
)
def _GetJobResourceSpec(api_version):
return concepts.ResourceSpec(
'dataproc.projects.regions.jobs',
api_version=api_version,
resource_name='job',
disable_auto_completers=True,
projectId=concepts.DEFAULT_PROJECT_ATTRIBUTE_CONFIG,
region=_RegionAttributeConfig(),
jobId=JobConfig(),
)
def AddJobResourceArg(parser, verb, api_version):
concept_parsers.ConceptParser.ForResource(
'job',
_GetJobResourceSpec(api_version),
'The ID of the job to {0}.'.format(verb),
required=True).AddToParser(parser)
def AddBatchResourceArg(parser, verb, api_version, use_location=False):
"""Adds batch resource argument to parser."""
def BatchConfig():
return concepts.ResourceParameterAttributeConfig(
name='batch',
help_text='Batch job ID.',
)
def GetBatchResourceSpec(api_version):
return concepts.ResourceSpec(
'dataproc.projects.locations.batches',
api_version=api_version,
resource_name='batch',
disable_auto_completers=True,
projectsId=concepts.DEFAULT_PROJECT_ATTRIBUTE_CONFIG,
locationsId=_LocationAttributeConfig()
if use_location
else _RegionAttributeConfig(),
batchesId=BatchConfig(),
)
concept_parsers.ConceptParser.ForResource(
'batch',
GetBatchResourceSpec(api_version),
'ID of the batch job to {0}.'.format(verb),
required=True).AddToParser(parser)
def AddSessionResourceArg(parser, verb, api_version):
"""Adds session resource argument to parser."""
def SessionConfig():
return concepts.ResourceParameterAttributeConfig(
name='session',
help_text='Session ID.',
)
def GetSessionResourceSpec(api_version):
return concepts.ResourceSpec(
'dataproc.projects.locations.sessions',
api_version=api_version,
resource_name='session',
disable_auto_completers=True,
projectsId=concepts.DEFAULT_PROJECT_ATTRIBUTE_CONFIG,
locationsId=_LocationAttributeConfig(),
sessionsId=SessionConfig(),
)
concept_parsers.ConceptParser.ForResource(
'session',
GetSessionResourceSpec(api_version),
'ID of the session to {0}.'.format(verb),
required=True).AddToParser(parser)
def AddNodeGroupResourceArg(parser, verb, api_version):
"""Adds node group resource argument to parser."""
def NodeGroupConfig():
return concepts.ResourceParameterAttributeConfig(
name='node_group',
help_text='Node group ID.',
)
def GetNodeGroupResourceSpec(api_version):
return concepts.ResourceSpec(
'dataproc.projects.regions.clusters.nodeGroups',
api_version=api_version,
resource_name='node_group',
disable_auto_completers=True,
projectId=concepts.DEFAULT_PROJECT_ATTRIBUTE_CONFIG,
region=_RegionAttributeConfig(),
clusterName=ClusterConfig(),
nodeGroupsId=NodeGroupConfig(),
)
concept_parsers.ConceptParser.ForResource(
'node_group',
GetNodeGroupResourceSpec(api_version),
'ID of the node group to {0}.'.format(verb),
required=True).AddToParser(parser)
def OperationConfig():
return concepts.ResourceParameterAttributeConfig(
name='operation',
help_text='The Operation ID.',
)
def _GetOperationResourceSpec(api_version):
return concepts.ResourceSpec(
'dataproc.projects.regions.operations',
api_version=api_version,
resource_name='operation',
disable_auto_completers=True,
projectsId=concepts.DEFAULT_PROJECT_ATTRIBUTE_CONFIG,
regionsId=_RegionAttributeConfig(),
operationsId=OperationConfig(),
)
def AddOperationResourceArg(parser, verb, api_version):
name = 'operation'
concept_parsers.ConceptParser.ForResource(
name,
_GetOperationResourceSpec(api_version),
'The ID of the operation to {0}.'.format(verb),
required=True).AddToParser(parser)
def AddTimeoutFlag(parser, default='10m'):
# This may be made visible or passed to the server in future.
parser.add_argument(
'--timeout',
type=arg_parsers.Duration(),
default=default,
help=('Client side timeout on how long to wait for Dataproc operations. '
'See $ gcloud topic datetimes for information on duration '
'formats.'),
hidden=True)
def AddParametersFlag(parser):
parser.add_argument(
'--parameters',
metavar='PARAM=VALUE',
type=arg_parsers.ArgDict(),
help="""
A map from parameter names to values that should be used for those
parameters. A value must be provided for every configured parameter.
Parameters can be configured when creating or updating a workflow
template.
""",
dest='parameters')
def AddMinCpuPlatformArgs(parser, include_driver_pool_args=False):
"""Add mininum CPU platform flags for both master and worker instances."""
help_text = """\
When specified, the VM is scheduled on the host with a specified CPU
architecture or a more recent CPU platform that's available in that
zone. To list available CPU platforms in a zone, run:
$ gcloud compute zones describe ZONE
CPU platform selection may not be available in a zone. Zones
that support CPU platform selection provide an `availableCpuPlatforms`
field, which contains the list of available CPU platforms in the zone
(see [Availability of CPU platforms](/compute/docs/instances/specify-min-cpu-platform#availablezones)
for more information).
"""
parser.add_argument(
'--master-min-cpu-platform',
metavar='PLATFORM',
required=False,
help=help_text)
parser.add_argument(
'--worker-min-cpu-platform',
metavar='PLATFORM',
required=False,
help=help_text)
if include_driver_pool_args:
parser.add_argument(
'--driver-pool-min-cpu-platform',
metavar='PLATFORM',
required=False,
help=help_text)
def AddComponentFlag(parser):
"""Add optional components flag."""
help_text = """\
List of optional components to be installed on cluster machines.
The following page documents the optional components that can be
installed:
https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/optional-components.
"""
parser.add_argument(
'--optional-components',
metavar='COMPONENT',
type=arg_parsers.ArgList(element_type=lambda val: val.upper()),
dest='components',
help=help_text)
def TemplateAttributeConfig():
return concepts.ResourceParameterAttributeConfig(
name='template',
help_text='The workflow template name.',
)
def _GetTemplateResourceSpec(api_version):
return concepts.ResourceSpec(
'dataproc.projects.regions.workflowTemplates',
api_version=api_version,
resource_name='template',
disable_auto_completers=True,
projectsId=concepts.DEFAULT_PROJECT_ATTRIBUTE_CONFIG,
regionsId=_RegionAttributeConfig(),
workflowTemplatesId=TemplateAttributeConfig(),
)
def AddTemplateResourceArg(parser, verb, api_version, positional=True):
"""Adds a workflow template resource argument.
Args:
parser: the argparse parser for the command.
verb: str, the verb to describe the resource, such as 'to update'.
api_version: api version, for example v1
positional: bool, if True, means that the instance ID is a positional rather
than a flag.
"""
name = 'template' if positional else '--workflow-template'
concept_parsers.ConceptParser.ForResource(
name,
_GetTemplateResourceSpec(api_version),
'The name of the workflow template to {}.'.format(verb),
required=True).AddToParser(parser)
def _AutoscalingPolicyResourceSpec(api_version):
return concepts.ResourceSpec(
'dataproc.projects.regions.autoscalingPolicies',
api_version=api_version,
resource_name='autoscaling policy',
disable_auto_completers=True,
projectsId=concepts.DEFAULT_PROJECT_ATTRIBUTE_CONFIG,
regionsId=_RegionAttributeConfig(),
autoscalingPoliciesId=concepts.ResourceParameterAttributeConfig(
name='autoscaling_policy',
help_text='The autoscaling policy id.',
),
)
def _SessionTemplateResourceSpec(api_version):
return concepts.ResourceSpec(
'dataproc.projects.locations.sessionTemplates',
api_version=api_version,
resource_name='session template',
disable_auto_completers=True,
projectsId=concepts.DEFAULT_PROJECT_ATTRIBUTE_CONFIG,
locationsId=_LocationAttributeConfig(),
sessionTemplatesId=concepts.ResourceParameterAttributeConfig(
name='session_template',
help_text='The session template name.',
),
)
def AddAutoscalingPolicyResourceArg(parser, verb, api_version):
"""Adds a workflow template resource argument.
Args:
parser: the argparse parser for the command.
verb: str, the verb to apply to the resource, such as 'to update'.
api_version: api version, for example v1
"""
concept_parsers.ConceptParser.ForResource(
'autoscaling_policy',
_AutoscalingPolicyResourceSpec(api_version),
'The autoscaling policy to {}.'.format(verb),
required=True).AddToParser(parser)
def AddSessionTemplateResourceArg(parser, verb, api_version):
"""Adds a session template resource argument.
Args:
parser: The argparse parser for the command.
verb: The verb to apply to the resource, such as 'to update'.
api_version: api version, for example v1
"""
concept_parsers.ConceptParser.ForResource(
'session_template',
_SessionTemplateResourceSpec(api_version),
'The session template to {}.'.format(verb),
required=True).AddToParser(parser)
def AddAutoscalingPolicyResourceArgForCluster(parser, api_version):
"""Adds a workflow template resource argument.
Args:
parser: the argparse parser for the command.
api_version: api version, for example v1
"""
concept_parsers.ConceptParser.ForResource(
'--autoscaling-policy',
_AutoscalingPolicyResourceSpec(api_version),
'The autoscaling policy to use.',
command_level_fallthroughs={
'region': ['--region'],
},
flag_name_overrides={
'region': ''
},
required=False).AddToParser(parser)
def AddListOperationsFormat(parser):
parser.display_info.AddTransforms({
'operationState': _TransformOperationState,
'operationTimestamp': _TransformOperationTimestamp,
'operationType': _TransformOperationType,
'operationWarnings': _TransformOperationWarnings,
})
parser.display_info.AddFormat('table(name.segment():label=NAME, '
'metadata.operationTimestamp():label=TIMESTAMP,'
'metadata.operationType():label=TYPE, '
'metadata.operationState():label=STATE, '
'status.code.yesno(no=\'\'):label=ERROR, '
'metadata.operationWarnings():label=WARNINGS)')
def _TransformOperationType(metadata):
"""Extract operation type from metadata."""
if 'operationType' in metadata:
return metadata['operationType']
elif 'graph' in metadata:
return 'WORKFLOW'
return ''
def _TransformOperationState(metadata):
"""Extract operation state from metadata."""
if 'status' in metadata:
return metadata['status']['state']
elif 'state' in metadata:
return metadata['state']
return ''
def _TransformOperationTimestamp(metadata):
"""Extract operation start timestamp from metadata."""
if 'statusHistory' in metadata:
return metadata['statusHistory'][0]['stateStartTime']
elif 'startTime' in metadata:
return metadata['startTime']
return ''
def _TransformOperationWarnings(metadata):
"""Returns a count of operations if any are present."""
if 'warnings' in metadata:
return len(metadata['warnings'])
return ''
def AddPersonalAuthSessionArgs(parser):
"""Adds the arguments for enabling personal auth sessions."""
parser.add_argument(
'--access-boundary',
help="""
The path to a JSON file specifying the credential access boundary for
the personal auth session.
If not specified, then the access boundary defaults to one that includes
the following roles on the containing project:
roles/storage.objectViewer
roles/storage.objectCreator
roles/storage.objectAdmin
roles/storage.legacyBucketReader
For more information, see:
https://cloud.google.com/iam/docs/downscoping-short-lived-credentials.
""")
parser.add_argument(
'--openssl-command',
hidden=True,
help="""
The full path to the command used to invoke the OpenSSL tool on this
machine.
""")
parser.add_argument(
'--refresh-credentials',
action='store_true',
default=True,
hidden=True,
help="""
Keep the command running to periodically refresh credentials.
""")
def ProjectGcsObjectsAccessBoundary(project):
"""Get an access boundary limited to to a project's GCS objects.
Args:
project: The project ID for the access boundary.
Returns:
A JSON formatted access boundary suitable for creating a downscoped token.
"""
universe_domain = properties.VALUES.core.universe_domain.Get()
cab_resource = f'//cloudresourcemanager.{universe_domain}/projects/{project}'
access_boundary = {
'access_boundary': {
'accessBoundaryRules': [{
'availableResource':
cab_resource,
'availablePermissions': [
'inRole:roles/storage.objectViewer',
'inRole:roles/storage.objectCreator',
'inRole:roles/storage.objectAdmin',
'inRole:roles/storage.legacyBucketReader'
]
}]
}
}
return six.text_type(json.dumps(access_boundary))
def AddSizeFlag(parser):
"""Adds the size field for resizing node groups.
Args:
parser: The argparse parser for the command.
"""
parser.add_argument(
'--size',
help=('New size for a node group.'),
type=int,
required=True)
def AddGracefulDecommissionTimeoutFlag(parser):
"""Adds a graceful decommission timeout for resizing a node group.
Args:
parser: The argparse parser for the command.
"""
parser.add_argument(
'--graceful-decommission-timeout',
help=(
'Graceful decommission timeout for a node group scale-down resize.'
),
required=False)
def AddDriverPoolId(parser):
"""Adds the customer provided driver pool id field.
Args:
parser: The argparse parser for the command.
"""
parser.add_argument(
'--driver-pool-id',
help=("""
Custom identifier for the DRIVER Node Group being created. If not
provided, a random string is generated.
"""),
required=False,
default=None)
def InstanceConfig():
return concepts.ResourceParameterAttributeConfig(
name='instance',
help_text='The instance name.',
)
def _GetInstanceResourceSpec(api_version):
return concepts.ResourceSpec(
'dataproc.projects.regions.clusters',
api_version=api_version,
resource_name='instance',
disable_auto_completers=True,
projectId=concepts.DEFAULT_PROJECT_ATTRIBUTE_CONFIG,
region=_RegionAttributeConfig(),
clusterName=InstanceConfig(),
)
def AddInstanceResourceArg(parser, verb, api_version):
concept_parsers.ConceptParser.ForResource(
'instance',
_GetInstanceResourceSpec(api_version),
'The name of the instance to {}.'.format(verb),
required=True).AddToParser(parser)
def GdceClusterConfig():
return concepts.ResourceParameterAttributeConfig(
name='gdce-cluster',
help_text='The GDCE Cluster path.',
)
def _GdceLocationAttributeConfig():
return concepts.ResourceParameterAttributeConfig(
name='gdce-cluster-location',
help_text='GDCE region for the {resource}.',
fallthroughs=_DataprocRegionFallthrough(),
)
def AddGdceClusterResourceArg(parser):
concept_parsers.ConceptParser.ForResource(
'--gdce-cluster',
_GetGdceClusterResourceSpec(),
'The GDCE cluster to install the Dataproc instance on.',
required=True,
).AddToParser(parser)
def _GetGdceClusterResourceSpec():
return concepts.ResourceSpec(
'edgecontainer.projects.locations.clusters',
resource_name='gdce-cluster',
projectsId=concepts.DEFAULT_PROJECT_ATTRIBUTE_CONFIG,
locationsId=_GdceLocationAttributeConfig(),
clustersId=GdceClusterConfig()
)

View File

@@ -0,0 +1,407 @@
# -*- coding: utf-8 -*- #
# Copyright 2021 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Utilities for building the dataproc clusters gke CLI."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
import re
from googlecloudsdk.calliope import arg_parsers
from googlecloudsdk.calliope import exceptions
def AddPoolsArg(parser):
parser.add_argument(
'--pools',
type=arg_parsers.ArgDict(
required_keys=[
'name',
'roles',
],
spec={
'name': str,
'roles': str,
'machineType': str,
'preemptible': arg_parsers.ArgBoolean(),
'localSsdCount': int,
'accelerators': str,
'minCpuPlatform': str,
'bootDiskKmsKey': str,
'locations': str,
'min': int,
'max': int,
},
),
action='append',
default=[],
metavar='KEY=VALUE[;VALUE]',
help="""
Each `--pools` flag represents a GKE node pool associated with
the virtual cluster. It is comprised of a CSV in the form
`KEY=VALUE[;VALUE]`, where certain keys may have multiple values.
The following KEYs must be specified:
-----------------------------------------------------------------------------------------------------------
KEY Type Example Description
------ ---------------- ------------------------ ----------------------------------------------------------
name string `my-node-pool` Name of the node pool.
roles repeated string `default;spark-driver` Roles that this node pool should perform. Valid values are
`default`, `controller`, `spark-driver`, `spark-executor`.
-----------------------------------------------------------------------------------------------------------
The following KEYs may be specified:
----------------------------------------------------------------------------------------------------------------------------------------------------------------
KEY Type Example Description
--------------- ---------------- --------------------------------------------- ---------------------------------------------------------------------------------
machineType string `n1-standard-8` Compute Engine machine type to use.
preemptible boolean `false` If true, then this node pool uses preemptible VMs.
This cannot be true on the node pool with the `controllers` role
(or `default` role if `controllers` role is not specified).
localSsdCount int `2` The number of local SSDs to attach to each node.
accelerator repeated string `nvidia-tesla-a100=1` Accelerators to attach to each node. In the format NAME=COUNT.
minCpuPlatform string `Intel Skylake` Minimum CPU platform for each node.
bootDiskKmsKey string `projects/project-id/locations/us-central1 The Customer Managed Encryption Key (CMEK) used to encrypt
/keyRings/keyRing-name/cryptoKeys/key-name` the boot disk attached to each node in the node pool.
locations repeated string `us-west1-a;us-west1-c` Zones within the location of the GKE cluster.
All `--pools` flags for a Dataproc cluster must have identical locations.
min int `0` Minimum number of nodes per zone that this node pool can scale down to.
max int `10` Maximum number of nodes per zone that this node pool can scale up to.
----------------------------------------------------------------------------------------------------------------------------------------------------------------
""")
def AddPoolsAlphaArg(parser):
parser.add_argument(
'--pools',
type=arg_parsers.ArgDict(
required_keys=[
'name',
'roles',
],
spec={
'name': str,
'roles': str,
'machineType': str,
'preemptible': arg_parsers.ArgBoolean(),
'localSsdCount': int,
'localNvmeSsdCount': int,
'accelerators': str,
'minCpuPlatform': str,
'bootDiskKmsKey': str,
'locations': str,
'min': int,
'max': int,
},
),
action='append',
default=[],
metavar='KEY=VALUE[;VALUE]',
help="""
Each `--pools` flag represents a GKE node pool associated with
the virtual cluster. It is a comma-separated list in the form
`KEY=VALUE[;VALUE]`, where certain keys may have multiple values.
The following KEYs must be specified:
-----------------------------------------------------------------------------------------------------------
KEY Type Example Description
------ ---------------- ------------------------ ----------------------------------------------------------
name string `my-node-pool` Name of the node pool.
roles repeated string `default;spark-driver` Roles that each node pool will perform.
[One Pool must have DEFAULT role] Valid values are
`default`, `controller`, `spark-driver`, `spark-executor`.
-----------------------------------------------------------------------------------------------------------
The following KEYs may be specified:
----------------------------------------------------------------------------------------------------------------------------------------------------------------
KEY Type Example Description
--------------- ---------------- --------------------------------------------- ---------------------------------------------------------------------------------
machineType string `n1-standard-8` Compute Engine machine type to use.
preemptible boolean `false` If true, then this node pool uses preemptible VMs.
This Must be `false` for a node pool with the CONTROLLER role or
for a node pool with the DEFAULT role in no node pool has the CONTROLLER role.
localSsdCount int `2` The number of local SSDs to attach to each node.
localNvmeSsdCount int `2` The number of local NVMe SSDs to attach to each node.
accelerator repeated string `nvidia-tesla-a100=1` Accelerators to attach to each node, in NODE=COUNT format.
minCpuPlatform string `Intel Skylake` Minimum CPU platform for each node.
bootDiskKmsKey string `projects/project-id/locations/us-central1 The Customer Managed Encryption Key (CMEK) used to encrypt
/keyRings/keyRing-name/cryptoKeys/key-name` the boot disk attached to each node in the node pool.
locations repeated string `us-west1-a;us-west1-c` Zones within the location of the GKE cluster.
All `--pools` flags for a Dataproc cluster must have identical locations.
min int `0` Minimum number of nodes per zone that this node pool can scale down to.
max int `10` Maximum number of nodes per zone that this node pool can scale up to.
----------------------------------------------------------------------------------------------------------------------------------------------------------------
""")
class GkeNodePoolTargetsParser():
"""Parses all the --pools flags into a list of GkeNodePoolTarget messages."""
@staticmethod
def Parse(dataproc, gke_cluster, arg_pools, support_shuffle_service=False):
"""Parses all the --pools flags into a list of GkeNodePoolTarget messages.
Args:
dataproc: The Dataproc API version to use for GkeNodePoolTarget messages.
gke_cluster: The GKE cluster's relative name, for example,
'projects/p1/locations/l1/clusters/c1'.
arg_pools: The list of dict[str, any] generated from all --pools flags.
support_shuffle_service: support shuffle service.
Returns:
A list of GkeNodePoolTargets message, one for each entry in the arg_pools
list.
"""
pools = [
_GkeNodePoolTargetParser.Parse(dataproc, gke_cluster, arg_pool,
support_shuffle_service)
for arg_pool in arg_pools
]
GkeNodePoolTargetsParser._ValidateUniqueNames(pools)
GkeNodePoolTargetsParser._ValidateRoles(dataproc, pools)
GkeNodePoolTargetsParser._ValidatePoolsHaveSameLocation(pools)
GkeNodePoolTargetsParser._ValidateBootDiskKmsKeyPattern(pools)
return pools
@staticmethod
def _ValidateUniqueNames(pools):
"""Validates that pools have unique names."""
used_names = set()
for pool in pools:
name = pool.nodePool
if name in used_names:
raise exceptions.InvalidArgumentException(
'--pools', 'Pool name "%s" used more than once.' % name)
used_names.add(name)
@staticmethod
def _ValidateRoles(dataproc, pools):
"""Validates that roles are exclusive and that one pool has DEFAULT."""
if not pools:
# The backend will automatically create the default pool.
return
seen_roles = set()
for pool in pools:
for role in pool.roles:
if role in seen_roles:
raise exceptions.InvalidArgumentException(
'--pools', 'Multiple pools contained the same role "%s".' % role)
else:
seen_roles.add(role)
default = dataproc.messages.GkeNodePoolTarget.RolesValueListEntryValuesEnum(
'DEFAULT')
if default not in seen_roles:
raise exceptions.InvalidArgumentException(
'--pools',
'If any pools are specified, then exactly one must have the '
'"default" role.')
@staticmethod
def _ValidatePoolsHaveSameLocation(pools):
"""Validates that all pools specify an identical location."""
if not pools:
return
initial_locations = None
for pool in pools:
if pool.nodePoolConfig is not None:
locations = pool.nodePoolConfig.locations
if initial_locations is None:
initial_locations = locations
continue
elif initial_locations != locations:
raise exceptions.InvalidArgumentException(
'--pools', 'All pools must have identical locations.')
@staticmethod
def _ValidateBootDiskKmsKeyPattern(pools):
"""Validates that the bootDiskKmsKey matches the correct pattern."""
if not pools:
return
boot_disk_kms_key_pattern = re.compile(
'projects/[^/]+/locations/[^/]+/keyRings/[^/]+/cryptoKeys/[^/]+')
for pool in pools:
if (pool.nodePoolConfig is
None) or (pool.nodePoolConfig.config is None) or (
pool.nodePoolConfig.config.bootDiskKmsKey is None):
continue
if not boot_disk_kms_key_pattern.match(
pool.nodePoolConfig.config.bootDiskKmsKey):
raise exceptions.InvalidArgumentException(
'--pools', 'bootDiskKmsKey must match pattern: '
'projects/[KEY_PROJECT_ID]/locations/[LOCATION]/keyRings/[RING_NAME]/cryptoKeys/[KEY_NAME]'
)
class _GkeNodePoolTargetParser():
"""Helper to parse a --pools flag into a GkeNodePoolTarget message."""
_ARG_ROLE_TO_API_ROLE = {
'default': 'DEFAULT',
'controller': 'CONTROLLER',
'spark-driver': 'SPARK_DRIVER',
'spark-executor': 'SPARK_EXECUTOR',
}
@staticmethod
def Parse(dataproc, gke_cluster, arg_pool, support_shuffle_service=False):
"""Parses a --pools flag into a GkeNodePoolTarget message.
Args:
dataproc: The Dataproc API version to use for the GkeNodePoolTarget
message.
gke_cluster: The GKE cluster's relative name, for example,
'projects/p1/locations/l1/clusters/c1'.
arg_pool: The dict[str, any] generated from the --pools flag.
support_shuffle_service: support shuffle service.
Returns:
A GkeNodePoolTarget message.
"""
return _GkeNodePoolTargetParser._GkeNodePoolTargetFromArgPool(
dataproc, gke_cluster, arg_pool, support_shuffle_service)
@staticmethod
def _GkeNodePoolTargetFromArgPool(dataproc,
gke_cluster,
arg_pool,
support_shuffle_service=False):
"""Creates a GkeNodePoolTarget from a --pool argument."""
return dataproc.messages.GkeNodePoolTarget(
nodePool='{0}/nodePools/{1}'.format(gke_cluster, arg_pool['name']),
roles=_GkeNodePoolTargetParser._SplitRoles(dataproc, arg_pool['roles'],
support_shuffle_service),
nodePoolConfig=_GkeNodePoolTargetParser._GkeNodePoolConfigFromArgPool(
dataproc, arg_pool))
@staticmethod
def _SplitRoles(dataproc, arg_roles, support_shuffle_service=False):
"""Splits the role string given as an argument into a list of Role enums."""
roles = []
support_shuffle_service = _GkeNodePoolTargetParser._ARG_ROLE_TO_API_ROLE
if support_shuffle_service:
defined_roles = _GkeNodePoolTargetParser._ARG_ROLE_TO_API_ROLE.copy()
defined_roles.update({'shuffle-service': 'SHUFFLE_SERVICE'})
for arg_role in arg_roles.split(';'):
if arg_role.lower() not in defined_roles:
raise exceptions.InvalidArgumentException(
'--pools', 'Unrecognized role "%s".' % arg_role)
roles.append(
dataproc.messages.GkeNodePoolTarget.RolesValueListEntryValuesEnum(
defined_roles[arg_role.lower()]))
return roles
@staticmethod
def _GkeNodePoolConfigFromArgPool(dataproc, arg_pool):
"""Creates the GkeNodePoolConfig via the arguments specified in --pools."""
config = dataproc.messages.GkeNodePoolConfig(
config=_GkeNodePoolTargetParser._GkeNodeConfigFromArgPool(
dataproc, arg_pool),
autoscaling=_GkeNodePoolTargetParser
._GkeNodePoolAutoscalingConfigFromArgPool(dataproc, arg_pool))
if 'locations' in arg_pool:
config.locations = arg_pool['locations'].split(';')
if config != dataproc.messages.GkeNodePoolConfig():
return config
return None
@staticmethod
def _GkeNodeConfigFromArgPool(dataproc, arg_pool):
"""Creates the GkeNodeConfig via the arguments specified in --pools."""
pool = dataproc.messages.GkeNodeConfig()
if 'machineType' in arg_pool:
pool.machineType = arg_pool['machineType']
if 'preemptible' in arg_pool:
# The ArgDict's spec declares this as an ArgBoolean(), so it is a boolean.
pool.preemptible = arg_pool['preemptible']
if 'localSsdCount' in arg_pool:
# The ArgDict's spec declares this as an int, so it is an int.
pool.localSsdCount = arg_pool['localSsdCount']
if 'localNvmeSsdCount' in arg_pool:
pool.ephemeralStorageConfig = dataproc.messages.GkeEphemeralStorageConfig(
localSsdCount=arg_pool['localNvmeSsdCount'])
if 'accelerators' in arg_pool:
pool.accelerators = _GkeNodePoolTargetParser._GkeNodePoolAcceleratorConfigFromArgPool(
dataproc, arg_pool['accelerators'])
if 'minCpuPlatform' in arg_pool:
pool.minCpuPlatform = arg_pool['minCpuPlatform']
if 'bootDiskKmsKey' in arg_pool:
pool.bootDiskKmsKey = arg_pool['bootDiskKmsKey']
if pool != dataproc.messages.GkeNodeConfig():
return pool
return None
@staticmethod
def _GkeNodePoolAcceleratorConfigFromArgPool(dataproc, arg_accelerators):
"""Creates the GkeNodePoolAcceleratorConfig via the arguments specified in --pools."""
accelerators = []
for arg_accelerator in arg_accelerators.split(';'):
if '=' not in arg_accelerator:
raise exceptions.InvalidArgumentException(
'--pools', 'accelerators value "%s" does not match the expected '
'"ACCELERATOR_TYPE=ACCELERATOR_VALUE" pattern.' % arg_accelerator)
accelerator_type, count_string = arg_accelerator.split('=', 1)
try:
count = int(count_string)
accelerators.append(
dataproc.messages.GkeNodePoolAcceleratorConfig(
acceleratorCount=count,
acceleratorType=accelerator_type,
))
except ValueError:
raise exceptions.InvalidArgumentException(
'--pools',
'Unable to parse accelerators count "%s" as an integer.' %
count_string)
return accelerators
@staticmethod
def _GkeNodePoolAutoscalingConfigFromArgPool(dataproc, arg_pool):
"""Creates the GkeNodePoolAutoscalingConfig via the arguments specified in --pools."""
config = dataproc.messages.GkeNodePoolAutoscalingConfig()
if 'min' in arg_pool:
# The ArgDict's spec declares this as an int, so it is an int.
config.minNodeCount = arg_pool['min']
if 'max' in arg_pool:
# The ArgDict's spec declares this as an int, so it is an int.
config.maxNodeCount = arg_pool['max']
if config != dataproc.messages.GkeNodePoolAutoscalingConfig():
return config
return None

View File

@@ -0,0 +1,48 @@
# -*- coding: utf-8 -*- #
# Copyright 2022 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Utilities for setting up GKE workload identity."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.api_lib.dataproc import compute_helpers
from googlecloudsdk.api_lib.dataproc import iam_helpers
class GkeWorkloadIdentity():
"""Sets up GKE Workload Identity."""
@staticmethod
def UpdateGsaIamPolicy(project_id, gsa_email, k8s_namespace,
k8s_service_accounts):
"""Allow the k8s_service_accounts to use gsa_email via Workload Identity."""
resource = 'projects/-/serviceAccounts/{gsa_email}'.format(
gsa_email=gsa_email)
members = [
'serviceAccount:{project_id}.svc.id.goog[{k8s_namespace}/{ksa}]'.format(
project_id=project_id, k8s_namespace=k8s_namespace, ksa=ksa)
for ksa in k8s_service_accounts
]
iam_helpers.AddIamPolicyBindings(resource, members,
'roles/iam.workloadIdentityUser')
class DefaultDataprocDataPlaneServiceAccount():
"""Find the default Google Service Account used by the Dataproc data plane."""
@staticmethod
def Get(project_id):
return compute_helpers.GetDefaultServiceAccount(project_id)

View File

@@ -0,0 +1,50 @@
# -*- coding: utf-8 -*- #
# Copyright 2023 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Utilities for converting Dataproc cluster to instance."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
def ConvertClusterToInstance(cluster):
"""Convert a dataproc cluster to instance object.
Args:
cluster: cluster returned from Dataproc service.
Returns:
Instance: instance dict represents resources installed on GDCE cluster.
"""
instance = dict()
gdce_cluster_config = (
cluster.virtualClusterConfig.kubernetesClusterConfig.gdceClusterConfig
)
instance['instanceName'] = cluster.clusterName
instance['instanceUuid'] = cluster.clusterUuid
instance['projectId'] = cluster.projectId
instance['status'] = cluster.status
instance['gdcEdgeIdentityProvider'] = (
gdce_cluster_config.gdcEdgeIdentityProvider
)
instance['gdcEdgeMembershipTarget'] = (
gdce_cluster_config.gdcEdgeMembershipTarget
)
instance['gdcEdgeWorkloadIdentityPool'] = (
gdce_cluster_config.gdcEdgeWorkloadIdentityPool
)
return instance

View File

@@ -0,0 +1,133 @@
# -*- coding: utf-8 -*- #
# Copyright 2015 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Utilities for building the dataproc clusters CLI."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
import abc
import collections
import os
from apitools.base.py import encoding
from googlecloudsdk.api_lib.dataproc import constants
from googlecloudsdk.api_lib.dataproc import exceptions
from googlecloudsdk.api_lib.dataproc import storage_helpers
from googlecloudsdk.core import log
from googlecloudsdk.core.util import files
import six
import six.moves.urllib.parse
class JobBase(six.with_metaclass(abc.ABCMeta, object)):
"""Base class for Jobs."""
def __init__(self, *args, **kwargs):
super(JobBase, self).__init__(*args, **kwargs)
self.files_by_type = {}
self.files_to_stage = []
self._staging_dir = None
def _GetStagedFile(self, file_str):
"""Validate file URI and register it for uploading if it is local."""
drive, _ = os.path.splitdrive(file_str)
uri = six.moves.urllib.parse.urlsplit(file_str, allow_fragments=False)
# Determine the file is local to this machine if no scheme besides a drive
# is passed. file:// URIs are interpreted as living on VMs.
is_local = drive or not uri.scheme
if not is_local:
# Non-local files are already staged.
# TODO(b/36057257): Validate scheme.
return file_str
if not os.path.exists(file_str):
raise files.Error('File Not Found: [{0}].'.format(file_str))
if self._staging_dir is None:
# we raise this exception only if there are files to stage but the staging
# location couldn't be determined. In case where files are already staged
# this exception is not raised
raise exceptions.ArgumentError(
'Could not determine where to stage local file {0}. When submitting '
'a job to a cluster selected via --cluster-labels, either\n'
'- a staging bucket must be provided via the --bucket argument, or\n'
'- all provided files must be non-local.'.format(file_str))
basename = os.path.basename(file_str)
self.files_to_stage.append(file_str)
staged_file = six.moves.urllib.parse.urljoin(self._staging_dir, basename)
return staged_file
def ValidateAndStageFiles(self):
"""Validate file URIs and upload them if they are local."""
for file_type, file_or_files in six.iteritems(self.files_by_type):
# TODO(b/36049793): Validate file suffixes.
if not file_or_files:
continue
elif isinstance(file_or_files, six.string_types):
self.files_by_type[file_type] = self._GetStagedFile(file_or_files)
else:
staged_files = [self._GetStagedFile(f) for f in file_or_files]
self.files_by_type[file_type] = staged_files
if self.files_to_stage:
log.info('Staging local files {0} to {1}.'.format(self.files_to_stage,
self._staging_dir))
storage_helpers.Upload(self.files_to_stage, self._staging_dir)
def GetStagingDir(self, cluster, cluster_pool, job_id, bucket=None):
"""Determine the GCS directory to stage job resources in."""
if bucket is None and cluster is None:
return None
if bucket is None:
# If bucket is not provided, fall back to cluster's staging bucket.
if cluster.config:
bucket = cluster.config.configBucket
elif cluster.virtualClusterConfig:
bucket = cluster.virtualClusterConfig.stagingBucket
else:
# This is only needed if the request needs to stage files. If it doesn't
# everything will work. If it does need to stage files, then it will
# fail with a message saying --bucket should be specified.
return None
environment = 'unresolved'
if cluster is not None:
environment = cluster.clusterUuid
if cluster_pool is not None:
environment = cluster_pool
staging_dir = (
'gs://{bucket}/{prefix}/{environment}/jobs/{job_id}/staging/'.format(
bucket=bucket,
prefix=constants.GCS_METADATA_PREFIX,
environment=environment,
job_id=job_id))
return staging_dir
def BuildLoggingConfig(self, messages, driver_logging):
"""Build LoggingConfig from parameters."""
if not driver_logging:
return None
value_enum = (messages.LoggingConfig.DriverLogLevelsValue.
AdditionalProperty.ValueValueValuesEnum)
config = collections.OrderedDict(
[(key, value_enum(value)) for key, value in driver_logging.items()])
return messages.LoggingConfig(
driverLogLevels=encoding.DictToAdditionalPropertyMessage(
config,
messages.LoggingConfig.DriverLogLevelsValue))
def PopulateFilesByType(self, args):
self.files_by_type.update(self.GetFilesByType(args))

View File

@@ -0,0 +1,110 @@
# -*- coding: utf-8 -*- #
# Copyright 2023 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Base class for Flink Job."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
import argparse
from apitools.base.py import encoding
from googlecloudsdk.calliope import arg_parsers
from googlecloudsdk.command_lib.dataproc.jobs import base as job_base
from googlecloudsdk.command_lib.dataproc.jobs import util as job_util
class FlinkBase(job_base.JobBase):
"""Submit a Java or Scala Flink job to a cluster."""
@staticmethod
def Args(parser):
"""Parses command-line arguments specific to submitting Flink jobs."""
parser.add_argument(
'--savepoint',
help=(
'HCFS URI of the savepoint that is used to refer to the state of '
'the previously stopped job. The new job will resume previous '
'state from there.'
),
)
parser.add_argument(
'--jars',
type=arg_parsers.ArgList(),
metavar='JAR',
default=[],
help=(
'Comma-separated list of jar files to provide to the '
'task manager classpaths.'
),
)
parser.add_argument(
'job_args',
nargs=argparse.REMAINDER,
help='The job arguments to pass.',
)
parser.add_argument(
'--properties',
type=arg_parsers.ArgDict(),
metavar='PROPERTY=VALUE',
help=(
'List of key=value pairs to configure Flink. For a list of '
'available properties, see: '
'https://nightlies.apache.org/flink/flink-docs-master/docs/deployment/config/.'
),
)
parser.add_argument(
'--properties-file', help=job_util.PROPERTIES_FILE_HELP_TEXT
)
parser.add_argument(
'--driver-log-levels',
type=arg_parsers.ArgDict(),
metavar='PACKAGE=LEVEL',
help=(
'List of package to log4j log level pairs to configure driver '
'logging. For example: root=FATAL,com.example=INFO.'
),
)
@staticmethod
def GetFilesByType(args):
"""Returns a dict of files by their type (main_jar, jars, etc.)."""
return {'main_jar': args.main_jar, 'jars': args.jars}
@staticmethod
def ConfigureJob(messages, job, files_by_type, logging_config, args):
"""Populates the flinkJob member of the given job."""
flink_job = messages.FlinkJob(
args=args.job_args or [],
mainClass=args.main_class,
mainJarFileUri=files_by_type['main_jar'],
jarFileUris=files_by_type['jars'],
loggingConfig=logging_config,
savepointUri=args.savepoint
)
job_properties = job_util.BuildJobProperties(
args.properties, args.properties_file
)
if job_properties:
# Sort properties to ensure tests comparing messages not fail on ordering.
flink_job.properties = encoding.DictToAdditionalPropertyMessage(
job_properties, messages.FlinkJob.PropertiesValue, sort_items=True
)
job.flinkJob = flink_job

View File

@@ -0,0 +1,107 @@
# -*- coding: utf-8 -*- #
# Copyright 2015 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Base class for Hadoop Job."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
import argparse
from apitools.base.py import encoding
from googlecloudsdk.calliope import arg_parsers
from googlecloudsdk.command_lib.dataproc.jobs import base as job_base
from googlecloudsdk.command_lib.dataproc.jobs import util as job_util
class HadoopBase(job_base.JobBase):
"""Common functionality between release tracks."""
@staticmethod
def Args(parser):
"""Parses command-line arguments specific to submitting Hadoop jobs."""
parser.add_argument(
'--jars',
type=arg_parsers.ArgList(),
metavar='JAR',
default=[],
help=('Comma separated list of jar files to be provided to the MR and '
'driver classpaths.'))
parser.add_argument(
'--files',
type=arg_parsers.ArgList(),
metavar='FILE',
default=[],
help='Comma separated list of file paths to be provided to the job. '
'A file path can either be a path to a local file or a path '
'to a file already in a Cloud Storage bucket.')
parser.add_argument(
'--archives',
type=arg_parsers.ArgList(),
metavar='ARCHIVE',
default=[],
help=('Comma separated list of archives to be provided to the job. '
'must be one of the following file formats: .zip, .tar, .tar.gz, '
'or .tgz.'))
parser.add_argument(
'job_args',
nargs=argparse.REMAINDER,
help='The arguments to pass to the driver.')
parser.add_argument(
'--properties',
type=arg_parsers.ArgDict(),
metavar='PROPERTY=VALUE',
help='A list of key value pairs to configure Hadoop.')
parser.add_argument(
'--properties-file',
help=job_util.PROPERTIES_FILE_HELP_TEXT)
parser.add_argument(
'--driver-log-levels',
type=arg_parsers.ArgDict(),
metavar='PACKAGE=LEVEL',
help=('A list of package to log4j log level pairs to configure driver '
'logging. For example: root=FATAL,com.example=INFO'))
@staticmethod
def GetFilesByType(args):
"""Returns a dict of files by their type (jars, archives, etc.)."""
return {
'main_jar': args.main_jar,
'jars': args.jars,
'archives': args.archives,
'files': args.files}
@staticmethod
def ConfigureJob(messages, job, files_by_type, logging_config, args):
"""Populates the hadoopJob member of the given job."""
hadoop_job = messages.HadoopJob(
args=args.job_args or [],
archiveUris=files_by_type['archives'],
fileUris=files_by_type['files'],
jarFileUris=files_by_type['jars'],
mainClass=args.main_class,
mainJarFileUri=files_by_type['main_jar'],
loggingConfig=logging_config)
job_properties = job_util.BuildJobProperties(
args.properties, args.properties_file)
if job_properties:
# Sort properties to ensure tests comparing messages not fail on ordering.
hadoop_job.properties = encoding.DictToAdditionalPropertyMessage(
job_properties, messages.HadoopJob.PropertiesValue, sort_items=True)
job.hadoopJob = hadoop_job

View File

@@ -0,0 +1,99 @@
# -*- coding: utf-8 -*- #
# Copyright 2015 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Base class for Hive Job."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from apitools.base.py import encoding
from googlecloudsdk.calliope import arg_parsers
from googlecloudsdk.command_lib.dataproc.jobs import base as job_base
from googlecloudsdk.command_lib.dataproc.jobs import util as job_util
class HiveBase(job_base.JobBase):
"""Common functionality between release tracks."""
@staticmethod
def Args(parser):
"""Performs command line parsing specific to Hive."""
driver = parser.add_mutually_exclusive_group(required=True)
driver.add_argument(
'--execute', '-e',
metavar='QUERY',
dest='queries',
action='append',
default=[],
help='A Hive query to execute as part of the job.')
driver.add_argument(
'--file', '-f',
help='HCFS URI of file containing Hive script to execute as the job.')
parser.add_argument(
'--jars',
type=arg_parsers.ArgList(),
metavar='JAR',
default=[],
help=('Comma separated list of jar files to be provided to the '
'Hive and MR. May contain UDFs.'))
parser.add_argument(
'--params',
type=arg_parsers.ArgDict(),
metavar='PARAM=VALUE',
help='A list of key value pairs to set variables in the Hive queries.')
parser.add_argument(
'--properties',
type=arg_parsers.ArgDict(),
metavar='PROPERTY=VALUE',
help='A list of key value pairs to configure Hive.')
parser.add_argument(
'--properties-file',
help=job_util.PROPERTIES_FILE_HELP_TEXT)
parser.add_argument(
'--continue-on-failure',
action='store_true',
help='Whether to continue if a single query fails.')
@staticmethod
def GetFilesByType(args):
return {
'jars': args.jars,
'file': args.file}
@staticmethod
def ConfigureJob(messages, job, files_by_type, args):
"""Populates the hiveJob member of the given job."""
hive_job = messages.HiveJob(
continueOnFailure=args.continue_on_failure,
jarFileUris=files_by_type['jars'],
queryFileUri=files_by_type['file'])
if args.queries:
hive_job.queryList = messages.QueryList(queries=args.queries)
if args.params:
hive_job.scriptVariables = encoding.DictToAdditionalPropertyMessage(
args.params, messages.HiveJob.ScriptVariablesValue)
job_properties = job_util.BuildJobProperties(
args.properties, args.properties_file)
if job_properties:
# Sort properties to ensure tests comparing messages not fail on ordering.
hive_job.properties = encoding.DictToAdditionalPropertyMessage(
job_properties, messages.HiveJob.PropertiesValue, sort_items=True)
job.hiveJob = hive_job

View File

@@ -0,0 +1,107 @@
# -*- coding: utf-8 -*- #
# Copyright 2015 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Base class for Pig Job."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from apitools.base.py import encoding
from googlecloudsdk.calliope import arg_parsers
from googlecloudsdk.command_lib.dataproc.jobs import base as job_base
from googlecloudsdk.command_lib.dataproc.jobs import util as job_util
class PigBase(job_base.JobBase):
"""Submit a Pig job to a cluster."""
@staticmethod
def Args(parser):
"""Performs command-line argument parsing specific to Pig."""
driver = parser.add_mutually_exclusive_group(required=True)
driver.add_argument(
'--execute', '-e',
metavar='QUERY',
dest='queries',
action='append',
default=[],
help='A Pig query to execute as part of the job.')
driver.add_argument(
'--file', '-f',
help='HCFS URI of file containing Pig script to execute as the job.')
parser.add_argument(
'--jars',
type=arg_parsers.ArgList(),
metavar='JAR',
default=[],
help=('Comma separated list of jar files to be provided to '
'Pig and MR. May contain UDFs.'))
parser.add_argument(
'--params',
type=arg_parsers.ArgDict(),
metavar='PARAM=VALUE',
help='A list of key value pairs to set variables in the Pig queries.')
parser.add_argument(
'--properties',
type=arg_parsers.ArgDict(),
metavar='PROPERTY=VALUE',
help='A list of key value pairs to configure Pig.')
parser.add_argument(
'--properties-file',
help=job_util.PROPERTIES_FILE_HELP_TEXT)
parser.add_argument(
'--continue-on-failure',
action='store_true',
help='Whether to continue if a single query fails.')
parser.add_argument(
'--driver-log-levels',
type=arg_parsers.ArgDict(),
metavar='PACKAGE=LEVEL',
help=('A list of package to log4j log level pairs to configure driver '
'logging. For example: root=FATAL,com.example=INFO'))
@staticmethod
def GetFilesByType(args):
return {
'jars': args.jars,
'file': args.file}
@staticmethod
def ConfigureJob(messages, job, files_by_type, logging_config, args):
"""Populates the pigJob member of the given job."""
pig_job = messages.PigJob(
continueOnFailure=args.continue_on_failure,
jarFileUris=files_by_type['jars'],
queryFileUri=files_by_type['file'],
loggingConfig=logging_config)
if args.queries:
pig_job.queryList = messages.QueryList(queries=args.queries)
if args.params:
pig_job.scriptVariables = encoding.DictToAdditionalPropertyMessage(
args.params, messages.PigJob.ScriptVariablesValue)
job_properties = job_util.BuildJobProperties(
args.properties, args.properties_file)
if job_properties:
# Sort properties to ensure tests comparing messages not fail on ordering.
pig_job.properties = encoding.DictToAdditionalPropertyMessage(
job_properties, messages.PigJob.PropertiesValue, sort_items=True)
job.pigJob = pig_job

View File

@@ -0,0 +1,102 @@
# -*- coding: utf-8 -*- #
# Copyright 2019 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Base class for the Presto job."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from apitools.base.py import encoding
from googlecloudsdk.calliope import arg_parsers
from googlecloudsdk.command_lib.dataproc.jobs import base as job_base
from googlecloudsdk.command_lib.dataproc.jobs import util as job_util
class PrestoBase(job_base.JobBase):
"""Submit a Presto job to a cluster."""
@staticmethod
def Args(parser):
"""Parses command line arguments specific to submitting Presto jobs."""
driver = parser.add_mutually_exclusive_group(required=True)
driver.add_argument(
'--execute',
'-e',
metavar='QUERY',
dest='queries',
action='append',
default=[],
help='A Presto query to execute.')
driver.add_argument(
'--file',
'-f',
help='HCFS URI of file containing the Presto script to execute.')
parser.add_argument(
'--properties',
type=arg_parsers.ArgDict(),
metavar='PARAM=VALUE',
help='A list of key value pairs to set Presto session properties.')
parser.add_argument(
'--properties-file',
help=job_util.PROPERTIES_FILE_HELP_TEXT)
parser.add_argument(
'--driver-log-levels',
type=arg_parsers.ArgDict(),
metavar='PACKAGE=LEVEL',
help=('A list of package-to-log4j log level pairs to configure driver '
'logging. For example: root=FATAL,com.example=INFO'))
parser.add_argument(
'--continue-on-failure',
action='store_true',
help='Whether to continue if a query fails.')
parser.add_argument(
'--query-output-format',
help=('The query output display format. See the Presto documentation '
'for supported output formats.'))
parser.add_argument(
'--client-tags',
type=arg_parsers.ArgList(),
metavar='CLIENT_TAG',
help='A list of Presto client tags to attach to this query.')
@staticmethod
def GetFilesByType(args):
return {'file': args.file}
@staticmethod
def ConfigureJob(messages, job, files_by_type, logging_config, args):
"""Populates the prestoJob member of the given job."""
presto_job = messages.PrestoJob(
continueOnFailure=args.continue_on_failure,
queryFileUri=files_by_type['file'],
loggingConfig=logging_config)
if args.queries:
presto_job.queryList = messages.QueryList(queries=args.queries)
if args.query_output_format:
presto_job.outputFormat = args.query_output_format
if args.client_tags:
presto_job.clientTags = args.client_tags
job_properties = job_util.BuildJobProperties(
args.properties, args.properties_file)
if job_properties:
# Sort properties to ensure tests comparing messages not fail on ordering.
presto_job.properties = encoding.DictToAdditionalPropertyMessage(
job_properties, messages.PrestoJob.PropertiesValue, sort_items=True)
job.prestoJob = presto_job

View File

@@ -0,0 +1,158 @@
# -*- coding: utf-8 -*- #
# Copyright 2023 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Base class for PySpark Job."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
import argparse
from apitools.base.py import encoding
from googlecloudsdk.calliope import arg_parsers
from googlecloudsdk.calliope import base
from googlecloudsdk.command_lib.dataproc.jobs import base as job_base
from googlecloudsdk.command_lib.dataproc.jobs import util as job_util
@base.Hidden
class PyFlinkBase(job_base.JobBase):
"""Submit a PyFlink job to a cluster."""
@staticmethod
def Args(parser):
"""Performs command-line argument parsing specific to PyFlink."""
parser.add_argument(
'py_file', help='HCFS URI of the main Python file.'
)
parser.add_argument(
'--savepoint',
help='HCFS URI of the savepoint that contains the saved job progress.',
)
parser.add_argument(
'--py-files',
type=arg_parsers.ArgList(),
metavar='PY_FILE',
default=[],
help=(
'Comma-separated list of custom Python files to provide to the'
' job. Supports standard resource file suffixes, such as'
' .py, .egg, .zip and .whl. This also supports passing a directory.'
),
)
parser.add_argument(
'--py-requirements',
help=(
'A requirements.txt file that defines third-party dependencies.'
' These dependencies are installed and added to the PYTHONPATH of'
' the python UDF worker.'
),
)
parser.add_argument(
'--py-module',
help=(
'Python module with program entry point. This option should be used'
' with --pyFiles.'
),
)
parser.add_argument(
'--jars',
type=arg_parsers.ArgList(),
metavar='JAR',
default=[],
help=(
'Comma-separated list of jar files to provide to the '
'task manager classpaths.'
),
)
parser.add_argument(
'--archives',
type=arg_parsers.ArgList(),
metavar='ARCHIVE',
default=[],
help=(
'Comma-separated list of archives to be extracted into the working'
' directory of the python UDF worker. Must be one of the following '
'file formats: .zip, .tar, .tar.gz, or .tgz.'
),
)
parser.add_argument(
'job_args',
nargs=argparse.REMAINDER,
help='The job arguments to pass.',
)
parser.add_argument(
'--properties',
type=arg_parsers.ArgDict(),
metavar='PROPERTY=VALUE',
help=(
'List of key=value pairs to configure PyFlink. For a list of '
'available properties, see: '
'https://nightlies.apache.org/flink/flink-docs-master/docs/deployment/config/'
),
)
parser.add_argument(
'--properties-file', help=job_util.PROPERTIES_FILE_HELP_TEXT
)
parser.add_argument(
'--driver-log-levels',
type=arg_parsers.ArgDict(),
metavar='PACKAGE=LEVEL',
help=(
'List of key=value pairs to configure driver logging, where the key'
' is a package and the value is the log4j log level. For '
'example: root=FATAL,com.example=INFO.'
),
)
@staticmethod
def GetFilesByType(args):
return {
'py_file': args.py_file,
'py_files': args.py_files,
'archives': args.archives,
'py_requirements': args.py_requirements,
'jars': args.jars,
}
@staticmethod
def ConfigureJob(messages, job, files_by_type, logging_config, args):
"""Populates the pyflinkJob member of the given job."""
pyflink_job = messages.PyFlinkJob(
args=args.job_args or [],
archiveUris=files_by_type['archives'],
pythonFileUris=files_by_type['py_files'],
jarFileUris=files_by_type['jars'],
pythonRequirements=files_by_type['py_requirements'],
pythonModule=args.py_module,
mainPythonFileUri=files_by_type['py_file'],
loggingConfig=logging_config,
savepointUri=args.savepoint
)
job_properties = job_util.BuildJobProperties(
args.properties, args.properties_file
)
if job_properties:
# Sort properties to ensure tests comparing messages not fail on ordering.
pyflink_job.properties = encoding.DictToAdditionalPropertyMessage(
job_properties, messages.PyFlinkJob.PropertiesValue, sort_items=True
)
job.pyflinkJob = pyflink_job

View File

@@ -0,0 +1,128 @@
# -*- coding: utf-8 -*- #
# Copyright 2015 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Base class for PySpark Job."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
import argparse
from apitools.base.py import encoding
from googlecloudsdk.calliope import arg_parsers
from googlecloudsdk.command_lib.dataproc.jobs import base as job_base
from googlecloudsdk.command_lib.dataproc.jobs import util as job_util
class PySparkBase(job_base.JobBase):
"""Submit a PySpark job to a cluster."""
@staticmethod
def Args(parser):
"""Performs command-line argument parsing specific to PySpark."""
parser.add_argument(
'py_file',
help='Main .py file to run as the driver.')
parser.add_argument(
'--py-files',
type=arg_parsers.ArgList(),
metavar='PY_FILE',
default=[],
help=('Comma separated list of Python files to be provided to the job. '
'Must be one of the following file formats '
'".py, .zip, or .egg".'))
parser.add_argument(
'--jars',
type=arg_parsers.ArgList(),
metavar='JAR',
default=[],
help=('Comma separated list of jar files to be provided to the '
'executor and driver classpaths.'))
parser.add_argument(
'--files',
type=arg_parsers.ArgList(),
metavar='FILE',
default=[],
help=('Comma separated list of files to be placed in the working '
'directory of both the app driver and executors.'))
parser.add_argument(
'--archives',
type=arg_parsers.ArgList(),
metavar='ARCHIVE',
default=[],
help=(
'Comma separated list of archives to be extracted into the working '
'directory of each executor. '
'Must be one of the following file formats: .zip, .tar, .tar.gz, '
'or .tgz.'))
parser.add_argument(
'job_args',
nargs=argparse.REMAINDER,
help='Arguments to pass to the driver.')
parser.add_argument(
'--properties',
type=arg_parsers.ArgDict(),
metavar='PROPERTY=VALUE',
help='List of key value pairs to configure PySpark. For a list of '
'available properties, see: '
'https://spark.apache.org/docs/latest/'
'configuration.html#available-properties.')
parser.add_argument(
'--properties-file',
help=job_util.PROPERTIES_FILE_HELP_TEXT)
parser.add_argument(
'--driver-log-levels',
type=arg_parsers.ArgDict(),
metavar='PACKAGE=LEVEL',
help=('List of key value pairs to configure driver logging, where key '
'is a package and value is the log4j log level. For '
'example: root=FATAL,com.example=INFO'))
@staticmethod
def GetFilesByType(args):
return {
'py_file': args.py_file,
'py_files': args.py_files,
'archives': args.archives,
'files': args.files,
'jars': args.jars}
@staticmethod
def ConfigureJob(messages, job, files_by_type, logging_config, args):
"""Populates the pysparkJob member of the given job."""
pyspark_job = messages.PySparkJob(
args=args.job_args or [],
archiveUris=files_by_type['archives'],
fileUris=files_by_type['files'],
jarFileUris=files_by_type['jars'],
pythonFileUris=files_by_type['py_files'],
mainPythonFileUri=files_by_type['py_file'],
loggingConfig=logging_config,
)
job_properties = job_util.BuildJobProperties(
args.properties, args.properties_file
)
if job_properties:
# Sort properties to ensure tests comparing messages not fail on ordering.
pyspark_job.properties = encoding.DictToAdditionalPropertyMessage(
job_properties, messages.PySparkJob.PropertiesValue, sort_items=True
)
job.pysparkJob = pyspark_job

View File

@@ -0,0 +1,114 @@
# -*- coding: utf-8 -*- #
# Copyright 2015 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Base class for Spark Job."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
import argparse
from apitools.base.py import encoding
from googlecloudsdk.calliope import arg_parsers
from googlecloudsdk.command_lib.dataproc.jobs import base as job_base
from googlecloudsdk.command_lib.dataproc.jobs import util as job_util
class SparkBase(job_base.JobBase):
"""Submit a Java or Scala Spark job to a cluster."""
@staticmethod
def Args(parser):
"""Parses command-line arguments specific to submitting Spark jobs."""
parser.add_argument(
'--jars',
type=arg_parsers.ArgList(),
metavar='JAR',
default=[],
help=('Comma separated list of jar files to be provided to the '
'executor and driver classpaths.'))
parser.add_argument(
'--files',
type=arg_parsers.ArgList(),
metavar='FILE',
default=[],
help=('Comma separated list of files to be placed in the working '
'directory of both the app driver and executors.'))
parser.add_argument(
'--archives',
type=arg_parsers.ArgList(),
metavar='ARCHIVE',
default=[],
help=(
'Comma separated list of archives to be extracted into the working '
'directory of each executor. '
'Must be one of the following file formats: .zip, .tar, .tar.gz, '
'or .tgz.'))
parser.add_argument(
'job_args',
nargs=argparse.REMAINDER,
help='Arguments to pass to the driver.')
parser.add_argument(
'--properties',
type=arg_parsers.ArgDict(),
metavar='PROPERTY=VALUE',
help='List of key value pairs to configure Spark. For a list of '
'available properties, see: '
'https://spark.apache.org/docs/latest/'
'configuration.html#available-properties.')
parser.add_argument(
'--properties-file',
help=job_util.PROPERTIES_FILE_HELP_TEXT)
parser.add_argument(
'--driver-log-levels',
type=arg_parsers.ArgDict(),
metavar='PACKAGE=LEVEL',
help=('List of package to log4j log level pairs to configure driver '
'logging. For example: root=FATAL,com.example=INFO'))
@staticmethod
def GetFilesByType(args):
"""Returns a dict of files by their type (jars, archives, etc.)."""
return {
'main_jar': args.main_jar,
'jars': args.jars,
'archives': args.archives,
'files': args.files}
@staticmethod
def ConfigureJob(messages, job, files_by_type, logging_config, args):
"""Populates the sparkJob member of the given job."""
spark_job = messages.SparkJob(
args=args.job_args or [],
archiveUris=files_by_type['archives'],
fileUris=files_by_type['files'],
jarFileUris=files_by_type['jars'],
mainClass=args.main_class,
mainJarFileUri=files_by_type['main_jar'],
loggingConfig=logging_config,
)
job_properties = job_util.BuildJobProperties(
args.properties, args.properties_file
)
if job_properties:
# Sort properties to ensure tests comparing messages not fail on ordering.
spark_job.properties = encoding.DictToAdditionalPropertyMessage(
job_properties, messages.SparkJob.PropertiesValue, sort_items=True
)
job.sparkJob = spark_job

View File

@@ -0,0 +1,104 @@
# -*- coding: utf-8 -*- #
# Copyright 2015 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Base class for SparkR Job."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
import argparse
from apitools.base.py import encoding
from googlecloudsdk.calliope import arg_parsers
from googlecloudsdk.command_lib.dataproc.jobs import base as job_base
from googlecloudsdk.command_lib.dataproc.jobs import util as job_util
class SparkRBase(job_base.JobBase):
"""Submit a SparkR job to a cluster."""
@staticmethod
def Args(parser):
"""Performs command-line argument parsing specific to SparkR."""
parser.add_argument('r_file', help='Main .R file to run as the driver.')
parser.add_argument(
'--files',
type=arg_parsers.ArgList(),
metavar='FILE',
default=[],
help='Comma separated list of files to be placed in the working '
'directory of both the app driver and executors.')
parser.add_argument(
'--archives',
type=arg_parsers.ArgList(),
metavar='ARCHIVE',
default=[],
help=(
'Comma separated list of archives to be extracted into the working '
'directory of each executor. '
'Must be one of the following file formats: .zip, .tar, .tar.gz, '
'or .tgz.'))
parser.add_argument(
'job_args',
nargs=argparse.REMAINDER,
help='Arguments to pass to the driver.')
parser.add_argument(
'--properties',
type=arg_parsers.ArgDict(),
metavar='PROPERTY=VALUE',
help='List of key value pairs to configure SparkR. For a list of '
'available properties, see: '
'https://spark.apache.org/docs/latest/'
'configuration.html#available-properties.')
parser.add_argument(
'--properties-file',
help=job_util.PROPERTIES_FILE_HELP_TEXT)
parser.add_argument(
'--driver-log-levels',
type=arg_parsers.ArgDict(),
metavar='PACKAGE=LEVEL',
help=('List of key value pairs to configure driver logging, where key '
'is a package and value is the log4j log level. For '
'example: root=FATAL,com.example=INFO'))
@staticmethod
def GetFilesByType(args):
return {
'r_file': args.r_file,
'archives': args.archives,
'files': args.files
}
@staticmethod
def ConfigureJob(messages, job, files_by_type, logging_config, args):
"""Populates the sparkRJob member of the given job."""
spark_r_job = messages.SparkRJob(
args=args.job_args or [],
archiveUris=files_by_type['archives'],
fileUris=files_by_type['files'],
mainRFileUri=files_by_type['r_file'],
loggingConfig=logging_config,
)
job_properties = job_util.BuildJobProperties(
args.properties, args.properties_file
)
if job_properties:
spark_r_job.properties = encoding.DictToMessage(
job_properties, messages.SparkRJob.PropertiesValue
)
job.sparkRJob = spark_r_job

View File

@@ -0,0 +1,106 @@
# -*- coding: utf-8 -*- #
# Copyright 2015 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Base class for Spark Sql Job."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from apitools.base.py import encoding
from googlecloudsdk.calliope import arg_parsers
from googlecloudsdk.command_lib.dataproc.jobs import base as job_base
from googlecloudsdk.command_lib.dataproc.jobs import util as job_util
class SparkSqlBase(job_base.JobBase):
"""Submit a Spark SQL job to a cluster."""
@staticmethod
def Args(parser):
"""Parses command-line arguments specific to submitting SparkSql jobs."""
driver = parser.add_mutually_exclusive_group(required=True)
driver.add_argument(
'--execute', '-e',
metavar='QUERY',
dest='queries',
action='append',
default=[],
help='A Spark SQL query to execute as part of the job.')
driver.add_argument(
'--file', '-f',
help=('HCFS URI of file containing Spark SQL script to execute as '
'the job.'))
parser.add_argument(
'--jars',
type=arg_parsers.ArgList(),
metavar='JAR',
default=[],
help=('Comma separated list of jar files to be provided to the '
'executor and driver classpaths. May contain UDFs.'))
parser.add_argument(
'--params',
type=arg_parsers.ArgDict(),
metavar='PARAM=VALUE',
help='A list of key value pairs to set variables in the Hive queries.')
parser.add_argument(
'--properties',
type=arg_parsers.ArgDict(),
metavar='PROPERTY=VALUE',
help='A list of key value pairs to configure Hive.')
parser.add_argument(
'--properties-file',
help=job_util.PROPERTIES_FILE_HELP_TEXT)
parser.add_argument(
'--driver-log-levels',
type=arg_parsers.ArgDict(),
metavar='PACKAGE=LEVEL',
help=('A list of package to log4j log level pairs to configure driver '
'logging. For example: root=FATAL,com.example=INFO'))
@staticmethod
def GetFilesByType(args):
return {
'jars': args.jars,
'file': args.file}
@staticmethod
def ConfigureJob(messages, job, files_by_type, logging_config, args):
"""Populates the sparkSqlJob member of the given job."""
spark_sql_job = messages.SparkSqlJob(
jarFileUris=files_by_type['jars'],
queryFileUri=files_by_type['file'],
loggingConfig=logging_config,
)
if args.queries:
spark_sql_job.queryList = messages.QueryList(queries=args.queries)
if args.params:
spark_sql_job.scriptVariables = encoding.DictToAdditionalPropertyMessage(
args.params, messages.SparkSqlJob.ScriptVariablesValue
)
job_properties = job_util.BuildJobProperties(
args.properties, args.properties_file
)
if job_properties:
# Sort properties to ensure tests comparing messages not fail on ordering.
spark_sql_job.properties = encoding.DictToAdditionalPropertyMessage(
job_properties, messages.SparkSqlJob.PropertiesValue, sort_items=True
)
job.sparkSqlJob = spark_sql_job

View File

@@ -0,0 +1,174 @@
# -*- coding: utf-8 -*- #
# Copyright 2015 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Utilities for building the dataproc clusters CLI."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.api_lib.dataproc import dataproc as dp
from googlecloudsdk.api_lib.dataproc import util
from googlecloudsdk.calliope import arg_parsers
from googlecloudsdk.calliope import base
from googlecloudsdk.command_lib.util.args import labels_util
from googlecloudsdk.core import log
class JobSubmitter(base.Command):
"""Submit a job to a cluster."""
@classmethod
def Args(cls, parser):
"""Register flags for this command."""
labels_util.AddCreateLabelsFlags(parser)
parser.add_argument(
'--max-failures-per-hour',
type=int,
help=('Specifies the maximum number of times a job can be restarted '
'per hour in event of failure. '
'Default is 0 (no retries after job failure).'))
parser.add_argument(
'--max-failures-total',
type=int,
help=('Specifies the maximum total number of times a job can be '
'restarted after the job fails. '
'Default is 0 (no retries after job failure).'))
parser.add_argument(
'--driver-required-memory-mb',
type=int,
help=(
'The memory allocation requested by the job driver in megabytes'
' (MB) for execution on the driver node group (it is used only by'
' clusters with a driver node group).'
),
)
parser.add_argument(
'--driver-required-vcores',
type=int,
help=(
'The vCPU allocation requested by the job driver for execution on'
' the driver node group (it is used only by clusters with a driver'
' node group).'
),
)
parser.add_argument(
'--ttl',
hidden=True,
type=arg_parsers.Duration(),
help=(
'The maximum duration this job is allowed to run before being'
' killed automatically. Specified using a s, m, h, or d (seconds,'
' minutes, hours, or days) suffix. The minimum value is 10 minutes'
' (10m), and the maximum value is 14 days (14d) Run'
' [gcloud topic datetimes]'
' (https://cloud.google.com/sdk/gcloud/reference/topic/datetimes)'
' for information on duration formats.'
),
)
cluster_placement = parser.add_mutually_exclusive_group(required=True)
cluster_placement.add_argument(
'--cluster', help='The Dataproc cluster to submit the job to.'
)
labels_util.GetCreateLabelsFlag(
'Labels of Dataproc cluster on which to place the job.',
'cluster-labels',
).AddToParser(cluster_placement)
def Run(self, args):
"""This is what gets called when the user runs this command."""
dataproc = dp.Dataproc(self.ReleaseTrack())
request_id = util.GetUniqueId()
job_id = args.id if args.id else request_id
# Don't use ResourceArgument, because --id is hidden by default
job_ref = util.ParseJob(job_id, dataproc)
self.PopulateFilesByType(args)
cluster = None
if args.cluster is not None:
cluster_ref = util.ParseCluster(args.cluster, dataproc)
request = dataproc.messages.DataprocProjectsRegionsClustersGetRequest(
projectId=cluster_ref.projectId,
region=cluster_ref.region,
clusterName=cluster_ref.clusterName)
cluster = dataproc.client.projects_regions_clusters.Get(request)
cluster_pool = None
if args.cluster_labels is not None:
if 'cluster-pool' in args.cluster_labels:
cluster_pool = args.cluster_labels['cluster-pool']
self._staging_dir = self.GetStagingDir(
cluster, cluster_pool, job_ref.jobId, bucket=args.bucket)
self.ValidateAndStageFiles()
job = dataproc.messages.Job(
reference=dataproc.messages.JobReference(
projectId=job_ref.projectId, jobId=job_ref.jobId),
placement=dataproc.messages.JobPlacement(clusterName=args.cluster))
self.ConfigureJob(dataproc.messages, job, args)
if args.driver_required_memory_mb and args.driver_required_vcores:
driver_scheduling_config = dataproc.messages.DriverSchedulingConfig(
memoryMb=args.driver_required_memory_mb,
vcores=args.driver_required_vcores)
job.driverSchedulingConfig = driver_scheduling_config
if args.max_failures_per_hour or args.max_failures_total or args.ttl:
scheduling = dataproc.messages.JobScheduling(
maxFailuresPerHour=args.max_failures_per_hour
if args.max_failures_per_hour
else None,
maxFailuresTotal=args.max_failures_total
if args.max_failures_total
else None,
ttl=str(args.ttl) + 's' if args.ttl else None,
)
job.scheduling = scheduling
request = dataproc.messages.DataprocProjectsRegionsJobsSubmitRequest(
projectId=job_ref.projectId,
region=job_ref.region,
submitJobRequest=dataproc.messages.SubmitJobRequest(
job=job,
requestId=request_id))
job = dataproc.client.projects_regions_jobs.Submit(request)
log.status.Print('Job [{0}] submitted.'.format(job_id))
if not args.async_:
job = util.WaitForJobTermination(
dataproc,
job,
job_ref,
message='Waiting for job completion',
goal_state=dataproc.messages.JobStatus.StateValueValuesEnum.DONE,
error_state=dataproc.messages.JobStatus.StateValueValuesEnum.ERROR,
stream_driver_log=True)
log.status.Print('Job [{0}] finished successfully.'.format(job_id))
return job
@staticmethod
def ConfigureJob(messages, job, args):
"""Add type-specific job configuration to job message."""
# Parse labels (if present)
job.labels = labels_util.ParseCreateArgs(args, messages.Job.LabelsValue)
job.placement.clusterLabels = labels_util.ParseCreateArgs(
args,
messages.JobPlacement.ClusterLabelsValue,
labels_dest='cluster_labels')

View File

@@ -0,0 +1,102 @@
# -*- coding: utf-8 -*- #
# Copyright 2022 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Base class for the Trino job."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from apitools.base.py import encoding
from googlecloudsdk.calliope import arg_parsers
from googlecloudsdk.command_lib.dataproc.jobs import base as job_base
from googlecloudsdk.command_lib.dataproc.jobs import util as job_util
class TrinoBase(job_base.JobBase):
"""Submit a Trino job to a cluster."""
@staticmethod
def Args(parser):
"""Parses command line arguments specific to submitting Trino jobs."""
driver = parser.add_mutually_exclusive_group(required=True)
driver.add_argument(
'--execute',
'-e',
metavar='QUERY',
dest='queries',
action='append',
default=[],
help='A Trino query to execute.')
driver.add_argument(
'--file',
'-f',
help='HCFS URI of file containing the Trino script to execute.')
parser.add_argument(
'--properties',
type=arg_parsers.ArgDict(),
metavar='PARAM=VALUE',
help='A list of key value pairs to set Trino session properties.')
parser.add_argument(
'--properties-file',
help=job_util.PROPERTIES_FILE_HELP_TEXT)
parser.add_argument(
'--driver-log-levels',
type=arg_parsers.ArgDict(),
metavar='PACKAGE=LEVEL',
help=('A list of package-to-log4j log level pairs to configure driver '
'logging. For example: root=FATAL,com.example=INFO'))
parser.add_argument(
'--continue-on-failure',
action='store_true',
help='Whether to continue if a query fails.')
parser.add_argument(
'--query-output-format',
help=('The query output display format. See the Trino documentation '
'for supported output formats.'))
parser.add_argument(
'--client-tags',
type=arg_parsers.ArgList(),
metavar='CLIENT_TAG',
help='A list of Trino client tags to attach to this query.')
@staticmethod
def GetFilesByType(args):
return {'file': args.file}
@staticmethod
def ConfigureJob(messages, job, files_by_type, logging_config, args):
"""Populates the trinoJob member of the given job."""
trino_job = messages.TrinoJob(
continueOnFailure=args.continue_on_failure,
queryFileUri=files_by_type['file'],
loggingConfig=logging_config)
if args.queries:
trino_job.queryList = messages.QueryList(queries=args.queries)
if args.query_output_format:
trino_job.outputFormat = args.query_output_format
if args.client_tags:
trino_job.clientTags = args.client_tags
job_properties = job_util.BuildJobProperties(
args.properties, args.properties_file)
if job_properties:
# Sort properties to ensure tests comparing messages not fail on ordering.
trino_job.properties = encoding.DictToAdditionalPropertyMessage(
job_properties, messages.TrinoJob.PropertiesValue, sort_items=True)
job.trinoJob = trino_job

View File

@@ -0,0 +1,88 @@
# -*- coding: utf-8 -*- #
# Copyright 2022 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Helper class for jobs."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.api_lib.dataproc import exceptions
from googlecloudsdk.api_lib.dataproc import storage_helpers
from googlecloudsdk.core import yaml
from googlecloudsdk.core.console import console_io
PROPERTIES_FILE_HELP_TEXT = """\
Path to a local file or a file in a Cloud Storage bucket containing
configuration properties for the job. The client machine running this command
must have read permission to the file.
Specify properties in the form of property=value in the text file. For example:
```
# Properties to set for the job:
key1=value1
key2=value2
# Comment out properties not used.
# key3=value3
```
If a property is set in both `--properties` and `--properties-file`, the
value defined in `--properties` takes precedence.
"""
def BuildJobProperties(arg_properties, properties_file):
"""Build job properties.
Merges properties from the arg_properties and properties_file. If a property
is set in both, the value in arg_properties is used.
Args:
arg_properties: A dictionary of property=value pairs.
properties_file: Path or URI to a text file with property=value lines
and/or comments. File can be a local file or a gs:// file.
Returns:
A dictionary merged properties
Example:
BuildJobProperties({'foo':'bar'}, 'gs://test-bucket/job_properties.conf')
"""
job_properties = {}
if properties_file:
try:
if properties_file.startswith('gs://'):
data = storage_helpers.ReadObject(properties_file)
else:
data = console_io.ReadFromFileOrStdin(properties_file, binary=False)
except Exception as e:
raise exceptions.Error('Cannot read properties-file: {0}'.format(e))
try:
yaml.allow_duplicate_keys = True
key_values = yaml.load(data.strip().replace('=', ': '), round_trip=True)
if key_values:
for key, value in key_values.items():
job_properties[key] = value
except Exception:
raise exceptions.ParseError(
'Cannot parse properties-file: {0}, '.format(properties_file) +
'make sure file format is a text file with list of key=value')
if arg_properties:
job_properties.update(arg_properties)
return job_properties

View File

@@ -0,0 +1,151 @@
# -*- coding: utf-8 -*- #
# Copyright 2021 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Helper class for uploading user files to GCS bucket."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
import os
from googlecloudsdk.api_lib.dataproc import storage_helpers
from googlecloudsdk.core.console import console_io
import six
def Upload(bucket, files):
"""Uploads user local files to the given GCS bucket.
Uploads files if they are local.
The function will prompt users to enter a region to create the bucket if the
bucket doesn't exist.
Args:
bucket: The destination GCS bucket name.
files: A dictionary of lists of files to upload. Field name of the lists
won't cause any behavior difference, and the structure will be kept in the
return value.
Returns:
A dictionary of lists of uri of the files. The structure is the same as the
input files.
Example:
Upload('my-bucket', {'jar':['my-jar.jar']}
> {'jar':['gs://my-bucket/dependencies/my-jar.jar']}
"""
bucket = _ParseBucketName(bucket)
result_files = {}
destination = _FormDestinationUri(bucket)
# Flag for creating bucket. Mark False after first call to create bucket.
need_create_bucket = True
for field, uris in files.items():
result_files[field] = []
# Aggregate a list of files that need to be upload.
need_upload = []
for uri in uris:
if _IsLocal(uri):
# Get reference-able file path. This should be sufficient in most cases.
expanded_uri = os.path.expandvars(os.path.expanduser(uri))
need_upload.append(expanded_uri)
result_files[field].append(_FormFileDestinationUri(
destination, expanded_uri))
else:
# Don't change anything if it is not a local file.
result_files[field].append(uri)
if need_upload:
if need_create_bucket:
need_create_bucket = False
_CreateBucketIfNotExists(bucket)
storage_helpers.Upload(need_upload, destination)
return result_files
def HasLocalFiles(files):
"""Determines whether files argument has local files.
Args:
files: A dictionary of lists of files to check.
Returns:
True if at least one of the files is local.
Example:
GetLocalFiles({'jar':['my-jar.jar', gs://my-bucket/my-gcs-jar.jar]}) -> True
"""
for _, uris in files.items():
for uri in uris:
if _IsLocal(uri):
return True
return False
def _CreateBucketIfNotExists(bucket):
"""Creates a Cloud Storage bucket if it doesn't exist."""
if storage_helpers.GetBucket(bucket):
return
# Ask user to enter a region to create the bucket.
region = console_io.PromptResponse(
message=('The bucket [{}] doesn\'t exist. Please enter a '
'Cloud Storage region to create the bucket (leave empty to '
'create in "global" region):'.format(bucket)))
storage_helpers.CreateBucketIfNotExists(bucket, region)
def _ParseBucketName(name):
"""Normalizes bucket name.
Normalizes bucket name. If it starts with gs://, remove it.
Api_lib's function doesn't like the gs prefix.
Args:
name: gs bucket name string.
Returns:
A name string without 'gs://' prefix.
"""
gs = 'gs://'
if name.startswith(gs):
return name[len(gs):]
return name
def _IsLocal(uri):
"""Checks if a given uri represent a local file."""
drive, _ = os.path.splitdrive(uri)
parsed_uri = six.moves.urllib.parse.urlsplit(uri, allow_fragments=False)
return drive or not parsed_uri.scheme
def _FormDestinationUri(bucket):
"""Forms destination bucket uri."""
return 'gs://{}/dependencies'.format(bucket)
def _FormFileDestinationUri(destination, uri):
"""Forms uri representing uploaded file."""
# Mimic the uri logic in storage_helpers.
return os.path.join(destination, os.path.basename(uri))

View File

@@ -0,0 +1,63 @@
# -*- coding: utf-8 -*- #
# Copyright 2023 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Factory for JupyterConfig message."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.command_lib.util.apis import arg_utils
class JupyterConfigFactory(object):
"""Factory for JupyterConfig message.
Factory to add JupyterConfig message arguments to argument parser and create
JupyterConfig message from parsed arguments.
"""
def __init__(self, dataproc):
"""Factory for JupyterConfig message.
Args:
dataproc: A api_lib.dataproc.Dataproc instance.
"""
self.dataproc = dataproc
def GetMessage(self, args):
"""Builds a JupyterConfig message according to user settings.
Args:
args: Parsed arguments.
Returns:
JupyterConfig: A JupyterConfig message instance.
"""
jupyter_config = self.dataproc.messages.JupyterConfig()
if args.kernel:
jupyter_config.kernel = arg_utils.ChoiceToEnum(
args.kernel,
self.dataproc.messages.JupyterConfig.KernelValueValuesEnum,
)
return jupyter_config
def AddArguments(parser):
"""Adds arguments related to JupyterConfig message to the given parser."""
parser.add_argument(
'--kernel',
choices=['python', 'scala'],
help=('Jupyter kernel type. The value could be "python" or "scala".'))

View File

@@ -0,0 +1,139 @@
# -*- coding: utf-8 -*- #
# Copyright 2022 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Factory class for Session message."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.command_lib.dataproc.sessions import (
jupyter_config_factory as jcf)
from googlecloudsdk.command_lib.dataproc.shared_messages import (
environment_config_factory as ecf)
from googlecloudsdk.command_lib.dataproc.shared_messages import (
runtime_config_factory as rcf)
from googlecloudsdk.command_lib.util.args import labels_util
class SessionMessageFactory(object):
"""Factory class for Session message.
Factory class for configuring argument parser and creating a Session message
from the parsed arguments.
"""
INVALID_SESSION_TYPE_ERR_MSG = 'Invalid session type: {}.'
def __init__(self, dataproc, runtime_config_factory_override=None,
environment_config_factory_override=None,
jupyter_config_factory_override=None):
"""Builder class for Session message.
Session message factory. Only the flags added in AddArguments are handled.
User need to provide session type specific message during message
creation.
Args:
dataproc: A api_lib.dataproc.Dataproc instance.
runtime_config_factory_override: Override the default
RuntimeConfigFactory instance.
environment_config_factory_override: Override the default
EnvironmentConfigFactory instance.
jupyter_config_factory_override: Override the default
JupyterConfigFactory instance.
"""
self.dataproc = dataproc
# Construct available session type to keyword mapping.
self._session2key = {self.dataproc.messages.JupyterConfig: 'jupyterSession'}
self.runtime_config_factory = (
runtime_config_factory_override or
rcf.RuntimeConfigFactory(self.dataproc, use_config_property=True))
self.environment_config_factory = (
environment_config_factory_override or
ecf.EnvironmentConfigFactory(self.dataproc))
self.jupyter_config_factory = (
jupyter_config_factory_override or
jcf.JupyterConfigFactory(self.dataproc))
def GetMessage(self, args):
"""Creates a Session message from given args.
Create a Session message from given arguments. Only the arguments added in
AddArguments are handled. Users need to provide session type specific
message during message creation.
Args:
args: Parsed argument.
Returns:
A Session message instance.
Raises:
AttributeError: When session is invalid.
"""
kwargs = {}
session_config = self.jupyter_config_factory.GetMessage(args)
kwargs[self._session2key[type(session_config)]] = session_config
if args.labels:
kwargs['labels'] = labels_util.ParseCreateArgs(
args, self.dataproc.messages.Session.LabelsValue)
runtime_config = self.runtime_config_factory.GetMessage(args)
if runtime_config:
kwargs['runtimeConfig'] = runtime_config
environment_config = self.environment_config_factory.GetMessage(args)
if environment_config:
kwargs['environmentConfig'] = environment_config
kwargs['name'] = args.CONCEPTS.session.Parse().RelativeName()
if args.session_template:
kwargs['sessionTemplate'] = args.session_template
if not kwargs:
return None
return self.dataproc.messages.Session(**kwargs)
def AddArguments(parser):
"""Adds arguments related to Session message.
Add Session arguments to the given parser. Session specific arguments are not
handled, and need to be set during factory instantiation.
Args:
parser: A argument parser.
"""
parser.add_argument(
'--session_template',
help="""The session template to use for creating the session.""",
)
labels_util.AddCreateLabelsFlags(parser)
_AddDependency(parser)
def _AddDependency(parser):
rcf.AddArguments(parser, use_config_property=True)
ecf.AddArguments(parser)
jcf.AddArguments(parser)

View File

@@ -0,0 +1,123 @@
# -*- coding: utf-8 -*- #
# Copyright 2022 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Factory class for SessionCreateRequest message."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
import re
from googlecloudsdk.api_lib.dataproc import util
from googlecloudsdk.calliope import arg_parsers
from googlecloudsdk.command_lib.dataproc.sessions import session_message_factory
class SessionsCreateRequestFactory(object):
"""Factory class handling SessionsCreateRequest message.
Factory class for configure argument parser and create
SessionsCreateRequest message from parsed argument.
"""
def __init__(self, dataproc, session_message_factory_override=None):
"""Factory for SessionsCreateRequest message.
Only handles general create flags added by this class. User needs to
provide session specific message when creating the request message.
Args:
dataproc: A api_lib.dataproc.Dataproc instance.
session_message_factory_override: Override SessionMessageFactory instance.
"""
self.dataproc = dataproc
self.session_message_factory = session_message_factory_override
if not self.session_message_factory:
self.session_message_factory = (
session_message_factory.SessionMessageFactory(self.dataproc))
def GetRequest(self, args):
"""Creates a SessionsCreateRequest message.
Creates a SessionsCreateRequest message. The factory only handles the
arguments added in AddArguments function. User needs to provide session
specific message instance.
Args:
args: Parsed arguments.
Returns:
SessionsCreateRequest: A configured SessionsCreateRequest.
"""
kwargs = {}
kwargs['parent'] = args.CONCEPTS.session.Parse().Parent().RelativeName()
kwargs['requestId'] = args.request_id
if not kwargs['requestId']:
kwargs['requestId'] = util.GetUniqueId()
kwargs['sessionId'] = args.session
kwargs['session'] = self.session_message_factory.GetMessage(args)
return (
self.dataproc.messages.DataprocProjectsLocationsSessionsCreateRequest(
**kwargs
)
)
def AddArguments(parser):
"""Add arguments related to SessionsCreateRequest message.
Add SessionsCreateRequest arguments to parser. This only includes general
arguments for all `sessions create` commands. Session type specific
arguments are not included, and those messages need to be passed in during
message construction (when calling GetMessage).
Args:
parser: A argument parser instance.
"""
request_id_pattern = re.compile(r'^[a-zA-Z0-9_-]{1,40}$')
parser.add_argument(
'--request-id',
type=arg_parsers.CustomFunctionValidator(request_id_pattern.match, (
'Only letters (a-z, A-Z), numbers (0-9), underscores (_), and hyphens'
' (-) are allowed. The length must not exceed 40 characters.')),
help=('A unique ID that identifies the request. If the service '
'receives two session create requests with the same request_id, '
'the second request is ignored and the operation that '
'corresponds to the first session created and stored in the '
'backend is returned. '
'Recommendation: Always set this value to a UUID. '
'The value must contain only letters (a-z, A-Z), numbers (0-9), '
'underscores (_), and hyphens (-). The maximum length is 40 '
'characters.'))
parser.add_argument(
'--max-idle',
type=arg_parsers.Duration(),
help="""
The duration after which an idle session will be automatically
terminated, for example, "20m" or "2h". A session is considered idle
if it has no active Spark applications and no active Jupyter kernels.
Run [gcloud topic datetimes](https://cloud.google.com/sdk/gcloud/reference/topic/datetimes)
for information on duration formats.""")
_AddDependency(parser)
def _AddDependency(parser):
session_message_factory.AddArguments(parser)

View File

@@ -0,0 +1,73 @@
# -*- coding: utf-8 -*- #
# Copyright 2025 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Factory for SparkHistoryServerConfig message."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.command_lib.util.apis import arg_utils
from googlecloudsdk.generated_clients.apis.dataproc.v1.dataproc_v1_messages import AuthenticationConfig as ac
class AuthenticationConfigFactory(object):
"""Factory for AuthenticationConfig message.
Adds arguments to argument parser and create AuthenticationConfig from
parsed arguments.
"""
def __init__(self, dataproc):
"""Factory class for AuthenticationConfig message.
Args:
dataproc: An api_lib.dataproc.Dataproc instance.
"""
self.dataproc = dataproc
def GetMessage(self, args):
"""Builds an AuthenticationConfig instance.
Args:
args: Parsed arguments.
Returns:
AuthenticationConfig: An AuthenticationConfig message instance.
None if all fields are None.
"""
kwargs = {}
if args.user_workload_authentication_type:
kwargs['userWorkloadAuthenticationType'] = arg_utils.ChoiceToEnum(
args.user_workload_authentication_type,
ac.UserWorkloadAuthenticationTypeValueValuesEnum,
)
if not kwargs:
return None
return self.dataproc.messages.AuthenticationConfig(**kwargs)
def AddArguments(parser):
"""Adds related arguments to aprser."""
parser.add_argument(
'--user-workload-authentication-type',
help=(
'Whether to use END_USER_CREDENTIALS or SERVICE_ACCOUNT to run'
' the workload.'
),
)

View File

@@ -0,0 +1,99 @@
# -*- coding: utf-8 -*- #
# Copyright 2023 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Factory for AutotuningConfig message."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.calliope import arg_parsers
from googlecloudsdk.command_lib.util.apis import arg_utils
from googlecloudsdk.generated_clients.apis.dataproc.v1.dataproc_v1_messages import AutotuningConfig as ac
class AutotuningConfigFactory(object):
"""Factory for AutotuningConfig message.
Add arguments related to AutotuningConfig to argument parser and create
AutotuningConfig message from parsed arguments.
"""
def __init__(self, dataproc):
"""Factory for AutotuningConfig message.
Args:
dataproc: An api_lib.dataproc.Dataproc instance.
"""
self.dataproc = dataproc
def GetMessage(self, args):
"""Builds an AutotuningConfig message instance.
Args:
args: Parsed arguments.
Returns:
AutotuningConfig: An AutotuningConfig message instance. Returns
none if all fields are None.
"""
kwargs = {}
if args.autotuning_scenarios:
kwargs['scenarios'] = [
arg_utils.ChoiceToEnum(sc, ac.ScenariosValueListEntryValuesEnum)
for sc in args.autotuning_scenarios
]
elif args.enable_autotuning:
kwargs['scenarios'] = [ac.ScenariosValueListEntryValuesEnum.AUTO]
if not kwargs:
return None
return self.dataproc.messages.AutotuningConfig(**kwargs)
def AddArguments(parser):
"""Adds related arguments to parser."""
scenario_choices = [
arg_utils.EnumNameToChoice(str(sc))
for sc in ac.ScenariosValueListEntryValuesEnum
if sc
not in [
ac.ScenariosValueListEntryValuesEnum.SCENARIO_UNSPECIFIED,
ac.ScenariosValueListEntryValuesEnum.BHJ,
ac.ScenariosValueListEntryValuesEnum.NONE,
]
]
scenarios_group = parser.add_mutually_exclusive_group(hidden=True)
scenarios_group.add_argument(
'--autotuning-scenarios',
type=arg_parsers.ArgList(
element_type=str,
choices=scenario_choices,
),
metavar='SCENARIO',
default=[],
help='Scenarios for which tunings are applied.',
hidden=True,
)
scenarios_group.add_argument(
'--enable-autotuning',
action='store_true',
default=False,
help='Enable autotuning got the workload.',
hidden=True,
)

View File

@@ -0,0 +1,92 @@
# -*- coding: utf-8 -*- #
# Copyright 2021 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Factory for EnvironmentConfig message."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.command_lib.dataproc.shared_messages import (
execution_config_factory as ecf)
from googlecloudsdk.command_lib.dataproc.shared_messages import (
peripherals_config_factory as pcf)
class EnvironmentConfigFactory(object):
"""Factory for EnvironmentConfig message.
Add arguments related to EnvironmentConfig to argument parser and create
EnvironmentConfig message from parsed arguments.
"""
def __init__(self, dataproc, execution_config_factory_override=None,
peripherals_config_factory_override=None):
"""Factory for EnvironmentConfig message.
Args:
dataproc: A api_lib.dataproc.Dataproc instance.
execution_config_factory_override: Override the default
ExecutionConfigFactory instance. This is a keyword argument.
peripherals_config_factory_override: Override the default
PeripheralsConfigFactory instance.
"""
self.dataproc = dataproc
self.execution_config_factory = execution_config_factory_override
if not self.execution_config_factory:
self.execution_config_factory = ecf.ExecutionConfigFactory(self.dataproc)
self.peripherals_config_factory = peripherals_config_factory_override
if not self.peripherals_config_factory:
self.peripherals_config_factory = (
pcf.PeripheralsConfigFactory(self.dataproc))
def GetMessage(self, args):
"""Builds an EnvironmentConfig message instance.
Args:
args: Parsed arguments.
Returns:
EnvironmentConfig: An environmentConfig message instance. Returns none
if all fields are None.
"""
kwargs = {}
execution_config = self.execution_config_factory.GetMessage(args)
if execution_config:
kwargs['executionConfig'] = execution_config
peripherals_config = (
self.peripherals_config_factory.GetMessage(args))
if peripherals_config:
kwargs['peripheralsConfig'] = peripherals_config
if not kwargs:
return None
return self.dataproc.messages.EnvironmentConfig(**kwargs)
def AddArguments(parser):
"""Adds EnvironmentConfig arguments to parser."""
# This message doesn't add new arguments.
_AddDependency(parser)
def _AddDependency(parser):
ecf.AddArguments(parser)
pcf.AddArguments(parser)

View File

@@ -0,0 +1,148 @@
# -*- coding: utf-8 -*- #
# Copyright 2021 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Factory for ExecutionConfig message."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.calliope import arg_parsers
from googlecloudsdk.command_lib.dataproc.shared_messages import (
authentication_config_factory as acf)
import six
class ExecutionConfigFactory(object):
"""Factory for ExecutionConfig message.
Add ExecutionConfig related arguments to argument parser and create
ExecutionConfig message from parsed arguments.
"""
def __init__(self, dataproc, auth_config_factory_override=None):
"""Factory class for ExecutionConfig message.
Args:
dataproc: A api_lib.dataproc.Dataproc instance.
auth_config_factory_override: Override the default
AuthenticationConfigFactory instance. This is a keyword argument.
"""
self.dataproc = dataproc
self.auth_config_factory = auth_config_factory_override
if not self.auth_config_factory:
self.auth_config_factory = (
acf.AuthenticationConfigFactory(
self.dataproc
)
)
def GetMessage(self, args):
"""Builds an ExecutionConfig instance.
Build a ExecutionConfig instance according to user settings.
Returns None if all fileds are None.
Args:
args: Parsed arguments.
Returns:
ExecutionConfig: A ExecutionConfig instance. None if all fields are
None.
"""
kwargs = {}
if args.tags:
kwargs['networkTags'] = args.tags
if args.network:
kwargs['networkUri'] = args.network
if args.subnet:
kwargs['subnetworkUri'] = args.subnet
if args.service_account:
kwargs['serviceAccount'] = args.service_account
if args.kms_key:
kwargs['kmsKey'] = args.kms_key
if hasattr(args, 'max_idle') and args.max_idle:
# ExecutionConfig message expects duration in seconds
kwargs['idleTtl'] = six.text_type(args.max_idle) + 's'
if args.ttl:
# ExecutionConfig message expects duration in seconds
kwargs['ttl'] = six.text_type(args.ttl) + 's'
if args.staging_bucket:
kwargs['stagingBucket'] = args.staging_bucket
authentication_config = (
self.auth_config_factory.GetMessage(args)
)
if authentication_config:
kwargs['authenticationConfig'] = authentication_config
if not kwargs:
return None
return self.dataproc.messages.ExecutionConfig(**kwargs)
def AddArguments(parser):
"""Adds ExecutionConfig related arguments to parser."""
parser.add_argument(
'--service-account',
help='The IAM service account to be used for a batch/session job.')
network_group = parser.add_mutually_exclusive_group()
network_group.add_argument(
'--network', help='Network URI to connect network to.')
network_group.add_argument(
'--subnet',
help=('Subnetwork URI to connect network to. Subnet must have Private '
'Google Access enabled.'))
parser.add_argument(
'--tags',
type=arg_parsers.ArgList(),
metavar='TAGS',
default=[],
help='Network tags for traffic control.')
parser.add_argument('--kms-key', help='Cloud KMS key to use for encryption.')
parser.add_argument(
'--staging-bucket',
help="""\
The Cloud Storage bucket to use to store job dependencies, config files,
and job driver console output. If not specified, the default [staging bucket]
(https://cloud.google.com/dataproc-serverless/docs/concepts/buckets) is used.
""",
)
parser.add_argument(
'--ttl',
type=arg_parsers.Duration(),
help="""
The duration after the workload will be unconditionally terminated,
for example, '20m' or '1h'. Run
[gcloud topic datetimes](https://cloud.google.com/sdk/gcloud/reference/topic/datetimes)
for information on duration formats.""",
)
acf.AddArguments(parser)

View File

@@ -0,0 +1,89 @@
# -*- coding: utf-8 -*- #
# Copyright 2021 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Factory for PeripheralsConfig message."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.command_lib.dataproc.shared_messages import (
spark_history_server_config_factory as shscf)
class PeripheralsConfigFactory(object):
"""Factory for PeripheralsConfig message.
Adds related arguments to argument parser and create PeripheralsConfig message
from parsed arguments.
"""
def __init__(self, dataproc,
spark_history_server_config_factory_override=None):
"""Factory class for PeripheralsConfig message.
Args:
dataproc: A api_lib.dataproc.Dataproc instance.
spark_history_server_config_factory_override: Override the default
SparkHistoryServerConfigFactory instance.
"""
self.dataproc = dataproc
self.spark_history_server_config_factory = (
spark_history_server_config_factory_override)
if not self.spark_history_server_config_factory:
self.spark_history_server_config_factory = (
shscf.SparkHistoryServerConfigFactory(self.dataproc))
def GetMessage(self, args):
"""Builds a PeripheralsConfig message.
Args:
args: Parsed arguments.
Returns:
PeripheralsConfig: A PeripheralsConfig message instance. None if all
fields are None.
"""
kwargs = {}
if args.metastore_service:
kwargs['metastoreService'] = args.metastore_service
spark_history_server_config = (
self.spark_history_server_config_factory.GetMessage(args))
if spark_history_server_config:
kwargs['sparkHistoryServerConfig'] = spark_history_server_config
if not kwargs:
return None
return self.dataproc.messages.PeripheralsConfig(**kwargs)
def AddArguments(parser):
"""Adds PeripheralsConfig related arguments to parser."""
parser.add_argument(
'--metastore-service',
help=('Name of a Dataproc Metastore service to be used as an '
'external metastore in the format: '
'"projects/{project-id}/locations/{region}/services/'
'{service-name}".'))
_AddDependency(parser)
def _AddDependency(parser):
shscf.AddArguments(parser)

View File

@@ -0,0 +1,185 @@
# -*- coding: utf-8 -*- #
# Copyright 2021 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Factory for RuntimeConfig message."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
import collections
from apitools.base.py import encoding
from googlecloudsdk.calliope import arg_parsers
from googlecloudsdk.command_lib.dataproc.shared_messages import autotuning_config_factory as standard_autotuning_config_factory
class RuntimeConfigFactory(object):
"""Factory for RuntimeConfig message.
Factory to add RuntimeConfig message arguments to argument parser and create
RuntimeConfig message from parsed arguments.
"""
def __init__(
self,
dataproc,
use_config_property=False,
include_autotuning=False,
include_cohort=False,
autotuning_config_factory=None,
):
"""Factory for RuntimeConfig message.
Args:
dataproc: Api_lib.dataproc.Dataproc instance.
use_config_property: Use --property instead of --properties
include_autotuning: Add support for autotuning arguments.
include_cohort: Add support for cohort argument.
autotuning_config_factory: Override the standard AutotuningConfigFactory
instance.
"""
self.dataproc = dataproc
self.use_config_property = use_config_property
self.include_autotuning = include_autotuning
self.include_cohort = include_cohort
self.autotuning_config_factory = (
autotuning_config_factory
or standard_autotuning_config_factory.AutotuningConfigFactory(
self.dataproc
)
)
def GetMessage(self, args):
"""Builds a RuntimeConfig message.
Build a RuntimeConfig message instance according to user settings. Returns
None if all fields are None.
Args:
args: Parsed arguments.
Returns:
RuntimeConfig: A RuntimeConfig message instance. This function returns
None if all fields are None.
"""
kwargs = {}
if args.container_image:
kwargs['containerImage'] = args.container_image
flat_property = collections.OrderedDict()
if self.use_config_property:
if args.property:
for entry in args.property:
for k, v in entry.items():
flat_property[k] = v
elif args.properties:
flat_property = args.properties
if flat_property:
kwargs['properties'] = encoding.DictToAdditionalPropertyMessage(
flat_property,
self.dataproc.messages.RuntimeConfig.PropertiesValue,
sort_items=True,
)
if args.version:
kwargs['version'] = args.version
if self.include_autotuning:
autotuning_config = self.autotuning_config_factory.GetMessage(args)
if autotuning_config:
kwargs['autotuningConfig'] = autotuning_config
if self.include_cohort:
cohort_id = args.cohort
if cohort_id:
kwargs['cohort'] = cohort_id
if not kwargs:
return None
return self.dataproc.messages.RuntimeConfig(**kwargs)
def AddArguments(
parser,
use_config_property=False,
include_autotuning=False,
include_cohort=False,
):
"""Adds arguments related to RuntimeConfig message to the given parser."""
parser.add_argument(
'--container-image',
help=(
'Optional custom container image to use for the batch/session '
'runtime environment. If not specified, a default container image '
'will be used. The value should follow the container image naming '
'format: {registry}/{repository}/{name}:{tag}, for example, '
'gcr.io/my-project/my-image:1.2.3'
),
)
if use_config_property:
parser.add_argument(
'--property',
type=arg_parsers.ArgDict(),
action='append',
metavar='PROPERTY=VALUE',
help='Specifies configuration properties.',
)
else:
parser.add_argument(
'--properties',
type=arg_parsers.ArgDict(),
metavar='PROPERTY=VALUE',
help="""\
Specifies configuration properties for the workload. See
[Dataproc Serverless for Spark documentation](https://cloud.google.com/dataproc-serverless/docs/concepts/properties)
for the list of supported properties.""",
)
parser.add_argument(
'--version',
help=(
'Optional runtime version. If not specified, a default '
'version will be used.'
),
)
if include_cohort:
cohort_group = parser.add_mutually_exclusive_group(hidden=True)
cohort_group.add_argument(
'--cohort',
help=(
'Cohort identifier. Identifies families of the workloads having the'
' similar structure and inputs, e.g. daily ETL jobs.'
),
hidden=True,
)
cohort_group.add_argument(
'--autotuning-cohort',
help=(
'Autotuning cohort identifier. Identifies families of the workloads'
' having the similar structure and inputs, e.g. daily ETL jobs.'
),
hidden=True,
)
_AddDependency(parser, include_autotuning)
def _AddDependency(parser, include_autotuning):
if include_autotuning:
standard_autotuning_config_factory.AddArguments(parser)

View File

@@ -0,0 +1,61 @@
# -*- coding: utf-8 -*- #
# Copyright 2021 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Factory for SparkHistoryServerConfig message."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
class SparkHistoryServerConfigFactory(object):
"""Factory for SparkHistoryServerConfig message.
Adds arguments to argument parser and create SparkHistoryServerConfig from
parsed arguments.
"""
def __init__(self, dataproc):
"""Factory class for SparkHistoryServerConfig message.
Args:
dataproc: An api_lib.dataproc.Dataproc instance.
"""
self.dataproc = dataproc
def GetMessage(self, args):
"""Builds a SparkHistoryServerConfig instance.
Args:
args: Parsed arguments.
Returns:
SparkHistoryServerConfig: A SparkHistoryServerConfig message instance.
None if all fields are None.
"""
if args.history_server_cluster:
return self.dataproc.messages.SparkHistoryServerConfig(
dataprocCluster=args.history_server_cluster)
return None
def AddArguments(parser):
"""Adds related arguments to aprser."""
parser.add_argument(
'--history-server-cluster',
help=('Spark History Server configuration for the batch/session job. '
'Resource name of an existing Dataproc cluster to act as a '
'Spark History Server for the workload in the format: "projects/'
'{project_id}/regions/{region}/clusters/{cluster_name}".'))

View File

@@ -0,0 +1,286 @@
# -*- coding: utf-8 -*- #
# Copyright 2025 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Module for user service account mapping API support.
Typical usage (update command):
# When defining arguments
user_sa_mapping_util.AddUpdateUserSaMappingFlags(parser)
# When running the command
user_sa_mapping_diff = user_sa_mapping_util.Diff.FromUpdateArgs(args)
if user_sa_mapping_diff.HasUpdates():
orig_resource = Get(...) # to prevent unnecessary Get calls
user_sa_mapping_update = user_sa_mapping_diff.Apply(user_sa_mapping_cls,
orig_resource.user_sa_mapping)
if user_sa_mapping_update.needs_update:
new_resource.user_sa_mapping = user_sa_mapping_update.user_sa_mapping
field_mask.append('user_sa_mapping')
Update(..., new_resource)
# Or alternatively, when running the command
user_sa_mapping_update = user_sa_mapping_util.ProcessUpdateArgsLazy(
args, user_sa_mapping_cls, lambda: Get(...).user_sa_mapping)
if user_sa_mapping_update.needs_update:
new_resource.user_sa_mapping = user_sa_mapping_update.user_sa_mapping
field_mask.append('user_sa_mapping')
Update(..., new_resource)
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.calliope import arg_parsers
import six
def AddUpdateUserSaMappingFlags(parser):
"""Adds update command user service account mapping flags to an argparse parser.
Args:
parser: The argparse parser to add the flags to.
"""
secure_multi_tenancy_group = parser.add_group(mutex=True)
add_and_remove_user_mapping_group = secure_multi_tenancy_group.add_group()
add_and_remove_user_mapping_group.add_argument(
'--add-user-mappings',
metavar='KEY=VALUE',
type=arg_parsers.ArgDict(),
action=arg_parsers.UpdateAction,
help="""\
List of user-to-service-account mappings to add to current mappings.
If a mapping exists, its value is modified; otherwise, the new
mapping is added.
""",
)
add_and_remove_user_mapping_group.add_argument(
'--remove-user-mappings',
metavar='KEY',
type=arg_parsers.ArgList(),
action=arg_parsers.UpdateAction,
help="""\
List of user-to-service-account mappings to remove from the
current mappings. If a mapping does not exist, it is ignored.
""",
)
secure_multi_tenancy_group.add_argument(
'--identity-config-file',
help="""\
Path to a YAML (or JSON) file that contains the configuration for [Secure Multi-Tenancy](/dataproc/docs/concepts/iam/sa-multi-tenancy)
on the cluster. The path can be a Cloud Storage URL (example: 'gs://path/to/file')
or a local filesystem path. The mappings provided in the file will overwrite existing mappings.
The YAML file is formatted as follows:
```
# Mapping header (first line) required.
user_service_account_mapping:
bob@company.com: service-account-bob@project.iam.gserviceaccount.com
alice@company.com: service-account-alice@project.iam.gserviceaccount.com
```
""",
)
def GetAddUserSaMappingDictFromArgs(args):
"""Returns the add user mapping dict from the parsed args.
Args:
args: The parsed args.
Returns:
The add user mapping dict from the parsed args.
"""
return args.add_user_mappings
def GetRemoveUserSaMappingListFromArgs(args):
"""Returns the remove user mapping list from the parsed args.
Args:
args: The parsed args.
Returns:
The remove user mapping list from the parsed args.
"""
return args.remove_user_mappings
class UpdateResult(object):
"""Result type for Diff application.
Attributes:
needs_update: bool, whether the diff resulted in any changes to the existing
user service account mapping proto.
_user_sa_mapping: UserServiceAccountMappingValue, the new populated
UserServiceAccountMappingValue object. If needs_update is False, this is
identical to the original UserServiceAccountMappingValue object.
"""
def __init__(self, needs_update, user_sa_mapping):
self.needs_update = needs_update
self._user_sa_mapping = user_sa_mapping
@property
def user_sa_mapping(self):
"""Returns the new user service account mapping.
Raises:
ValueError: if needs_update is False.
"""
if not self.needs_update:
raise ValueError(
'If no update is needed (self.needs_update == False), '
'checking user service account mapping is unnecessary.'
)
return self._user_sa_mapping
def GetOrNone(self):
"""Returns the new user service account mapping if an update is needed or None otherwise.
NOTE: If this function returns None, make sure not to include the user
service account mapping field in the field mask of the update command.
Otherwise, it's possible to inadvertently clear the user service account
mapping on the resource.
"""
try:
return self.user_sa_mapping
except ValueError:
return None
class Diff(object):
"""Class for diffing user service account mapping."""
def __init__(self, add_user_mapping=None, remove_user_mapping=None):
"""Initialize a Diff.
Args:
add_user_mapping: {str: str}, any user-to-service-account mapping to be
added
remove_user_mapping: List[str], any user-to-service-account mappings to be
removed
Returns:
Diff.
"""
self._add_user_mapping = add_user_mapping
self._remove_user_mapping = remove_user_mapping
def _AddUserSaMapping(self, new_user_sa_mapping):
new_user_sa_mapping = new_user_sa_mapping.copy()
new_user_sa_mapping.update(self._add_user_mapping)
return new_user_sa_mapping
def _RemoveUserSaMapping(self, new_user_sa_mapping):
new_user_sa_mapping = new_user_sa_mapping.copy()
for key in self._remove_user_mapping:
new_user_sa_mapping.pop(key, None)
return new_user_sa_mapping
def _GetExistingUserSaMappingDict(self, user_sa_mapping):
if not user_sa_mapping:
return {}
return {l.key: l.value for l in user_sa_mapping.additionalProperties}
def _PackageUserSaMapping(self, user_sa_mapping_cls, user_sa_mapping):
"""Converts a dictionary representing a user service account mapping into an instance of a specified class.
Args:
user_sa_mapping_cls: The class to instantiate, which should have an
`AdditionalProperty` inner class and an `additionalProperties`
attribute.
user_sa_mapping: A dictionary where keys are user identifiers and values
are service account identifiers.
Returns:
An instance of `user_sa_mapping_cls` populated with the key-value pairs
from `user_sa_mapping`.
"""
# Sorted for test stability
return user_sa_mapping_cls(
additionalProperties=[
user_sa_mapping_cls.AdditionalProperty(key=key, value=value)
for key, value in sorted(six.iteritems(user_sa_mapping))
]
)
def HasUpdates(self):
"""Returns true iff the diff is non-empty which means user service account mapping has been updated."""
return any([self._add_user_mapping, self._remove_user_mapping])
def Apply(self, user_sa_mapping_cls, existing_user_sa_mapping=None):
"""Apply this Diff to the existing user service account mapping.
Args:
user_sa_mapping_cls: type, the UserServiceAccountMappingValue class for
the resource.
existing_user_sa_mapping: UserServiceAccountMappingValue, the existing
UserServiceAccountMappingValue object for the original resource (or
None, which is treated the same as empty user service account mapping)
Returns:
UpdateResult, the result of applying the diff.
"""
existing_user_sa_mapping_dict = self._GetExistingUserSaMappingDict(
existing_user_sa_mapping
)
new_user_sa_mapping_dict = existing_user_sa_mapping_dict.copy()
if self._add_user_mapping:
new_user_sa_mapping_dict = self._AddUserSaMapping(
new_user_sa_mapping_dict
)
if self._remove_user_mapping:
new_user_sa_mapping_dict = self._RemoveUserSaMapping(
new_user_sa_mapping_dict
)
needs_update = new_user_sa_mapping_dict != existing_user_sa_mapping_dict
return UpdateResult(
needs_update,
self._PackageUserSaMapping(
user_sa_mapping_cls, new_user_sa_mapping_dict
),
)
@classmethod
def FromUpdateArgs(cls, args):
return cls(args.add_user_mappings, args.remove_user_mappings)
def ProcessUpdateArgsLazy(
args, user_sa_mapping_cls, orig_user_sa_mapping_thunk
):
"""Returns the result of applying the diff constructed from args.
Lazily fetches the original user service account mapping value if needed.
Args:
args: argparse.Namespace, the parsed arguments with add_user_mapping and
remove_user_mapping
user_sa_mapping_cls: type, the UserSaMappingValue class for the new user
service account mapping.
orig_user_sa_mapping_thunk: callable, a thunk which will return the original
user_service_account_mapping object when evaluated.
Returns:
UpdateResult: the result of applying the diff.
"""
diff = Diff.FromUpdateArgs(args)
orig_user_sa_mapping = (
orig_user_sa_mapping_thunk() if diff.HasUpdates() else None
)
return diff.Apply(user_sa_mapping_cls, orig_user_sa_mapping)

View File

@@ -0,0 +1,114 @@
# -*- coding: utf-8 -*- #
# Copyright 2015 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Utilities for dataproc workflow template add-job CLI."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.calliope import arg_parsers
from googlecloudsdk.command_lib.dataproc import flags
from googlecloudsdk.command_lib.util.args import labels_util
# Fields to filter on export.
TEMPLATE_FIELDS = ['id', 'name', 'version', 'createTime', 'updateTime']
def AddWorkflowTemplatesArgs(parser, api_version):
"""Register flags for this command."""
labels_util.AddCreateLabelsFlags(parser)
flags.AddTemplateResourceArg(
parser, 'add job to', api_version, positional=False)
parser.add_argument(
'--step-id',
required=True,
type=str,
help='The step ID of the job in the workflow template.')
parser.add_argument(
'--start-after',
metavar='STEP_ID',
type=arg_parsers.ArgList(element_type=str, min_length=1),
help='(Optional) List of step IDs to start this job after.')
def AddDagTimeoutFlag(parser, is_required):
parser.add_argument(
'--dag-timeout',
type=arg_parsers.Duration(),
required=is_required,
help="""\
The duration for which a DAG of jobs can run before being
auto-cancelled, such as "10m" or "16h".
See $ gcloud topic datetimes for information on duration formats.
""")
def AddKmsKeyFlag(parser, is_required):
parser.add_argument(
'--kms-key',
type=str,
required=is_required,
help="""\
The KMS key used to encrypt sensitive data in the workflow template.
""",
)
def GenerateEncryptionConfig(kms_key, dataproc):
encryption_config = (
dataproc.messages.GoogleCloudDataprocV1WorkflowTemplateEncryptionConfig()
)
encryption_config.kmsKey = kms_key
return encryption_config
def CreateWorkflowTemplateOrderedJob(args, dataproc):
"""Create an ordered job for workflow template."""
ordered_job = dataproc.messages.OrderedJob(stepId=args.step_id)
if args.start_after:
ordered_job.prerequisiteStepIds = args.start_after
return ordered_job
def AddJobToWorkflowTemplate(args, dataproc, ordered_job):
"""Add an ordered job to the workflow template."""
template = args.CONCEPTS.workflow_template.Parse()
workflow_template = dataproc.GetRegionsWorkflowTemplate(
template, args.version)
jobs = workflow_template.jobs if workflow_template.jobs is not None else []
jobs.append(ordered_job)
workflow_template.jobs = jobs
response = dataproc.client.projects_regions_workflowTemplates.Update(
workflow_template)
return response
def ConfigureOrderedJob(messages, job, args):
"""Add type-specific job configuration to job message."""
# Parse labels (if present)
job.labels = labels_util.ParseCreateArgs(
args, messages.OrderedJob.LabelsValue)
def Filter(template):
for field in TEMPLATE_FIELDS:
if field in template:
del template[field]