feat: Add new gcloud commands, API clients, and third-party libraries across various services.

This commit is contained in:
2026-01-01 20:26:35 +01:00
parent 5e23cbece0
commit a19e592eb7
25221 changed files with 8324611 additions and 0 deletions

View File

@@ -0,0 +1,54 @@
# -*- coding: utf-8 -*- #
# Copyright 2015 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""The command group for cloud dataproc clusters."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.calliope import base
@base.UniverseCompatible
@base.ReleaseTracks(
base.ReleaseTrack.ALPHA, base.ReleaseTrack.BETA, base.ReleaseTrack.GA
)
class Clusters(base.Group):
"""Create and manage Dataproc clusters.
Create and manage Dataproc clusters.
## EXAMPLES
To create a cluster, run:
$ {command} create my-cluster --region=us-central1
To resize a cluster, run:
$ {command} update my-cluster --region=us-central1 --num-workers 5
To delete a cluster, run:
$ {command} delete my-cluster --region=us-central1
To view the details of a cluster, run:
$ {command} describe my-cluster --region=us-central1
To see the list of all clusters, run:
$ {command} list
"""

View File

@@ -0,0 +1,177 @@
# -*- coding: utf-8 -*- #
# Copyright 2015 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Create cluster command."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.api_lib.dataproc import compute_helpers
from googlecloudsdk.api_lib.dataproc import constants
from googlecloudsdk.api_lib.dataproc import dataproc as dp
from googlecloudsdk.calliope import base
from googlecloudsdk.calliope import exceptions
from googlecloudsdk.command_lib.dataproc import clusters
from googlecloudsdk.command_lib.dataproc import flags
from googlecloudsdk.command_lib.kms import resource_args as kms_resource_args
from googlecloudsdk.command_lib.util.apis import arg_utils
from googlecloudsdk.command_lib.util.args import labels_util
from googlecloudsdk.core import properties
@base.UniverseCompatible
@base.ReleaseTracks(base.ReleaseTrack.GA)
class Create(base.CreateCommand):
"""Create a cluster."""
# DEPRECATED Beta release track should no longer be used, Google Cloud
# no longer supports it.
BETA = False
detailed_help = {
'EXAMPLES': """\
To create a cluster, run:
$ {command} my-cluster --region=us-central1
"""
}
@classmethod
def Args(cls, parser):
dataproc = dp.Dataproc(cls.ReleaseTrack())
base.ASYNC_FLAG.AddToParser(parser)
flags.AddClusterResourceArg(parser, 'create', dataproc.api_version)
clusters.ArgsForClusterRef(
parser,
dataproc,
cls.BETA,
cls.ReleaseTrack() == base.ReleaseTrack.ALPHA,
include_ttl_config=True,
# GKE platform args are only supported in the default universe.
include_gke_platform_args=cls.BETA and properties.IsDefaultUniverse(),
# Driver pools are only supported in the default universe.
include_driver_pool_args=properties.IsDefaultUniverse(),
)
# Add arguments for failure action for primary workers
if not cls.BETA:
parser.add_argument(
'--action-on-failed-primary-workers',
choices={
'NO_ACTION': 'take no action',
'DELETE': 'delete the failed primary workers',
'FAILURE_ACTION_UNSPECIFIED': 'failure action is not specified'
},
type=arg_utils.ChoiceToEnumName,
help="""
Failure action to take when primary workers fail during cluster creation
""")
# Add gce-pd-kms-key args
kms_flag_overrides = {
'kms-key': '--gce-pd-kms-key',
'kms-keyring': '--gce-pd-kms-key-keyring',
'kms-location': '--gce-pd-kms-key-location',
'kms-project': '--gce-pd-kms-key-project'
}
kms_resource_args.AddKmsKeyResourceArg(
parser,
'cluster',
flag_overrides=kms_flag_overrides,
name='--gce-pd-kms-key')
@staticmethod
def ValidateArgs(args):
if constants.ALLOW_ZERO_WORKERS_PROPERTY in args.properties:
raise exceptions.InvalidArgumentException(
'--properties',
'Instead of %s, use gcloud beta dataproc clusters create '
'--single-node to deploy single node clusters' %
constants.ALLOW_ZERO_WORKERS_PROPERTY)
clusters.ValidateReservationAffinityGroup(args)
def Run(self, args):
self.ValidateArgs(args)
dataproc = dp.Dataproc(self.ReleaseTrack())
cluster_ref = args.CONCEPTS.cluster.Parse()
compute_resources = compute_helpers.GetComputeResources(
self.ReleaseTrack(), cluster_ref.clusterName, cluster_ref.region)
cluster_config = clusters.GetClusterConfig(
args,
dataproc,
cluster_ref.projectId,
compute_resources,
self.BETA,
self.ReleaseTrack() == base.ReleaseTrack.ALPHA,
include_ttl_config=True,
include_gke_platform_args=self.BETA and properties.IsDefaultUniverse())
action_on_failed_primary_workers = None
if not self.BETA:
action_on_failed_primary_workers = arg_utils.ChoiceToEnum(
args.action_on_failed_primary_workers,
dataproc.messages.DataprocProjectsRegionsClustersCreateRequest
.ActionOnFailedPrimaryWorkersValueValuesEnum)
cluster = dataproc.messages.Cluster(
config=cluster_config,
clusterName=cluster_ref.clusterName,
projectId=cluster_ref.projectId)
self.ConfigureCluster(dataproc.messages, args, cluster)
return clusters.CreateCluster(
dataproc,
cluster_ref,
cluster,
args.async_,
args.timeout,
enable_create_on_gke=self.BETA,
action_on_failed_primary_workers=action_on_failed_primary_workers)
@staticmethod
def ConfigureCluster(messages, args, cluster):
"""Performs any additional configuration of the cluster."""
cluster.labels = labels_util.ParseCreateArgs(args,
messages.Cluster.LabelsValue)
# DEPRECATED Beta & Alpha release tracks should no longer be used, Google Cloud
# no longer supports them.
@base.ReleaseTracks(base.ReleaseTrack.ALPHA, base.ReleaseTrack.BETA)
class CreateBeta(Create):
"""Create a cluster."""
BETA = True
@classmethod
def Args(cls, parser):
super(CreateBeta, cls).Args(parser)
clusters.BetaArgsForClusterRef(parser)
@staticmethod
def ValidateArgs(args):
super(CreateBeta, CreateBeta).ValidateArgs(args)
if args.master_accelerator and 'type' not in args.master_accelerator:
raise exceptions.InvalidArgumentException(
'--master-accelerator', 'accelerator type must be specified. '
'e.g. --master-accelerator type=nvidia-tesla-k80,count=2')
if args.worker_accelerator and 'type' not in args.worker_accelerator:
raise exceptions.InvalidArgumentException(
'--worker-accelerator', 'accelerator type must be specified. '
'e.g. --worker-accelerator type=nvidia-tesla-k80,count=2')

View File

@@ -0,0 +1,66 @@
# -*- coding: utf-8 -*- #
# Copyright 2018 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Create a cluster from a file."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.api_lib.dataproc import dataproc as dp
from googlecloudsdk.api_lib.dataproc import util
from googlecloudsdk.calliope import base
from googlecloudsdk.command_lib.dataproc import clusters
from googlecloudsdk.command_lib.dataproc import flags
from googlecloudsdk.command_lib.export import util as export_util
from googlecloudsdk.core.console import console_io
@base.ReleaseTracks(base.ReleaseTrack.ALPHA, base.ReleaseTrack.BETA)
class CreateFromFile(base.CreateCommand):
"""Create a cluster from a file."""
detailed_help = {
'EXAMPLES': """
To create a cluster from a YAML file, run:
$ {command} --file=cluster.yaml
"""
}
@classmethod
def Args(cls, parser):
parser.add_argument(
'--file',
help="""
The path to a YAML file containing a Dataproc Cluster resource.
For more information, see:
https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.clusters#Cluster.
""",
required=True)
# TODO(b/80197067): Move defaults to a common location.
flags.AddTimeoutFlag(parser, default='35m')
flags.AddRegionFlag(parser)
base.ASYNC_FLAG.AddToParser(parser)
def Run(self, args):
dataproc = dp.Dataproc(self.ReleaseTrack())
data = console_io.ReadFromFileOrStdin(args.file or '-', binary=False)
cluster = export_util.Import(
message_type=dataproc.messages.Cluster, stream=data)
cluster_ref = util.ParseCluster(cluster.clusterName, dataproc)
return clusters.CreateCluster(dataproc, cluster_ref, cluster, args.async_,
args.timeout)

View File

@@ -0,0 +1,79 @@
# -*- coding: utf-8 -*- #
# Copyright 2015 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Delete cluster command."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.api_lib.dataproc import dataproc as dp
from googlecloudsdk.api_lib.dataproc import util
from googlecloudsdk.calliope import base
from googlecloudsdk.command_lib.dataproc import flags
from googlecloudsdk.core import log
from googlecloudsdk.core.console import console_io
class Delete(base.DeleteCommand):
"""Delete a cluster."""
detailed_help = {
'EXAMPLES': """\
To delete a cluster, run:
$ {command} my-cluster --region=us-central1
""",
}
@classmethod
def Args(cls, parser):
base.ASYNC_FLAG.AddToParser(parser)
flags.AddTimeoutFlag(parser)
dataproc = dp.Dataproc(cls.ReleaseTrack())
flags.AddClusterResourceArg(parser, 'delete', dataproc.api_version)
def Run(self, args):
dataproc = dp.Dataproc(self.ReleaseTrack())
cluster_ref = args.CONCEPTS.cluster.Parse()
request = dataproc.messages.DataprocProjectsRegionsClustersDeleteRequest(
clusterName=cluster_ref.clusterName,
region=cluster_ref.region,
projectId=cluster_ref.projectId,
requestId=util.GetUniqueId())
console_io.PromptContinue(
message="The cluster '{0}' and all attached disks will be "
'deleted.'.format(cluster_ref.clusterName),
cancel_on_no=True,
cancel_string='Deletion aborted by user.')
operation = dataproc.client.projects_regions_clusters.Delete(request)
if args.async_:
log.status.Print('Deleting [{0}] with operation [{1}].'.format(
cluster_ref, operation.name))
return operation
operation = util.WaitForOperation(
dataproc,
operation,
message='Waiting for cluster deletion operation',
timeout_s=args.timeout)
log.DeletedResource(cluster_ref)
return operation

View File

@@ -0,0 +1,53 @@
# -*- coding: utf-8 -*- #
# Copyright 2015 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Describe cluster command."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.api_lib.dataproc import dataproc as dp
from googlecloudsdk.calliope import base
from googlecloudsdk.command_lib.dataproc import flags
class Describe(base.DescribeCommand):
"""View the details of a cluster."""
detailed_help = {
'EXAMPLES': """\
To view the details of a cluster, run:
$ {command} my-cluster --region=us-central1
""",
}
@classmethod
def Args(cls, parser):
dataproc = dp.Dataproc(cls.ReleaseTrack())
flags.AddClusterResourceArg(parser, 'describe', dataproc.api_version)
def Run(self, args):
dataproc = dp.Dataproc(self.ReleaseTrack())
cluster_ref = args.CONCEPTS.cluster.Parse()
request = dataproc.messages.DataprocProjectsRegionsClustersGetRequest(
projectId=cluster_ref.projectId,
region=cluster_ref.region,
clusterName=cluster_ref.clusterName)
cluster = dataproc.client.projects_regions_clusters.Get(request)
return cluster

View File

@@ -0,0 +1,195 @@
# -*- coding: utf-8 -*- #
# Copyright 2015 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Diagnose cluster command."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from apitools.base.py import encoding
from googlecloudsdk.api_lib.dataproc import dataproc as dp
from googlecloudsdk.api_lib.dataproc import exceptions
from googlecloudsdk.api_lib.dataproc import storage_helpers
from googlecloudsdk.api_lib.dataproc import util
from googlecloudsdk.calliope import actions
from googlecloudsdk.calliope import base
from googlecloudsdk.command_lib.dataproc import flags
from googlecloudsdk.command_lib.util.apis import arg_utils
from googlecloudsdk.core import log
from googlecloudsdk.core.util import retry
@base.UniverseCompatible
class Diagnose(base.Command):
"""Run a detailed diagnostic on a cluster."""
detailed_help = {
'EXAMPLES': """
To diagnose a cluster, run:
$ {command} my-cluster --region=us-central1
"""
}
@classmethod
def Args(cls, parser):
# 26m is backend timeout + 4m for safety buffer.
flags.AddTimeoutFlag(parser, default='30m')
dataproc = dp.Dataproc(cls.ReleaseTrack())
flags.AddClusterResourceArg(parser, 'diagnose', dataproc.api_version)
Diagnose.addDiagnoseFlags(parser, dataproc)
@staticmethod
def _GetValidTarballAccessChoices(dataproc):
tarball_access_enums = (
dataproc.messages.DiagnoseClusterRequest.TarballAccessValueValuesEnum
)
return [
arg_utils.ChoiceToEnumName(n)
for n in tarball_access_enums.names()
if n != 'TARBALL_ACCESS_UNSPECIFIED'
]
@staticmethod
def addDiagnoseFlags(parser, dataproc):
parser.add_argument(
'--tarball-access',
type=arg_utils.ChoiceToEnumName,
choices=Diagnose._GetValidTarballAccessChoices(dataproc),
help='Target access privileges for diagnostic tarball.')
parser.add_argument(
'--start-time',
help='Time instant to start the diagnosis from (in ' +
'%Y-%m-%dT%H:%M:%S.%fZ format).')
parser.add_argument(
'--end-time',
help='Time instant to stop the diagnosis at (in ' +
'%Y-%m-%dT%H:%M:%S.%fZ format).')
parser.add_argument(
'--job-id',
hidden=True,
help='The job on which to perform the diagnosis.',
action=actions.DeprecationAction(
'--job-id',
warn=(
'The {flag_name} option is deprecated and will be removed in'
' upcoming release; use --job-ids instead.'
),
removed=False,
),
)
parser.add_argument(
'--yarn-application-id',
hidden=True,
help='The yarn application on which to perform the diagnosis.',
action=actions.DeprecationAction(
'--yarn-application-id',
warn=(
'The {flag_name} option is deprecated and will be removed in'
' upcoming release; use --yarn-application-ids instead.'
),
removed=False,
),
)
parser.add_argument(
'--workers',
hidden=True,
help='A list of workers in the cluster to run the diagnostic script ' +
'on.')
parser.add_argument(
'--job-ids',
help='A list of jobs on which to perform the diagnosis.',
)
parser.add_argument(
'--yarn-application-ids',
help='A list of yarn applications on which to perform the diagnosis.',
)
parser.add_argument(
'--tarball-gcs-dir',
help='The output Cloud Storage directory for the diagnostic tarball. ' +
'If not specified, a task-specific directory in the cluster\'s ' +
'staging bucket will be used.'
)
def Run(self, args):
dataproc = dp.Dataproc(self.ReleaseTrack())
cluster_ref = args.CONCEPTS.cluster.Parse()
request = None
diagnose_request = dataproc.messages.DiagnoseClusterRequest(
job=args.job_id, yarnApplicationId=args.yarn_application_id
)
diagnose_request.diagnosisInterval = dataproc.messages.Interval(
startTime=args.start_time,
endTime=args.end_time
)
if args.job_ids is not None:
diagnose_request.jobs.extend(args.job_ids.split(','))
if args.yarn_application_ids is not None:
diagnose_request.yarnApplicationIds.extend(
args.yarn_application_ids.split(','))
if args.workers is not None:
diagnose_request.workers.extend(args.workers.split(','))
if args.tarball_access is not None:
tarball_access = arg_utils.ChoiceToEnum(
args.tarball_access,
dataproc.messages.DiagnoseClusterRequest.TarballAccessValueValuesEnum)
diagnose_request.tarballAccess = tarball_access
if args.tarball_gcs_dir is not None:
diagnose_request.tarballGcsDir = args.tarball_gcs_dir
request = dataproc.messages.DataprocProjectsRegionsClustersDiagnoseRequest(
clusterName=cluster_ref.clusterName,
region=cluster_ref.region,
projectId=cluster_ref.projectId,
diagnoseClusterRequest=diagnose_request)
operation = dataproc.client.projects_regions_clusters.Diagnose(request)
# TODO(b/36052522): Stream output during polling.
operation = util.WaitForOperation(
dataproc,
operation,
message='Waiting for cluster diagnose operation',
timeout_s=args.timeout)
if not operation.response:
raise exceptions.OperationError('Operation is missing response')
properties = encoding.MessageToDict(operation.response)
output_uri = properties['outputUri']
if not output_uri:
raise exceptions.OperationError('Response is missing outputUri')
log.err.Print('Output from diagnostic:')
log.err.Print('-----------------------------------------------')
driver_log_stream = storage_helpers.StorageObjectSeriesStream(
output_uri)
# A single read might not read whole stream. Try a few times.
read_retrier = retry.Retryer(max_retrials=4, jitter_ms=None)
try:
read_retrier.RetryOnResult(
lambda: driver_log_stream.ReadIntoWritable(log.err),
sleep_ms=100,
should_retry_if=lambda *_: driver_log_stream.open)
except retry.MaxRetrialsException:
log.warning(
'Diagnostic finished successfully, '
'but output did not finish streaming.')
log.err.Print('-----------------------------------------------')
return output_uri

View File

@@ -0,0 +1,229 @@
# -*- coding: utf-8 -*- #
# Copyright 2020 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Enable a personal auth session on a cluster."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
import time
# TODO(b/173821917): Once the Cloud SDK supports pytype, uncomment the
# following lines and then replace all of the un-annotated method signatures
# with their corresponding typed signatures that are commented out above them.
#
# import argparse
# from typing import Any, IO, List
from googlecloudsdk.api_lib.dataproc import dataproc as dp
from googlecloudsdk.api_lib.dataproc import exceptions
from googlecloudsdk.api_lib.dataproc import util
from googlecloudsdk.api_lib.util import waiter
from googlecloudsdk.calliope import base
from googlecloudsdk.command_lib.dataproc import clusters
from googlecloudsdk.command_lib.dataproc import flags
from googlecloudsdk.core import log
from googlecloudsdk.core.console import console_io
from googlecloudsdk.core.console import progress_tracker
from googlecloudsdk.core.util import files
# def _inject_encrypted_credentials(dataproc: dp.Dataproc, project: str,
# region: str, cluster_name: str,
# cluster_uuid: str,
# credentials_ciphertext: str) -> Any:
def _inject_encrypted_credentials(dataproc, project, region, cluster_name,
cluster_uuid, credentials_ciphertext):
"""Inject credentials into the given cluster.
The credentials must have already been encrypted before calling this method.
Args:
dataproc: The API client for calling into the Dataproc API.
project: The project containing the cluster.
region: The region where the cluster is located.
cluster_name: The name of the cluster.
cluster_uuid: The cluster UUID assigned by the Dataproc control plane.
credentials_ciphertext: The (already encrypted) credentials to inject.
Returns:
An operation resource for the credential injection.
"""
inject_credentials_request = dataproc.messages.InjectCredentialsRequest(
clusterUuid=cluster_uuid, credentialsCiphertext=credentials_ciphertext)
request = dataproc.messages.DataprocProjectsRegionsClustersInjectCredentialsRequest(
project='projects/' + project,
region='regions/' + region,
cluster='clusters/' + cluster_name,
injectCredentialsRequest=inject_credentials_request)
return dataproc.client.projects_regions_clusters.InjectCredentials(request)
@base.DefaultUniverseOnly
@base.Hidden
@base.ReleaseTracks(
base.ReleaseTrack.ALPHA, base.ReleaseTrack.BETA, base.ReleaseTrack.GA
)
class EnablePersonalAuthSession(base.Command):
"""Enable a personal auth session on a cluster."""
detailed_help = {
'EXAMPLES':
"""
To enable a personal auth session, run:
$ {command} my-cluster --region=us-central1
""",
}
# def Args(cls, parser: argparse.ArgumentParser):
@classmethod
def Args(cls, parser):
"""Method called by Calliope to register flags for this command.
Args:
parser: An argparser parser used to register flags.
"""
dataproc = dp.Dataproc(cls.ReleaseTrack())
flags.AddClusterResourceArg(parser, 'enable a personal auth session on',
dataproc.api_version)
flags.AddPersonalAuthSessionArgs(parser)
# def inject_credentials(
# self, dataproc: dp.Dataproc, project: str, region: str,
# cluster_name: str, cluster_uuid: str, cluster_key: str,
# access_boundary_json: str,
# operation_poller: waiter.CloudOperationPollerNoResources),
# openssl_executable: str:
def inject_credentials(self, dataproc, project, region, cluster_name,
cluster_uuid, cluster_key, access_boundary_json,
operation_poller, openssl_executable):
downscoped_token = util.GetCredentials(access_boundary_json)
if not downscoped_token:
raise exceptions.PersonalAuthError(
'Failure getting credentials to inject into {}'.format(cluster_name))
credentials_ciphertext = util.PersonalAuthUtils().EncryptWithPublicKey(
cluster_key, downscoped_token, openssl_executable)
inject_operation = _inject_encrypted_credentials(dataproc, project, region,
cluster_name, cluster_uuid,
credentials_ciphertext)
if inject_operation:
waiter.WaitFor(operation_poller, inject_operation)
# def Run(self, args: argparse.Namespace):
def Run(self, args):
message = ('A personal authentication session will propagate your personal '
'credentials to the cluster, so make sure you trust the cluster '
'and the user who created it.')
console_io.PromptContinue(
message=message,
cancel_on_no=True,
cancel_string='Enabling session aborted by user')
dataproc = dp.Dataproc(self.ReleaseTrack())
cluster_ref = args.CONCEPTS.cluster.Parse()
project = cluster_ref.projectId
region = cluster_ref.region
cluster_name = cluster_ref.clusterName
get_request = dataproc.messages.DataprocProjectsRegionsClustersGetRequest(
projectId=project, region=region, clusterName=cluster_name)
cluster = dataproc.client.projects_regions_clusters.Get(get_request)
cluster_uuid = cluster.clusterUuid
if args.access_boundary:
with files.FileReader(args.access_boundary) as abf:
access_boundary_json = abf.read()
else:
access_boundary_json = flags.ProjectGcsObjectsAccessBoundary(project)
# ECIES keys should be used by default. If tink libraries are absent from
# the system then fallback to using RSA keys.
cluster_key_type = 'ECIES' if util.PersonalAuthUtils(
).IsTinkLibraryInstalled() else 'RSA'
cluster_key = None
if cluster_key_type == 'ECIES':
# Try to fetch ECIES keys from cluster control plane node's metadata.
# If ECIES keys are not available then again fallback to RSA keys.
cluster_key = clusters.ClusterKey(cluster, cluster_key_type)
if not cluster_key:
cluster_key_type = 'RSA'
openssl_executable = None
if cluster_key_type == 'RSA':
cluster_key = clusters.ClusterKey(cluster, cluster_key_type)
openssl_executable = args.openssl_command
if not openssl_executable:
try:
openssl_executable = files.FindExecutableOnPath('openssl')
except ValueError:
log.fatal('Could not find openssl on your system. The enable-session '
'command requires openssl to be installed.')
operation_poller = waiter.CloudOperationPollerNoResources(
dataproc.client.projects_regions_operations,
lambda operation: operation.name)
try:
if not cluster_key:
raise exceptions.PersonalAuthError(
'The cluster {} does not support personal auth.'.format(
cluster_name))
with progress_tracker.ProgressTracker(
'Injecting initial credentials into the cluster {}'.format(
cluster_name),
autotick=True):
self.inject_credentials(dataproc, project, region, cluster_name,
cluster_uuid, cluster_key, access_boundary_json,
operation_poller, openssl_executable)
if not args.refresh_credentials:
return
update_message = (
'Periodically refreshing credentials for cluster {}. This'
' will continue running until the command is interrupted'
).format(cluster_name)
with progress_tracker.ProgressTracker(update_message, autotick=True):
try:
# Cluster keys are periodically regenerated, so fetch the latest
# each time we inject credentials.
cluster = dataproc.client.projects_regions_clusters.Get(get_request)
cluster_key = clusters.ClusterKey(cluster, cluster_key_type)
if not cluster_key:
raise exceptions.PersonalAuthError(
'The cluster {} does not support personal auth.'.format(
cluster_name))
failure_count = 0
while failure_count < 3:
try:
time.sleep(30)
self.inject_credentials(dataproc, project, region, cluster_name,
cluster_uuid, cluster_key,
access_boundary_json, operation_poller,
openssl_executable)
failure_count = 0
except ValueError as err:
log.error(err)
failure_count += 1
raise exceptions.PersonalAuthError(
'Credential injection failed three times in a row, giving up...')
except (console_io.OperationCancelledError, KeyboardInterrupt):
return
except exceptions.PersonalAuthError as err:
log.error(err)
return

View File

@@ -0,0 +1,121 @@
# -*- coding: utf-8 -*- #
# Copyright 2018 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Export cluster command."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
import sys
from googlecloudsdk.api_lib.dataproc import dataproc as dp
from googlecloudsdk.calliope import base
from googlecloudsdk.command_lib.dataproc import clusters
from googlecloudsdk.command_lib.dataproc import flags
from googlecloudsdk.command_lib.export import util as export_util
from googlecloudsdk.core.util import files
@base.ReleaseTracks(base.ReleaseTrack.ALPHA, base.ReleaseTrack.BETA,
base.ReleaseTrack.GA)
class Export(base.DescribeCommand):
"""Export a cluster.
Exports an existing cluster's configuration to a file.
This configuration can then be used to create new clusters using the import
command.
"""
detailed_help = {
'EXAMPLES': """
To export a cluster to a YAML file, run:
$ {command} my-cluster --region=us-central1 --destination=cluster.yaml
To export a cluster to standard output, run:
$ {command} my-cluster --region=us-central1
"""
}
@classmethod
def GetApiVersion(cls):
"""Returns the API version based on the release track."""
return 'v1'
@classmethod
def Args(cls, parser):
dataproc = dp.Dataproc(cls.ReleaseTrack())
flags.AddClusterResourceArg(parser, 'export', dataproc.api_version)
export_util.AddExportFlags(parser)
def Run(self, args):
dataproc = dp.Dataproc(self.ReleaseTrack())
cluster_ref = args.CONCEPTS.cluster.Parse()
request = dataproc.messages.DataprocProjectsRegionsClustersGetRequest(
projectId=cluster_ref.projectId,
region=cluster_ref.region,
clusterName=cluster_ref.clusterName)
cluster = dataproc.client.projects_regions_clusters.Get(request)
# Filter out Dataproc-generated labels and properties.
clusters.DeleteGeneratedLabels(cluster, dataproc)
clusters.DeleteGeneratedProperties(cluster, dataproc)
RemoveNonImportableFields(cluster)
if args.destination:
with files.FileWriter(args.destination) as stream:
export_util.Export(message=cluster, stream=stream)
else:
export_util.Export(message=cluster, stream=sys.stdout)
# Note that this needs to be kept in sync with v1 clusters.proto.
def RemoveNonImportableFields(cluster):
"""Modifies cluster to exclude OUTPUT_ONLY and resource-identifying fields."""
cluster.projectId = None
cluster.clusterName = None
cluster.status = None
cluster.statusHistory = []
cluster.clusterUuid = None
cluster.metrics = None
if cluster.config is not None:
config = cluster.config
if config.lifecycleConfig is not None:
config.lifecycleConfig.idleStartTime = None
# This is an absolute time, so exclude it from cluster templates. Due to
# b/152239418, even if a user specified a TTL (auto_delete_ttl) rather
# than an absolute time, the API still returns the absolute time and does
# not return auto_delete_ttl. So TTLs are effectively excluded from
# templates, at least until that FR is resolved.
config.lifecycleConfig.autoDeleteTime = None
instance_group_configs = [
config.masterConfig, config.workerConfig, config.secondaryWorkerConfig
]
for aux_config in config.auxiliaryNodeGroups:
instance_group_configs.append(aux_config.nodeGroup.nodeGroupConfig)
for group in instance_group_configs:
if group is not None:
group.instanceNames = []
group.isPreemptible = None
group.managedGroupConfig = None

View File

@@ -0,0 +1,66 @@
# -*- coding: utf-8 -*- #
# Copyright 2015 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Get IAM cluster policy command."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.api_lib.dataproc import dataproc as dp
from googlecloudsdk.api_lib.dataproc import iam_helpers
from googlecloudsdk.calliope import base
from googlecloudsdk.command_lib.dataproc import flags
@base.UniverseCompatible
@base.ReleaseTracks(
base.ReleaseTrack.ALPHA, base.ReleaseTrack.BETA, base.ReleaseTrack.GA
)
class GetIamPolicy(base.ListCommand):
"""Get IAM policy for a cluster.
Gets the IAM policy for a cluster, given a cluster name.
## EXAMPLES
The following command prints the IAM policy for a cluster with the name
`example-cluster-name-1`:
$ {command} example-cluster-name-1
"""
@classmethod
def Args(cls, parser):
dataproc = dp.Dataproc(cls.ReleaseTrack())
flags.AddClusterResourceArg(parser, 'retrieve the policy for',
dataproc.api_version)
base.URI_FLAG.RemoveFromParser(parser)
def Run(self, args):
dataproc = dp.Dataproc(self.ReleaseTrack())
messages = dataproc.messages
cluster_ref = args.CONCEPTS.cluster.Parse()
request = messages.DataprocProjectsRegionsClustersGetIamPolicyRequest(
resource=cluster_ref.RelativeName(),
getIamPolicyRequest=messages.GetIamPolicyRequest(
options=messages.GetPolicyOptions(
requestedPolicyVersion=iam_helpers.MAX_LIBRARY_IAM_SUPPORTED_VERSION
)
),
)
return dataproc.client.projects_regions_clusters.GetIamPolicy(request)

View File

@@ -0,0 +1,37 @@
# -*- coding: utf-8 -*- #
# Copyright 2021 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""The command group for cloud dataproc GKE-based virtual clusters."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.calliope import base
@base.DefaultUniverseOnly
class Gke(base.Group):
"""Create Dataproc GKE-based virtual clusters.
All interactions other than creation should be handled by
"gcloud dataproc clusters" commands.
## EXAMPLES
To create a cluster, run:
$ {command} my-cluster --region='us-central1' --gke-cluster='my-gke-cluster'
--spark-engine-version='latest' --pools='name=dp,roles=default'
"""

View File

@@ -0,0 +1,273 @@
# -*- coding: utf-8 -*- #
# Copyright 2021 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Create GKE-based virtual cluster command."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
import collections
from apitools.base.py import encoding
from googlecloudsdk.api_lib.dataproc import dataproc as dp
from googlecloudsdk.api_lib.dataproc import exceptions
from googlecloudsdk.api_lib.dataproc import gke_helpers
from googlecloudsdk.calliope import arg_parsers
from googlecloudsdk.calliope import base
from googlecloudsdk.command_lib.dataproc import clusters
from googlecloudsdk.command_lib.dataproc import flags
from googlecloudsdk.command_lib.dataproc import gke_clusters
from googlecloudsdk.command_lib.dataproc import gke_workload_identity
from googlecloudsdk.command_lib.dataproc.gke_clusters import GkeNodePoolTargetsParser
from googlecloudsdk.core import log
@base.ReleaseTracks(base.ReleaseTrack.GA, base.ReleaseTrack.BETA)
class Create(base.CreateCommand):
"""Create a GKE-based virtual cluster."""
detailed_help = {
'EXAMPLES':
"""\
Create a Dataproc on GKE cluster in us-central1 on a GKE cluster in
the same project and region with default values:
$ {command} my-cluster --region=us-central1 --gke-cluster=my-gke-cluster --spark-engine-version=latest --pools='name=dp,roles=default'
Create a Dataproc on GKE cluster in us-central1 on a GKE cluster in
the same project and zone us-central1-f with default values:
$ {command} my-cluster --region=us-central1 --gke-cluster=my-gke-cluster --gke-cluster-location=us-central1-f --spark-engine-version=3.1 --pools='name=dp,roles=default'
Create a Dataproc on GKE cluster in us-central1 with machine type
'e2-standard-4', autoscaling 5-15 nodes per zone.
$ {command} my-cluster --region='us-central1' --gke-cluster='projects/my-project/locations/us-central1/clusters/my-gke-cluster' --spark-engine-version=dataproc-1.5 --pools='name=dp-default,roles=default,machineType=e2-standard-4,min=5,max=15'
Create a Dataproc on GKE cluster in us-central1 with two distinct
node pools.
$ {command} my-cluster --region='us-central1' --gke-cluster='my-gke-cluster' --spark-engine-version='dataproc-2.0' --pools='name=dp-default,roles=default,machineType=e2-standard-4' --pools='name=workers,roles=spark-drivers;spark-executors,machineType=n2-standard-8
"""
}
_support_shuffle_service = False
@classmethod
def Args(cls, parser):
dataproc = dp.Dataproc(cls.ReleaseTrack())
base.ASYNC_FLAG.AddToParser(parser)
flags.AddClusterResourceArg(parser, 'create', dataproc.api_version)
# 30m is backend timeout + 5m for safety buffer.
flags.AddTimeoutFlag(parser, default='35m')
parser.add_argument(
'--spark-engine-version',
required=True,
help="""\
The version of the Spark engine to run on this cluster.
""")
parser.add_argument(
'--staging-bucket',
help="""\
The Cloud Storage bucket to use to stage job dependencies, miscellaneous
config files, and job driver console output when using this cluster.
""")
parser.add_argument(
'--properties',
type=arg_parsers.ArgDict(),
action=arg_parsers.UpdateAction,
default={},
metavar='PREFIX:PROPERTY=VALUE',
help="""\
Specifies configuration properties for installed packages, such as
Spark. Properties are mapped to configuration files by specifying a
prefix, such as "core:io.serializations".
""")
flags.AddGkeClusterResourceArg(parser)
parser.add_argument(
'--namespace',
help="""\
The name of the Kubernetes namespace to deploy Dataproc system
components in. This namespace does not need to exist.
""")
if cls._support_shuffle_service:
gke_clusters.AddPoolsAlphaArg(parser)
else:
gke_clusters.AddPoolsArg(parser)
parser.add_argument(
'--setup-workload-identity',
action='store_true',
help="""\
Sets up the GKE Workload Identity for your Dataproc on GKE cluster.
Note that running this requires elevated permissions as it will
manipulate IAM policies on the Google Service Accounts that will be
used by your Dataproc on GKE cluster.
""")
flags.AddMetastoreServiceResourceArg(parser)
flags.AddHistoryServerClusterResourceArg(parser)
def Run(self, args):
dataproc = dp.Dataproc(self.ReleaseTrack())
cluster_ref = args.CONCEPTS.cluster.Parse()
gke_cluster_ref = args.CONCEPTS.gke_cluster.Parse()
metastore_service_ref = args.CONCEPTS.metastore_service.Parse()
history_server_cluster_ref = args.CONCEPTS.history_server_cluster.Parse()
virtual_cluster_config = Create._GetVirtualClusterConfig(
dataproc, gke_cluster_ref, args, metastore_service_ref,
history_server_cluster_ref)
Create._VerifyGkeClusterIsWorkloadIdentityEnabled(gke_cluster_ref)
if args.setup_workload_identity:
Create._SetupWorkloadIdentity(args, cluster_ref, gke_cluster_ref)
cluster = dataproc.messages.Cluster(
virtualClusterConfig=virtual_cluster_config,
clusterName=cluster_ref.clusterName,
projectId=cluster_ref.projectId)
return clusters.CreateCluster(
dataproc,
cluster_ref,
cluster,
args.async_,
args.timeout,
# This refers to the old GKE beta.
enable_create_on_gke=False,
action_on_failed_primary_workers=None)
@staticmethod
def _GetVirtualClusterConfig(dataproc, gke_cluster_ref, args,
metastore_service_ref,
history_server_cluster_ref):
"""Get dataproc virtual cluster configuration for GKE based clusters.
Args:
dataproc: Dataproc object that contains client, messages, and resources
gke_cluster_ref: GKE cluster reference.
args: Arguments parsed from argparse.ArgParser.
metastore_service_ref: Reference to a Dataproc Metastore Service.
history_server_cluster_ref: Reference to a Dataproc history cluster.
Returns:
virtual_cluster_config: Dataproc virtual cluster configuration
"""
kubernetes_software_config = dataproc.messages.KubernetesSoftwareConfig(
componentVersion=encoding.DictToAdditionalPropertyMessage(
{'SPARK': args.spark_engine_version},
dataproc.messages.KubernetesSoftwareConfig.ComponentVersionValue,
sort_items=True))
if args.properties:
kubernetes_software_config.properties = encoding.DictToAdditionalPropertyMessage(
args.properties,
dataproc.messages.KubernetesSoftwareConfig.PropertiesValue,
sort_items=True)
pools = GkeNodePoolTargetsParser.Parse(dataproc,
gke_cluster_ref.RelativeName(),
args.pools)
gke_cluster_config = dataproc.messages.GkeClusterConfig(
gkeClusterTarget=gke_cluster_ref.RelativeName(), nodePoolTarget=pools)
kubernetes_cluster_config = dataproc.messages.KubernetesClusterConfig(
kubernetesNamespace=args.namespace,
gkeClusterConfig=gke_cluster_config,
kubernetesSoftwareConfig=kubernetes_software_config)
metastore_config = None
if metastore_service_ref:
metastore_config = dataproc.messages.MetastoreConfig(
dataprocMetastoreService=metastore_service_ref.RelativeName())
spark_history_server_config = None
if history_server_cluster_ref:
spark_history_server_config = dataproc.messages.SparkHistoryServerConfig(
dataprocCluster=history_server_cluster_ref.RelativeName())
auxiliary_services_config = None
if metastore_config or spark_history_server_config:
auxiliary_services_config = dataproc.messages.AuxiliaryServicesConfig(
metastoreConfig=metastore_config,
sparkHistoryServerConfig=spark_history_server_config)
virtual_cluster_config = dataproc.messages.VirtualClusterConfig(
stagingBucket=args.staging_bucket,
kubernetesClusterConfig=kubernetes_cluster_config,
auxiliaryServicesConfig=auxiliary_services_config)
return virtual_cluster_config
@staticmethod
def _VerifyGkeClusterIsWorkloadIdentityEnabled(gke_cluster_ref):
workload_identity_enabled = gke_helpers.GetGkeClusterIsWorkloadIdentityEnabled(
project=gke_cluster_ref.projectsId,
location=gke_cluster_ref.locationsId,
cluster=gke_cluster_ref.clustersId)
if not workload_identity_enabled:
raise exceptions.GkeClusterMissingWorkloadIdentityError(gke_cluster_ref)
@staticmethod
def _SetupWorkloadIdentity(args, cluster_ref, gke_cluster_ref):
default_gsa_sentinel = None
gsa_to_ksas = collections.OrderedDict()
agent_gsa = args.properties.get(
'dataproc:dataproc.gke.agent.google-service-account',
default_gsa_sentinel)
gsa_to_ksas.setdefault(agent_gsa, []).append('agent')
spark_driver_gsa = args.properties.get(
'dataproc:dataproc.gke.spark.driver.google-service-account',
default_gsa_sentinel)
gsa_to_ksas.setdefault(spark_driver_gsa, []).append('spark-driver')
spark_executor_gsa = args.properties.get(
'dataproc:dataproc.gke.spark.executor.google-service-account',
default_gsa_sentinel)
gsa_to_ksas.setdefault(spark_executor_gsa, []).append('spark-executor')
if default_gsa_sentinel in gsa_to_ksas:
ksas = gsa_to_ksas.pop(default_gsa_sentinel)
default_gsa = (
gke_workload_identity.DefaultDataprocDataPlaneServiceAccount.Get(
gke_cluster_ref.projectsId))
if default_gsa in gsa_to_ksas:
gsa_to_ksas[default_gsa].extend(ksas)
else:
gsa_to_ksas[default_gsa] = ksas
k8s_namespace = args.namespace or cluster_ref.clusterName
log.debug(
'Setting up Workload Identity for the following GSA to KSAs %s in the "%s" namespace.',
gsa_to_ksas, k8s_namespace)
for gsa, ksas in gsa_to_ksas.items():
gke_workload_identity.GkeWorkloadIdentity.UpdateGsaIamPolicy(
project_id=gke_cluster_ref.projectsId,
gsa_email=gsa,
k8s_namespace=k8s_namespace,
k8s_service_accounts=ksas)
@base.ReleaseTracks(base.ReleaseTrack.ALPHA)
class CreateAlpha(Create):
_support_shuffle_service = True
__doc__ = Create.__doc__

View File

@@ -0,0 +1,77 @@
# -*- coding: utf-8 -*- #
# Copyright 2018 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Import cluster command."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.api_lib.dataproc import dataproc as dp
from googlecloudsdk.calliope import base
from googlecloudsdk.command_lib.dataproc import clusters
from googlecloudsdk.command_lib.dataproc import flags
from googlecloudsdk.command_lib.export import util as export_util
from googlecloudsdk.core.console import console_io
@base.ReleaseTracks(base.ReleaseTrack.ALPHA, base.ReleaseTrack.BETA,
base.ReleaseTrack.GA)
class Import(base.UpdateCommand):
"""Import a cluster.
This will create a new cluster with the given configuration. If a cluster with
this name already exists, an error will be thrown.
"""
detailed_help = {
'EXAMPLES': """
To import a cluster from a YAML file, run:
$ {command} my-cluster --region=us-central1 --source=cluster.yaml
To import a cluster from standard output, run:
$ {command} my-cluster --region=us-central1
"""
}
@classmethod
def GetApiVersion(cls):
"""Returns the API version based on the release track."""
return 'v1'
@classmethod
def Args(cls, parser):
dataproc = dp.Dataproc(cls.ReleaseTrack())
flags.AddClusterResourceArg(parser, 'import', dataproc.api_version)
export_util.AddImportFlags(parser)
base.ASYNC_FLAG.AddToParser(parser)
# 30m is backend timeout + 5m for safety buffer.
flags.AddTimeoutFlag(parser, default='35m')
def Run(self, args):
dataproc = dp.Dataproc(self.ReleaseTrack())
msgs = dataproc.messages
data = console_io.ReadFromFileOrStdin(args.source or '-', binary=False)
cluster = export_util.Import(message_type=msgs.Cluster, stream=data)
cluster_ref = args.CONCEPTS.cluster.Parse()
cluster.clusterName = cluster_ref.clusterName
cluster.projectId = cluster_ref.projectId
# Import only supports create, not update (for now).
return clusters.CreateCluster(dataproc, cluster_ref, cluster, args.async_,
args.timeout)

View File

@@ -0,0 +1,149 @@
# -*- coding: utf-8 -*- #
# Copyright 2015 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""List cluster command."""
from typing import Any, Dict
from apitools.base.py import list_pager
from googlecloudsdk.api_lib.dataproc import constants
from googlecloudsdk.api_lib.dataproc import dataproc as dp
from googlecloudsdk.api_lib.dataproc import util
from googlecloudsdk.calliope import base
from googlecloudsdk.command_lib.dataproc import flags
from googlecloudsdk.core import properties
def _HasScheduledDelete(cluster: Dict[str, Any]) -> str:
if 'config' in cluster and 'lifecycleConfig' in cluster['config']:
lifecycle_config = cluster['config']['lifecycleConfig']
if (
'idleDeleteTtl' in lifecycle_config
or 'autoDeleteTtl' in lifecycle_config
or 'autoDeleteTime' in lifecycle_config
):
return 'enabled'
return ''
def _HasScheduledStop(cluster: Dict[str, Any]) -> str:
if 'config' in cluster and 'lifecycleConfig' in cluster['config']:
lifecycle_config = cluster['config']['lifecycleConfig']
if (
'idleStopTtl' in lifecycle_config
or 'autoStopTtl' in lifecycle_config
or 'autoStopTime' in lifecycle_config
):
return 'enabled'
return ''
@base.UniverseCompatible
class List(base.ListCommand):
"""View a list of clusters in a project.
View a list of clusters in a project. An optional filter can be used to
constrain the clusters returned. Filters are case-sensitive and have the
following syntax:
field = value [AND [field = value]] ...
where `field` is one of `status.state`, `clusterName`, or `labels.[KEY]`,
and `[KEY]` is a label key. `value` can be ```*``` to match all values.
`status.state` can be one of the following: `ACTIVE`, `INACTIVE`,
`CREATING`, `RUNNING`, `ERROR`, `DELETING`, or `UPDATING`. `ACTIVE`
contains the `CREATING`, `UPDATING`, and `RUNNING` states. `INACTIVE`
contains the `DELETING` and `ERROR` states. `clusterName` is the name of the
cluster provided at creation time. Only the logical `AND` operator is
supported; space-separated items are treated as having an implicit `AND`
operator.
## EXAMPLES
To see the list of all clusters in Dataproc's 'us-central1' region, run:
$ {command} --region='us-central1'
To show a cluster in Dataproc's 'global' region with the name 'mycluster',
run:
$ {command} --region='global' --filter='clusterName = mycluster'
To see the list of all clusters in Dataproc's 'global' region with specified
labels, run:
$ {command} --region='global' --filter='labels.env = staging AND
labels.starred = *'
To see a list of all active clusters in Dataproc's 'europe-west1' region with
specified labels, run:
$ {command} --region='europe-west1' --filter='status.state = ACTIVE AND
labels.env = staging AND labels.starred = *'
"""
@staticmethod
def Args(parser):
flags.AddRegionFlag(parser)
base.URI_FLAG.RemoveFromParser(parser)
base.PAGE_SIZE_FLAG.SetDefault(parser, constants.DEFAULT_PAGE_SIZE)
parser.display_info.AddFormat("""
table(
clusterName:label=NAME,
config.gceClusterConfig.yesno(yes=GCE, no=GKE):label=PLATFORM,
config.workerConfig.numInstances:label=PRIMARY_WORKER_COUNT,
config.secondaryWorkerConfig.numInstances:label=SECONDARY_WORKER_COUNT,
status.state:label=STATUS,
config.firstof(
gkeClusterConfig.namespacedGkeDeploymentTarget.targetGkeCluster,
gceClusterConfig.zoneUri
).scope('locations').segment(0):label=ZONE,
has_scheduled_delete():label=SCHEDULED_DELETE,
has_scheduled_stop():label=SCHEDULED_STOP
)
""")
parser.display_info.AddTransforms({
'has_scheduled_delete': _HasScheduledDelete,
'has_scheduled_stop': _HasScheduledStop,
})
def Run(self, args):
dataproc = dp.Dataproc(self.ReleaseTrack())
project = properties.VALUES.core.project.GetOrFail()
region = util.ResolveRegion()
request = self.GetRequest(dataproc.messages, project, region, args)
return list_pager.YieldFromList(
dataproc.client.projects_regions_clusters,
request,
limit=args.limit,
field='clusters',
batch_size=args.page_size,
batch_size_attribute='pageSize',
)
@staticmethod
def GetRequest(messages, project, region, args):
# Explicitly null out args.filter if present because by default args.filter
# also acts as a postfilter to the things coming back from the backend
backend_filter = None
if args.filter:
backend_filter = args.filter
args.filter = None
return messages.DataprocProjectsRegionsClustersListRequest(
projectId=project, region=region, filter=backend_filter
)

View File

@@ -0,0 +1,277 @@
# -*- coding: utf-8 -*- #
# Copyright 2022 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Repair cluster command."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.api_lib.dataproc import dataproc as dp
from googlecloudsdk.api_lib.dataproc import util
from googlecloudsdk.calliope import arg_parsers
from googlecloudsdk.calliope import base
from googlecloudsdk.calliope import exceptions
from googlecloudsdk.command_lib.dataproc import flags
from googlecloudsdk.command_lib.util.apis import arg_utils
from googlecloudsdk.core import log
from googlecloudsdk.core.console import console_io
import six
@base.UniverseCompatible
@base.ReleaseTracks(
base.ReleaseTrack.ALPHA, base.ReleaseTrack.BETA, base.ReleaseTrack.GA
)
@base.Hidden
class Repair(base.Command):
"""Repair a cluster."""
detailed_help = {
'EXAMPLES': """
To repair an ERROR_DUE_TO_UPDATE cluster back to RUNNING state, run:
$ {command} my-cluster --region=us-central1 \
--cluster-repair-action=REPAIR_ERROR_DUE_TO_UPDATE_CLUSTER
To repair a cluster by deleting faulty primary worker nodes, run:
$ {command} my-cluster --region=us-central1 \
--node-pool id=PRIMARY_WORKER_POOL,repair-action=delete,instance-names="w-1;w-10"
To repair a cluster by deleting faulty secondary worker nodes, run:
$ {command} my-cluster --region=us-central1 \
--node-pool id=SECONDARY_WORKER_POOL,repair-action=delete,instance-names="sw-1;sw-10"
To repair a cluster by deleting faulty nodes from different pools, run:
$ {command} my-cluster --region=us-central1 \
--node-pool id=PRIMARY_WORKER_POOL,repair-action=delete,instance-names="w-1;w-10" \
--node-pool id=SECONDARY_WORKER_POOL,repair-action=delete,instance-names="sw-1;sw-10"
""",
}
@classmethod
def Args(cls, parser):
"""Parse arguments for repair command."""
dataproc = dp.Dataproc(cls.ReleaseTrack())
base.ASYNC_FLAG.AddToParser(parser)
flags.AddTimeoutFlag(parser)
flags.AddClusterResourceArg(parser, 'repair', dataproc.api_version)
parser.add_argument(
'--dataproc-super-user',
type=bool,
default=False,
help=(
'Whether to use Dataproc superuser permissions. If true, IAM checks'
' for `dataproc.googleapis.com/clusters.repair` permission,'
' otherwise `dataproc.googleapis.com/clusters.update`'
),
hidden=True,
)
repair_target = parser.add_argument_group(mutex=True, required=True)
repair_target.add_argument(
'--cluster-repair-action',
help="""
`--cluster-repair-action` flag indicates the repair operation is at
the cluster level. `--node-pool` flag should not be specified with
this flag. Valid values : {}.
""".format(cls._GetValidClusterRepairActionChoices(dataproc)),
)
node_pool = repair_target.add_argument_group(help='Node pool flags')
node_pool.add_argument(
'--node-pool',
type=arg_parsers.ArgDict(
required_keys=['id', 'repair-action', 'instance-names'],
spec={
'id': str,
'repair-action': cls._GetParseNodePoolRepairActionFunc(
dataproc
),
'instance-names': arg_parsers.ArgList(custom_delim_char=';'),
},
),
action='append',
default=[],
metavar='id=ID,repair-action=REPAIR_ACTION,instance-names="INSTANCE_NAME1[;INSTANCE_NAME2]"',
help="""
Each `--node-pool` flag represents either the primary or secondary
worker pool associated with the cluster and an action on specified
nodes.
*id:*::: Valid values : {}.
*repair-action:*::: Valid values : {}.
""".format(
cls._GetValidNodePoolIdChoices(),
cls._GetValidNodePoolRepairActionChoices(dataproc),
),
)
node_pool.add_argument(
'--graceful-decommission-timeout',
type=arg_parsers.Duration(lower_bound='0s', upper_bound='1d'),
help="""
The graceful decommission timeout for decommissioning Node Managers
in the cluster, used when removing nodes. Graceful decommissioning
allows removing nodes from the cluster without interrupting jobs in
progress. Timeout specifies how long to wait for jobs in progress to
finish before forcefully removing nodes (and potentially
interrupting jobs). Timeout defaults to 0 if not set (for forceful
decommission), and the maximum allowed timeout is 1 day.
See $ gcloud topic datetimes for information on duration formats.
""",
)
@classmethod
def _GetParseNodePoolRepairActionFunc(cls, dataproc):
"""Get the function to verify node pool repair-action values."""
def _ParseNodePoolRepairActionFunc(repair_action=None):
return arg_utils.ChoiceToEnum(
repair_action,
dataproc.messages.NodePool.RepairActionValueValuesEnum,
item_type='NODE_POOL_REPAIR_ACTION',
valid_choices=cls._GetValidNodePoolRepairActionChoices(dataproc),
)
return _ParseNodePoolRepairActionFunc
@classmethod
def _GetValidNodePoolIdChoices(cls):
"""Get list of valid node-pool id values."""
return ['PRIMARY_WORKER_POOL', 'SECONDARY_WORKER_POOL']
@classmethod
def _GetValidNodePoolRepairActionChoices(cls, dataproc):
"""Get list of valid REPAIR_ACTION values."""
repair_action_enums = dataproc.messages.NodePool.RepairActionValueValuesEnum
return [
arg_utils.ChoiceToEnumName(n)
for n in repair_action_enums.names()
if n != 'REPAIR_ACTION_UNSPECIFIED'
]
@classmethod
def _GetValidClusterRepairActionChoices(cls, dataproc):
"""Get list of valid REPAIR_ACTION values."""
repair_action_enums = (
dataproc.messages.ClusterToRepair.ClusterRepairActionValueValuesEnum
)
return [
arg_utils.ChoiceToEnumName(n)
for n in repair_action_enums.names()
if n != 'CLUSTER_REPAIR_ACTION_UNSPECIFIED'
]
def _ParseClusterRepairAction(self, dataproc, cluster_repair_action):
"""Get the function to verify cluster repair-action values."""
return arg_utils.ChoiceToEnum(
cluster_repair_action,
dataproc.messages.ClusterToRepair.ClusterRepairActionValueValuesEnum,
item_type='CLUSTER_REPAIR_ACTION',
valid_choices=self._GetValidClusterRepairActionChoices(dataproc),
)
def _ParseNodePool(self, dataproc, node_pool):
"""Parses a single --node-pool flag into a NodePool message."""
return dataproc.messages.NodePool(
id=node_pool['id'],
repairAction=node_pool['repair-action'],
instanceNames=node_pool['instance-names'])
def _ParseNodePools(self, dataproc, args_node_pools):
"""Parses all --node-pool flags into a list of NodePool messages."""
pools = [
self._ParseNodePool(dataproc, node_pool)
for node_pool in args_node_pools
]
self._ValidateNodePoolIds(pools)
return pools
def _ValidateNodePoolIds(self, node_pools):
"""Validates whether node-pools are valid."""
valid_ids = self._GetValidNodePoolIdChoices()
for node_pool in node_pools:
node_pool_id = node_pool.id
if node_pool_id not in valid_ids:
raise exceptions.InvalidArgumentException(
'--node-pool',
'Node pool ID "{}" is not one of {}'.format(node_pool_id,
valid_ids))
unique_ids = set()
for node_pool in node_pools:
node_pool_id = node_pool.id
if node_pool_id in unique_ids:
raise exceptions.InvalidArgumentException(
'--node-pool',
'Node pool id "{}" used more than once.'.format(node_pool_id))
unique_ids.add(node_pool_id)
def Run(self, args):
dataproc = dp.Dataproc(self.ReleaseTrack())
cluster_ref = args.CONCEPTS.cluster.Parse()
repair_cluster_request = None
if args.node_pool:
repair_cluster_request = dataproc.messages.RepairClusterRequest(
requestId=util.GetUniqueId(),
nodePools=self._ParseNodePools(dataproc, args.node_pool),
dataprocSuperUser=args.dataproc_super_user,
)
if args.graceful_decommission_timeout is not None:
repair_cluster_request.gracefulDecommissionTimeout = (
six.text_type(args.graceful_decommission_timeout) + 's'
)
console_io.PromptContinue(
message=(
"The specified nodes in cluster '{0}' and all"
' attached disks will be deleted.'.format(cluster_ref.clusterName)
),
cancel_on_no=True,
cancel_string='Repair canceled by user.',
)
if args.cluster_repair_action:
repair_cluster_request = dataproc.messages.RepairClusterRequest(
requestId=util.GetUniqueId(),
cluster=dataproc.messages.ClusterToRepair(
clusterRepairAction=self._ParseClusterRepairAction(
dataproc, args.cluster_repair_action
)
),
dataprocSuperUser=args.dataproc_super_user,
)
request = dataproc.messages.DataprocProjectsRegionsClustersRepairRequest(
clusterName=cluster_ref.clusterName,
region=cluster_ref.region,
projectId=cluster_ref.projectId,
repairClusterRequest=repair_cluster_request,
)
operation = dataproc.client.projects_regions_clusters.Repair(request)
if args.async_:
log.status.write('Repairing [{0}] with operation [{1}].'.format(
cluster_ref, operation.name))
return operation
return util.WaitForOperation(
dataproc,
operation,
message="Waiting for cluster '{0}' repair to finish.".format(
cluster_ref.clusterName),
timeout_s=args.timeout)

View File

@@ -0,0 +1,71 @@
# -*- coding: utf-8 -*- #
# Copyright 2015 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Set IAM cluster policy command."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.api_lib.dataproc import dataproc as dp
from googlecloudsdk.api_lib.dataproc import iam_helpers
from googlecloudsdk.calliope import base
from googlecloudsdk.command_lib.dataproc import flags
from googlecloudsdk.command_lib.iam import iam_util
@base.UniverseCompatible
@base.ReleaseTracks(
base.ReleaseTrack.ALPHA, base.ReleaseTrack.BETA, base.ReleaseTrack.GA
)
class SetIamPolicy(base.Command):
"""Set IAM policy for a cluster.
Sets the IAM policy for a cluster, given a cluster name and the policy.
## EXAMPLES
The following command sets the IAM policy for a cluster with the name
`example-cluster-name-1` using policy.yaml:
$ {command} example-cluster-name-1 policy.yaml
"""
@classmethod
def Args(cls, parser):
dataproc = dp.Dataproc(cls.ReleaseTrack())
flags.AddClusterResourceArg(parser, 'set the policy on',
dataproc.api_version)
parser.add_argument(
'policy_file',
metavar='POLICY_FILE',
help="""\
Path to a local JSON or YAML formatted file containing a valid policy.
""")
def Run(self, args):
dataproc = dp.Dataproc(self.ReleaseTrack())
messages = dataproc.messages
policy = iam_util.ParsePolicyFile(args.policy_file, messages.Policy)
policy.version = iam_helpers.MAX_LIBRARY_IAM_SUPPORTED_VERSION
set_iam_policy_request = messages.SetIamPolicyRequest(policy=policy)
cluster_ref = args.CONCEPTS.cluster.Parse()
request = messages.DataprocProjectsRegionsClustersSetIamPolicyRequest(
resource=cluster_ref.RelativeName(),
setIamPolicyRequest=set_iam_policy_request)
return dataproc.client.projects_regions_clusters.SetIamPolicy(request)

View File

@@ -0,0 +1,77 @@
# -*- coding: utf-8 -*- #
# Copyright 2020 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Start cluster command."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.api_lib.dataproc import dataproc as dp
from googlecloudsdk.api_lib.dataproc import util
from googlecloudsdk.calliope import base
from googlecloudsdk.command_lib.dataproc import flags
from googlecloudsdk.core import log
@base.ReleaseTracks(base.ReleaseTrack.ALPHA, base.ReleaseTrack.BETA,
base.ReleaseTrack.GA)
class Start(base.Command):
"""Start a cluster."""
detailed_help = {
'EXAMPLES': """
To start a cluster, run:
$ {command} my-cluster --region=us-central1
""",
}
@classmethod
def Args(cls, parser):
base.ASYNC_FLAG.AddToParser(parser)
flags.AddTimeoutFlag(parser)
dataproc = dp.Dataproc(cls.ReleaseTrack())
flags.AddClusterResourceArg(parser, 'start', dataproc.api_version)
def Run(self, args):
dataproc = dp.Dataproc(self.ReleaseTrack())
cluster_ref = args.CONCEPTS.cluster.Parse()
start_cluster_request = dataproc.messages.StartClusterRequest(
requestId=util.GetUniqueId())
request = dataproc.messages.DataprocProjectsRegionsClustersStartRequest(
clusterName=cluster_ref.clusterName,
region=cluster_ref.region,
projectId=cluster_ref.projectId,
startClusterRequest=start_cluster_request)
operation = dataproc.client.projects_regions_clusters.Start(request)
if args.async_:
log.status.write('Starting [{0}] with operation [{1}].'.format(
cluster_ref, operation.name))
return operation
operation = util.WaitForOperation(
dataproc,
operation,
message="Waiting for cluster '{0}' to start.".format(
cluster_ref.clusterName),
timeout_s=args.timeout)
return operation

View File

@@ -0,0 +1,77 @@
# -*- coding: utf-8 -*- #
# Copyright 2020 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Stop cluster command."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.api_lib.dataproc import dataproc as dp
from googlecloudsdk.api_lib.dataproc import util
from googlecloudsdk.calliope import base
from googlecloudsdk.command_lib.dataproc import flags
from googlecloudsdk.core import log
@base.ReleaseTracks(base.ReleaseTrack.ALPHA, base.ReleaseTrack.BETA,
base.ReleaseTrack.GA)
class Stop(base.Command):
"""Stop a cluster."""
detailed_help = {
'EXAMPLES': """
To stop a cluster, run:
$ {command} my-cluster --region=us-central1
""",
}
@classmethod
def Args(cls, parser):
base.ASYNC_FLAG.AddToParser(parser)
flags.AddTimeoutFlag(parser)
dataproc = dp.Dataproc(cls.ReleaseTrack())
flags.AddClusterResourceArg(parser, 'stop', dataproc.api_version)
def Run(self, args):
dataproc = dp.Dataproc(self.ReleaseTrack())
cluster_ref = args.CONCEPTS.cluster.Parse()
stop_cluster_request = dataproc.messages.StopClusterRequest(
requestId=util.GetUniqueId())
request = dataproc.messages.DataprocProjectsRegionsClustersStopRequest(
clusterName=cluster_ref.clusterName,
region=cluster_ref.region,
projectId=cluster_ref.projectId,
stopClusterRequest=stop_cluster_request)
operation = dataproc.client.projects_regions_clusters.Stop(request)
if args.async_:
log.status.write('Stopping [{0}] with operation [{1}].'.format(
cluster_ref, operation.name))
return operation
operation = util.WaitForOperation(
dataproc,
operation,
message="Waiting for cluster '{0}' to stop.".format(
cluster_ref.clusterName),
timeout_s=args.timeout)
return operation

View File

@@ -0,0 +1,619 @@
# -*- coding: utf-8 -*- #
# Copyright 2015 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Update cluster command."""
from googlecloudsdk.api_lib.dataproc import constants as dataproc_constants
from googlecloudsdk.api_lib.dataproc import dataproc as dp
from googlecloudsdk.api_lib.dataproc import exceptions
from googlecloudsdk.api_lib.dataproc import util
from googlecloudsdk.calliope import actions
from googlecloudsdk.calliope import arg_parsers
from googlecloudsdk.calliope import base
from googlecloudsdk.command_lib.dataproc import clusters
from googlecloudsdk.command_lib.dataproc import flags
from googlecloudsdk.command_lib.dataproc.utils import user_sa_mapping_util
from googlecloudsdk.command_lib.util.args import labels_util
from googlecloudsdk.core import log
from googlecloudsdk.core.util import times
import six
@base.UniverseCompatible
class Update(base.UpdateCommand):
"""Update labels and/or the number of worker nodes in a cluster.
Update the number of worker nodes and/or the labels in a cluster.
## EXAMPLES
To resize a cluster, run:
$ {command} my-cluster --region=us-central1 --num-workers=5
To change the number preemptible workers in a cluster, run:
$ {command} my-cluster --region=us-central1 --num-preemptible-workers=5
To add the label 'customer=acme' to a cluster, run:
$ {command} my-cluster --region=us-central1 --update-labels=customer=acme
To update the label 'customer=ackme' to 'customer=acme', run:
$ {command} my-cluster --region=us-central1 --update-labels=customer=acme
To remove the label whose key is 'customer', run:
$ {command} my-cluster --region=us-central1 --remove-labels=customer
"""
@classmethod
def Args(cls, parser):
dataproc = dp.Dataproc(cls.ReleaseTrack())
base.ASYNC_FLAG.AddToParser(parser)
# Allow the user to specify new labels as well as update/remove existing
labels_util.AddUpdateLabelsFlags(parser)
# Graceful decomissioning timeouts can be up to 24 hours + add 1 hour for
# deleting VMs, etc.
flags.AddTimeoutFlag(parser, default='25h')
flags.AddClusterResourceArg(parser, 'update', dataproc.api_version)
parser.add_argument(
'--num-workers',
type=int,
help='The new number of worker nodes in the cluster.')
num_secondary_workers = parser.add_argument_group(mutex=True)
num_secondary_workers.add_argument(
'--num-preemptible-workers',
action=actions.DeprecationAction(
'--num-preemptible-workers',
warn=('The `--num-preemptible-workers` flag is deprecated. '
'Use the `--num-secondary-workers` flag instead.')),
type=int,
hidden=True,
help='The new number of preemptible worker nodes in the cluster.')
num_secondary_workers.add_argument(
'--num-secondary-workers',
type=int,
help='The new number of secondary worker nodes in the cluster.')
parser.add_argument(
'--graceful-decommission-timeout',
type=arg_parsers.Duration(lower_bound='0s', upper_bound='1d'),
help="""
The graceful decommission timeout for decommissioning Node Managers
in the cluster, used when removing nodes. Graceful decommissioning
allows removing nodes from the cluster without interrupting jobs in
progress. Timeout specifies how long to wait for jobs in progress to
finish before forcefully removing nodes (and potentially
interrupting jobs). Timeout defaults to 0 if not set (for forceful
decommission), and the maximum allowed timeout is 1 day.
See $ gcloud topic datetimes for information on duration formats.
""",
)
parser.add_argument(
'--min-secondary-worker-fraction',
help=(
'Minimum fraction of new secondary worker nodes added in a scale up'
' update operation, required to update the cluster. If it is not'
' met, cluster updation will rollback the addition of secondary'
' workers. Must be a decimal value between 0 and 1. Defaults to'
' 0.0001.'
),
type=float,
)
_AddAlphaArguments(parser, cls.ReleaseTrack())
idle_delete_group = parser.add_mutually_exclusive_group()
idle_delete_group.add_argument(
'--max-idle',
type=arg_parsers.Duration(),
hidden=True,
help="""\
The duration after the last job completes to auto-delete the cluster,
such as "2h" or "1d".
See $ gcloud topic datetimes for information on duration formats.
""")
idle_delete_group.add_argument(
'--no-max-idle',
action='store_true',
hidden=True,
help="""\
Cancels the cluster auto-deletion by cluster idle duration (configured
by --max-idle flag)
""",
)
idle_delete_group.add_argument(
'--delete-max-idle',
type=arg_parsers.Duration(),
help="""\
The duration after the last job completes to auto-delete the cluster,
such as "2h" or "1d".
See $ gcloud topic datetimes for information on duration formats.
""")
idle_delete_group.add_argument(
'--no-delete-max-idle',
action='store_true',
help="""\
Cancels the cluster auto-deletion by cluster idle duration (configured
by --delete-max-idle flag)
""")
auto_delete_group = parser.add_mutually_exclusive_group()
auto_delete_group.add_argument(
'--max-age',
type=arg_parsers.Duration(),
hidden=True,
help="""\
The lifespan of the cluster, with auto-deletion upon completion,
"2h" or "1d".
See $ gcloud topic datetimes for information on duration formats.
""")
auto_delete_group.add_argument(
'--expiration-time',
type=arg_parsers.Datetime.Parse,
hidden=True,
help="""\
The time when the cluster will be auto-deleted, such as
"2017-08-29T18:52:51.142Z". See $ gcloud topic datetimes for
information on time formats.
""")
auto_delete_group.add_argument(
'--no-max-age',
action='store_true',
hidden=True,
help="""\
Cancels the cluster auto-deletion by maximum cluster age (configured by
--max-age or --expiration-time flags)
""")
auto_delete_group.add_argument(
'--delete-max-age',
type=arg_parsers.Duration(),
help="""\
The lifespan of the cluster with auto-deletion upon completion,
such as "2h" or "1d".
See $ gcloud topic datetimes for information on duration formats.
""")
auto_delete_group.add_argument(
'--delete-expiration-time',
type=arg_parsers.Datetime.Parse,
help="""\
The time when the cluster will be auto-deleted, such as
"2017-08-29T18:52:51.142Z". See $ gcloud topic datetimes for
information on time formats.
""")
auto_delete_group.add_argument(
'--no-delete-max-age',
action='store_true',
help="""\
Cancels the cluster auto-deletion by maximum cluster age (configured
by --delete-max-age or --delete-expiration-time flags)
""")
idle_stop_group = parser.add_mutually_exclusive_group()
idle_stop_group.add_argument(
'--stop-max-idle',
type=arg_parsers.Duration(),
help="""\
The duration after the last job completes to auto-stop the cluster,
such as "2h" or "1d".
See $ gcloud topic datetimes for information on duration formats.
""")
idle_stop_group.add_argument(
'--no-stop-max-idle',
action='store_true',
help="""\
Cancels the cluster auto-stop by cluster idle duration (configured
by --stop-max-idle flag)
""")
auto_stop_group = parser.add_mutually_exclusive_group()
auto_stop_group.add_argument(
'--stop-max-age',
type=arg_parsers.Duration(),
help="""\
The lifespan of the cluster, with auto-stop upon completion,
such as "2h" or "1d".
See $ gcloud topic datetimes for information on duration formats.
""")
auto_stop_group.add_argument(
'--stop-expiration-time',
type=arg_parsers.Datetime.Parse,
help="""\
The time when the cluster will be auto-stopped, such as
"2017-08-29T18:52:51.142Z". See $ gcloud topic datetimes for
information on time formats.
""")
auto_stop_group.add_argument(
'--no-stop-max-age',
action='store_true',
help="""\
Cancels the cluster auto-stop by maximum cluster age (configured by
--stop-max-age or --stop-expiration-time flags)
""")
# Can only specify one of --autoscaling-policy or --disable-autoscaling
autoscaling_group = parser.add_mutually_exclusive_group()
flags.AddAutoscalingPolicyResourceArgForCluster(
autoscaling_group, api_version='v1')
autoscaling_group.add_argument(
'--disable-autoscaling',
action='store_true',
help="""\
Disable autoscaling, if it is enabled. This is an alias for passing the
empty string to --autoscaling-policy'.
""")
user_sa_mapping_util.AddUpdateUserSaMappingFlags(parser)
def Run(self, args):
dataproc = dp.Dataproc(self.ReleaseTrack())
cluster_ref = args.CONCEPTS.cluster.Parse()
cluster_config = dataproc.messages.ClusterConfig()
changed_fields = []
has_changes = False
no_update_error_msg = (
'Must specify at least one cluster parameter to update.'
)
if args.num_workers is not None:
worker_config = dataproc.messages.InstanceGroupConfig(
numInstances=args.num_workers)
cluster_config.workerConfig = worker_config
changed_fields.append('config.worker_config.num_instances')
has_changes = True
num_secondary_workers = _FirstNonNone(args.num_preemptible_workers,
args.num_secondary_workers)
if num_secondary_workers is not None:
worker_config = dataproc.messages.InstanceGroupConfig(
numInstances=num_secondary_workers)
cluster_config.secondaryWorkerConfig = worker_config
changed_fields.append(
'config.secondary_worker_config.num_instances')
has_changes = True
if args.min_secondary_worker_fraction is not None:
if cluster_config.secondaryWorkerConfig is None:
worker_config = dataproc.messages.InstanceGroupConfig(
startupConfig=dataproc.messages.StartupConfig(
requiredRegistrationFraction=(
args.min_secondary_worker_fraction
)
)
)
else:
worker_config = dataproc.messages.InstanceGroupConfig(
numInstances=num_secondary_workers,
startupConfig=dataproc.messages.StartupConfig(
requiredRegistrationFraction=(
args.min_secondary_worker_fraction
)
),
)
cluster_config.secondaryWorkerConfig = worker_config
changed_fields.append(
'config.secondary_worker_config.startup_config.required_registration_fraction'
)
has_changes = True
if self.ReleaseTrack() == base.ReleaseTrack.ALPHA:
if args.secondary_worker_standard_capacity_base is not None:
if cluster_config.secondaryWorkerConfig is None:
worker_config = dataproc.messages.InstanceGroupConfig(
instanceFlexibilityPolicy=dataproc.messages.InstanceFlexibilityPolicy(
provisioningModelMix=dataproc.messages.ProvisioningModelMix(
standardCapacityBase=args.secondary_worker_standard_capacity_base
)))
else:
worker_config = dataproc.messages.InstanceGroupConfig(
numInstances=num_secondary_workers,
startupConfig=cluster_config.secondaryWorkerConfig.startupConfig,
instanceFlexibilityPolicy=dataproc.messages.InstanceFlexibilityPolicy(
provisioningModelMix=dataproc.messages.ProvisioningModelMix(
standardCapacityBase=args.secondary_worker_standard_capacity_base
)
)
)
cluster_config.secondaryWorkerConfig = worker_config
changed_fields.append(
'config.secondary_worker_config.instance_flexibility_policy.provisioning_model_mix.standard_capacity_base'
)
has_changes = True
if args.autoscaling_policy:
cluster_config.autoscalingConfig = dataproc.messages.AutoscalingConfig(
policyUri=args.CONCEPTS.autoscaling_policy.Parse().RelativeName())
changed_fields.append('config.autoscaling_config.policy_uri')
has_changes = True
elif args.autoscaling_policy == '' or args.disable_autoscaling: # pylint: disable=g-explicit-bool-comparison
# Disabling autoscaling. Don't need to explicitly set
# cluster_config.autoscaling_config to None.
changed_fields.append('config.autoscaling_config.policy_uri')
has_changes = True
lifecycle_config = dataproc.messages.LifecycleConfig()
changed_config = False
# Flags max_age, expiration_time, max_idle, no_max_age, no_max_idle are
# hidden, but still supported. They are replaced with new flags
# delete_max_age, delete_expiration_time, delete_max_idle,
# no_delete_max_age and no_delete_max_idle.
if args.max_age is not None:
lifecycle_config.autoDeleteTtl = six.text_type(args.max_age) + 's'
changed_fields.append('config.lifecycle_config.auto_delete_ttl')
changed_config = True
if args.expiration_time is not None:
lifecycle_config.autoDeleteTime = times.FormatDateTime(
args.expiration_time)
changed_fields.append('config.lifecycle_config.auto_delete_time')
changed_config = True
if args.max_idle is not None:
lifecycle_config.idleDeleteTtl = six.text_type(args.max_idle) + 's'
changed_fields.append('config.lifecycle_config.idle_delete_ttl')
changed_config = True
if args.no_max_age:
lifecycle_config.autoDeleteTtl = None
changed_fields.append('config.lifecycle_config.auto_delete_ttl')
changed_config = True
if args.no_max_idle:
lifecycle_config.idleDeleteTtl = None
changed_fields.append('config.lifecycle_config.idle_delete_ttl')
changed_config = True
if args.delete_max_age is not None:
lifecycle_config.autoDeleteTtl = (
six.text_type(args.delete_max_age) + 's'
)
changed_fields.append('config.lifecycle_config.auto_delete_ttl')
changed_config = True
if args.delete_expiration_time is not None:
lifecycle_config.autoDeleteTime = times.FormatDateTime(
args.delete_expiration_time
)
changed_fields.append('config.lifecycle_config.auto_delete_time')
changed_config = True
if args.delete_max_idle is not None:
lifecycle_config.idleDeleteTtl = (
six.text_type(args.delete_max_idle) + 's'
)
changed_fields.append('config.lifecycle_config.idle_delete_ttl')
changed_config = True
if args.no_delete_max_age:
lifecycle_config.autoDeleteTtl = None
changed_fields.append('config.lifecycle_config.auto_delete_ttl')
changed_config = True
if args.no_delete_max_idle:
lifecycle_config.idleDeleteTtl = None
changed_fields.append('config.lifecycle_config.idle_delete_ttl')
changed_config = True
if args.stop_max_age is not None:
lifecycle_config.autoStopTtl = six.text_type(args.stop_max_age) + 's'
changed_fields.append('config.lifecycle_config.auto_stop_ttl')
changed_config = True
if args.stop_expiration_time is not None:
lifecycle_config.autoStopTime = times.FormatDateTime(
args.stop_expiration_time)
changed_fields.append('config.lifecycle_config.auto_stop_time')
changed_config = True
if args.stop_max_idle is not None:
lifecycle_config.idleStopTtl = six.text_type(args.stop_max_idle) + 's'
changed_fields.append('config.lifecycle_config.idle_stop_ttl')
changed_config = True
if args.no_stop_max_age:
lifecycle_config.autoStopTtl = None
changed_fields.append('config.lifecycle_config.auto_stop_ttl')
changed_config = True
if args.no_stop_max_idle:
lifecycle_config.idleStopTtl = None
changed_fields.append('config.lifecycle_config.idle_stop_ttl')
changed_config = True
if changed_config:
cluster_config.lifecycleConfig = lifecycle_config
has_changes = True
# Put in a thunk so we only make this call if needed
def _GetCurrentLabels():
# We need to fetch cluster first so we know what the labels look like. The
# labels_util will fill out the proto for us with all the updates and
# removals, but first we need to provide the current state of the labels
current_cluster = _GetCurrentCluster(dataproc, cluster_ref)
return current_cluster.labels
labels_update = labels_util.ProcessUpdateArgsLazy(
args, dataproc.messages.Cluster.LabelsValue,
orig_labels_thunk=_GetCurrentLabels)
if labels_update.needs_update:
has_changes = True
changed_fields.append('labels')
labels = labels_update.GetOrNone()
def _GetCurrentUserServiceAccountMapping():
current_cluster = _GetCurrentCluster(dataproc, cluster_ref)
if (
current_cluster.config.securityConfig
and current_cluster.config.securityConfig.identityConfig
):
return (
current_cluster.config.securityConfig.identityConfig.userServiceAccountMapping
)
return None
def _UpdateSecurityConfig(cluster_config, user_sa_mapping):
if cluster_config.securityConfig is None:
cluster_config.securityConfig = dataproc.messages.SecurityConfig()
if cluster_config.securityConfig.identityConfig is None:
cluster_config.securityConfig.identityConfig = (
dataproc.messages.IdentityConfig()
)
cluster_config.securityConfig.identityConfig.userServiceAccountMapping = (
user_sa_mapping
)
if args.add_user_mappings or args.remove_user_mappings:
if not _IsMultitenancyCluster(_GetCurrentCluster(dataproc, cluster_ref)):
raise exceptions.ArgumentError(
'User service account mapping can only be updated for multi-tenant'
' clusters.'
)
user_sa_mapping_update = user_sa_mapping_util.ProcessUpdateArgsLazy(
args,
dataproc.messages.IdentityConfig.UserServiceAccountMappingValue,
orig_user_sa_mapping_thunk=_GetCurrentUserServiceAccountMapping,
)
if user_sa_mapping_update.needs_update:
changed_fields.append(
'config.security_config.identity_config.user_service_account_mapping'
)
has_changes = True
else:
if args.add_user_mappings:
no_update_error_msg += (
' User to add is already present in service account mapping.'
)
if args.remove_user_mappings:
no_update_error_msg += (
' User to remove is not present in service account mapping.'
)
user_sa_mapping = user_sa_mapping_update.GetOrNone()
if user_sa_mapping:
_UpdateSecurityConfig(cluster_config, user_sa_mapping)
elif args.identity_config_file:
if not _IsMultitenancyCluster(_GetCurrentCluster(dataproc, cluster_ref)):
raise exceptions.ArgumentError(
'User service account mapping can only be updated for multi-tenant'
' clusters.'
)
if cluster_config.securityConfig is None:
cluster_config.securityConfig = dataproc.messages.SecurityConfig()
cluster_config.securityConfig.identityConfig = (
clusters.ParseIdentityConfigFile(dataproc, args.identity_config_file)
)
changed_fields.append(
'config.security_config.identity_config.user_service_account_mapping'
)
has_changes = True
if not has_changes:
raise exceptions.ArgumentError(no_update_error_msg)
cluster = dataproc.messages.Cluster(
config=cluster_config,
clusterName=cluster_ref.clusterName,
labels=labels,
projectId=cluster_ref.projectId)
request = dataproc.messages.DataprocProjectsRegionsClustersPatchRequest(
clusterName=cluster_ref.clusterName,
region=cluster_ref.region,
projectId=cluster_ref.projectId,
cluster=cluster,
updateMask=','.join(changed_fields),
requestId=util.GetUniqueId())
if args.graceful_decommission_timeout is not None:
request.gracefulDecommissionTimeout = (
six.text_type(args.graceful_decommission_timeout) + 's')
operation = dataproc.client.projects_regions_clusters.Patch(request)
if args.async_:
log.status.write(
'Updating [{0}] with operation [{1}].'.format(
cluster_ref, operation.name))
return
util.WaitForOperation(
dataproc,
operation,
message='Waiting for cluster update operation',
timeout_s=args.timeout)
request = dataproc.messages.DataprocProjectsRegionsClustersGetRequest(
projectId=cluster_ref.projectId,
region=cluster_ref.region,
clusterName=cluster_ref.clusterName)
cluster = dataproc.client.projects_regions_clusters.Get(request)
log.UpdatedResource(cluster_ref)
return cluster
def _FirstNonNone(first, second):
return first if first is not None else second
def _AddAlphaArguments(parser, release_track):
if release_track == base.ReleaseTrack.ALPHA:
parser.add_argument(
'--secondary-worker-standard-capacity-base',
type=int,
help="""
The number of standard VMs in the Spot and Standard Mix
feature.
""",
)
def _IsMultitenancyCluster(cluster) -> bool:
"""Checks if the cluster is a multi-tenant cluster.
Args:
cluster: The cluster configuration.
Returns:
True if the cluster is a multi-tenant cluster, False otherwise.
"""
config = cluster.config
if config and config.softwareConfig and config.softwareConfig.properties:
props = config.softwareConfig.properties
for prop in props.additionalProperties:
if (
prop.key == dataproc_constants.ENABLE_DYNAMIC_MULTI_TENANCY_PROPERTY
and prop.value.lower() == 'true'
):
return True
return False
def _GetCurrentCluster(dataproc, cluster_ref):
"""Retrieves the current cluster configuration.
Args:
dataproc: The Dataproc API client.
cluster_ref: The reference to the cluster.
Returns:
The current cluster configuration.
"""
# This is used for labels and auxiliary_node_pool_configs
get_cluster_request = (
dataproc.messages.DataprocProjectsRegionsClustersGetRequest(
projectId=cluster_ref.projectId,
region=cluster_ref.region,
clusterName=cluster_ref.clusterName,
)
)
current_cluster = dataproc.client.projects_regions_clusters.Get(
get_cluster_request
)
return current_cluster