344 lines
17 KiB
YAML
344 lines
17 KiB
YAML
- release_tracks: [ALPHA, GA]
|
|
help_text:
|
|
brief: |
|
|
Update a Dataplex task resource.
|
|
description: |
|
|
Update a Dataplex task resource with the given configurations.
|
|
|
|
examples: |
|
|
To update a Dataplex task `test-task` within lake `test-lake` in location `us-central1` and
|
|
change the description to `Updated Description`, run:
|
|
|
|
$ {command} projects/test-project/locations/us-central1/lakes/test-lake/tasks/test-task \
|
|
--description='Updated Description'
|
|
request:
|
|
ALPHA:
|
|
api_version: v1
|
|
collection: dataplex.projects.locations.lakes.tasks
|
|
arguments:
|
|
resource:
|
|
help_text: |
|
|
Arguments and flags that specify the Dataplex Task you want to update.
|
|
spec: !REF googlecloudsdk.command_lib.dataplex.resources:task
|
|
params:
|
|
- arg_name: description
|
|
api_field: googleCloudDataplexV1Task.description
|
|
help_text: |
|
|
Description of the task.
|
|
- arg_name: display-name
|
|
api_field: googleCloudDataplexV1Task.displayName
|
|
help_text: |
|
|
Display name of the task.
|
|
- group: # trigger_spec
|
|
help_text: |
|
|
Spec related to how often and when a task should be triggered.
|
|
params:
|
|
- arg_name: trigger-start-time
|
|
api_field: googleCloudDataplexV1Task.triggerSpec.startTime
|
|
help_text: |
|
|
The first run of the task will be after this time.
|
|
If not specified, the task will run shortly after being submitted if
|
|
ON_DEMAND and based on the schedule if RECURRING.
|
|
- arg_name: trigger-disabled
|
|
api_field: googleCloudDataplexV1Task.triggerSpec.disabled
|
|
type: bool
|
|
default: false
|
|
help_text: |
|
|
Prevent the task from executing.
|
|
This does not cancel already running tasks. It is intended to temporarily
|
|
disable RECURRING tasks.
|
|
- arg_name: trigger-max-retires
|
|
type: int
|
|
api_field: googleCloudDataplexV1Task.triggerSpec.maxRetries
|
|
help_text: |
|
|
Number of retry attempts before aborting.
|
|
Set to zero to never attempt to retry a failed task.
|
|
- arg_name: trigger-schedule
|
|
api_field: googleCloudDataplexV1Task.triggerSpec.schedule
|
|
help_text: |
|
|
Cron schedule (https://en.wikipedia.org/wiki/Cron) for running
|
|
tasks periodically.
|
|
- group: # execution_spec
|
|
help_text: |
|
|
Spec related to how a task is executed.
|
|
params:
|
|
- arg_name: execution-args
|
|
api_field: googleCloudDataplexV1Task.executionSpec.args.additionalProperties
|
|
metavar: KEY=VALUE
|
|
type:
|
|
arg_dict:
|
|
flatten: true
|
|
spec:
|
|
- api_field: key
|
|
- api_field: value
|
|
help_text: |
|
|
The arguments to pass to the task.
|
|
The args can use placeholders of the format ${placeholder} as
|
|
part of key/value string. These will be interpolated before passing the
|
|
args to the driver. Currently supported placeholders:
|
|
- ${task_id}
|
|
- ${job_time}
|
|
To pass positional args, set the key as TASK_ARGS. The value should be a
|
|
comma-separated string of all the positional arguments. To use a
|
|
delimiter other than comma, refer to
|
|
https://cloud.google.com/sdk/gcloud/reference/topic/escaping. In case of
|
|
other keys being present in the args, then TASK_ARGS will be passed as
|
|
the last argument.
|
|
- arg_name: execution-service-account
|
|
api_field: googleCloudDataplexV1Task.executionSpec.serviceAccount
|
|
help_text: |
|
|
Service account to use to execute a task.
|
|
If not provided, the default Compute service account for the project is used.
|
|
- arg_name: execution-project
|
|
api_field: googleCloudDataplexV1Task.executionSpec.project
|
|
help_text: |
|
|
The project in which jobs are run.
|
|
By default, the project containing the Lake is used.
|
|
If a project is provided, the --execution-service-account must belong to this same
|
|
project.
|
|
- arg_name: max-job-execution-lifetime
|
|
api_field: googleCloudDataplexV1Task.executionSpec.maxJobExecutionLifetime
|
|
help_text: |
|
|
The maximum duration before the job execution expires.
|
|
- arg_name: kms-key
|
|
api_field: googleCloudDataplexV1Task.executionSpec.kmsKey
|
|
help_text: |
|
|
The Cloud KMS key to use for encryption, of the form:
|
|
projects/{project_number}/locations/{location_id}/keyRings/{key-ring-name}/cryptoKeys/{key-name}
|
|
- group: # task_config
|
|
mutex: true
|
|
help_text: |
|
|
Select which task you want to schedule and provide the required arguments:-
|
|
- spark tasks
|
|
- notebook tasks
|
|
params:
|
|
- group: # notebook_task_config
|
|
help_text: |
|
|
Config related to running custom Notebook tasks.
|
|
params:
|
|
- arg_name: notebook
|
|
repeated: true
|
|
api_field: googleCloudDataplexV1Task.notebook.notebook
|
|
help_text: |
|
|
Google Cloud Storage URIs of the notebook file or the path to a Notebook Content.
|
|
Path to input notebook.
|
|
- arg_name: notebook-file-uris
|
|
repeated: true
|
|
api_field: googleCloudDataplexV1Task.notebook.fileUris
|
|
help_text: |
|
|
Google Cloud Storage URIs of files to be placed in the working directory of each
|
|
executor.
|
|
- arg_name: notebook-archive-uris
|
|
repeated: true
|
|
api_field: googleCloudDataplexV1Task.notebook.archiveUris
|
|
help_text: |
|
|
Google Cloud Storage URIs of archives to be extracted into the working directory of
|
|
each executor. Supported file types: .jar, .tar, .tar.gz, .tgz, and
|
|
.zip.
|
|
- group: # notebook_infrastructure_spec
|
|
params:
|
|
- group: # notebook_batch_compute_resource
|
|
help_text: |
|
|
Compute resources needed for a Task when using Dataproc Serverless.
|
|
params:
|
|
- arg_name: notebook-batch-executors-count
|
|
api_field: googleCloudDataplexV1Task.notebook.infrastructureSpec.batch.executorsCount
|
|
type: int
|
|
help_text: |
|
|
Total number of job executors.
|
|
- arg_name: notebook-batch-max-executors-count
|
|
api_field: googleCloudDataplexV1Task.notebook.infrastructureSpec.batch.maxExecutorsCount
|
|
type: int
|
|
help_text: |
|
|
Max configurable executors.
|
|
If max_executors_count > executors_count, then auto-scaling is enabled.
|
|
- group: # notebook_container_image_runtime
|
|
help_text: |
|
|
Container Image Runtime Configuration.
|
|
params:
|
|
- arg_name: notebook-container-image
|
|
api_field: googleCloudDataplexV1Task.notebook.infrastructureSpec.containerImage.image
|
|
help_text: |
|
|
Optional custom container image for the job.
|
|
- arg_name: notebook-container-image-java-jars
|
|
api_field: googleCloudDataplexV1Task.notebook.infrastructureSpec.containerImage.javaJars
|
|
repeated: true
|
|
help_text: |
|
|
A list of Java JARS to add to the classpath.
|
|
Valid input includes Cloud Storage URIs to Jar binaries.
|
|
For example, gs://bucket-name/my/path/to/file.jar
|
|
- arg_name: notebook-container-image-properties
|
|
api_field: googleCloudDataplexV1Task.notebook.infrastructureSpec.containerImage.properties.additionalProperties
|
|
metavar: KEY=VALUE
|
|
type:
|
|
arg_dict:
|
|
flatten: true
|
|
spec:
|
|
- api_field: key
|
|
- api_field: value
|
|
help_text: |
|
|
Override to common configuration of open source components installed on
|
|
the Dataproc cluster.
|
|
The properties to set on daemon config files.
|
|
Property keys are specified in `prefix:property` format, for example
|
|
`core:hadoop.tmp.dir`.
|
|
For more information, see [Cluster
|
|
properties](https://cloud.google.com/dataproc/docs/concepts/cluster-properties).
|
|
- group: # notebook_vpc_network
|
|
help_text: |
|
|
Cloud VPC Network used to run the infrastructure.
|
|
params:
|
|
- group:
|
|
mutex: true
|
|
help_text: |
|
|
The Cloud VPC network identifier.
|
|
params:
|
|
- arg_name: notebook-vpc-network-name
|
|
api_field: googleCloudDataplexV1Task.notebook.infrastructureSpec.vpcNetwork.network
|
|
help_text: |
|
|
The Cloud VPC network in which the job is run. By default, the Cloud
|
|
VPC network named Default within the project is used.
|
|
- arg_name: notebook-vpc-sub-network-name
|
|
api_field: googleCloudDataplexV1Task.notebook.infrastructureSpec.vpcNetwork.subNetwork
|
|
help_text: |
|
|
The Cloud VPC sub-network in which the job is run.
|
|
- arg_name: notebook-vpc-network-tags
|
|
api_field: googleCloudDataplexV1Task.notebook.infrastructureSpec.vpcNetwork.networkTags
|
|
repeated: true
|
|
help_text: |
|
|
List of network tags to apply to the job.
|
|
- group: # spark_task_config
|
|
help_text: |
|
|
Config related to running custom Spark tasks.
|
|
params:
|
|
- group: # driver
|
|
mutex: true
|
|
help_text: |
|
|
The specification of the main method to call to drive the
|
|
job. Specify either the jar file that contains the main class or the
|
|
main class name.
|
|
params:
|
|
- arg_name: spark-main-jar-file-uri
|
|
api_field: googleCloudDataplexV1Task.spark.mainJarFileUri
|
|
help_text: |
|
|
The Google Cloud Storage URI of the jar file that contains the main class.
|
|
The execution args are passed in as a sequence of named process
|
|
arguments (`--key=value`).
|
|
- arg_name: spark-main-class
|
|
api_field: googleCloudDataplexV1Task.spark.mainClass
|
|
help_text: |
|
|
The name of the driver's main class. The jar file that contains the
|
|
class must be in the default CLASSPATH or specified in
|
|
- arg_name: spark-python-script-file
|
|
api_field: googleCloudDataplexV1Task.spark.pythonScriptFile
|
|
help_text: |
|
|
The Google Cloud Storage URI of the main Python file to use as the driver. Must
|
|
be a .py file.
|
|
- arg_name: spark-sql-script-file
|
|
api_field: googleCloudDataplexV1Task.spark.sqlScriptFile
|
|
help_text: |
|
|
A reference to a query file. This can be the Google Cloud Storage URI of the query file
|
|
or it can the path to a SqlScript Content.
|
|
- arg_name: spark-sql-script
|
|
api_field: googleCloudDataplexV1Task.spark.sqlScript
|
|
help_text: |
|
|
The SQL query text.
|
|
- arg_name: spark-file-uris
|
|
repeated: true
|
|
api_field: googleCloudDataplexV1Task.spark.fileUris
|
|
help_text: |
|
|
Google Cloud Storage URIs of files to be placed in the working directory of each
|
|
executor.
|
|
- arg_name: spark-archive-uris
|
|
repeated: true
|
|
api_field: googleCloudDataplexV1Task.spark.archiveUris
|
|
help_text: |
|
|
Google Cloud Storage URIs of archives to be extracted into the working directory of
|
|
each executor. Supported file types: .jar, .tar, .tar.gz, .tgz, and
|
|
.zip.
|
|
- group: # infrastructure_spec
|
|
params:
|
|
- group: # batch_compute_resource
|
|
help_text: |
|
|
Compute resources needed for a Task when using Dataproc Serverless.
|
|
params:
|
|
- arg_name: batch-executors-count
|
|
api_field: googleCloudDataplexV1Task.spark.infrastructureSpec.batch.executorsCount
|
|
type: int
|
|
help_text: |
|
|
Total number of job executors.
|
|
- arg_name: batch-max-executors-count
|
|
api_field: googleCloudDataplexV1Task.spark.infrastructureSpec.batch.maxExecutorsCount
|
|
type: int
|
|
help_text: |
|
|
Max configurable executors.
|
|
If max_executors_count > executors_count, then auto-scaling is enabled.
|
|
- group: # container_image_runtime
|
|
help_text: |
|
|
Container Image Runtime Configuration.
|
|
params:
|
|
- arg_name: container-image
|
|
api_field: googleCloudDataplexV1Task.spark.infrastructureSpec.containerImage.image
|
|
help_text: |
|
|
Optional custom container image for the job.
|
|
- arg_name: container-image-java-jars
|
|
api_field: googleCloudDataplexV1Task.spark.infrastructureSpec.containerImage.javaJars
|
|
repeated: true
|
|
help_text: |
|
|
A list of Java JARS to add to the classpath.
|
|
Valid input includes Cloud Storage URIs to Jar binaries.
|
|
For example, gs://bucket-name/my/path/to/file.jar
|
|
- arg_name: container-image-python-packages
|
|
api_field: googleCloudDataplexV1Task.spark.infrastructureSpec.containerImage.pythonPackages
|
|
repeated: true
|
|
help_text: |
|
|
A list of python packages to be installed.
|
|
Valid formats include Cloud Storage URI to a PIP installable library.
|
|
For example, gs://bucket-name/my/path/to/lib.tar.gz
|
|
- arg_name: container-image-properties
|
|
api_field: googleCloudDataplexV1Task.spark.infrastructureSpec.containerImage.properties.additionalProperties
|
|
metavar: KEY=VALUE
|
|
type:
|
|
arg_dict:
|
|
flatten: true
|
|
spec:
|
|
- api_field: key
|
|
- api_field: value
|
|
help_text: |
|
|
Override to common configuration of open source components installed on
|
|
the Dataproc cluster.
|
|
The properties to set on daemon config files.
|
|
Property keys are specified in `prefix:property` format, for example
|
|
`core:hadoop.tmp.dir`.
|
|
For more information, see [Cluster
|
|
properties](https://cloud.google.com/dataproc/docs/concepts/cluster-properties).
|
|
- group: # vpc-network
|
|
help_text: |
|
|
Cloud VPC Network used to run the infrastructure.
|
|
params:
|
|
- group:
|
|
help_text: |
|
|
The Cloud VPC network identifier.
|
|
params:
|
|
- arg_name: vpc-network-name
|
|
api_field: googleCloudDataplexV1Task.spark.infrastructureSpec.vpcNetwork.network
|
|
help_text: |
|
|
The Cloud VPC network in which the job is run. By default, the Cloud
|
|
VPC network named Default within the project is used.
|
|
- arg_name: vpc-sub-network-name
|
|
api_field: googleCloudDataplexV1Task.spark.infrastructureSpec.vpcNetwork.subNetwork
|
|
help_text: |
|
|
The Cloud VPC sub-network in which the job is run.
|
|
- arg_name: vpc-network-tags
|
|
api_field: googleCloudDataplexV1Task.spark.infrastructureSpec.vpcNetwork.networkTags
|
|
repeated: true
|
|
help_text: |
|
|
List of network tags to apply to the job.
|
|
labels:
|
|
api_field: googleCloudDataplexV1Task.labels
|
|
async:
|
|
collection: dataplex.projects.locations.operations
|
|
update:
|
|
# b/136698204
|
|
read_modify_update: true
|