# -*- coding: utf-8 -*- # # Copyright 2024 Google LLC. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Create a Flink job from a Java jar.""" from __future__ import absolute_import from __future__ import division from __future__ import unicode_literals import os from googlecloudsdk.api_lib.managed_flink import util as flink_util from googlecloudsdk.api_lib.util import waiter from googlecloudsdk.calliope import base from googlecloudsdk.calliope import exceptions from googlecloudsdk.command_lib.managed_flink import flags from googlecloudsdk.command_lib.managed_flink import flink_backend from googlecloudsdk.command_lib.util.args import common_args from googlecloudsdk.core import exceptions as core_exceptions from googlecloudsdk.core import log from googlecloudsdk.core import properties from googlecloudsdk.core import resources from googlecloudsdk.core import yaml from googlecloudsdk.core.util import encoding from googlecloudsdk.core.util import files from googlecloudsdk.core.util import platforms def GetJobType(job_type, job_file): """Returns the job type based on the job_type and job_file.""" if job_type == 'auto': job_type = None if job_file.endswith('.py'): job_type = 'python' elif job_file.endswith('.sql'): job_type = 'sql' elif job_file.endswith('.jar'): job_type = 'jar' if not job_type: raise UnknownJobType( 'Unable to determine type of job [{}]. Job input files must end in' ) return job_type def GetInputType(job_file): """Returns the input type based on the job_file.""" input_type = 'file://' # format is: # ar:///// if job_file.startswith('ar://') or job_file.startswith('artifactregistry://'): input_type = 'ar://' return input_type class UnknownJobType(core_exceptions.Error): """Raised when the job type cannot be determined.""" @base.DefaultUniverseOnly @base.ReleaseTracks(base.ReleaseTrack.ALPHA) class Create(base.BinaryBackedCommand): """Create a Flink job from a Java jar.""" detailed_help = { 'EXAMPLES': """ To create a Flink job from a Java jar, run: $ {command} my-job.jar --project=my-project --location=us-central1 """, } def _JobSubmitResponseHandler(self, response, job_type, temp_dir, args): """Process results of BinaryOperation Execution.""" if response.stdout and (args.show_output or job_type == 'sql'): log.Print(response.stdout) if response.stderr: log.status.Print(response.stderr) if response.failed: return None jobgraph = os.path.join(temp_dir, 'jobgraph.bin') if not os.path.exists(jobgraph): return None jobspec = os.path.join(temp_dir, 'jobspec.yaml') if not os.path.exists(jobspec): return None with files.FileReader(jobspec) as f: jobspec_json = yaml.load(f) files_to_upload = list() files_to_upload.append(os.path.join(temp_dir, 'jobgraph.bin')) for jar in jobspec_json['job']['jars']: if jar.startswith('file:'): files_to_upload.append(jar[5:]) dest = flink_backend.Upload( files_to_upload, os.path.join(args.staging_location, jobspec_json['job']['id']), ) msg = flink_util.GetMessagesModule(self.ReleaseTrack()) jobspec = msg.JobSpec( jobName='{0}'.format(jobspec_json['job']['name']), jobGraphUri=dest[os.path.join(temp_dir, 'jobgraph.bin')], jarUris=[dest[jar[5:]] for jar in jobspec_json['job']['jars']], ) # Configure optional arguments if args.name: jobspec.displayName = args.name if args.network: config = msg.NetworkConfig(vpc=args.network) if args.subnetwork: config.subnetwork = args.subnetwork jobspec.networkConfig = config # Configure autotuning mode autotuning_config = msg.AutotuningConfig() if args.autotuning_mode == 'fixed': autotuning_config.fixed = msg.Fixed(parallelism=args.parallelism) else: autotuning_config.throughputBased = msg.Elastic( parallelism=args.min_parallelism, minParallelism=args.min_parallelism, maxParallelism=args.max_parallelism, ) jobspec.autotuningConfig = autotuning_config job = msg.Job(name=jobspec_json['job']['id'], jobSpec=jobspec) if args.deployment: job.deploymentId = args.deployment create = msg.ManagedflinkProjectsLocationsJobsCreateRequest( parent='projects/{0}/locations/{1}'.format( properties.VALUES.core.project.Get(required=True), args.location ), jobId=jobspec_json['job']['id'], job=job, ) if args.show_output: log.Print(create) if args.dry_run: return response flink_client = flink_util.FlinkClient(self.ReleaseTrack()) create_op = flink_client.client.projects_locations_jobs.Create(create) if args.show_output: log.Print(create_op) log.Print('Create request issued for [{0}]'.format(create.jobId)) if args.async_submit: return response create_op_ref = resources.REGISTRY.Parse( create_op.name, collection='managedflink.projects.locations.operations' ) waiter.WaitFor( waiter.CloudOperationPoller( flink_client.client.projects_locations_jobs, flink_client.client.projects_locations_operations, ), create_op_ref, 'Waiting for operations [{0}] to complete...'.format(create_op.name), ) return response @staticmethod def Args(parser): # Common arguments common_args.ProjectArgument( help_text_to_overwrite='Project to run the job in.' ).AddToParser(parser) # Specific arguments flags.AddDeploymentArgument( parser, help_text_to_overwrite='Deployment to run the job in.' ) flags.AddShowOutputArgument(parser) flags.AddDryRunArgument(parser) flags.AddAsyncArgument(parser) flags.AddMainClassArgument(parser) flags.AddExtraJarsArgument(parser) flags.AddLocationArgument(parser) flags.AddStagingLocationArgument(parser) flags.AddAutotuningModeArgument(parser) flags.AddJobJarArgument(parser) flags.AddJobTypeArgument(parser) flags.AddNameArgument(parser) flags.AddFixedParallelismArgs(parser) flags.AddElasticParallelismArgs(parser) flags.AddNetworkConfigArgs(parser) flags.AddWorkloadIdentityArgument(parser) flags.AddJobArgsCollector(parser) flags.AddPythonVirtualEnvArgument(parser) flags.AddExtraArchivesArgument(parser) def Run(self, args): current_os = platforms.OperatingSystem.Current() if current_os is platforms.OperatingSystem.WINDOWS: raise exceptions.ToolException('Job creation not supported on Windows.') # Determing the job_file input method input_type = GetInputType(args.job) # Make sure the job file exists if input_type == 'file://' and not os.path.exists(args.job): raise exceptions.InvalidArgumentException( 'JAR|PY|SQL', 'Job definition [{0}] does not exist.'.format(args.job), ) # Determine the job type job_type = GetJobType(args.job_type, args.job) # Make sure both network arguments are set if at least one is present. if args.network: if not args.subnetwork: raise exceptions.InvalidArgumentException( 'network-config-subnetwork', '--network-config-subnetwork must be set if --network-config-vpc is' ' set.', ) elif args.subnetwork: if not args.network: raise exceptions.InvalidArgumentException( 'network-config-vpc', '--network-config-vpc must be set if --network-config-subnetwork is' ' set.', ) if args.workload_identity and args.deployment: raise exceptions.InvalidArgumentException( 'workload-identity', '--workload-identity cannot be set if --deployment is set.', ) # Validate that autotuning arguments are consistent flink_backend.ValidateAutotuning( args.autotuning_mode, args.min_parallelism, args.max_parallelism, args.parallelism, ) # Validate the staging location if not args.staging_location.startswith('gs://'): raise exceptions.InvalidArgumentException( 'staging-location', 'Staging location must be of the form gs:///.', ) flink_backend.CheckStagingLocation(args.staging_location) # Validate the python virtualenv if job_type == 'python': if not args.python_venv: raise exceptions.InvalidArgumentException( 'python-venv', 'Python virtualenv must be set if job type is python.', ) if not args.python_venv.startswith('gs://'): raise exceptions.InvalidArgumentException( 'python-venv', 'Python Virtualenv location must be of the form' ' gs:///.', ) env = dict() env['CLOUDSDK_MANAGEDFLINK_JOB_TYPE'] = job_type if job_type == 'python': if args.extra_jars: env['HADOOP_CLASSPATH'] = ':'.join(args.extra_jars) elif job_type == 'jar': if args.extra_jars: env['HADOOP_CLASSPATH'] = ':'.join(args.extra_jars) # If there is a HADOOP_CLASSPATH environment variable, add it to the env # variable so it doesn't get overwritten. if env.get('HADOOP_CLASSPATH') and encoding.GetEncodedValue( os.environ, 'HADOOP_CLASSPATH' ): env['HADOOP_CLASSPATH'] = ':'.join([ env.get('HADOOP_CLASSPATH'), encoding.GetEncodedValue(os.environ, 'HADOOP_CLASSPATH'), ]) # Dry run if args.dry_run: env['CLOUDSDK_MANAGEDFLINK_DRY_RUN'] = 'true' env['CLOUDSDK_MANAGEDFLINK_ECHO_CMD'] = 'true' with files.TemporaryDirectory() as temp_dir: jar_path = args.job if input_type == 'ar://': jar_name, registry = flink_backend.CreateRegistryFromArtifactUri( args.job ) log.Print( 'Downloading {0} file from Artifact Registry...'.format(jar_name) ) jar_path = os.path.join(temp_dir, jar_name.split('/')[-1]) # Will throw an HTTPError if the file does not exist. flink_backend.DownloadJarFromArtifactRegistry( dest_path=jar_path, artifact_jar_path=registry.RelativeName() ) log.debug('Successfully downloaded the file to ' + jar_path) command_executor = flink_backend.FlinkClientWrapper() response = command_executor( command='run', job_type=job_type, jar=jar_path, target='gcloud', deployment=args.deployment, staging_location=args.staging_location, autotuning_mode=args.autotuning_mode, temp_dir=temp_dir, network=args.network, subnetwork=args.subnetwork, name=args.name, location=args.location, main_class=args.main_class, extra_jars=args.extra_jars, extra_args=args.job_args, extra_archives=args.archives, python_venv=args.python_venv, env=flink_backend.GetEnvArgsForCommand(env), ) return self._JobSubmitResponseHandler(response, job_type, temp_dir, args)