509 lines
20 KiB
Python
509 lines
20 KiB
Python
# -*- coding: utf-8 -*- #
|
|
# Copyright 2017 Google LLC. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
"""Command to run an Airflow CLI sub-command in an environment."""
|
|
|
|
from __future__ import absolute_import
|
|
from __future__ import division
|
|
from __future__ import unicode_literals
|
|
|
|
import argparse
|
|
import random
|
|
import re
|
|
import time
|
|
|
|
from googlecloudsdk.api_lib.composer import environments_util as environments_api_util
|
|
from googlecloudsdk.api_lib.composer import util as api_util
|
|
from googlecloudsdk.calliope import base
|
|
from googlecloudsdk.command_lib.composer import image_versions_util as image_versions_command_util
|
|
from googlecloudsdk.command_lib.composer import resource_args
|
|
from googlecloudsdk.command_lib.composer import util as command_util
|
|
from googlecloudsdk.core import execution_utils
|
|
from googlecloudsdk.core import log
|
|
from googlecloudsdk.core.console import console_io
|
|
|
|
WORKER_POD_SUBSTR = 'airflow-worker'
|
|
WORKER_CONTAINER = 'airflow-worker'
|
|
DEPRECATION_WARNING = ('Because Cloud Composer manages the Airflow metadata '
|
|
'database for your environment, support for the Airflow '
|
|
'`{}` subcommand is being deprecated. '
|
|
'To avoid issues related to Airflow metadata, we '
|
|
'recommend that you do not use this subcommand unless '
|
|
'you understand the outcome.')
|
|
DEFAULT_POLL_TIME_SECONDS = 2
|
|
MAX_CONSECUTIVE_POLL_ERRORS = 10
|
|
MAX_POLL_TIME_SECONDS = 30
|
|
EXP_BACKOFF_MULTIPLIER = 1.75
|
|
POLL_JITTER_SECONDS = 0.5
|
|
|
|
|
|
@base.DefaultUniverseOnly
|
|
class Run(base.Command):
|
|
"""Run an Airflow sub-command remotely in a Cloud Composer environment.
|
|
|
|
Executes an Airflow CLI sub-command remotely in an environment. If the
|
|
sub-command takes flags, separate the environment name from the sub-command
|
|
and its flags with ``--''. This command waits for the sub-command to
|
|
complete; its exit code will match the sub-command's exit code.
|
|
|
|
Note: Airflow CLI sub-command syntax differs between Airflow 1 and Airflow 2.
|
|
Refer to the Airflow CLI reference documentation for more details.
|
|
|
|
## EXAMPLES
|
|
|
|
The following command in environments with Airflow 2:
|
|
|
|
{command} myenv dags trigger -- some_dag --run_id=foo
|
|
|
|
is equivalent to running the following command from a shell inside the
|
|
*my-environment* environment:
|
|
|
|
airflow dags trigger --run_id=foo some_dag
|
|
|
|
The same command, but for environments with Airflow 1.10.14+:
|
|
|
|
{command} myenv trigger_dag -- some_dag --run_id=foo
|
|
|
|
is equivalent to running the following command from a shell inside the
|
|
*my-environment* environment:
|
|
|
|
airflow trigger_dag some_dag --run_id=foo
|
|
|
|
The following command (for environments with Airflow 1.10.14+):
|
|
|
|
{command} myenv dags list
|
|
|
|
is equivalent to running the following command from a shell inside the
|
|
*my-environment* environment:
|
|
|
|
airflow dags list
|
|
"""
|
|
|
|
SUBCOMMAND_ALLOWLIST = command_util.SUBCOMMAND_ALLOWLIST
|
|
|
|
@classmethod
|
|
def Args(cls, parser):
|
|
resource_args.AddEnvironmentResourceArg(
|
|
parser, 'in which to run an Airflow command')
|
|
|
|
doc_url = 'https://airflow.apache.org/docs/apache-airflow/stable/cli-and-env-variables-ref.html'
|
|
parser.add_argument(
|
|
'subcommand',
|
|
metavar='SUBCOMMAND',
|
|
choices=list(cls.SUBCOMMAND_ALLOWLIST.keys()),
|
|
help=('The Airflow CLI subcommand to run. Available subcommands '
|
|
'include (listed with Airflow versions that support): {} '
|
|
'(see {} for more info).').format(
|
|
', '.join(
|
|
sorted([
|
|
'{} [{}, {})'.format(cmd, r.from_version or '**',
|
|
r.to_version or '**')
|
|
for cmd, r in cls.SUBCOMMAND_ALLOWLIST.items()
|
|
])), doc_url))
|
|
# Add information about restricted nested subcommands.
|
|
# Some subcommands only allow certain nested subcommands.
|
|
# Written with for loops to reduce complexity. [g-complex-comprehension]
|
|
allowed_nested_subcommands_help = []
|
|
for sub_cmd, r in cls.SUBCOMMAND_ALLOWLIST.items():
|
|
# Skip sub-commands which don't have a list of allowed_nested_subcommands
|
|
# Meaning, all nested subcommands are allowed for this subcommand
|
|
if not r.allowed_nested_subcommands:
|
|
continue
|
|
allowed_nested_subcommands_help.append(
|
|
'- {}: {}'.format(
|
|
sub_cmd,
|
|
', '.join(sorted(r.allowed_nested_subcommands.keys()))
|
|
))
|
|
# Add an additional element stating that all other subcommands are allowed
|
|
allowed_nested_subcommands_help.append(
|
|
'- all other subcommands: all nested subcommands are allowed'
|
|
)
|
|
parser.add_argument(
|
|
'subcommand_nested',
|
|
metavar='SUBCOMMAND_NESTED',
|
|
nargs=argparse.OPTIONAL,
|
|
help=(
|
|
'Additional subcommand in case it is nested. '
|
|
'The following is a list of allowed nested subcommands:\n'
|
|
'{}'
|
|
).format('\n'.join(allowed_nested_subcommands_help)),
|
|
)
|
|
parser.add_argument(
|
|
'cmd_args',
|
|
metavar='CMD_ARGS',
|
|
nargs=argparse.REMAINDER,
|
|
help='Command line arguments to the subcommand.',
|
|
example='{command} myenv trigger_dag -- some_dag --run_id=foo')
|
|
|
|
def BypassConfirmationPrompt(self, args, airflow_version):
|
|
"""Bypasses confirmations with "yes" responses.
|
|
|
|
Prevents certain Airflow CLI subcommands from presenting a confirmation
|
|
prompting (which can make the gcloud CLI stop responding). When necessary,
|
|
bypass confirmations with a "yes" response.
|
|
|
|
Args:
|
|
args: argparse.Namespace, An object that contains the values for the
|
|
arguments specified in the .Args() method.
|
|
airflow_version: String, an Airflow semantic version.
|
|
"""
|
|
# Value is the lowest Airflow version for which this command needs to bypass
|
|
# the confirmation prompt.
|
|
prompting_subcommands = {
|
|
'backfill': '1.10.6',
|
|
'delete_dag': None,
|
|
('dags', 'backfill'): None,
|
|
('dags', 'delete'): None,
|
|
('tasks', 'clear'): None,
|
|
('db', 'clean'): None,
|
|
}
|
|
|
|
# Handle nested commands like "dags list". There are two ways to execute
|
|
# nested Airflow subcommands via gcloud:
|
|
# 1. {command} myenv dags delete -- dag_id
|
|
# 2. {command} myenv dags -- delete dag_id
|
|
subcommand_two_level = self._GetSubcommandTwoLevel(args)
|
|
|
|
def _IsPromptingSubcommand(s):
|
|
if s in prompting_subcommands:
|
|
pass
|
|
elif s[0] in prompting_subcommands:
|
|
s = s[0]
|
|
else:
|
|
return False
|
|
|
|
return (prompting_subcommands[s] is None or
|
|
image_versions_command_util.CompareVersions(
|
|
airflow_version, prompting_subcommands[s]) >= 0)
|
|
|
|
if (_IsPromptingSubcommand(subcommand_two_level) and
|
|
set(args.cmd_args or []).isdisjoint({'-y', '--yes'})):
|
|
args.cmd_args = args.cmd_args or []
|
|
args.cmd_args.append('--yes')
|
|
|
|
def CheckForRequiredCmdArgs(self, args):
|
|
"""Prevents running Airflow CLI commands without required arguments.
|
|
|
|
Args:
|
|
args: argparse.Namespace, An object that contains the values for the
|
|
arguments specified in the .Args() method.
|
|
"""
|
|
# Dict values are lists of tuples, each tuple represents set of arguments,
|
|
# where at least one argument from tuple will be required.
|
|
# E.g. for "users create" subcommand, one of the "-p", "--password" or
|
|
# "--use-random-password" will be required.
|
|
required_cmd_args = {
|
|
('users', 'create'): [['-p', '--password', '--use-random-password']],
|
|
}
|
|
|
|
def _StringifyRequiredCmdArgs(cmd_args):
|
|
quoted_args = ['"{}"'.format(a) for a in cmd_args]
|
|
return '[{}]'.format(', '.join(quoted_args))
|
|
|
|
subcommand_two_level = self._GetSubcommandTwoLevel(args)
|
|
|
|
# For now `required_cmd_args` contains only two-level Airflow commands,
|
|
# but potentially in the future it could be extended for one-level
|
|
# commands as well, and this code will have to be updated appropriately.
|
|
for subcommand_required_cmd_args in required_cmd_args.get(
|
|
subcommand_two_level, []):
|
|
if set(subcommand_required_cmd_args).isdisjoint(set(args.cmd_args or [])):
|
|
raise command_util.Error(
|
|
'The subcommand "{}" requires one of the following command line '
|
|
'arguments: {}.'.format(
|
|
' '.join(subcommand_two_level),
|
|
_StringifyRequiredCmdArgs(subcommand_required_cmd_args)))
|
|
|
|
def DeprecationWarningPrompt(self, args):
|
|
response = True
|
|
if args.subcommand in command_util.SUBCOMMAND_DEPRECATION:
|
|
response = console_io.PromptContinue(
|
|
message=DEPRECATION_WARNING.format(args.subcommand),
|
|
default=False,
|
|
cancel_on_no=True)
|
|
return response
|
|
|
|
def _GetSubcommandTwoLevel(self, args):
|
|
"""Extract and return two level nested Airflow subcommand in unified shape.
|
|
|
|
There are two ways to execute nested Airflow subcommands via gcloud, e.g.:
|
|
1. {command} myenv users create -- -u User
|
|
2. {command} myenv users -- create -u User
|
|
The method returns here (users, create) in both cases.
|
|
|
|
It is possible that first element of args.cmd_args will not be a nested
|
|
subcommand, but that is ok as it will not break entire logic.
|
|
So, essentially there can be subcommand_two_level = ['info', '--anonymize'].
|
|
|
|
Args:
|
|
args: argparse.Namespace, An object that contains the values for the
|
|
arguments specified in the .Args() method.
|
|
|
|
Returns:
|
|
subcommand_two_level: two level subcommand in unified format
|
|
"""
|
|
|
|
subcommand_two_level = (args.subcommand, None)
|
|
|
|
if args.subcommand_nested:
|
|
subcommand_two_level = (args.subcommand, args.subcommand_nested)
|
|
elif args.cmd_args:
|
|
subcommand_two_level = (args.subcommand, args.cmd_args[0])
|
|
|
|
return subcommand_two_level
|
|
|
|
def CheckSubcommandAirflowSupport(self, args, airflow_version):
|
|
|
|
def _CheckIsSupportedSubcommand(command, airflow_version, from_version,
|
|
to_version):
|
|
if not image_versions_command_util.IsVersionInRange(
|
|
airflow_version, from_version, to_version):
|
|
_RaiseLackOfSupportError(command, airflow_version)
|
|
|
|
def _RaiseLackOfSupportError(command, airflow_version):
|
|
raise command_util.Error(
|
|
'The subcommand "{}" is not supported for Composer environments'
|
|
' with Airflow version {}.'.format(command, airflow_version),)
|
|
|
|
subcommand, subcommand_nested = self._GetSubcommandTwoLevel(args)
|
|
_CheckIsSupportedSubcommand(
|
|
subcommand, airflow_version,
|
|
self.SUBCOMMAND_ALLOWLIST[args.subcommand].from_version,
|
|
self.SUBCOMMAND_ALLOWLIST[args.subcommand].to_version)
|
|
|
|
if not self.SUBCOMMAND_ALLOWLIST[
|
|
args.subcommand].allowed_nested_subcommands:
|
|
return
|
|
|
|
two_level_subcommand_string = '{} {}'.format(subcommand, subcommand_nested)
|
|
|
|
if subcommand_nested in self.SUBCOMMAND_ALLOWLIST[
|
|
args.subcommand].allowed_nested_subcommands:
|
|
_CheckIsSupportedSubcommand(
|
|
two_level_subcommand_string, airflow_version,
|
|
self.SUBCOMMAND_ALLOWLIST[args.subcommand]
|
|
.allowed_nested_subcommands[subcommand_nested].from_version,
|
|
self.SUBCOMMAND_ALLOWLIST[args.subcommand]
|
|
.allowed_nested_subcommands[subcommand_nested].to_version)
|
|
else:
|
|
_RaiseLackOfSupportError(two_level_subcommand_string, airflow_version)
|
|
|
|
def CheckSubcommandNestedAirflowSupport(self, args, airflow_version):
|
|
if (args.subcommand_nested and
|
|
not image_versions_command_util.IsVersionInRange(
|
|
airflow_version, '1.10.14', None)):
|
|
raise command_util.Error(
|
|
'Nested subcommands are supported only for Composer environments '
|
|
'with Airflow version 1.10.14 or higher.')
|
|
|
|
def ConvertKubectlError(self, error, env_obj):
|
|
is_private = (
|
|
env_obj.config.privateEnvironmentConfig and
|
|
env_obj.config.privateEnvironmentConfig.enablePrivateEnvironment)
|
|
if is_private:
|
|
return command_util.Error(
|
|
str(error)
|
|
+ ' Make sure you have followed'
|
|
' https://cloud.google.com/composer/docs/how-to/accessing/airflow-cli#private-ip'
|
|
' to enable access to your private Cloud Composer environment from'
|
|
' your machine.'
|
|
)
|
|
return error
|
|
|
|
def _ExtractAirflowVersion(self, image_version):
|
|
return re.findall(r'-airflow-([\d\.]+)', image_version)[0]
|
|
|
|
def _RunKubectl(self, args, env_obj):
|
|
"""Runs Airflow command using kubectl on the GKE Cluster.
|
|
|
|
This mode the command is executed by connecting to the cluster and
|
|
running `kubectl pod exec` command.
|
|
It requires access to GKE Control plane.
|
|
|
|
Args:
|
|
args: argparse.Namespace, An object that contains the values for the
|
|
arguments specified in the .Args() method.
|
|
env_obj: Cloud Composer Environment object.
|
|
"""
|
|
cluster_id = env_obj.config.gkeCluster
|
|
cluster_location_id = command_util.ExtractGkeClusterLocationId(env_obj)
|
|
|
|
tty = 'no-tty' not in args
|
|
|
|
with command_util.TemporaryKubeconfig(
|
|
cluster_location_id, cluster_id, None
|
|
):
|
|
try:
|
|
image_version = env_obj.config.softwareConfig.imageVersion
|
|
airflow_version = self._ExtractAirflowVersion(image_version)
|
|
|
|
self.CheckSubcommandAirflowSupport(args, airflow_version)
|
|
self.CheckSubcommandNestedAirflowSupport(args, airflow_version)
|
|
|
|
kubectl_ns = command_util.FetchKubectlNamespace(image_version)
|
|
pod = command_util.GetGkePod(
|
|
pod_substr=WORKER_POD_SUBSTR, kubectl_namespace=kubectl_ns)
|
|
|
|
log.status.Print(
|
|
'Executing within the following Kubernetes cluster namespace: '
|
|
'{}'.format(kubectl_ns))
|
|
self.BypassConfirmationPrompt(args, airflow_version)
|
|
kubectl_args = ['exec', pod, '--stdin']
|
|
if tty:
|
|
kubectl_args.append('--tty')
|
|
kubectl_args.extend(
|
|
['--container', WORKER_CONTAINER, '--', 'airflow', args.subcommand])
|
|
if args.subcommand_nested:
|
|
kubectl_args.append(args.subcommand_nested)
|
|
if args.cmd_args:
|
|
kubectl_args.extend(args.cmd_args)
|
|
|
|
command_util.RunKubectlCommand(
|
|
command_util.AddKubectlNamespace(kubectl_ns, kubectl_args),
|
|
out_func=log.out.Print)
|
|
except command_util.KubectlError as e:
|
|
raise self.ConvertKubectlError(e, env_obj)
|
|
|
|
def _RunApi(self, args, env_obj):
|
|
image_version = env_obj.config.softwareConfig.imageVersion
|
|
airflow_version = self._ExtractAirflowVersion(image_version)
|
|
env_ref = args.CONCEPTS.environment.Parse()
|
|
|
|
self.CheckSubcommandAirflowSupport(args, airflow_version)
|
|
self.CheckSubcommandNestedAirflowSupport(args, airflow_version)
|
|
self.BypassConfirmationPrompt(args, airflow_version)
|
|
|
|
cmd = [args.subcommand]
|
|
if args.subcommand_nested:
|
|
cmd.append(args.subcommand_nested)
|
|
if args.cmd_args:
|
|
cmd.extend(args.cmd_args)
|
|
log.status.Print(
|
|
'Executing the command: [ airflow {} ]...'.format(' '.join(cmd))
|
|
)
|
|
execute_result = environments_api_util.ExecuteAirflowCommand(
|
|
command=args.subcommand,
|
|
subcommand=args.subcommand_nested or '',
|
|
parameters=args.cmd_args or [],
|
|
environment_ref=env_ref,
|
|
release_track=self.ReleaseTrack(),
|
|
)
|
|
if execute_result and execute_result.executionId:
|
|
log.status.Print(
|
|
'Command has been started. execution_id={}'.format(
|
|
execute_result.executionId
|
|
)
|
|
)
|
|
|
|
if not execute_result.executionId:
|
|
raise command_util.Error(
|
|
'Cannot execute subcommand for environment. Got empty execution Id.'
|
|
)
|
|
log.status.Print('Use ctrl-c to interrupt the command')
|
|
output_end = False
|
|
next_line = 1
|
|
cur_consequetive_poll_errors = 0
|
|
wait_time_seconds = DEFAULT_POLL_TIME_SECONDS
|
|
poll_result = None
|
|
interrupted = False
|
|
force_stop = False
|
|
while not output_end and not force_stop:
|
|
lines = None
|
|
try:
|
|
with execution_utils.RaisesKeyboardInterrupt():
|
|
time.sleep(
|
|
wait_time_seconds
|
|
+ random.uniform(-POLL_JITTER_SECONDS, POLL_JITTER_SECONDS)
|
|
)
|
|
poll_result = environments_api_util.PollAirflowCommand(
|
|
execution_id=execute_result.executionId,
|
|
pod_name=execute_result.pod,
|
|
pod_namespace=execute_result.podNamespace,
|
|
next_line_number=next_line,
|
|
environment_ref=env_ref,
|
|
release_track=self.ReleaseTrack(),
|
|
)
|
|
cur_consequetive_poll_errors = 0
|
|
output_end = poll_result.outputEnd
|
|
lines = poll_result.output
|
|
lines.sort(key=lambda line: line.lineNumber)
|
|
except KeyboardInterrupt:
|
|
log.status.Print('Interrupting the command...')
|
|
try:
|
|
log.debug('Stopping the airflow command...')
|
|
stop_result = environments_api_util.StopAirflowCommand(
|
|
execution_id=execute_result.executionId,
|
|
pod_name=execute_result.pod,
|
|
force=interrupted,
|
|
pod_namespace=execute_result.podNamespace,
|
|
environment_ref=env_ref,
|
|
release_track=self.ReleaseTrack(),
|
|
)
|
|
log.debug('Stop airflow command result...'+str(stop_result))
|
|
if stop_result and stop_result.output:
|
|
for line in stop_result.output:
|
|
log.Print(line)
|
|
if interrupted:
|
|
force_stop = True
|
|
interrupted = True
|
|
except: # pylint:disable=bare-except
|
|
log.debug('Error during stopping airflow command. Retrying polling')
|
|
cur_consequetive_poll_errors += 1
|
|
except: # pylint:disable=bare-except
|
|
cur_consequetive_poll_errors += 1
|
|
|
|
if cur_consequetive_poll_errors == MAX_CONSECUTIVE_POLL_ERRORS:
|
|
raise command_util.Error('Cannot fetch airflow command status.')
|
|
|
|
if not lines:
|
|
wait_time_seconds = min(
|
|
wait_time_seconds * EXP_BACKOFF_MULTIPLIER, MAX_POLL_TIME_SECONDS
|
|
)
|
|
else:
|
|
wait_time_seconds = DEFAULT_POLL_TIME_SECONDS
|
|
for line in lines:
|
|
log.Print(line.content if line.content else '')
|
|
next_line = lines[-1].lineNumber + 1
|
|
|
|
if poll_result and poll_result.exitInfo and poll_result.exitInfo.exitCode:
|
|
if poll_result.exitInfo.error:
|
|
log.error('Error message: {}'.format(poll_result.exitInfo.error))
|
|
log.error('Command exit code: {}'.format(poll_result.exitInfo.exitCode))
|
|
exit(poll_result.exitInfo.exitCode)
|
|
|
|
def Run(self, args):
|
|
self.DeprecationWarningPrompt(args)
|
|
self.CheckForRequiredCmdArgs(args)
|
|
|
|
running_state = api_util.GetMessagesModule(
|
|
release_track=self.ReleaseTrack()
|
|
).Environment.StateValueValuesEnum.RUNNING
|
|
|
|
env_ref = args.CONCEPTS.environment.Parse()
|
|
env_obj = environments_api_util.Get(
|
|
env_ref, release_track=self.ReleaseTrack()
|
|
)
|
|
|
|
if env_obj.state != running_state:
|
|
raise command_util.Error(
|
|
'Cannot execute subcommand for environment in state {}. '
|
|
'Must be RUNNING.'.format(env_obj.state)
|
|
)
|
|
if image_versions_command_util.IsVersionAirflowCommandsApiCompatible(
|
|
image_version=env_obj.config.softwareConfig.imageVersion
|
|
):
|
|
self._RunApi(args, env_obj)
|
|
else:
|
|
self._RunKubectl(args, env_obj)
|