feat: Add new gcloud commands, API clients, and third-party libraries across various services.

This commit is contained in:
2026-01-01 20:26:35 +01:00
parent 5e23cbece0
commit a19e592eb7
25221 changed files with 8324611 additions and 0 deletions

View File

@@ -0,0 +1,36 @@
# -*- coding: utf-8 -*- #
# Copyright 2017 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Debugging tools for Compute Engine virtual machine instances."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from googlecloudsdk.calliope import base
@base.UniverseCompatible
@base.ReleaseTracks(
base.ReleaseTrack.ALPHA, base.ReleaseTrack.BETA, base.ReleaseTrack.GA)
class Diagnose(base.Group):
"""Debugging tools for Compute Engine virtual machine instances."""
Diagnose.category = base.INSTANCES_CATEGORY
Diagnose.detailed_help = {
'brief': (
'Debugging tools for Compute Engine virtual machine instances.'),
}

View File

@@ -0,0 +1,279 @@
# -*- coding: utf-8 -*- #
# Copyright 2018 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Triggers instance to gather logs and upload them to a GCS Bucket."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
import base64
import datetime
import json
import time
from apitools.base.py.exceptions import HttpError
from googlecloudsdk.api_lib.cloudresourcemanager import projects_api
from googlecloudsdk.api_lib.compute import base_classes
from googlecloudsdk.api_lib.compute.diagnose import diagnose_utils
from googlecloudsdk.calliope import base
from googlecloudsdk.command_lib.compute.instances import flags as instance_flags
from googlecloudsdk.command_lib.projects import util as projects_util
from googlecloudsdk.command_lib.util import time_util
from googlecloudsdk.core import log
from googlecloudsdk.core import properties
import six
_DIAGNOSTICS_METADATA_KEY = 'diagnostics'
_SERVICE_ACCOUNT_NAME = 'gce-diagnostics-extract-logs'
_GCS_LOGS_BUCKET_PREFIX = 'diagnostics_logs_project'
_SUCCESS_MSG = """Log collection has begun.
It may take several minutes for this operation to complete.
Logs will be made available shortly at:
gs://{0}/{1}
Status has been sent to the serial port and can be viewed by running:
gcloud compute instances get-serial-port-output $VM-NAME$ \
--project=$PROJECT$ --zone=$ZONE$"""
DETAILED_HELP = {
'EXAMPLES':
"""\
To export logs and upload them to a Cloud Storage Bucket, run:
$ {command} example-instance --zone=us-central1
""",
}
_SERVICE_ACCOUNT_TOKEN_CREATOR_ROLE_MISSING_MSG = """
To use this feature you must grant the iam.serviceAccountTokenCreator role on the project.
For more information please refer to Collecting diagnostic information:
[https://cloud.google.com/compute/docs/instances/collecting-diagnostic-information]
"""
@base.ReleaseTracks(base.ReleaseTrack.ALPHA, base.ReleaseTrack.BETA,
base.ReleaseTrack.GA)
class ExportLogs(base_classes.BaseCommand):
"""Triggers instance to gather logs and upload them to a Cloud Storage Bucket.
Gathers logs from a running Compute Engine VM and exports them to a Google
Cloud Storage Bucket. Outputs a path to the logs within the Bucket.
"""
detailed_help = DETAILED_HELP
@classmethod
def Args(cls, parser):
"""See base class."""
instance_flags.INSTANCE_ARG.AddArgument(parser)
parser.add_argument(
'--collect-process-traces',
action='store_true',
help=('Collect a 10 minute trace of the running system. On Windows, '
'this utilizes Windows Performance Recorder. It records CPU, '
'disk, file, and network activity during that time.'))
parser.display_info.AddFormat('none')
return
def Run(self, args):
"""See base class."""
self._diagnose_client = diagnose_utils.DiagnoseClient()
instance_ref = self._ResolveInstance(args)
project = properties.VALUES.core.project.Get(required=True)
service_account = self._GetDiagnosticsServiceAccount(project)
expiration_time = self._GetSignedUrlExpiration()
bucket = self._GetLogBucket(project)
log_path = self._GetLogPath(instance_ref.instance)
url = self._CreateResumableSignedUrl(service_account, expiration_time,
bucket, log_path)
diagnostics_entry = self._ConstructDiagnosticsKeyEntry(
url, args.collect_process_traces)
self._diagnose_client.UpdateMetadata(
project, instance_ref, _DIAGNOSTICS_METADATA_KEY, diagnostics_entry)
log.Print(_SUCCESS_MSG.format(bucket, log_path))
return {'bucket': bucket, 'logPath': log_path, 'signedUrl': url}
def _CreateResumableSignedUrl(self, service_account, expiration, bucket,
filepath):
"""Make a resumable signed url using the SignBlob API of a Service Account.
This creates a signed url that can be used by another program to upload a
single file to the specified bucket with the specified file name.
Args:
service_account: The email of a service account that has permissions to
sign a blob and create files within GCS Buckets.
expiration: The time at which the returned signed url becomes invalid,
measured in seconds since the epoch.
bucket: The name of the bucket the signed url will point to.
filepath: The name or relative path the file will have within the bucket
once uploaded.
Returns:
A string url that can be used until its expiration to upload a file.
"""
url_data = six.ensure_binary(
'POST\n\n\n{0}\nx-goog-resumable:start\n/{1}/{2}'.format(
expiration, bucket, filepath))
signed_blob = ''
try:
signed_blob = self._diagnose_client.SignBlob(service_account, url_data)
except HttpError as e:
if e.status_code == 403:
log.Print(_SERVICE_ACCOUNT_TOKEN_CREATOR_ROLE_MISSING_MSG)
raise
signature = six.ensure_binary(signed_blob)
encoded_sig = base64.b64encode(signature)
url = ('https://storage.googleapis.com/'
'{0}/{1}?GoogleAccessId={2}&Expires={3}&Signature={4}')
url_suffix = six.moves.urllib.parse.quote_plus(encoded_sig)
return url.format(bucket, filepath, service_account, expiration, url_suffix)
def _GetDiagnosticsServiceAccount(self, project):
"""Locates or creates a service account with the correct permissions.
Attempts to locate the service account meant for creating the signed url.
If not found, it will subsequently create the service account. It will then
give the service account the correct IAM permissions to create a signed url
to a GCS Bucket.
Args:
project: The project to search for the service account in.
Returns:
A string email of the service account to use.
"""
# Search for service account by name.
service_account = None
for account in self._diagnose_client.ListServiceAccounts(project):
if account.email.startswith('{}@'.format(_SERVICE_ACCOUNT_NAME)):
service_account = account.email
if service_account is None:
service_account = self._diagnose_client.CreateServiceAccount(
project, _SERVICE_ACCOUNT_NAME)
# We can apply the correct IAM permissions for accessing the GCS Bucket
# regardless of whether or not the account already has them.
project_ref = projects_util.ParseProject(project)
service_account_ref = 'serviceAccount:{}'.format(service_account)
projects_api.AddIamPolicyBinding(project_ref, service_account_ref,
'roles/storage.objectCreator')
projects_api.AddIamPolicyBinding(project_ref, service_account_ref,
'roles/storage.objectViewer')
return service_account
def _GetSignedUrlExpiration(self, hours=1):
"""Generate a string expiration time based on some hours in the future.
Args:
hours: The number of hours in the future for your timestamp to represent
Returns:
A string timestamp measured in seconds since the epoch.
"""
expiration = datetime.datetime.now() + datetime.timedelta(hours=hours)
expiration_seconds = time.mktime(expiration.timetuple())
return six.text_type(int(expiration_seconds))
def _GetLogBucket(self, project_id):
"""Locates or creates the GCS Bucket for logs associated with the project.
Args:
project_id: The id number of the project the bucket is associated with.
Returns:
The name of the GCS Bucket.
"""
project_number = self._GetProjectNumber(project_id)
bucket_name = '{}_{}'.format(_GCS_LOGS_BUCKET_PREFIX, project_number)
bucket = self._diagnose_client.FindBucket(project_id, bucket_name)
if bucket is None:
bucket = self._diagnose_client.CreateBucketWithLifecycle(days_to_live=10)
bucket.name = bucket_name
suffix = 0
# We can't guarantee that our chosen bucket name isn't already taken, so
# we may have to try multiple suffixes before we generate a unique name.
bucket_name_taken = True
while bucket_name_taken:
try:
self._diagnose_client.InsertBucket(project_id, bucket)
bucket_name_taken = False
except HttpError as e:
# Error 409 means that bucket name already exists.
if e.status_code != 409:
raise e
bucket.name = '{}_{}'.format(bucket_name, suffix)
suffix += 1
return bucket.name
def _GetProjectNumber(self, project_id):
"""Converts a project id to a project number."""
project_ref = projects_util.ParseProject(project_id)
project = projects_api.Get(project_ref)
return project.projectNumber
def _GetLogPath(self, instance):
"""Creates a timestamped filename that should be realistically unique."""
timestamp = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')
return '{}-logs-{}.zip'.format(instance, timestamp)
def _ResolveInstance(self, args):
"""Resolves the arguments into an instance.
Args:
args: The command line arguments.
Returns:
An instance reference to a VM.
"""
holder = base_classes.ComputeApiHolder(self.ReleaseTrack())
compute_client = holder.client
resources = holder.resources
instance_ref = instance_flags.INSTANCE_ARG.ResolveAsResource(
args,
resources,
scope_lister=instance_flags.GetInstanceZoneScopeLister(compute_client))
return instance_ref
def _ConstructDiagnosticsKeyEntry(self, signed_url, trace):
"""Generates a JSON String that is a command for the VM to extract the logs.
Args:
signed_url: The url where the logs can be uploaded.
trace: Whether or not to take a 10 minute trace on the VM.
Returns:
A JSON String that can be written to the metadata server to trigger the
extraction of logs.
"""
expire_str = time_util.CalculateExpiration(300)
diagnostics_key_data = {
'signedUrl': signed_url,
'trace': trace,
'expireOn': expire_str
}
return json.dumps(diagnostics_key_data, sort_keys=True)

View File

@@ -0,0 +1,440 @@
# -*- coding: utf-8 -*- #
# Copyright 2017 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Routes to/from Compute Engine VMs."""
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
import argparse
import io
import os
import re
from googlecloudsdk.api_lib.compute import base_classes
from googlecloudsdk.api_lib.compute import lister
from googlecloudsdk.calliope import exceptions
from googlecloudsdk.command_lib.compute import ssh_utils
from googlecloudsdk.command_lib.compute.diagnose import external_helper
from googlecloudsdk.command_lib.compute.diagnose import internal_helpers
from googlecloudsdk.command_lib.util.ssh import ssh
from googlecloudsdk.core import log
from googlecloudsdk.core import properties
from googlecloudsdk.core.console import console_io
from googlecloudsdk.core.util import files
import six
DETAILED_HELP = {
'EXAMPLES':
"""\
To route to/from Compute Engine virtual machine instances, run:
$ {command}
""",
}
class Routes(base_classes.BaseCommand):
"""Routes to/from Compute Engine virtual machine instances.
Routes to/from Compute Engine virtual machine instances.
NOTE: The name filtering will cycle through all the VMs in the project.
Depending on the size of the project, this could be a considerable amount
of work.
If that is the case, use the --regexp flag to filter down the amount
of VMs considered in the filtering.
"""
detailed_help = DETAILED_HELP
@classmethod
def Args(cls, parser):
_RoutesArgs.Args(parser)
def Run(self, args):
"""Default run method implementation."""
super(Routes, self).Run(args)
self._use_accounts_service = False
holder = base_classes.ComputeApiHolder(self.ReleaseTrack())
resource_registry = holder.resources
ssh_helper = ssh_utils.BaseSSHCLIHelper()
ssh_helper.Run(args)
# We store always needed commands non-changing fields
self._args = args
self._ssh_helper = ssh_helper
# We obtain generic parameters of the call
project = properties.VALUES.core.project.GetOrFail()
filters = _RoutesArgs.GetFilters(args)
instances = _RoutesQueries.ObtainInstances(
args.names,
service=self.compute.instances,
project=project,
zones=args.zones,
filters=filters,
http=self.http,
batch_url=self.batch_url)
user = args.user
if not user:
user = ssh.GetDefaultSshUsername()
# We unpack the flags
dry_run = args.dry_run
reverse_traceroute = args.reverse_traceroute
traceroute_args = args.traceroute_args
external_route_ip = args.external_route_ip
internal_helpers.PrintHeader(instances)
prompt = 'The following VMs will be tracerouted.'
if instances and not dry_run and not console_io.PromptContinue(prompt):
return
# Sometimes the prompt would appear after the instance data
log.out.flush()
for instance in instances:
header = 'Checking instance %s' % instance.name
log.out.Print(header)
log.out.Print('-' * len(header))
try:
self.TracerouteInstance(instance, traceroute_args, dry_run,
resource_registry)
except exceptions.ToolException as e:
log.error('Error routing to instance')
log.error(six.text_type(e))
continue
if reverse_traceroute:
try:
has_traceroute = self.CheckTraceroute(instance, user, dry_run,
resource_registry)
if has_traceroute:
# We obtain the self ip
if not external_route_ip:
external_route_ip = self.ObtainSelfIp(instance, user, dry_run,
resource_registry)
if external_route_ip:
self.ReverseTracerouteInstance(instance, user, external_route_ip,
traceroute_args, dry_run,
resource_registry)
else:
log.out.Print('Unable to obtain self ip. Aborting.')
else:
log.out.Print(
'Please make sure traceroute is installed in PATH to move on.')
except ssh.CommandError as e:
log.error(six.text_type(e))
log.out.Print('') # Separator
###########################################################
# Traceroute Invocations
###########################################################
def TracerouteInstance(self, instance, traceroute_args, dry_run,
resource_registry):
"""Runs a traceroute from localhost to a GCE VM.
Args:
instance: Compute Engine VM.
traceroute_args: Additional traceroute args to be passed on.
dry_run: Whether to only print commands instead of running them.
resource_registry: gcloud class used for obtaining data from the
resources.
"""
instance_string = internal_helpers.GetInstanceNetworkTitleString(instance)
log.out.Print('>>> Tracerouting to %s' % instance_string)
external_ip = ssh_utils.GetExternalIPAddress(instance)
cmd = ['traceroute', external_ip]
if traceroute_args:
cmd += traceroute_args
if dry_run:
external_helper.DryRunLog(' '.join(cmd))
else:
external_helper.RunSubprocess(proc_name='Traceroute', command_list=cmd)
log.out.Print('>>>')
def ReverseTracerouteInstance(self, instance, user, external_route_ip,
traceroute_args, dry_run, resource_registry):
"""Runs a traceroute from a GCE VM to localhost.
Args:
instance: Compute Engine VM.
user: The user to use to SSH into the instance.
external_route_ip: the ip to which traceroute from the VM
traceroute_args: Additional traceroute args to be passed on.
dry_run: Whether to only print commands instead of running them.
resource_registry: gcloud class used for obtaining data from the
resources.
Raises:
ssh.CommandError: there was an error running a SSH command
"""
instance_string = internal_helpers.GetInstanceNetworkTitleString(instance)
log.out.Print('<<< Reverse tracerouting from %s' % instance_string)
# Necessary because the order of commands in the output
# would be wrong otherwise (the ssh command will output by its own)
log.out.flush()
if dry_run:
external_route_ip = '<SELF-IP>'
cmd = ['traceroute', external_route_ip]
if traceroute_args:
cmd += traceroute_args
external_helper.RunSSHCommandToInstance(
command_list=cmd,
instance=instance,
user=user,
args=self._args,
ssh_helper=self._ssh_helper,
dry_run=dry_run)
# This identifier is a simple delimiter of each traceroute run
if not dry_run:
log.out.Print('<<<')
def CheckTraceroute(self, instance, user, dry_run, resource_registry):
"""Checks whether the instance has traceroute in PATH.
Args:
instance: Compute Engine VM.
user: The user to use to SSH into the instance.
dry_run: Whether to only print commands instead of running them.
resource_registry: gcloud class used for obtaining data from the
resources.
Returns:
True if the instance has traceroute in PATH,
False otherwise
Raises:
ssh.CommandError: there was an error running a SSH command
"""
instance_string = internal_helpers.GetInstanceNetworkTitleString(instance)
log.out.write('Checking traceroute for %s: ' % instance_string)
if dry_run:
log.out.Print('[DRY-RUN] No command executed.')
log.out.flush()
cmd = ['which', 'traceroute']
try:
# This command is silent
with files.FileWriter(os.devnull) as dev_null:
return_code = external_helper.RunSSHCommandToInstance(
command_list=cmd,
instance=instance,
user=user,
args=self._args,
ssh_helper=self._ssh_helper,
explicit_output_file=dev_null,
dry_run=dry_run)
except Exception as e:
log.out.write(six.text_type(e))
log.out.write('\n') # Close the open print stmt
log.out.flush()
raise ssh.CommandError(' '.join(cmd), six.text_type(e))
if return_code == 0:
log.out.Print('Traceroute found in PATH')
else:
log.out.Print('Traceroute not found in PATH')
log.out.flush()
return return_code == 0
def ObtainSelfIp(self, instance, user, dry_run, resource_registry):
"""Returns the localhost ip as seen from the VM.
Args:
instance: Compute Engine VM.
user: The user to use to SSH into the instance.
dry_run: Whether to only print commands instead of running them.
resource_registry: gcloud class used for obtaining data from the
resources.
Returns:
A string containing the local ip,
None if the obtaining was unsuccessful
Raises:
ssh.CommandError: there was an error running a SSH command
"""
instance_string = internal_helpers.GetInstanceNetworkTitleString(instance)
log.out.write('Obtaining self ip from %s: ' % instance_string)
# Sometimes this call will appear after the actual result
log.out.flush()
if dry_run:
log.out.Print('<SELF-IP>')
temp = io.BytesIO()
cmd = ['echo', '$SSH_CLIENT']
try:
external_helper.RunSSHCommandToInstance(
command_list=cmd,
instance=instance,
user=user,
args=self._args,
ssh_helper=self._ssh_helper,
explicit_output_file=temp,
dry_run=dry_run)
except Exception as e: # pylint: disable=broad-exception
log.out.write('\n') # Close the open print stmt
log.out.flush()
raise ssh.CommandError(' '.join(cmd), six.text_type(e))
who_am_i_str = temp.getvalue().decode('utf-8')
result = re.search(r'(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})', who_am_i_str)
if result:
res = result.group(1)
log.out.Print(res)
log.out.flush()
return res
return None
@property
def resource_type(self):
return 'instances'
class _RoutesArgs(object):
"""Helper to setting and getting values for the args."""
@classmethod
def Args(cls, parser):
"""Creates the flags stmts for the command."""
# Gives us the basic SSH flags
ssh_utils.BaseSSHCLIHelper.Args(parser)
base_classes.ZonalLister.Args(parser)
# SSH flag
parser.add_argument(
'--container',
help="""\
The name or ID of a container inside of the virtual machine instance
to connect to. This only applies to virtual machines that are using
a Container-Optimized OS virtual machine image.
For more information, see
[](https://cloud.google.com/compute/docs/containers)
""")
parser.add_argument(
'--external-route-ip',
default=None,
help=
('For reverse traceroute, this will be the ip given to the VM instance '
'to traceroute to. This will override all obtained ips.'))
parser.add_argument(
'--reverse-traceroute',
action='store_true',
help='If enabled, will also run traceroute from the VM to the host')
# SSH flag
parser.add_argument(
'--ssh-flag',
action='append',
help="""\
Additional flags to be passed to *ssh(1)*. It is recommended that flags
be passed using an assignment operator and quotes. This flag will
replace occurences of ``%USER%'' and ``%INSTANCE%'' with their
dereferenced values. Example:
$ {command} example-instance --zone us-central1-a \
--ssh-flag="-vvv" --ssh-flag="-L 80:%INSTANCE%:80"
is equivalent to passing the flags ``--vvv'' and ``-L
80:162.222.181.197:80'' to *ssh(1)* if the external IP address of
'example-instance' is 162.222.181.197.
""")
parser.add_argument(
'--user',
help="""\
User for login to the selected VMs.
If not specified, the default user will be used.
""")
parser.add_argument(
'traceroute_args',
nargs=argparse.REMAINDER,
help="""\
Flags and positionals passed to the underlying traceroute call.
""",
example="""\
$ {command} example-instance -- -w 0.5 -q 5 42
""")
@classmethod
def GetFilters(cls, args):
filters = []
if args.regexp:
filters.append('name eq %s' % args.regexp)
if not filters:
return None
filters = ' AND '.join(filters)
return filters
class _RoutesQueries(object):
"""Helper for getting instance queries using the gcloud SDK."""
@classmethod
def ObtainInstances(cls, names, **kwargs):
"""Returns a list of instances according to the flags."""
errors = []
result = lister.GetZonalResources(
service=kwargs['service'],
project=kwargs['project'],
requested_zones=kwargs['zones'],
filter_expr=kwargs['filters'],
http=kwargs['http'],
batch_url=kwargs['batch_url'],
errors=errors)
instances = list(result)
# We filter them according to the names
filtered_instances = []
if not names:
filtered_instances = instances
else:
for name in names:
# First compare by name
name_match = None
in_name = None
in_self_link = None
for instance in instances:
if name == instance.name:
# Exact name match has a priority
# over loose match on instance name or selfLink
name_match = instance
break
elif name in instance.name:
in_name = instance
elif name in instance.selfLink:
in_self_link = instance
if name_match:
filtered_instances.append(name_match)
elif in_name:
filtered_instances.append(in_name)
elif in_self_link:
filtered_instances.append(in_self_link)
return filtered_instances