191 lines
7.3 KiB
Python
191 lines
7.3 KiB
Python
# -*- coding: utf-8 -*- #
|
|
# Copyright 2021 Google LLC. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
"""Implementation of hash command for getting formatted file hashes."""
|
|
|
|
from __future__ import absolute_import
|
|
from __future__ import division
|
|
from __future__ import unicode_literals
|
|
|
|
import base64
|
|
import binascii
|
|
|
|
from googlecloudsdk.calliope import base
|
|
from googlecloudsdk.command_lib.storage import encryption_util
|
|
from googlecloudsdk.command_lib.storage import errors
|
|
from googlecloudsdk.command_lib.storage import fast_crc32c_util
|
|
from googlecloudsdk.command_lib.storage import flags
|
|
from googlecloudsdk.command_lib.storage import hash_util
|
|
from googlecloudsdk.command_lib.storage import storage_url
|
|
from googlecloudsdk.command_lib.storage import wildcard_iterator
|
|
from googlecloudsdk.command_lib.storage.resources import resource_reference
|
|
from googlecloudsdk.command_lib.util import crc32c
|
|
from googlecloudsdk.core import log
|
|
|
|
_DIGEST_FORMAT_KEY = 'digest_format'
|
|
_CRC32C_HASH_KEY = 'crc32c_hash'
|
|
_MD5_HASH_KEY = 'md5_hash'
|
|
_URL_KEY = 'url'
|
|
|
|
|
|
def _convert_base64_to_hex(base64_string):
|
|
"""Converts base64 hash digest to hex-formatted hash digest string."""
|
|
if base64_string is None:
|
|
return None
|
|
return binascii.hexlify(
|
|
base64.b64decode(
|
|
base64_string.strip('\n"\'').encode('utf-8'))).decode('utf-8')
|
|
|
|
|
|
def _is_object_or_file_resource(resource):
|
|
return isinstance(resource, (resource_reference.ObjectResource,
|
|
resource_reference.FileObjectResource))
|
|
|
|
|
|
def _get_resource_iterator(url_strings):
|
|
"""Wildcard matches and recurses into top-level of buckets."""
|
|
any_url_matched = False
|
|
for url_string in url_strings:
|
|
wildcard_expanded_iterator = wildcard_iterator.get_wildcard_iterator(
|
|
url_string,
|
|
error_on_missing_key=False,
|
|
fetch_encrypted_object_hashes=True)
|
|
this_url_matched = False
|
|
for wildcard_expanded_resource in wildcard_expanded_iterator:
|
|
if _is_object_or_file_resource(wildcard_expanded_resource):
|
|
any_url_matched = this_url_matched = True
|
|
yield wildcard_expanded_resource
|
|
elif (isinstance(wildcard_expanded_resource.storage_url,
|
|
storage_url.CloudUrl) and
|
|
wildcard_expanded_resource.storage_url.is_bucket()):
|
|
bucket_expanded_iterator = wildcard_iterator.get_wildcard_iterator(
|
|
wildcard_expanded_resource.storage_url.join('*').url_string,
|
|
error_on_missing_key=False)
|
|
for bucket_expanded_resource in bucket_expanded_iterator:
|
|
if isinstance(bucket_expanded_resource,
|
|
(resource_reference.ObjectResource)):
|
|
any_url_matched = this_url_matched = True
|
|
yield bucket_expanded_resource
|
|
if not this_url_matched:
|
|
log.warning('No matches found for {}'.format(url_string))
|
|
if not any_url_matched:
|
|
raise errors.InvalidUrlError('No URLS matched.')
|
|
|
|
|
|
@base.UniverseCompatible
|
|
class Hash(base.Command):
|
|
"""Calculates hashes on local or cloud files."""
|
|
|
|
detailed_help = {
|
|
'DESCRIPTION':
|
|
"""
|
|
Calculates hashes on local or cloud files that can be used to compare with
|
|
"gcloud storage ls -L" output. If a specific hash option is not provided,
|
|
this command calculates all gcloud storage-supported hashes for the file.
|
|
|
|
Note that gcloud storage automatically performs hash validation when
|
|
uploading or downloading files, so this command is only needed if you want
|
|
to write a script that separately checks the hash for some reason.
|
|
|
|
If you calculate a CRC32C hash for the file without a precompiled
|
|
google-crc32c installation, hashing will be very slow.
|
|
""",
|
|
'EXAMPLES':
|
|
"""
|
|
|
|
To get the MD5 and CRC32C hash digest of a cloud object in Base64 format:
|
|
|
|
$ {command} gs://bucket/object
|
|
|
|
To get just the MD5 hash digest of a local object in hex format:
|
|
|
|
$ {command} /dir/object.txt --skip-crc32c --hex
|
|
""",
|
|
}
|
|
|
|
@staticmethod
|
|
def Args(parser):
|
|
parser.add_argument(
|
|
'urls', nargs='+', help='Local or cloud URLs of objects to hash.')
|
|
parser.add_argument(
|
|
'--hex',
|
|
action='store_true',
|
|
help='Output hash digests in hex format. By default, digests are'
|
|
' displayed in base64.')
|
|
skip_flags_group = parser.add_group(mutex=True)
|
|
skip_flags_group.add_argument(
|
|
'--skip-crc32c',
|
|
action='store_true',
|
|
help='Skip CRC32C hash calculation. Useful if command is running slow.')
|
|
skip_flags_group.add_argument(
|
|
'--skip-md5',
|
|
action='store_true',
|
|
help='Skip MD5 hash calculation. Useful if command is running slow.')
|
|
flags.add_encryption_flags(parser, command_only_reads_data=True)
|
|
|
|
flags.add_additional_headers_flag(parser)
|
|
|
|
def Run(self, args):
|
|
|
|
encryption_util.initialize_key_store(args)
|
|
if not args.skip_crc32c:
|
|
if fast_crc32c_util.should_use_gcloud_crc32c():
|
|
crc32c_implementation = 'gcloud-crc32c (Go binary)'
|
|
elif crc32c.IS_FAST_GOOGLE_CRC32C_AVAILABLE:
|
|
crc32c_implementation = 'google-crc32c (Python binary)'
|
|
else:
|
|
crc32c_implementation = 'crcmod (slow pure Python implementation)'
|
|
log.info('CRC32C implementation: {}'.format(crc32c_implementation))
|
|
|
|
if args.hex:
|
|
hash_format = 'hex'
|
|
format_cloud_digest = _convert_base64_to_hex
|
|
format_file_hash_object = lambda x: x.hexdigest()
|
|
else:
|
|
hash_format = 'base64'
|
|
format_cloud_digest = lambda x: x
|
|
format_file_hash_object = hash_util.get_base64_hash_digest_string
|
|
|
|
for resource in _get_resource_iterator(args.urls):
|
|
output_dict = {
|
|
_DIGEST_FORMAT_KEY: hash_format,
|
|
}
|
|
if isinstance(resource, resource_reference.ObjectResource):
|
|
if resource.crc32c_hash is None and resource.md5_hash is None:
|
|
log.warning('No hashes found for {}'.format(resource))
|
|
continue
|
|
output_dict[_URL_KEY] = resource.storage_url.versionless_url_string
|
|
if not args.skip_crc32c:
|
|
output_dict[_CRC32C_HASH_KEY] = format_cloud_digest(
|
|
resource.crc32c_hash)
|
|
if not args.skip_md5:
|
|
output_dict[_MD5_HASH_KEY] = format_cloud_digest(resource.md5_hash)
|
|
else: # FileObjectResource
|
|
output_dict[_URL_KEY] = resource.storage_url.resource_name
|
|
if not args.skip_crc32c:
|
|
output_dict[_CRC32C_HASH_KEY] = format_file_hash_object(
|
|
hash_util.get_hash_from_file(
|
|
resource.storage_url.resource_name,
|
|
hash_util.HashAlgorithm.CRC32C,
|
|
)
|
|
)
|
|
if not args.skip_md5:
|
|
output_dict[_MD5_HASH_KEY] = format_file_hash_object(
|
|
hash_util.get_hash_from_file(
|
|
resource.storage_url.resource_name,
|
|
hash_util.HashAlgorithm.MD5,
|
|
)
|
|
)
|
|
yield output_dict
|