393 lines
13 KiB
Python
393 lines
13 KiB
Python
# -*- coding: utf-8 -*- #
|
|
# Copyright 2023 Google LLC. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
"""Implementation of Unix-like cat command for cloud storage providers."""
|
|
|
|
from __future__ import absolute_import
|
|
from __future__ import division
|
|
from __future__ import unicode_literals
|
|
|
|
import collections
|
|
import enum
|
|
|
|
from googlecloudsdk.api_lib.storage import cloud_api
|
|
from googlecloudsdk.calliope import arg_parsers
|
|
from googlecloudsdk.calliope import base
|
|
from googlecloudsdk.command_lib.storage import errors
|
|
from googlecloudsdk.command_lib.storage import errors_util
|
|
from googlecloudsdk.command_lib.storage import flags
|
|
from googlecloudsdk.command_lib.storage import stdin_iterator
|
|
from googlecloudsdk.command_lib.storage import storage_url
|
|
from googlecloudsdk.command_lib.storage import user_request_args_factory
|
|
from googlecloudsdk.command_lib.storage import wildcard_iterator
|
|
from googlecloudsdk.command_lib.storage.tasks import task_executor
|
|
from googlecloudsdk.command_lib.storage.tasks import task_graph_executor
|
|
from googlecloudsdk.command_lib.storage.tasks import task_status
|
|
from googlecloudsdk.command_lib.storage.tasks.buckets import restore_bucket_task
|
|
from googlecloudsdk.command_lib.storage.tasks.objects import bulk_restore_objects_task
|
|
from googlecloudsdk.command_lib.storage.tasks.objects import restore_object_task
|
|
from googlecloudsdk.core import log
|
|
|
|
|
|
_BULK_RESTORE_FLAGS = [
|
|
'allow_overwrite',
|
|
'created_after_time',
|
|
'created_before_time',
|
|
'deleted_after_time',
|
|
'deleted_before_time',
|
|
]
|
|
|
|
_INVALID_BUCKET_RESTORE_FLAGS = [
|
|
'all_versions',
|
|
'allow_overwrite',
|
|
'created_after_time',
|
|
'created_before_time',
|
|
'deleted_after_time',
|
|
'deleted_before_time',
|
|
'asyncronous',
|
|
]
|
|
|
|
_SYNCHRONOUS_RESTORE_FLAGS = ['all_versions']
|
|
|
|
|
|
class ExecutionMode(enum.Enum):
|
|
ASYNCHRONOUS = 'Asynchronous'
|
|
SYNCHRONOUS = 'Synchronous'
|
|
|
|
|
|
def _raise_if_invalid_flag_combination(args, execution_mode, invalid_flags):
|
|
"""Raises error if invalid combination of flags found in user input.
|
|
|
|
Args:
|
|
args (parser_extensions.Namespace): User input object.
|
|
execution_mode (ExecutionMode): Determined by presence of --async flag.
|
|
invalid_flags (list[str]): Flags as `args` attributes.
|
|
|
|
Raises:
|
|
error.Error: Flag incompatible with execution mode.
|
|
"""
|
|
for invalid_flag in invalid_flags:
|
|
if getattr(args, invalid_flag):
|
|
raise errors.Error(
|
|
'{} execution does not support flag: {}.'
|
|
' See help text with --help.'.format(
|
|
execution_mode.value, invalid_flag
|
|
)
|
|
)
|
|
|
|
|
|
def _raise_error_if_invalid_flags_for_bucket_restore(url, args):
|
|
"""Raises error if invalid combination of flags found in user input for bucket restore.
|
|
|
|
Args:
|
|
url: CloudUrl object.
|
|
args: (parser_extensions.Namespace): User input object.
|
|
|
|
Raises:
|
|
Error: Flags incompatible with execution mode.
|
|
"""
|
|
if _is_bucket_restore(url):
|
|
# Bucket restore doesn't work with flags that are specific to objects.
|
|
invalid_flags_found = []
|
|
for invalid_flag in _INVALID_BUCKET_RESTORE_FLAGS:
|
|
if getattr(args, invalid_flag):
|
|
invalid_flags_found.append(invalid_flag)
|
|
|
|
if invalid_flags_found:
|
|
raise errors.Error(
|
|
'Bucket restore does not support the following flags: {}.'
|
|
' See help text with --help.'.format(', '.join(invalid_flags_found))
|
|
)
|
|
|
|
|
|
def _is_bucket_restore(url):
|
|
return (
|
|
isinstance(url, storage_url.CloudUrl)
|
|
and url.is_bucket()
|
|
and not wildcard_iterator.contains_wildcard(url.url_string)
|
|
)
|
|
|
|
|
|
def _url_iterator(args):
|
|
"""Extracts, validates, and yields URLs."""
|
|
for url_string in stdin_iterator.get_urls_iterable(
|
|
args.urls, args.read_paths_from_stdin
|
|
):
|
|
url = storage_url.storage_url_from_string(
|
|
url_string, is_bucket_gen_parsing_allowed=True
|
|
)
|
|
_raise_error_if_invalid_flags_for_bucket_restore(url, args)
|
|
errors_util.raise_error_if_not_gcs(args.command_path, url)
|
|
yield url
|
|
|
|
|
|
def _async_restore_task_iterator(args, user_request_args):
|
|
"""Yields non-blocking restore tasks."""
|
|
bucket_to_globs = collections.defaultdict(list)
|
|
for url in _url_iterator(args):
|
|
if not wildcard_iterator.contains_wildcard(url.url_string):
|
|
log.warning(
|
|
'Bulk restores are long operations. For restoring a single'
|
|
' object, you should probably use a synchronous restore'
|
|
' without the --async flag. URL without wildcards: {}'.format(url)
|
|
)
|
|
bucket_to_globs[storage_url.CloudUrl(url.scheme, url.bucket_name)].append(
|
|
url.resource_name
|
|
)
|
|
|
|
for bucket_url, object_globs in bucket_to_globs.items():
|
|
yield bulk_restore_objects_task.BulkRestoreObjectsTask(
|
|
bucket_url,
|
|
object_globs,
|
|
allow_overwrite=args.allow_overwrite,
|
|
created_after_time=args.created_after_time,
|
|
created_before_time=args.created_before_time,
|
|
deleted_after_time=args.deleted_after_time,
|
|
deleted_before_time=args.deleted_before_time,
|
|
user_request_args=user_request_args,
|
|
)
|
|
|
|
|
|
def _sync_restore_task_iterator(args, fields_scope, user_request_args):
|
|
"""Yields blocking restore tasks."""
|
|
last_resource = None
|
|
for url in _url_iterator(args):
|
|
if _is_bucket_restore(url):
|
|
yield restore_bucket_task.RestoreBucketTask(url)
|
|
continue
|
|
resources = list(
|
|
wildcard_iterator.get_wildcard_iterator(
|
|
url.url_string,
|
|
fields_scope=fields_scope,
|
|
object_state=cloud_api.ObjectState.SOFT_DELETED,
|
|
files_only=True,
|
|
)
|
|
)
|
|
if not resources:
|
|
raise errors.InvalidUrlError(
|
|
'The following URLs matched no objects:\n-{}'.format(url.url_string)
|
|
)
|
|
for resource in resources:
|
|
if args.all_versions:
|
|
yield restore_object_task.RestoreObjectTask(resource, user_request_args)
|
|
else:
|
|
if (
|
|
last_resource
|
|
and last_resource.storage_url.versionless_url_string
|
|
!= resource.storage_url.versionless_url_string
|
|
):
|
|
yield restore_object_task.RestoreObjectTask(
|
|
last_resource, user_request_args
|
|
)
|
|
last_resource = resource
|
|
if last_resource:
|
|
yield restore_object_task.RestoreObjectTask(
|
|
last_resource, user_request_args
|
|
)
|
|
|
|
|
|
def _restore_task_iterator(args):
|
|
"""Yields restore tasks."""
|
|
if args.preserve_acl:
|
|
fields_scope = cloud_api.FieldsScope.FULL
|
|
else:
|
|
fields_scope = cloud_api.FieldsScope.SHORT
|
|
user_request_args = (
|
|
user_request_args_factory.get_user_request_args_from_command_args(
|
|
args, metadata_type=user_request_args_factory.MetadataType.OBJECT
|
|
)
|
|
)
|
|
if args.asyncronous:
|
|
return _async_restore_task_iterator(args, user_request_args)
|
|
return _sync_restore_task_iterator(args, fields_scope, user_request_args)
|
|
|
|
|
|
@base.UniverseCompatible
|
|
class Restore(base.Command):
|
|
"""Restore one or more soft-deleted objects."""
|
|
|
|
# TODO(b/292075826): Update docstring and help once bucket restore supported.
|
|
detailed_help = {
|
|
'DESCRIPTION': """
|
|
The restore command restores soft-deleted resources:
|
|
|
|
$ {command} url...
|
|
|
|
""",
|
|
'EXAMPLES': """
|
|
|
|
Restore soft-deleted version of bucket with generations:
|
|
|
|
$ {command} gs://bucket#123
|
|
|
|
Restore several soft-deleted buckets with generations:
|
|
|
|
$ {command} gs://bucket1#123 gs://bucket2#456
|
|
|
|
Restore latest soft-deleted version of object in a bucket.
|
|
|
|
$ {command} gs://bucket/file1.txt
|
|
|
|
Restore a specific soft-deleted version of object in a bucket by specifying the generation.
|
|
|
|
$ {command} gs://bucket/file1.txt#123
|
|
|
|
Restore all soft-deleted versions of object in a bucket.
|
|
|
|
$ {command} gs://bucket/file1.txt --all-versions
|
|
|
|
Restore several objects in a bucket (with or without generation):
|
|
|
|
$ {command} gs://bucket/file1.txt gs://bucket/file2.txt#456
|
|
|
|
Restore the latest soft-deleted version of all text objects in a bucket:
|
|
|
|
$ {command} gs://bucket/**.txt
|
|
|
|
Restore a list of objects read from stdin (with or without generation):
|
|
|
|
$ cat list-of-files.txt | {command} --read-paths-from-stdin
|
|
|
|
Restore object with its original ACL policy:
|
|
|
|
$ {command} gs://bucket/file1.txt --preserve-acl
|
|
|
|
Restore all objects in a bucket asynchronously:
|
|
|
|
$ {command} gs://bucket/** --async
|
|
|
|
Restore all text files in a bucket asynchronously:
|
|
|
|
$ {command} gs://bucket/**.txt --async
|
|
|
|
Restore objects created within a specific time range:
|
|
|
|
$ {command} gs://bucket/** --async \
|
|
--created-after-time="2023-01-01T00:00:00Z" \
|
|
--created-before-time="2023-01-31T23:59:59Z"
|
|
|
|
Restore objects soft-deleted within a specific time range:
|
|
|
|
$ {command} gs://bucket/** --async \
|
|
--deleted-after-time="2023-01-01T00:00:00Z" \
|
|
--deleted-before-time="2023-01-31T23:59:59Z"
|
|
|
|
Restore objects using a combination of creation and deletion time filters:
|
|
|
|
$ {command} gs://bucket/** --async --allow-overwrite \
|
|
--created-after-time="2023-01-01T00:00:00Z" \
|
|
--deleted-after-time="2023-01-01T00:00:00Z"
|
|
|
|
This command filters the objects that were live at 2023-01-01T00:00:00Z
|
|
and then soft-deleted afterwards. This combination of filters is
|
|
especially helpful if there is a period of erroneous overwrites.
|
|
They allow you to go back to the point just before the overwrites began.
|
|
You will also need to set the `--allow-overwrite` option to true.
|
|
""",
|
|
}
|
|
|
|
@staticmethod
|
|
def Args(parser):
|
|
parser.add_argument('urls', nargs='*', help='The url of objects to list.')
|
|
flags.add_precondition_flags(parser)
|
|
flags.add_preserve_acl_flag(parser)
|
|
flags.add_read_paths_from_stdin_flag(parser)
|
|
|
|
synchronous_restore_flag_group = parser.add_group(
|
|
help='SYNCHRONOUS RESTORE OPTIONS'
|
|
)
|
|
synchronous_restore_flag_group.add_argument(
|
|
'-a',
|
|
'--all-versions',
|
|
action='store_true',
|
|
help=(
|
|
'Restores all versions of soft-deleted objects.'
|
|
'\n\nThis flag is only useful for buckets with [object versioning]'
|
|
' (https://cloud.google.com/storage/docs/object-versioning)'
|
|
' enabled. In this case, the latest soft-deleted version will'
|
|
' become live and the previous generations will become noncurrent.'
|
|
'\n\nIf versioning is disabled, the latest soft-deleted version'
|
|
' will become live and previous generations will be soft-deleted'
|
|
' again.'
|
|
'\n\nThis flag disables parallelism to preserve version order.'
|
|
),
|
|
)
|
|
|
|
parser.add_argument(
|
|
'--async',
|
|
# Can't create `async` attribute because "async" is a keyword.
|
|
dest='asyncronous',
|
|
action='store_true',
|
|
help=(
|
|
'Initiates an asynchronous bulk restore operation on the specified'
|
|
' bucket.'
|
|
),
|
|
)
|
|
bulk_restore_flag_group = parser.add_group(help='BULK RESTORE OPTIONS')
|
|
bulk_restore_flag_group.add_argument(
|
|
'--created-after-time',
|
|
type=arg_parsers.Datetime.Parse,
|
|
help='Restores only the objects that were created after this time.',
|
|
)
|
|
bulk_restore_flag_group.add_argument(
|
|
'--created-before-time',
|
|
type=arg_parsers.Datetime.Parse,
|
|
help='Restores only the objects that were created before this time.',
|
|
)
|
|
bulk_restore_flag_group.add_argument(
|
|
'--allow-overwrite',
|
|
action='store_true',
|
|
help=(
|
|
'If included, live objects will be overwritten. If versioning is'
|
|
' enabled, this will result in a noncurrent object. If versioning'
|
|
' is not enabled, this will result in a soft-deleted object.'
|
|
),
|
|
)
|
|
bulk_restore_flag_group.add_argument(
|
|
'--deleted-after-time',
|
|
type=arg_parsers.Datetime.Parse,
|
|
help=(
|
|
'Restores only the objects that were soft-deleted after this time.'
|
|
),
|
|
)
|
|
bulk_restore_flag_group.add_argument(
|
|
'--deleted-before-time',
|
|
type=arg_parsers.Datetime.Parse,
|
|
help=(
|
|
'Restores only the objects that were soft-deleted before this time.'
|
|
),
|
|
)
|
|
|
|
def Run(self, args):
|
|
# TODO(b/383682645): Add support for restoring managed folders.
|
|
task_status_queue = task_graph_executor.multiprocessing_context.Queue()
|
|
|
|
if args.asyncronous:
|
|
_raise_if_invalid_flag_combination(
|
|
args, ExecutionMode.ASYNCHRONOUS, _SYNCHRONOUS_RESTORE_FLAGS
|
|
)
|
|
else:
|
|
_raise_if_invalid_flag_combination(
|
|
args, ExecutionMode.SYNCHRONOUS, _BULK_RESTORE_FLAGS
|
|
)
|
|
|
|
self.exit_code = task_executor.execute_tasks(
|
|
task_iterator=_restore_task_iterator(args),
|
|
parallelizable=not args.all_versions,
|
|
task_status_queue=task_status_queue,
|
|
progress_manager_args=task_status.ProgressManagerArgs(
|
|
increment_type=task_status.IncrementType.INTEGER, manifest_path=None
|
|
),
|
|
)
|