5036 lines
216 KiB
Python
5036 lines
216 KiB
Python
# -*- coding: utf-8 -*-
|
|
# Copyright 2013 Google Inc. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
"""Integration tests for cp command."""
|
|
|
|
from __future__ import absolute_import
|
|
from __future__ import division
|
|
from __future__ import print_function
|
|
from __future__ import unicode_literals
|
|
|
|
import ast
|
|
import base64
|
|
import binascii
|
|
import datetime
|
|
import gzip
|
|
import logging
|
|
import os
|
|
import pickle
|
|
import pkgutil
|
|
import random
|
|
import re
|
|
import stat
|
|
import string
|
|
import sys
|
|
import threading
|
|
from unittest import mock
|
|
|
|
from apitools.base.py import exceptions as apitools_exceptions
|
|
import boto
|
|
from boto import storage_uri
|
|
from boto.exception import ResumableTransferDisposition
|
|
from boto.exception import StorageResponseError
|
|
from boto.storage_uri import BucketStorageUri
|
|
|
|
from gslib import command
|
|
from gslib import exception
|
|
from gslib import name_expansion
|
|
from gslib.cloud_api import ResumableUploadStartOverException
|
|
from gslib.commands.config import DEFAULT_SLICED_OBJECT_DOWNLOAD_THRESHOLD
|
|
from gslib.commands.cp import ShimTranslatePredefinedAclSubOptForCopy
|
|
from gslib.cs_api_map import ApiSelector
|
|
from gslib.daisy_chain_wrapper import _DEFAULT_DOWNLOAD_CHUNK_SIZE
|
|
from gslib.discard_messages_queue import DiscardMessagesQueue
|
|
from gslib.exception import InvalidUrlError
|
|
from gslib.gcs_json_api import GcsJsonApi
|
|
from gslib.parallel_tracker_file import ObjectFromTracker
|
|
from gslib.parallel_tracker_file import WriteParallelUploadTrackerFile
|
|
from gslib.project_id import PopulateProjectId
|
|
from gslib.storage_url import StorageUrlFromString
|
|
from gslib.tests.rewrite_helper import EnsureRewriteResumeCallbackHandler
|
|
from gslib.tests.rewrite_helper import HaltingRewriteCallbackHandler
|
|
from gslib.tests.rewrite_helper import RewriteHaltException
|
|
import gslib.tests.testcase as testcase
|
|
from gslib.tests.testcase.base import NotParallelizable
|
|
from gslib.tests.testcase.integration_testcase import SkipForGS
|
|
from gslib.tests.testcase.integration_testcase import SkipForS3
|
|
from gslib.tests.testcase.integration_testcase import SkipForXML
|
|
from gslib.tests.testcase.integration_testcase import SkipForJSON
|
|
from gslib.tests.util import AuthorizeProjectToUseTestingKmsKey
|
|
from gslib.tests.util import BuildErrorRegex
|
|
from gslib.tests.util import GenerationFromURI as urigen
|
|
from gslib.tests.util import HaltingCopyCallbackHandler
|
|
from gslib.tests.util import HaltOneComponentCopyCallbackHandler
|
|
from gslib.tests.util import HAS_GS_PORT
|
|
from gslib.tests.util import HAS_S3_CREDS
|
|
from gslib.tests.util import KmsTestingResources
|
|
from gslib.tests.util import ObjectToURI as suri
|
|
from gslib.tests.util import ORPHANED_FILE
|
|
from gslib.tests.util import POSIX_GID_ERROR
|
|
from gslib.tests.util import POSIX_INSUFFICIENT_ACCESS_ERROR
|
|
from gslib.tests.util import POSIX_MODE_ERROR
|
|
from gslib.tests.util import POSIX_UID_ERROR
|
|
from gslib.tests.util import SequentialAndParallelTransfer
|
|
from gslib.tests.util import SetBotoConfigForTest
|
|
from gslib.tests.util import SetEnvironmentForTest
|
|
from gslib.tests.util import TailSet
|
|
from gslib.tests.util import TEST_ENCRYPTION_KEY1
|
|
from gslib.tests.util import TEST_ENCRYPTION_KEY1_SHA256_B64
|
|
from gslib.tests.util import TEST_ENCRYPTION_KEY2
|
|
from gslib.tests.util import TEST_ENCRYPTION_KEY3
|
|
from gslib.tests.util import unittest
|
|
from gslib.third_party.storage_apitools import storage_v1_messages as apitools_messages
|
|
from gslib.tracker_file import DeleteTrackerFile
|
|
from gslib.tracker_file import GetRewriteTrackerFilePath
|
|
from gslib.tracker_file import GetSlicedDownloadTrackerFilePaths
|
|
from gslib.ui_controller import BytesToFixedWidthString
|
|
from gslib.utils import hashing_helper
|
|
from gslib.utils.boto_util import UsingCrcmodExtension
|
|
from gslib.utils.constants import START_CALLBACK_PER_BYTES
|
|
from gslib.utils.constants import UTF8
|
|
from gslib.utils.copy_helper import GetTrackerFilePath
|
|
from gslib.utils.copy_helper import PARALLEL_UPLOAD_STATIC_SALT
|
|
from gslib.utils.copy_helper import PARALLEL_UPLOAD_TEMP_NAMESPACE
|
|
from gslib.utils.copy_helper import TrackerFileType
|
|
from gslib.utils.hashing_helper import CalculateB64EncodedMd5FromContents
|
|
from gslib.utils.hashing_helper import CalculateMd5FromContents
|
|
from gslib.utils.hashing_helper import GetMd5
|
|
from gslib.utils.metadata_util import CreateCustomMetadata
|
|
from gslib.utils.posix_util import GID_ATTR
|
|
from gslib.utils.posix_util import MODE_ATTR
|
|
from gslib.utils.posix_util import NA_ID
|
|
from gslib.utils.posix_util import NA_MODE
|
|
from gslib.utils.posix_util import UID_ATTR
|
|
from gslib.utils.posix_util import ParseAndSetPOSIXAttributes
|
|
from gslib.utils.posix_util import ValidateFilePermissionAccess
|
|
from gslib.utils.posix_util import ValidatePOSIXMode
|
|
from gslib.utils.retry_util import Retry
|
|
from gslib.utils.system_util import IS_WINDOWS
|
|
from gslib.utils.text_util import get_random_ascii_chars
|
|
from gslib.utils.unit_util import EIGHT_MIB
|
|
from gslib.utils.unit_util import HumanReadableToBytes
|
|
from gslib.utils.unit_util import MakeHumanReadable
|
|
from gslib.utils.unit_util import ONE_KIB
|
|
from gslib.utils.unit_util import ONE_MIB
|
|
from gslib.utils import shim_util
|
|
|
|
import six
|
|
from six.moves import http_client
|
|
from six.moves import range
|
|
from six.moves import range
|
|
|
|
if six.PY3:
|
|
long = int # pylint: disable=redefined-builtin,invalid-name
|
|
|
|
# These POSIX-specific variables aren't defined for Windows.
|
|
# pylint: disable=g-import-not-at-top
|
|
if not IS_WINDOWS:
|
|
from gslib.tests import util
|
|
from gslib.tests.util import DEFAULT_MODE
|
|
from gslib.tests.util import GetInvalidGid
|
|
from gslib.tests.util import GetNonPrimaryGid
|
|
from gslib.tests.util import GetPrimaryGid
|
|
from gslib.tests.util import INVALID_UID
|
|
from gslib.tests.util import USER_ID
|
|
# pylint: enable=g-import-not-at-top
|
|
|
|
# (status_code, error_prefix, error_substring)
|
|
_GCLOUD_STORAGE_GZIP_FLAG_CONFLICT_OUTPUT = (
|
|
2, 'ERROR',
|
|
'At most one of --gzip-in-flight | --gzip-in-flight-all | --gzip-local |'
|
|
' --gzip-local-all can be specified')
|
|
|
|
|
|
def TestCpMvPOSIXBucketToLocalErrors(cls, bucket_uri, obj, tmpdir, is_cp=True):
|
|
"""Helper function for preserve_posix_errors tests in test_cp and test_mv.
|
|
|
|
Args:
|
|
cls: An instance of either TestCp or TestMv.
|
|
bucket_uri: The uri of the bucket that the object is in.
|
|
obj: The object to run the tests on.
|
|
tmpdir: The local file path to cp to.
|
|
is_cp: Whether or not the calling test suite is cp or mv.
|
|
"""
|
|
error_key = 'error_regex'
|
|
if cls._use_gcloud_storage:
|
|
insufficient_access_error = no_read_access_error = re.compile(
|
|
r"User \d+ owns file, but owner does not have read permission")
|
|
missing_gid_error = re.compile(
|
|
r"GID in .* metadata doesn't exist on current system")
|
|
missing_uid_error = re.compile(
|
|
r"UID in .* metadata doesn't exist on current system")
|
|
else:
|
|
insufficient_access_error = BuildErrorRegex(
|
|
obj, POSIX_INSUFFICIENT_ACCESS_ERROR)
|
|
missing_gid_error = BuildErrorRegex(obj, POSIX_GID_ERROR)
|
|
missing_uid_error = BuildErrorRegex(obj, POSIX_UID_ERROR)
|
|
no_read_access_error = BuildErrorRegex(obj, POSIX_MODE_ERROR)
|
|
|
|
# A dict of test_name: attrs_dict.
|
|
# attrs_dict holds the different attributes that we want for the object in a
|
|
# specific test.
|
|
# To minimize potential test flakes from the system's GID mapping changing
|
|
# mid-test, we use the GID-related methods that fetch GID info each time,
|
|
# rather than reusing the LazyWrapper-wrapped constants across operations.
|
|
test_params = {
|
|
'test1': {
|
|
MODE_ATTR: '333',
|
|
error_key: no_read_access_error,
|
|
},
|
|
'test2': {
|
|
GID_ATTR: GetInvalidGid,
|
|
error_key: missing_gid_error,
|
|
},
|
|
'test3': {
|
|
GID_ATTR: GetInvalidGid,
|
|
MODE_ATTR: '420',
|
|
error_key: missing_gid_error,
|
|
},
|
|
'test4': {
|
|
UID_ATTR: INVALID_UID,
|
|
error_key: missing_uid_error,
|
|
},
|
|
'test5': {
|
|
UID_ATTR: INVALID_UID,
|
|
MODE_ATTR: '530',
|
|
error_key: missing_uid_error,
|
|
},
|
|
'test6': {
|
|
UID_ATTR: INVALID_UID,
|
|
GID_ATTR: GetInvalidGid,
|
|
error_key: missing_uid_error,
|
|
},
|
|
'test7': {
|
|
UID_ATTR: INVALID_UID,
|
|
GID_ATTR: GetInvalidGid,
|
|
MODE_ATTR: '640',
|
|
error_key: missing_uid_error,
|
|
},
|
|
'test8': {
|
|
UID_ATTR: INVALID_UID,
|
|
GID_ATTR: GetPrimaryGid,
|
|
error_key: missing_uid_error,
|
|
},
|
|
'test9': {
|
|
UID_ATTR: INVALID_UID,
|
|
GID_ATTR: GetNonPrimaryGid,
|
|
error_key: missing_uid_error,
|
|
},
|
|
'test10': {
|
|
UID_ATTR: INVALID_UID,
|
|
GID_ATTR: GetPrimaryGid,
|
|
MODE_ATTR: '640',
|
|
error_key: missing_uid_error,
|
|
},
|
|
'test11': {
|
|
UID_ATTR: INVALID_UID,
|
|
GID_ATTR: GetNonPrimaryGid,
|
|
MODE_ATTR: '640',
|
|
error_key: missing_uid_error,
|
|
},
|
|
'test12': {
|
|
UID_ATTR: USER_ID,
|
|
GID_ATTR: GetInvalidGid,
|
|
error_key: missing_gid_error,
|
|
},
|
|
'test13': {
|
|
UID_ATTR: USER_ID,
|
|
GID_ATTR: GetInvalidGid,
|
|
MODE_ATTR: '640',
|
|
error_key: missing_gid_error,
|
|
},
|
|
'test14': {
|
|
GID_ATTR: GetPrimaryGid,
|
|
MODE_ATTR: '240',
|
|
error_key: insufficient_access_error,
|
|
}
|
|
}
|
|
# The first variable below can be used to help debug the test if there is a
|
|
# problem.
|
|
for test_name, attrs_dict in six.iteritems(test_params):
|
|
cls.ClearPOSIXMetadata(obj)
|
|
|
|
# Attributes default to None if they are not in attrs_dict; some attrs are
|
|
# functions or LazyWrapper objects that should be called.
|
|
uid = attrs_dict.get(UID_ATTR)
|
|
if uid is not None and callable(uid):
|
|
uid = uid()
|
|
|
|
gid = attrs_dict.get(GID_ATTR)
|
|
if gid is not None and callable(gid):
|
|
gid = gid()
|
|
|
|
mode = attrs_dict.get(MODE_ATTR)
|
|
|
|
cls.SetPOSIXMetadata(cls.default_provider,
|
|
bucket_uri.bucket_name,
|
|
obj.object_name,
|
|
uid=uid,
|
|
gid=gid,
|
|
mode=mode)
|
|
stderr = cls.RunGsUtil([
|
|
'cp' if is_cp else 'mv', '-P',
|
|
suri(bucket_uri, obj.object_name), tmpdir
|
|
],
|
|
expected_status=1,
|
|
return_stderr=True)
|
|
|
|
if cls._use_gcloud_storage:
|
|
general_posix_error = 'ERROR'
|
|
else:
|
|
general_posix_error = ORPHANED_FILE
|
|
cls.assertIn(
|
|
general_posix_error, stderr,
|
|
'Error during test "%s": %s not found in stderr:\n%s' %
|
|
(test_name, general_posix_error, stderr))
|
|
|
|
error_regex = attrs_dict[error_key]
|
|
cls.assertTrue(
|
|
error_regex.search(stderr),
|
|
'Test %s did not match expected error; could not find a match for '
|
|
'%s\n\nin stderr:\n%s' % (test_name, error_regex.pattern, stderr))
|
|
listing1 = TailSet(suri(bucket_uri), cls.FlatListBucket(bucket_uri))
|
|
listing2 = TailSet(tmpdir, cls.FlatListDir(tmpdir))
|
|
# Bucket should have un-altered content.
|
|
cls.assertEqual(listing1, set(['/%s' % obj.object_name]))
|
|
# Dir should have un-altered content.
|
|
cls.assertEqual(listing2, set(['']))
|
|
|
|
|
|
def TestCpMvPOSIXBucketToLocalNoErrors(cls, bucket_uri, tmpdir, is_cp=True):
|
|
"""Helper function for preserve_posix_no_errors tests in test_cp and test_mv.
|
|
|
|
Args:
|
|
cls: An instance of either TestCp or TestMv.
|
|
bucket_uri: The uri of the bucket that the object is in.
|
|
tmpdir: The local file path to cp to.
|
|
is_cp: Whether or not the calling test suite is cp or mv.
|
|
"""
|
|
primary_gid = os.stat(tmpdir).st_gid
|
|
non_primary_gid = util.GetNonPrimaryGid()
|
|
test_params = {
|
|
'obj1': {
|
|
GID_ATTR: primary_gid
|
|
},
|
|
'obj2': {
|
|
GID_ATTR: non_primary_gid
|
|
},
|
|
'obj3': {
|
|
GID_ATTR: primary_gid,
|
|
MODE_ATTR: '440'
|
|
},
|
|
'obj4': {
|
|
GID_ATTR: non_primary_gid,
|
|
MODE_ATTR: '444'
|
|
},
|
|
'obj5': {
|
|
UID_ATTR: USER_ID
|
|
},
|
|
'obj6': {
|
|
UID_ATTR: USER_ID,
|
|
MODE_ATTR: '420'
|
|
},
|
|
'obj7': {
|
|
UID_ATTR: USER_ID,
|
|
GID_ATTR: primary_gid
|
|
},
|
|
'obj8': {
|
|
UID_ATTR: USER_ID,
|
|
GID_ATTR: non_primary_gid
|
|
},
|
|
'obj9': {
|
|
UID_ATTR: USER_ID,
|
|
GID_ATTR: primary_gid,
|
|
MODE_ATTR: '433'
|
|
},
|
|
'obj10': {
|
|
UID_ATTR: USER_ID,
|
|
GID_ATTR: non_primary_gid,
|
|
MODE_ATTR: '442'
|
|
}
|
|
}
|
|
for obj_name, attrs_dict in six.iteritems(test_params):
|
|
uid = attrs_dict.get(UID_ATTR)
|
|
gid = attrs_dict.get(GID_ATTR)
|
|
mode = attrs_dict.get(MODE_ATTR)
|
|
cls.CreateObject(bucket_uri=bucket_uri,
|
|
object_name=obj_name,
|
|
contents=obj_name.encode(UTF8),
|
|
uid=uid,
|
|
gid=gid,
|
|
mode=mode)
|
|
for obj_name in six.iterkeys(test_params):
|
|
# Move objects one at a time to avoid listing consistency.
|
|
cls.RunGsUtil(
|
|
['cp' if is_cp else 'mv', '-P',
|
|
suri(bucket_uri, obj_name), tmpdir])
|
|
listing = TailSet(tmpdir, cls.FlatListDir(tmpdir))
|
|
cls.assertEqual(
|
|
listing,
|
|
set([
|
|
'/obj1', '/obj2', '/obj3', '/obj4', '/obj5', '/obj6', '/obj7',
|
|
'/obj8', '/obj9', '/obj10'
|
|
]))
|
|
cls.VerifyLocalPOSIXPermissions(os.path.join(tmpdir, 'obj1'),
|
|
gid=primary_gid,
|
|
mode=DEFAULT_MODE)
|
|
cls.VerifyLocalPOSIXPermissions(os.path.join(tmpdir, 'obj2'),
|
|
gid=non_primary_gid,
|
|
mode=DEFAULT_MODE)
|
|
cls.VerifyLocalPOSIXPermissions(os.path.join(tmpdir, 'obj3'),
|
|
gid=primary_gid,
|
|
mode=0o440)
|
|
cls.VerifyLocalPOSIXPermissions(os.path.join(tmpdir, 'obj4'),
|
|
gid=non_primary_gid,
|
|
mode=0o444)
|
|
cls.VerifyLocalPOSIXPermissions(os.path.join(tmpdir, 'obj5'),
|
|
uid=USER_ID,
|
|
gid=primary_gid,
|
|
mode=DEFAULT_MODE)
|
|
cls.VerifyLocalPOSIXPermissions(os.path.join(tmpdir, 'obj6'),
|
|
uid=USER_ID,
|
|
gid=primary_gid,
|
|
mode=0o420)
|
|
cls.VerifyLocalPOSIXPermissions(os.path.join(tmpdir, 'obj7'),
|
|
uid=USER_ID,
|
|
gid=primary_gid,
|
|
mode=DEFAULT_MODE)
|
|
cls.VerifyLocalPOSIXPermissions(os.path.join(tmpdir, 'obj8'),
|
|
uid=USER_ID,
|
|
gid=non_primary_gid,
|
|
mode=DEFAULT_MODE)
|
|
cls.VerifyLocalPOSIXPermissions(os.path.join(tmpdir, 'obj9'),
|
|
uid=USER_ID,
|
|
gid=primary_gid,
|
|
mode=0o433)
|
|
cls.VerifyLocalPOSIXPermissions(os.path.join(tmpdir, 'obj10'),
|
|
uid=USER_ID,
|
|
gid=non_primary_gid,
|
|
mode=0o442)
|
|
|
|
|
|
def TestCpMvPOSIXLocalToBucketNoErrors(cls, bucket_uri, is_cp=True):
|
|
"""Helper function for testing local to bucket POSIX preservation.
|
|
|
|
Args:
|
|
cls: An instance of either TestCp or TestMv.
|
|
bucket_uri: The uri of the bucket to cp/mv to.
|
|
is_cp: Whether or not the calling test suite is cp or mv.
|
|
"""
|
|
primary_gid = os.getgid()
|
|
non_primary_gid = util.GetNonPrimaryGid()
|
|
test_params = {
|
|
'obj1': {
|
|
GID_ATTR: primary_gid
|
|
},
|
|
'obj2': {
|
|
GID_ATTR: non_primary_gid
|
|
},
|
|
'obj3': {
|
|
GID_ATTR: primary_gid,
|
|
MODE_ATTR: '440'
|
|
},
|
|
'obj4': {
|
|
GID_ATTR: non_primary_gid,
|
|
MODE_ATTR: '444'
|
|
},
|
|
'obj5': {
|
|
UID_ATTR: USER_ID
|
|
},
|
|
'obj6': {
|
|
UID_ATTR: USER_ID,
|
|
MODE_ATTR: '420'
|
|
},
|
|
'obj7': {
|
|
UID_ATTR: USER_ID,
|
|
GID_ATTR: primary_gid
|
|
},
|
|
'obj8': {
|
|
UID_ATTR: USER_ID,
|
|
GID_ATTR: non_primary_gid
|
|
},
|
|
'obj9': {
|
|
UID_ATTR: USER_ID,
|
|
GID_ATTR: primary_gid,
|
|
MODE_ATTR: '433'
|
|
},
|
|
'obj10': {
|
|
UID_ATTR: USER_ID,
|
|
GID_ATTR: non_primary_gid,
|
|
MODE_ATTR: '442'
|
|
}
|
|
}
|
|
for obj_name, attrs_dict in six.iteritems(test_params):
|
|
uid = attrs_dict.get(UID_ATTR, NA_ID)
|
|
gid = attrs_dict.get(GID_ATTR, NA_ID)
|
|
mode = attrs_dict.get(MODE_ATTR, NA_MODE)
|
|
if mode != NA_MODE:
|
|
ValidatePOSIXMode(int(mode, 8))
|
|
ValidateFilePermissionAccess(obj_name,
|
|
uid=uid,
|
|
gid=int(gid),
|
|
mode=int(mode))
|
|
fpath = cls.CreateTempFile(contents=b'foo', uid=uid, gid=gid, mode=mode)
|
|
cls.RunGsUtil(
|
|
['cp' if is_cp else 'mv', '-P', fpath,
|
|
suri(bucket_uri, obj_name)])
|
|
if uid != NA_ID:
|
|
cls.VerifyObjectCustomAttribute(bucket_uri.bucket_name, obj_name,
|
|
UID_ATTR, str(uid))
|
|
if gid != NA_ID:
|
|
cls.VerifyObjectCustomAttribute(bucket_uri.bucket_name, obj_name,
|
|
GID_ATTR, str(gid))
|
|
if mode != NA_MODE:
|
|
cls.VerifyObjectCustomAttribute(bucket_uri.bucket_name, obj_name,
|
|
MODE_ATTR, str(mode))
|
|
|
|
|
|
def _ReadContentsFromFifo(fifo_path, list_for_output):
|
|
with open(fifo_path, 'rb') as f:
|
|
list_for_output.append(f.read())
|
|
|
|
|
|
def _WriteContentsToFifo(contents, fifo_path):
|
|
with open(fifo_path, 'wb') as f:
|
|
f.write(contents)
|
|
|
|
|
|
class _JSONForceHTTPErrorCopyCallbackHandler(object):
|
|
"""Test callback handler that raises an arbitrary HTTP error exception."""
|
|
|
|
def __init__(self, startover_at_byte, http_error_num):
|
|
self._startover_at_byte = startover_at_byte
|
|
self._http_error_num = http_error_num
|
|
self.started_over_once = False
|
|
|
|
# pylint: disable=invalid-name
|
|
def call(self, total_bytes_transferred, total_size):
|
|
"""Forcibly exits if the transfer has passed the halting point."""
|
|
if (total_bytes_transferred >= self._startover_at_byte and
|
|
not self.started_over_once):
|
|
sys.stderr.write('Forcing HTTP error %s after byte %s. '
|
|
'%s/%s transferred.\r\n' %
|
|
(self._http_error_num, self._startover_at_byte,
|
|
MakeHumanReadable(total_bytes_transferred),
|
|
MakeHumanReadable(total_size)))
|
|
self.started_over_once = True
|
|
raise apitools_exceptions.HttpError({'status': self._http_error_num},
|
|
None, None)
|
|
|
|
|
|
class _XMLResumableUploadStartOverCopyCallbackHandler(object):
|
|
"""Test callback handler that raises start-over exception during upload."""
|
|
|
|
def __init__(self, startover_at_byte):
|
|
self._startover_at_byte = startover_at_byte
|
|
self.started_over_once = False
|
|
|
|
# pylint: disable=invalid-name
|
|
def call(self, total_bytes_transferred, total_size):
|
|
"""Forcibly exits if the transfer has passed the halting point."""
|
|
if (total_bytes_transferred >= self._startover_at_byte and
|
|
not self.started_over_once):
|
|
sys.stderr.write(
|
|
'Forcing ResumableUpload start over error after byte %s. '
|
|
'%s/%s transferred.\r\n' %
|
|
(self._startover_at_byte, MakeHumanReadable(total_bytes_transferred),
|
|
MakeHumanReadable(total_size)))
|
|
self.started_over_once = True
|
|
raise boto.exception.ResumableUploadException(
|
|
'Forcing upload start over', ResumableTransferDisposition.START_OVER)
|
|
|
|
|
|
class _DeleteBucketThenStartOverCopyCallbackHandler(object):
|
|
"""Test callback handler that deletes bucket then raises start-over."""
|
|
|
|
def __init__(self, startover_at_byte, bucket_uri):
|
|
self._startover_at_byte = startover_at_byte
|
|
self._bucket_uri = bucket_uri
|
|
self.started_over_once = False
|
|
|
|
# pylint: disable=invalid-name
|
|
def call(self, total_bytes_transferred, total_size):
|
|
"""Forcibly exits if the transfer has passed the halting point."""
|
|
if (total_bytes_transferred >= self._startover_at_byte and
|
|
not self.started_over_once):
|
|
sys.stderr.write('Deleting bucket (%s)' % (self._bucket_uri.bucket_name))
|
|
|
|
@Retry(StorageResponseError, tries=5, timeout_secs=1)
|
|
def DeleteBucket():
|
|
bucket_list = list(self._bucket_uri.list_bucket(all_versions=True))
|
|
for k in bucket_list:
|
|
self._bucket_uri.get_bucket().delete_key(k.name,
|
|
version_id=k.version_id)
|
|
self._bucket_uri.delete_bucket()
|
|
|
|
DeleteBucket()
|
|
sys.stderr.write(
|
|
'Forcing ResumableUpload start over error after byte %s. '
|
|
'%s/%s transferred.\r\n' %
|
|
(self._startover_at_byte, MakeHumanReadable(total_bytes_transferred),
|
|
MakeHumanReadable(total_size)))
|
|
self.started_over_once = True
|
|
raise ResumableUploadStartOverException('Artificially forcing start-over')
|
|
|
|
|
|
class _ResumableUploadRetryHandler(object):
|
|
"""Test callback handler for causing retries during a resumable transfer."""
|
|
|
|
def __init__(self,
|
|
retry_at_byte,
|
|
exception_to_raise,
|
|
exc_args,
|
|
num_retries=1):
|
|
self._retry_at_byte = retry_at_byte
|
|
self._exception_to_raise = exception_to_raise
|
|
self._exception_args = exc_args
|
|
self._num_retries = num_retries
|
|
|
|
self._retries_made = 0
|
|
|
|
# pylint: disable=invalid-name
|
|
def call(self, total_bytes_transferred, unused_total_size):
|
|
"""Cause a single retry at the retry point."""
|
|
if (total_bytes_transferred >= self._retry_at_byte and
|
|
self._retries_made < self._num_retries):
|
|
self._retries_made += 1
|
|
raise self._exception_to_raise(*self._exception_args)
|
|
|
|
|
|
class TestCp(testcase.GsUtilIntegrationTestCase):
|
|
"""Integration tests for cp command."""
|
|
|
|
# For tests that artificially halt, we need to ensure at least one callback
|
|
# occurs.
|
|
halt_size = START_CALLBACK_PER_BYTES * 2
|
|
|
|
def _get_test_file(self, name):
|
|
contents = pkgutil.get_data('gslib', 'tests/test_data/%s' % name)
|
|
return self.CreateTempFile(file_name=name, contents=contents)
|
|
|
|
def _CpWithFifoViaGsUtilAndAppendOutputToList(self, src_path_tuple, dst_path,
|
|
list_for_return_value,
|
|
**kwargs):
|
|
arg_list = ['cp']
|
|
arg_list.extend(src_path_tuple)
|
|
arg_list.append(dst_path)
|
|
# Append stderr, stdout, or return status (if specified in kwargs) to the
|
|
# given list.
|
|
list_for_return_value.append(self.RunGsUtil(arg_list, **kwargs))
|
|
|
|
@SequentialAndParallelTransfer
|
|
def test_noclobber(self):
|
|
key_uri = self.CreateObject(contents=b'foo')
|
|
fpath = self.CreateTempFile(contents=b'bar')
|
|
stderr = self.RunGsUtil(
|
|
['cp', '-n', fpath, suri(key_uri)], return_stderr=True)
|
|
self.assertRegex(stderr, r'Skipping.*: {}'.format(re.escape(suri(key_uri))))
|
|
self.assertEqual(key_uri.get_contents_as_string(), b'foo')
|
|
stderr = self.RunGsUtil(['cp', '-n', suri(key_uri), fpath],
|
|
return_stderr=True)
|
|
with open(fpath, 'rb') as f:
|
|
self.assertRegex(stderr, r'Skipping.*: {}'.format(re.escape(suri(f))))
|
|
self.assertEqual(f.read(), b'bar')
|
|
|
|
@SequentialAndParallelTransfer
|
|
def test_noclobber_different_size(self):
|
|
key_uri = self.CreateObject(contents=b'foo')
|
|
fpath = self.CreateTempFile(contents=b'quux')
|
|
stderr = self.RunGsUtil(
|
|
['cp', '-n', fpath, suri(key_uri)], return_stderr=True)
|
|
self.assertRegex(stderr, r'Skipping.*: {}'.format(re.escape(suri(key_uri))))
|
|
self.assertEqual(key_uri.get_contents_as_string(), b'foo')
|
|
stderr = self.RunGsUtil(['cp', '-n', suri(key_uri), fpath],
|
|
return_stderr=True)
|
|
with open(fpath, 'rb') as f:
|
|
self.assertRegex(stderr, r'Skipping.*: {}'.format(re.escape(suri(f))))
|
|
self.assertEqual(f.read(), b'quux')
|
|
|
|
def test_dest_bucket_not_exist(self):
|
|
fpath = self.CreateTempFile(contents=b'foo')
|
|
invalid_bucket_uri = ('%s://%s' %
|
|
(self.default_provider, self.nonexistent_bucket_name))
|
|
# TODO(b/135780661): Remove retry after bug resolved
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check():
|
|
stderr = self.RunGsUtil(['cp', fpath, invalid_bucket_uri],
|
|
expected_status=1,
|
|
return_stderr=True)
|
|
if self._use_gcloud_storage:
|
|
self.assertIn('not found: 404', stderr)
|
|
else:
|
|
self.assertIn('does not exist', stderr)
|
|
|
|
_Check()
|
|
|
|
def test_copy_in_cloud_noclobber(self):
|
|
bucket1_uri = self.CreateBucket()
|
|
bucket2_uri = self.CreateBucket()
|
|
key_uri = self.CreateObject(bucket_uri=bucket1_uri, contents=b'foo')
|
|
stderr = self.RunGsUtil(
|
|
['cp', suri(key_uri), suri(bucket2_uri)], return_stderr=True)
|
|
# Rewrite API may output an additional 'Copying' progress notification.
|
|
self.assertGreaterEqual(stderr.count('Copying'), 1)
|
|
self.assertLessEqual(stderr.count('Copying'), 2)
|
|
stderr = self.RunGsUtil(
|
|
['cp', '-n', suri(key_uri),
|
|
suri(bucket2_uri)], return_stderr=True)
|
|
self.assertRegex(
|
|
stderr, r'Skipping.*: {}'.format(suri(bucket2_uri,
|
|
key_uri.object_name)))
|
|
|
|
@SequentialAndParallelTransfer
|
|
@SkipForXML('Boto library does not handle objects with .. in them.')
|
|
def test_skip_object_with_parent_directory_symbol_in_name(self):
|
|
bucket_uri = self.CreateBucket()
|
|
key_uri = self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='dir/../../../file',
|
|
contents=b'data',
|
|
prefer_json_api=True)
|
|
self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='file2',
|
|
contents=b'data')
|
|
directory = self.CreateTempDir()
|
|
|
|
stderr = self.RunGsUtil(
|
|
['cp', '-r', suri(bucket_uri), directory], return_stderr=True)
|
|
|
|
# By default, deletes in the tearDown method run with the XML API. Boto
|
|
# does not handle names with '..', so we need to delete problematic
|
|
# objects with the json API. Delete happens before assertions, in case they
|
|
# raise errors and prevent cleanup.
|
|
self.json_api.DeleteObject(bucket_uri.bucket_name, key_uri.object_name)
|
|
|
|
self.assertIn(
|
|
'Skipping copy of source URL %s because it would be copied '
|
|
'outside the expected destination directory: %s.' %
|
|
(suri(key_uri), os.path.abspath(directory)), stderr)
|
|
self.assertFalse(os.path.exists(os.path.join(directory, 'file')))
|
|
self.assertTrue(
|
|
os.path.exists(os.path.join(directory, bucket_uri.bucket_name,
|
|
'file2')))
|
|
|
|
@SequentialAndParallelTransfer
|
|
@SkipForXML('Boto library does not handle objects with .. in them.')
|
|
def test_skip_parent_directory_symbol_in_name_is_reflected_in_manifest(self):
|
|
bucket_uri = self.CreateBucket()
|
|
key_uri = self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='dir/../../../file',
|
|
contents=b'data',
|
|
prefer_json_api=True)
|
|
directory = self.CreateTempDir()
|
|
log_path = os.path.join(directory, 'log.csv')
|
|
|
|
stderr = self.RunGsUtil(
|
|
['cp', '-r', '-L', log_path,
|
|
suri(bucket_uri), directory],
|
|
return_stderr=True)
|
|
|
|
# By default, deletes in the tearDown method run with the XML API. Boto
|
|
# does not handle names with '..', so we need to delete problematic
|
|
# objects with the json API. Delete happens before assertions, in case they
|
|
# raise errors and prevent cleanup.
|
|
self.json_api.DeleteObject(bucket_uri.bucket_name, key_uri.object_name)
|
|
|
|
self.assertIn(
|
|
'Skipping copy of source URL %s because it would be copied '
|
|
'outside the expected destination directory: %s.' %
|
|
(suri(key_uri), os.path.abspath(directory)), stderr)
|
|
self.assertFalse(os.path.exists(os.path.join(directory, 'file')))
|
|
with open(log_path, 'r') as f:
|
|
lines = f.readlines()
|
|
results = lines[1].strip().split(',')
|
|
self.assertEqual(results[0], suri(key_uri)) # The 'Source' column.
|
|
self.assertEqual(results[8], 'skip') # The 'Result' column.
|
|
|
|
@SequentialAndParallelTransfer
|
|
@SkipForXML('Boto library does not handle objects with .. in them.')
|
|
@unittest.skipIf(IS_WINDOWS, 'os.symlink() is not available on Windows.')
|
|
def test_skip_parent_directory_symbol_object_with_symlink_destination(self):
|
|
bucket_uri = self.CreateBucket()
|
|
key_uri = self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='dir/../../../file',
|
|
contents=b'data',
|
|
prefer_json_api=True)
|
|
second_key_uri = self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='file2',
|
|
contents=b'data')
|
|
|
|
directory = self.CreateTempDir()
|
|
linked_destination = os.path.join(directory, 'linked_destination')
|
|
destination = os.path.join(directory, 'destination')
|
|
os.mkdir(destination)
|
|
os.symlink(destination, linked_destination)
|
|
|
|
stderr = self.RunGsUtil([
|
|
'-D', 'cp', '-r',
|
|
suri(bucket_uri),
|
|
suri(second_key_uri), linked_destination
|
|
],
|
|
return_stderr=True)
|
|
|
|
# By default, deletes in the tearDown method run with the XML API. Boto
|
|
# does not handle names with '..', so we need to delete problematic
|
|
# objects with the json API. Delete happens before assertions, in case they
|
|
# raise errors and prevent cleanup.
|
|
self.json_api.DeleteObject(bucket_uri.bucket_name, key_uri.object_name)
|
|
|
|
self.assertIn(
|
|
'Skipping copy of source URL %s because it would be copied '
|
|
'outside the expected destination directory: %s.' %
|
|
(suri(key_uri), linked_destination), stderr)
|
|
self.assertFalse(os.path.exists(os.path.join(linked_destination, 'file')))
|
|
self.assertTrue(os.path.exists(os.path.join(linked_destination, 'file2')))
|
|
|
|
@unittest.skipIf(IS_WINDOWS, 'os.mkfifo not available on Windows.')
|
|
@SequentialAndParallelTransfer
|
|
def test_cp_from_local_file_to_fifo(self):
|
|
contents = b'bar'
|
|
fifo_path = self.CreateTempFifo()
|
|
file_path = self.CreateTempFile(contents=contents)
|
|
list_for_output = []
|
|
|
|
read_thread = threading.Thread(target=_ReadContentsFromFifo,
|
|
args=(fifo_path, list_for_output))
|
|
read_thread.start()
|
|
write_thread = threading.Thread(
|
|
target=self._CpWithFifoViaGsUtilAndAppendOutputToList,
|
|
args=((file_path,), fifo_path, []))
|
|
write_thread.start()
|
|
write_thread.join(120)
|
|
read_thread.join(120)
|
|
if not list_for_output:
|
|
self.fail('Reading/writing to the fifo timed out.')
|
|
self.assertEqual(list_for_output[0].strip(), contents)
|
|
|
|
@unittest.skipIf(IS_WINDOWS, 'os.mkfifo not available on Windows.')
|
|
@SequentialAndParallelTransfer
|
|
def test_cp_from_one_object_to_fifo(self):
|
|
fifo_path = self.CreateTempFifo()
|
|
bucket_uri = self.CreateBucket()
|
|
contents = b'bar'
|
|
obj_uri = self.CreateObject(bucket_uri=bucket_uri, contents=contents)
|
|
list_for_output = []
|
|
|
|
read_thread = threading.Thread(target=_ReadContentsFromFifo,
|
|
args=(fifo_path, list_for_output))
|
|
read_thread.start()
|
|
write_thread = threading.Thread(
|
|
target=self._CpWithFifoViaGsUtilAndAppendOutputToList,
|
|
args=((suri(obj_uri),), fifo_path, []))
|
|
write_thread.start()
|
|
write_thread.join(120)
|
|
read_thread.join(120)
|
|
if not list_for_output:
|
|
self.fail('Reading/writing to the fifo timed out.')
|
|
self.assertEqual(list_for_output[0].strip(), contents)
|
|
|
|
@unittest.skipIf(IS_WINDOWS, 'os.mkfifo not available on Windows.')
|
|
@SequentialAndParallelTransfer
|
|
def test_cp_from_multiple_objects_to_fifo(self):
|
|
fifo_path = self.CreateTempFifo()
|
|
bucket_uri = self.CreateBucket()
|
|
contents1 = b'foo and bar'
|
|
contents2 = b'baz and qux'
|
|
obj1_uri = self.CreateObject(bucket_uri=bucket_uri, contents=contents1)
|
|
obj2_uri = self.CreateObject(bucket_uri=bucket_uri, contents=contents2)
|
|
list_for_output = []
|
|
|
|
read_thread = threading.Thread(target=_ReadContentsFromFifo,
|
|
args=(fifo_path, list_for_output))
|
|
read_thread.start()
|
|
write_thread = threading.Thread(
|
|
target=self._CpWithFifoViaGsUtilAndAppendOutputToList,
|
|
args=((suri(obj1_uri), suri(obj2_uri)), fifo_path, []))
|
|
write_thread.start()
|
|
write_thread.join(120)
|
|
read_thread.join(120)
|
|
if not list_for_output:
|
|
self.fail('Reading/writing to the fifo timed out.')
|
|
self.assertIn(contents1, list_for_output[0])
|
|
self.assertIn(contents2, list_for_output[0])
|
|
|
|
@SequentialAndParallelTransfer
|
|
def test_streaming(self):
|
|
bucket_uri = self.CreateBucket()
|
|
stderr = self.RunGsUtil(
|
|
['cp', '-', '%s' % suri(bucket_uri, 'foo')],
|
|
stdin='bar',
|
|
return_stderr=True)
|
|
if self._use_gcloud_storage:
|
|
self.assertIn('Copying file://- to ' + suri(bucket_uri, 'foo'), stderr)
|
|
else:
|
|
self.assertIn('Copying from <STDIN>', stderr)
|
|
key_uri = self.StorageUriCloneReplaceName(bucket_uri, 'foo')
|
|
self.assertEqual(key_uri.get_contents_as_string(), b'bar')
|
|
|
|
@unittest.skipIf(IS_WINDOWS, 'os.mkfifo not available on Windows.')
|
|
@SequentialAndParallelTransfer
|
|
def test_streaming_from_fifo_to_object(self):
|
|
bucket_uri = self.CreateBucket()
|
|
fifo_path = self.CreateTempFifo()
|
|
object_name = 'foo'
|
|
object_contents = b'bar'
|
|
list_for_output = []
|
|
|
|
# Start writer in the background, which won't finish until a corresponding
|
|
# read operation is performed on the fifo.
|
|
write_thread = threading.Thread(target=_WriteContentsToFifo,
|
|
args=(object_contents, fifo_path))
|
|
write_thread.start()
|
|
# The fifo requires both a pending read and write before either operation
|
|
# will complete. Regardless of which operation occurs first, the
|
|
# corresponding subsequent operation will unblock the first one.
|
|
# We run gsutil in a thread so that it can timeout rather than hang forever
|
|
# if the write thread fails.
|
|
read_thread = threading.Thread(
|
|
target=self._CpWithFifoViaGsUtilAndAppendOutputToList,
|
|
args=((fifo_path,), suri(bucket_uri, object_name), list_for_output),
|
|
kwargs={'return_stderr': True})
|
|
read_thread.start()
|
|
|
|
read_thread.join(120)
|
|
write_thread.join(120)
|
|
if not list_for_output:
|
|
self.fail('Reading/writing to the fifo timed out.')
|
|
|
|
if self._use_gcloud_storage:
|
|
self.assertIn(
|
|
'Copying file://{} to {}'.format(fifo_path,
|
|
suri(bucket_uri, object_name)),
|
|
list_for_output[0])
|
|
else:
|
|
self.assertIn('Copying from named pipe', list_for_output[0])
|
|
|
|
key_uri = self.StorageUriCloneReplaceName(bucket_uri, object_name)
|
|
self.assertEqual(key_uri.get_contents_as_string(), object_contents)
|
|
|
|
@unittest.skipIf(IS_WINDOWS, 'os.mkfifo not available on Windows.')
|
|
@SequentialAndParallelTransfer
|
|
def test_streaming_from_fifo_to_stdout(self):
|
|
fifo_path = self.CreateTempFifo()
|
|
contents = b'bar'
|
|
list_for_output = []
|
|
|
|
write_thread = threading.Thread(target=_WriteContentsToFifo,
|
|
args=(contents, fifo_path))
|
|
write_thread.start()
|
|
read_thread = threading.Thread(
|
|
target=self._CpWithFifoViaGsUtilAndAppendOutputToList,
|
|
args=((fifo_path,), '-', list_for_output),
|
|
kwargs={'return_stdout': True})
|
|
read_thread.start()
|
|
read_thread.join(120)
|
|
write_thread.join(120)
|
|
if not list_for_output:
|
|
self.fail('Reading/writing to the fifo timed out.')
|
|
self.assertEqual(list_for_output[0].strip().encode('ascii'), contents)
|
|
|
|
@unittest.skipIf(IS_WINDOWS, 'os.mkfifo not available on Windows.')
|
|
@SequentialAndParallelTransfer
|
|
def test_streaming_from_stdout_to_fifo(self):
|
|
fifo_path = self.CreateTempFifo()
|
|
contents = b'bar'
|
|
list_for_output = []
|
|
list_for_gsutil_output = []
|
|
|
|
read_thread = threading.Thread(target=_ReadContentsFromFifo,
|
|
args=(fifo_path, list_for_output))
|
|
read_thread.start()
|
|
write_thread = threading.Thread(
|
|
target=self._CpWithFifoViaGsUtilAndAppendOutputToList,
|
|
args=(('-',), fifo_path, list_for_gsutil_output),
|
|
kwargs={
|
|
'return_stderr': True,
|
|
'stdin': contents
|
|
})
|
|
write_thread.start()
|
|
write_thread.join(120)
|
|
read_thread.join(120)
|
|
if not list_for_output:
|
|
self.fail('Reading/writing to the fifo timed out.')
|
|
self.assertEqual(list_for_output[0].strip(), contents)
|
|
|
|
def test_streaming_multiple_arguments(self):
|
|
bucket_uri = self.CreateBucket()
|
|
stderr = self.RunGsUtil(['cp', '-', '-', suri(bucket_uri)],
|
|
stdin='bar',
|
|
return_stderr=True,
|
|
expected_status=1)
|
|
if self._use_gcloud_storage:
|
|
self.assertIn(
|
|
'Multiple URL strings are not supported when transferring'
|
|
' from stdin.', stderr)
|
|
else:
|
|
self.assertIn('Multiple URL strings are not supported with streaming',
|
|
stderr)
|
|
|
|
# TODO: Implement a way to test both with and without using magic file.
|
|
|
|
@SequentialAndParallelTransfer
|
|
def test_detect_content_type(self):
|
|
"""Tests local detection of content type."""
|
|
bucket_uri = self.CreateBucket()
|
|
dsturi = suri(bucket_uri, 'foo')
|
|
|
|
self.RunGsUtil(['cp', self._get_test_file('test.mp3'), dsturi])
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check1():
|
|
stdout = self.RunGsUtil(['ls', '-L', dsturi], return_stdout=True)
|
|
if IS_WINDOWS:
|
|
self.assertTrue(
|
|
re.search(r'Content-Type:\s+audio/x-mpg', stdout) or
|
|
re.search(r'Content-Type:\s+audio/mpeg', stdout))
|
|
else:
|
|
self.assertRegex(stdout, r'Content-Type:\s+audio/mpeg')
|
|
|
|
_Check1()
|
|
|
|
self.RunGsUtil(['cp', self._get_test_file('test.gif'), dsturi])
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check2():
|
|
stdout = self.RunGsUtil(['ls', '-L', dsturi], return_stdout=True)
|
|
self.assertRegex(stdout, r'Content-Type:\s+image/gif')
|
|
|
|
_Check2()
|
|
|
|
def test_content_type_override_default(self):
|
|
"""Tests overriding content type with the default value."""
|
|
bucket_uri = self.CreateBucket()
|
|
dsturi = suri(bucket_uri, 'foo')
|
|
|
|
self.RunGsUtil(
|
|
['-h', 'Content-Type:', 'cp',
|
|
self._get_test_file('test.mp3'), dsturi])
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check1():
|
|
stdout = self.RunGsUtil(['ls', '-L', dsturi], return_stdout=True)
|
|
self.assertRegex(stdout, r'Content-Type:\s+application/octet-stream')
|
|
|
|
_Check1()
|
|
|
|
self.RunGsUtil(
|
|
['-h', 'Content-Type:', 'cp',
|
|
self._get_test_file('test.gif'), dsturi])
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check2():
|
|
stdout = self.RunGsUtil(['ls', '-L', dsturi], return_stdout=True)
|
|
self.assertRegex(stdout, r'Content-Type:\s+application/octet-stream')
|
|
|
|
_Check2()
|
|
|
|
def test_content_type_override(self):
|
|
"""Tests overriding content type with a value."""
|
|
bucket_uri = self.CreateBucket()
|
|
dsturi = suri(bucket_uri, 'foo')
|
|
|
|
self.RunGsUtil([
|
|
'-h', 'Content-Type:text/plain', 'cp',
|
|
self._get_test_file('test.mp3'), dsturi
|
|
])
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check1():
|
|
stdout = self.RunGsUtil(['ls', '-L', dsturi], return_stdout=True)
|
|
self.assertRegex(stdout, r'Content-Type:\s+text/plain')
|
|
|
|
_Check1()
|
|
|
|
self.RunGsUtil([
|
|
'-h', 'Content-Type:text/plain', 'cp',
|
|
self._get_test_file('test.gif'), dsturi
|
|
])
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check2():
|
|
stdout = self.RunGsUtil(['ls', '-L', dsturi], return_stdout=True)
|
|
self.assertRegex(stdout, r'Content-Type:\s+text/plain')
|
|
|
|
_Check2()
|
|
|
|
@unittest.skipIf(IS_WINDOWS, 'magicfile is not available on Windows.')
|
|
@SequentialAndParallelTransfer
|
|
def test_magicfile_override(self):
|
|
"""Tests content type override with magicfile value."""
|
|
bucket_uri = self.CreateBucket()
|
|
dsturi = suri(bucket_uri, 'foo')
|
|
fpath = self.CreateTempFile(contents=b'foo/bar\n')
|
|
self.RunGsUtil(['cp', fpath, dsturi])
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check1():
|
|
stdout = self.RunGsUtil(['ls', '-L', dsturi], return_stdout=True)
|
|
use_magicfile = boto.config.getbool('GSUtil', 'use_magicfile', False)
|
|
content_type = ('text/plain'
|
|
if use_magicfile else 'application/octet-stream')
|
|
self.assertRegex(stdout, r'Content-Type:\s+%s' % content_type)
|
|
|
|
_Check1()
|
|
|
|
@SequentialAndParallelTransfer
|
|
def test_content_type_mismatches(self):
|
|
"""Tests overriding content type when it does not match the file type."""
|
|
bucket_uri = self.CreateBucket()
|
|
dsturi = suri(bucket_uri, 'foo')
|
|
fpath = self.CreateTempFile(contents=b'foo/bar\n')
|
|
|
|
self.RunGsUtil([
|
|
'-h', 'Content-Type:image/gif', 'cp',
|
|
self._get_test_file('test.mp3'), dsturi
|
|
])
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check1():
|
|
stdout = self.RunGsUtil(['ls', '-L', dsturi], return_stdout=True)
|
|
self.assertRegex(stdout, r'Content-Type:\s+image/gif')
|
|
|
|
_Check1()
|
|
|
|
self.RunGsUtil([
|
|
'-h', 'Content-Type:image/gif', 'cp',
|
|
self._get_test_file('test.gif'), dsturi
|
|
])
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check2():
|
|
stdout = self.RunGsUtil(['ls', '-L', dsturi], return_stdout=True)
|
|
self.assertRegex(stdout, r'Content-Type:\s+image/gif')
|
|
|
|
_Check2()
|
|
|
|
self.RunGsUtil(['-h', 'Content-Type:image/gif', 'cp', fpath, dsturi])
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check3():
|
|
stdout = self.RunGsUtil(['ls', '-L', dsturi], return_stdout=True)
|
|
self.assertRegex(stdout, r'Content-Type:\s+image/gif')
|
|
|
|
_Check3()
|
|
|
|
@SequentialAndParallelTransfer
|
|
def test_content_type_header_case_insensitive(self):
|
|
"""Tests that content type header is treated with case insensitivity."""
|
|
bucket_uri = self.CreateBucket()
|
|
dsturi = suri(bucket_uri, 'foo')
|
|
fpath = self._get_test_file('test.gif')
|
|
|
|
self.RunGsUtil(['-h', 'content-Type:text/plain', 'cp', fpath, dsturi])
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check1():
|
|
stdout = self.RunGsUtil(['ls', '-L', dsturi], return_stdout=True)
|
|
self.assertRegex(stdout, r'Content-Type:\s+text/plain')
|
|
self.assertNotRegex(stdout, r'image/gif')
|
|
|
|
_Check1()
|
|
|
|
self.RunGsUtil([
|
|
'-h', 'CONTENT-TYPE:image/gif', '-h', 'content-type:image/gif', 'cp',
|
|
fpath, dsturi
|
|
])
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check2():
|
|
stdout = self.RunGsUtil(['ls', '-L', dsturi], return_stdout=True)
|
|
self.assertRegex(stdout, r'Content-Type:\s+image/gif')
|
|
self.assertNotRegex(stdout, r'image/gif,\s*image/gif')
|
|
|
|
_Check2()
|
|
|
|
@SequentialAndParallelTransfer
|
|
def test_other_headers(self):
|
|
"""Tests that non-content-type headers are applied successfully on copy."""
|
|
bucket_uri = self.CreateBucket()
|
|
dst_uri = suri(bucket_uri, 'foo')
|
|
fpath = self._get_test_file('test.gif')
|
|
|
|
self.RunGsUtil([
|
|
'-h', 'Cache-Control:public,max-age=12', '-h',
|
|
'x-%s-meta-1:abcd' % self.provider_custom_meta, 'cp', fpath, dst_uri
|
|
])
|
|
|
|
stdout = self.RunGsUtil(['ls', '-L', dst_uri], return_stdout=True)
|
|
self.assertRegex(stdout, r'Cache-Control\s*:\s*public,max-age=12')
|
|
|
|
self.assertRegex(stdout, r'Metadata:\s*1:\s*abcd')
|
|
|
|
dst_uri2 = suri(bucket_uri, 'bar')
|
|
self.RunGsUtil(['cp', dst_uri, dst_uri2])
|
|
# Ensure metadata was preserved across copy.
|
|
stdout = self.RunGsUtil(['ls', '-L', dst_uri2], return_stdout=True)
|
|
self.assertRegex(stdout, r'Cache-Control\s*:\s*public,max-age=12')
|
|
self.assertRegex(stdout, r'Metadata:\s*1:\s*abcd')
|
|
|
|
@SequentialAndParallelTransfer
|
|
def test_request_reason_header(self):
|
|
"""Test that x-goog-request-header can be set using the environment variable."""
|
|
os.environ['CLOUDSDK_CORE_REQUEST_REASON'] = 'b/this_is_env_reason'
|
|
bucket_uri = self.CreateBucket()
|
|
dst_uri = suri(bucket_uri, 'foo')
|
|
fpath = self._get_test_file('test.gif')
|
|
# Ensure x-goog-request-header is set in cp command
|
|
stderr = self.RunGsUtil(['-DD', 'cp', fpath, dst_uri], return_stderr=True)
|
|
|
|
if self._use_gcloud_storage:
|
|
reason_regex = r"b'X-Goog-Request-Reason': b'b/this_is_env_reason'"
|
|
else:
|
|
reason_regex = r"'x-goog-request-reason': 'b/this_is_env_reason'"
|
|
|
|
self.assertRegex(stderr, reason_regex)
|
|
# Ensure x-goog-request-header is set in ls command
|
|
stderr = self.RunGsUtil(['-DD', 'ls', '-L', dst_uri], return_stderr=True)
|
|
self.assertRegex(stderr, reason_regex)
|
|
|
|
@SequentialAndParallelTransfer
|
|
@SkipForXML('XML APIs use a different debug log format.')
|
|
def test_request_reason_header_persists_multiple_requests_json(self):
|
|
"""Test that x-goog-request-header works when cp sends multiple requests."""
|
|
os.environ['CLOUDSDK_CORE_REQUEST_REASON'] = 'b/this_is_env_reason'
|
|
bucket_uri = self.CreateBucket()
|
|
dst_uri = suri(bucket_uri, 'foo')
|
|
fpath = self._get_test_file('test.gif')
|
|
|
|
boto_config_for_test = ('GSUtil', 'resumable_threshold', '0')
|
|
with SetBotoConfigForTest([boto_config_for_test]):
|
|
stderr = self.RunGsUtil(['-DD', 'cp', fpath, dst_uri], return_stderr=True)
|
|
|
|
if self._use_gcloud_storage:
|
|
reason_regex = r'X-Goog-Request-Reason\': b\'b/this_is_env_reason'
|
|
else:
|
|
reason_regex = r'x-goog-request-reason\': \'b/this_is_env_reason'
|
|
|
|
self.assertRegex(
|
|
stderr,
|
|
# POST follows GET request. Both need the request-reason header.
|
|
r'GET[\s\S]*' + reason_regex + r'[\s\S]*POST[\s\S]*' + reason_regex)
|
|
|
|
@SequentialAndParallelTransfer
|
|
@SkipForJSON('JSON API uses a different debug log format.')
|
|
def test_request_reason_header_persists_multiple_requests_xml(self):
|
|
"""Test that x-goog-request-header works when cp sends multiple requests."""
|
|
os.environ['CLOUDSDK_CORE_REQUEST_REASON'] = 'b/this_is_env_reason'
|
|
bucket_uri = self.CreateBucket()
|
|
dst_uri = suri(bucket_uri, 'foo')
|
|
fpath = self._get_test_file('test.gif')
|
|
|
|
boto_config_for_test = ('GSUtil', 'resumable_threshold', '0')
|
|
with SetBotoConfigForTest([boto_config_for_test]):
|
|
stderr = self.RunGsUtil(['-D', 'cp', fpath, dst_uri], return_stderr=True)
|
|
|
|
reason_regex = (
|
|
r'Final headers: \{[\s\S]*\''
|
|
r'x-goog-request-reason\': \'b/this_is_env_reason\'[\s\S]*}')
|
|
|
|
# Pattern should match twice since two requests should have a reason header.
|
|
self.assertRegex(stderr, reason_regex + r'[\s\S]*' + reason_regex)
|
|
|
|
@SequentialAndParallelTransfer
|
|
def test_versioning(self):
|
|
"""Tests copy with versioning."""
|
|
bucket_uri = self.CreateVersionedBucket()
|
|
k1_uri = self.CreateObject(bucket_uri=bucket_uri, contents=b'data2')
|
|
k2_uri = self.CreateObject(bucket_uri=bucket_uri, contents=b'data1')
|
|
g1 = urigen(k2_uri)
|
|
self.RunGsUtil(['cp', suri(k1_uri), suri(k2_uri)])
|
|
k2_uri = self.StorageUriCloneReplaceName(bucket_uri, k2_uri.object_name)
|
|
k2_uri = self.StorageUriCloneReplaceKey(bucket_uri, k2_uri.get_key())
|
|
g2 = urigen(k2_uri)
|
|
self.StorageUriSetContentsFromString(k2_uri, 'data3')
|
|
g3 = urigen(k2_uri)
|
|
|
|
fpath = self.CreateTempFile()
|
|
# Check to make sure current version is data3.
|
|
self.RunGsUtil(['cp', k2_uri.versionless_uri, fpath])
|
|
with open(fpath, 'rb') as f:
|
|
self.assertEqual(f.read(), b'data3')
|
|
|
|
# Check contents of all three versions
|
|
self.RunGsUtil(['cp', '%s#%s' % (k2_uri.versionless_uri, g1), fpath])
|
|
with open(fpath, 'rb') as f:
|
|
self.assertEqual(f.read(), b'data1')
|
|
self.RunGsUtil(['cp', '%s#%s' % (k2_uri.versionless_uri, g2), fpath])
|
|
with open(fpath, 'rb') as f:
|
|
self.assertEqual(f.read(), b'data2')
|
|
self.RunGsUtil(['cp', '%s#%s' % (k2_uri.versionless_uri, g3), fpath])
|
|
with open(fpath, 'rb') as f:
|
|
self.assertEqual(f.read(), b'data3')
|
|
|
|
# Copy first version to current and verify.
|
|
self.RunGsUtil(
|
|
['cp',
|
|
'%s#%s' % (k2_uri.versionless_uri, g1), k2_uri.versionless_uri])
|
|
self.RunGsUtil(['cp', k2_uri.versionless_uri, fpath])
|
|
with open(fpath, 'rb') as f:
|
|
self.assertEqual(f.read(), b'data1')
|
|
|
|
# Attempt to specify a version-specific URI for destination.
|
|
stderr = self.RunGsUtil(['cp', fpath, k2_uri.uri],
|
|
return_stderr=True,
|
|
expected_status=1)
|
|
if self._use_gcloud_storage:
|
|
self.assertIn(
|
|
'destination argument of the cp command cannot'
|
|
' be a version-specific URL', stderr)
|
|
else:
|
|
self.assertIn('cannot be the destination for gsutil cp', stderr)
|
|
|
|
def test_versioning_no_parallelism(self):
|
|
"""Tests that copy all-versions errors when parallelism is enabled."""
|
|
# TODO(b/135780661): Remove retry after bug resolved
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check():
|
|
stderr = self.RunGsUtil([
|
|
'-m', 'cp', '-A',
|
|
suri(self.nonexistent_bucket_name, 'foo'),
|
|
suri(self.nonexistent_bucket_name, 'bar')
|
|
],
|
|
expected_status=1,
|
|
return_stderr=True)
|
|
if self._use_gcloud_storage:
|
|
self.assertIn('sequential instead of parallel task execution', stderr)
|
|
else:
|
|
self.assertIn('-m option is not supported with the cp -A flag', stderr)
|
|
|
|
_Check()
|
|
|
|
@SkipForS3('S3 lists versioned objects in reverse timestamp order.')
|
|
def test_recursive_copying_versioned_bucket(self):
|
|
"""Tests cp -R with versioned buckets."""
|
|
bucket1_uri = self.CreateVersionedBucket()
|
|
bucket2_uri = self.CreateVersionedBucket()
|
|
bucket3_uri = self.CreateVersionedBucket()
|
|
|
|
# Write two versions of an object to the bucket1.
|
|
v1_uri = self.CreateObject(bucket_uri=bucket1_uri,
|
|
object_name='k',
|
|
contents=b'data0')
|
|
self.CreateObject(bucket_uri=bucket1_uri,
|
|
object_name='k',
|
|
contents=b'longer_data1',
|
|
gs_idempotent_generation=urigen(v1_uri))
|
|
|
|
self.AssertNObjectsInBucket(bucket1_uri, 2, versioned=True)
|
|
self.AssertNObjectsInBucket(bucket2_uri, 0, versioned=True)
|
|
self.AssertNObjectsInBucket(bucket3_uri, 0, versioned=True)
|
|
|
|
# Recursively copy to second versioned bucket.
|
|
# -A flag should copy all versions in order.
|
|
self.RunGsUtil(
|
|
['cp', '-R', '-A',
|
|
suri(bucket1_uri, '*'),
|
|
suri(bucket2_uri)])
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check2():
|
|
"""Validates the results of the cp -R."""
|
|
listing1 = self.RunGsUtil(['ls', '-la', suri(bucket1_uri)],
|
|
return_stdout=True).split('\n')
|
|
listing2 = self.RunGsUtil(['ls', '-la', suri(bucket2_uri)],
|
|
return_stdout=True).split('\n')
|
|
# 2 lines of listing output, 1 summary line, 1 empty line from \n split.
|
|
self.assertEqual(len(listing1), 4)
|
|
self.assertEqual(len(listing2), 4)
|
|
|
|
# First object in each bucket should match in size and version-less name.
|
|
size1, _, uri_str1, _ = listing1[0].split()
|
|
self.assertEqual(size1, str(len('data0')))
|
|
self.assertEqual(storage_uri(uri_str1).object_name, 'k')
|
|
size2, _, uri_str2, _ = listing2[0].split()
|
|
self.assertEqual(size2, str(len('data0')))
|
|
self.assertEqual(storage_uri(uri_str2).object_name, 'k')
|
|
|
|
# Similarly for second object in each bucket.
|
|
size1, _, uri_str1, _ = listing1[1].split()
|
|
self.assertEqual(size1, str(len('longer_data1')))
|
|
self.assertEqual(storage_uri(uri_str1).object_name, 'k')
|
|
size2, _, uri_str2, _ = listing2[1].split()
|
|
self.assertEqual(size2, str(len('longer_data1')))
|
|
self.assertEqual(storage_uri(uri_str2).object_name, 'k')
|
|
|
|
_Check2()
|
|
|
|
# Recursively copy to second versioned bucket with no -A flag.
|
|
# This should copy only the live object.
|
|
self.RunGsUtil(['cp', '-R', suri(bucket1_uri, '*'), suri(bucket3_uri)])
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check3():
|
|
"""Validates the results of the cp -R."""
|
|
listing1 = self.RunGsUtil(['ls', '-la', suri(bucket1_uri)],
|
|
return_stdout=True).split('\n')
|
|
listing2 = self.RunGsUtil(['ls', '-la', suri(bucket3_uri)],
|
|
return_stdout=True).split('\n')
|
|
# 2 lines of listing output, 1 summary line, 1 empty line from \n split.
|
|
self.assertEqual(len(listing1), 4)
|
|
# 1 lines of listing output, 1 summary line, 1 empty line from \n split.
|
|
self.assertEqual(len(listing2), 3)
|
|
|
|
# Live (second) object in bucket 1 should match the single live object.
|
|
size1, _, uri_str1, _ = listing2[0].split()
|
|
self.assertEqual(size1, str(len('longer_data1')))
|
|
self.assertEqual(storage_uri(uri_str1).object_name, 'k')
|
|
|
|
_Check3()
|
|
|
|
@SequentialAndParallelTransfer
|
|
@SkipForS3('Preconditions not supported for S3.')
|
|
def test_cp_generation_zero_match(self):
|
|
"""Tests that cp handles an object-not-exists precondition header."""
|
|
bucket_uri = self.CreateBucket()
|
|
fpath1 = self.CreateTempFile(contents=b'data1')
|
|
# Match 0 means only write the object if it doesn't already exist.
|
|
gen_match_header = 'x-goog-if-generation-match:0'
|
|
|
|
# First copy should succeed.
|
|
# TODO: This can fail (rarely) if the server returns a 5xx but actually
|
|
# commits the bytes. If we add restarts on small uploads, handle this
|
|
# case.
|
|
self.RunGsUtil(['-h', gen_match_header, 'cp', fpath1, suri(bucket_uri)])
|
|
|
|
# Second copy should fail with a precondition error.
|
|
stderr = self.RunGsUtil(
|
|
['-h', gen_match_header, 'cp', fpath1,
|
|
suri(bucket_uri)],
|
|
return_stderr=True,
|
|
expected_status=1)
|
|
if self._use_gcloud_storage:
|
|
self.assertIn(
|
|
'HTTPError 412: At least one of the pre-conditions you specified'
|
|
' did not hold.', stderr)
|
|
else:
|
|
self.assertIn('PreconditionException', stderr)
|
|
|
|
@SequentialAndParallelTransfer
|
|
@SkipForS3('Preconditions not supported for S3.')
|
|
def test_cp_v_generation_match(self):
|
|
"""Tests that cp -v option handles the if-generation-match header."""
|
|
bucket_uri = self.CreateVersionedBucket()
|
|
k1_uri = self.CreateObject(bucket_uri=bucket_uri, contents=b'data1')
|
|
g1 = k1_uri.generation
|
|
|
|
tmpdir = self.CreateTempDir()
|
|
fpath1 = self.CreateTempFile(tmpdir=tmpdir, contents=b'data2')
|
|
|
|
gen_match_header = 'x-goog-if-generation-match:%s' % g1
|
|
# First copy should succeed.
|
|
self.RunGsUtil(['-h', gen_match_header, 'cp', fpath1, suri(k1_uri)])
|
|
|
|
# Second copy should fail the precondition.
|
|
stderr = self.RunGsUtil(
|
|
['-h', gen_match_header, 'cp', fpath1,
|
|
suri(k1_uri)],
|
|
return_stderr=True,
|
|
expected_status=1)
|
|
|
|
if self._use_gcloud_storage:
|
|
self.assertIn('pre-condition', stderr)
|
|
else:
|
|
self.assertIn('PreconditionException', stderr)
|
|
|
|
# Specifiying a generation with -n should fail before the request hits the
|
|
# server.
|
|
stderr = self.RunGsUtil(
|
|
['-h', gen_match_header, 'cp', '-n', fpath1,
|
|
suri(k1_uri)],
|
|
return_stderr=True,
|
|
expected_status=1)
|
|
|
|
if self._use_gcloud_storage:
|
|
self.assertIn(
|
|
'Cannot specify both generation precondition and no-clobber', stderr)
|
|
else:
|
|
self.assertIn('ArgumentException', stderr)
|
|
self.assertIn(
|
|
'Specifying x-goog-if-generation-match is not supported '
|
|
'with cp -n', stderr)
|
|
|
|
@SequentialAndParallelTransfer
|
|
def test_cp_nv(self):
|
|
"""Tests that cp -nv works when skipping existing file."""
|
|
bucket_uri = self.CreateVersionedBucket()
|
|
k1_uri = self.CreateObject(bucket_uri=bucket_uri, contents=b'data1')
|
|
|
|
tmpdir = self.CreateTempDir()
|
|
fpath1 = self.CreateTempFile(tmpdir=tmpdir, contents=b'data2')
|
|
|
|
# First copy should succeed.
|
|
self.RunGsUtil(['cp', '-nv', fpath1, suri(k1_uri)])
|
|
|
|
# Second copy should skip copying.
|
|
stderr = self.RunGsUtil(
|
|
['cp', '-nv', fpath1, suri(k1_uri)], return_stderr=True)
|
|
self.assertIn('Skipping existing', stderr)
|
|
|
|
@SequentialAndParallelTransfer
|
|
@SkipForS3('S3 lists versioned objects in reverse timestamp order.')
|
|
def test_cp_v_option(self):
|
|
""""Tests that cp -v returns the created object's version-specific URI."""
|
|
bucket_uri = self.CreateVersionedBucket()
|
|
k1_uri = self.CreateObject(bucket_uri=bucket_uri, contents=b'data1')
|
|
k2_uri = self.CreateObject(bucket_uri=bucket_uri, contents=b'data2')
|
|
|
|
# Case 1: Upload file to object using one-shot PUT.
|
|
tmpdir = self.CreateTempDir()
|
|
fpath1 = self.CreateTempFile(tmpdir=tmpdir, contents=b'data1')
|
|
self._run_cp_minus_v_test('-v', fpath1, k2_uri.uri)
|
|
|
|
# Case 2: Upload file to object using resumable upload.
|
|
size_threshold = ONE_KIB
|
|
boto_config_for_test = ('GSUtil', 'resumable_threshold',
|
|
str(size_threshold))
|
|
with SetBotoConfigForTest([boto_config_for_test]):
|
|
file_as_string = os.urandom(size_threshold)
|
|
tmpdir = self.CreateTempDir()
|
|
fpath1 = self.CreateTempFile(tmpdir=tmpdir, contents=file_as_string)
|
|
self._run_cp_minus_v_test('-v', fpath1, k2_uri.uri)
|
|
|
|
# Case 3: Upload stream to object.
|
|
self._run_cp_minus_v_test('-v', '-', k2_uri.uri)
|
|
|
|
# Case 4: Download object to file. For this case we just expect output of
|
|
# gsutil cp -v to be the URI of the file.
|
|
tmpdir = self.CreateTempDir()
|
|
fpath1 = self.CreateTempFile(tmpdir=tmpdir)
|
|
dst_uri = storage_uri(fpath1)
|
|
stderr = self.RunGsUtil(
|
|
['cp', '-v', suri(k1_uri), suri(dst_uri)], return_stderr=True)
|
|
# TODO: Add ordering assertion (should be in stderr.split('\n)[-2]) back
|
|
# once both the creation and status messages are handled by the UI thread.
|
|
self.assertIn('Created: %s\n' % dst_uri.uri, stderr)
|
|
|
|
# Case 5: Daisy-chain from object to object.
|
|
self._run_cp_minus_v_test('-Dv', k1_uri.uri, k2_uri.uri)
|
|
|
|
# Case 6: Copy object to object in-the-cloud.
|
|
self._run_cp_minus_v_test('-v', k1_uri.uri, k2_uri.uri)
|
|
|
|
def _run_cp_minus_v_test(self, opt, src_str, dst_str):
|
|
"""Runs cp -v with the options and validates the results."""
|
|
stderr = self.RunGsUtil(['cp', opt, src_str, dst_str], return_stderr=True)
|
|
match = re.search(r'Created: (.*)\n', stderr)
|
|
self.assertIsNotNone(match)
|
|
created_uri = match.group(1)
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check1():
|
|
stdout = self.RunGsUtil(['ls', '-a', dst_str], return_stdout=True)
|
|
lines = stdout.split('\n')
|
|
# Final (most recent) object should match the "Created:" URI. This is
|
|
# in second-to-last line (last line is '\n').
|
|
self.assertGreater(len(lines), 2)
|
|
self.assertEqual(created_uri, lines[-2])
|
|
|
|
_Check1()
|
|
|
|
@SequentialAndParallelTransfer
|
|
def test_stdin_args(self):
|
|
"""Tests cp with the -I option."""
|
|
tmpdir = self.CreateTempDir()
|
|
fpath1 = self.CreateTempFile(tmpdir=tmpdir, contents=b'data1')
|
|
fpath2 = self.CreateTempFile(tmpdir=tmpdir, contents=b'data2')
|
|
bucket_uri = self.CreateBucket()
|
|
self.RunGsUtil(['cp', '-I', suri(bucket_uri)],
|
|
stdin='\n'.join((fpath1, fpath2)))
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check1():
|
|
stdout = self.RunGsUtil(['ls', suri(bucket_uri)], return_stdout=True)
|
|
self.assertIn(os.path.basename(fpath1), stdout)
|
|
self.assertIn(os.path.basename(fpath2), stdout)
|
|
self.assertNumLines(stdout, 2)
|
|
|
|
_Check1()
|
|
|
|
def test_cross_storage_class_cloud_cp(self):
|
|
bucket1_uri = self.CreateBucket(storage_class='standard')
|
|
bucket2_uri = self.CreateBucket(
|
|
storage_class='durable_reduced_availability')
|
|
key_uri = self.CreateObject(bucket_uri=bucket1_uri, contents=b'foo')
|
|
# Server now allows copy-in-the-cloud across storage classes.
|
|
self.RunGsUtil(['cp', suri(key_uri), suri(bucket2_uri)])
|
|
|
|
@unittest.skipUnless(HAS_S3_CREDS, 'Test requires both S3 and GS credentials')
|
|
def test_cross_provider_cp(self):
|
|
s3_bucket = self.CreateBucket(provider='s3')
|
|
gs_bucket = self.CreateBucket(provider='gs')
|
|
s3_key = self.CreateObject(bucket_uri=s3_bucket, contents=b'foo')
|
|
gs_key = self.CreateObject(bucket_uri=gs_bucket, contents=b'bar')
|
|
self.RunGsUtil(['cp', suri(s3_key), suri(gs_bucket)])
|
|
self.RunGsUtil(['cp', suri(gs_key), suri(s3_bucket)])
|
|
|
|
@unittest.skipUnless(HAS_S3_CREDS, 'Test requires both S3 and GS credentials')
|
|
@unittest.skip('This test performs a large copy but remains here for '
|
|
'debugging purposes.')
|
|
def test_cross_provider_large_cp(self):
|
|
s3_bucket = self.CreateBucket(provider='s3')
|
|
gs_bucket = self.CreateBucket(provider='gs')
|
|
s3_key = self.CreateObject(bucket_uri=s3_bucket,
|
|
contents=b'f' * 1024 * 1024)
|
|
gs_key = self.CreateObject(bucket_uri=gs_bucket,
|
|
contents=b'b' * 1024 * 1024)
|
|
self.RunGsUtil(['cp', suri(s3_key), suri(gs_bucket)])
|
|
self.RunGsUtil(['cp', suri(gs_key), suri(s3_bucket)])
|
|
with SetBotoConfigForTest([('GSUtil', 'resumable_threshold', str(ONE_KIB)),
|
|
('GSUtil', 'json_resumable_chunk_size',
|
|
str(ONE_KIB * 256))]):
|
|
# Ensure copy also works across json upload chunk boundaries.
|
|
self.RunGsUtil(['cp', suri(s3_key), suri(gs_bucket)])
|
|
|
|
@unittest.skipUnless(HAS_S3_CREDS, 'Test requires both S3 and GS credentials')
|
|
def test_gs_to_s3_multipart_cp(self):
|
|
"""Ensure daisy_chain works for an object that is downloaded in 2 parts."""
|
|
s3_bucket = self.CreateBucket(provider='s3')
|
|
gs_bucket = self.CreateBucket(provider='gs', prefer_json_api=True)
|
|
num_bytes = int(_DEFAULT_DOWNLOAD_CHUNK_SIZE * 1.1)
|
|
gs_key = self.CreateObject(bucket_uri=gs_bucket,
|
|
contents=b'b' * num_bytes,
|
|
prefer_json_api=True)
|
|
self.RunGsUtil([
|
|
'-o', 's3:use-sigv4=True', '-o', 's3:host=s3.amazonaws.com', 'cp',
|
|
suri(gs_key),
|
|
suri(s3_bucket)
|
|
])
|
|
|
|
@unittest.skip('This test is slow due to creating many objects, '
|
|
'but remains here for debugging purposes.')
|
|
def test_daisy_chain_cp_file_sizes(self):
|
|
"""Ensure daisy chain cp works with a wide of file sizes."""
|
|
bucket_uri = self.CreateBucket()
|
|
bucket2_uri = self.CreateBucket()
|
|
exponent_cap = 28 # Up to 256 MiB in size.
|
|
for i in range(exponent_cap):
|
|
one_byte_smaller = 2**i - 1
|
|
normal = 2**i
|
|
one_byte_larger = 2**i + 1
|
|
self.CreateObject(bucket_uri=bucket_uri, contents=b'a' * one_byte_smaller)
|
|
self.CreateObject(bucket_uri=bucket_uri, contents=b'b' * normal)
|
|
self.CreateObject(bucket_uri=bucket_uri, contents=b'c' * one_byte_larger)
|
|
|
|
self.AssertNObjectsInBucket(bucket_uri, exponent_cap * 3)
|
|
self.RunGsUtil(
|
|
['-m', 'cp', '-D',
|
|
suri(bucket_uri, '**'),
|
|
suri(bucket2_uri)])
|
|
|
|
self.AssertNObjectsInBucket(bucket2_uri, exponent_cap * 3)
|
|
|
|
def test_daisy_chain_cp(self):
|
|
"""Tests cp with the -D option."""
|
|
bucket1_uri = self.CreateBucket(storage_class='standard')
|
|
bucket2_uri = self.CreateBucket(
|
|
storage_class='durable_reduced_availability')
|
|
key_uri = self.CreateObject(bucket_uri=bucket1_uri, contents=b'foo')
|
|
# Set some headers on source object so we can verify that headers are
|
|
# presereved by daisy-chain copy.
|
|
self.RunGsUtil([
|
|
'setmeta', '-h', 'Cache-Control:public,max-age=12', '-h',
|
|
'Content-Type:image/gif', '-h',
|
|
'x-%s-meta-1:abcd' % self.provider_custom_meta,
|
|
suri(key_uri)
|
|
])
|
|
# Set public-read (non-default) ACL so we can verify that cp -D -p works.
|
|
self.RunGsUtil(['acl', 'set', 'public-read', suri(key_uri)])
|
|
acl_json = self.RunGsUtil(['acl', 'get', suri(key_uri)], return_stdout=True)
|
|
# Perform daisy-chain copy and verify that source object headers and ACL
|
|
# were preserved. Also specify -n option to test that gsutil correctly
|
|
# removes the x-goog-if-generation-match:0 header that was set at uploading
|
|
# time when updating the ACL.
|
|
stderr = self.RunGsUtil(
|
|
['cp', '-Dpn', suri(key_uri),
|
|
suri(bucket2_uri)], return_stderr=True)
|
|
self.assertNotIn('Copy-in-the-cloud disallowed', stderr)
|
|
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check():
|
|
uri = suri(bucket2_uri, key_uri.object_name)
|
|
stdout = self.RunGsUtil(['ls', '-L', uri], return_stdout=True)
|
|
self.assertRegex(stdout, r'Cache-Control:\s+public,max-age=12')
|
|
self.assertRegex(stdout, r'Content-Type:\s+image/gif')
|
|
self.assertRegex(stdout, r'Metadata:\s+1:\s+abcd')
|
|
new_acl_json = self.RunGsUtil(['acl', 'get', uri], return_stdout=True)
|
|
self.assertEqual(acl_json, new_acl_json)
|
|
|
|
_Check()
|
|
|
|
@unittest.skipUnless(
|
|
not HAS_GS_PORT, 'gs_port is defined in config which can cause '
|
|
'problems when uploading and downloading to the same local host port')
|
|
def test_daisy_chain_cp_download_failure(self):
|
|
"""Tests cp with the -D option when the download thread dies."""
|
|
bucket1_uri = self.CreateBucket()
|
|
bucket2_uri = self.CreateBucket()
|
|
key_uri = self.CreateObject(bucket_uri=bucket1_uri,
|
|
contents=b'a' * self.halt_size)
|
|
boto_config_for_test = ('GSUtil', 'resumable_threshold', str(ONE_KIB))
|
|
test_callback_file = self.CreateTempFile(
|
|
contents=pickle.dumps(HaltingCopyCallbackHandler(False, 5)))
|
|
with SetBotoConfigForTest([boto_config_for_test]):
|
|
stderr = self.RunGsUtil([
|
|
'cp', '--testcallbackfile', test_callback_file, '-D',
|
|
suri(key_uri),
|
|
suri(bucket2_uri)
|
|
],
|
|
expected_status=1,
|
|
return_stderr=True)
|
|
# Should have three exception traces; one from the download thread and
|
|
# two from the upload thread (expection message is repeated in main's
|
|
# _OutputAndExit).
|
|
self.assertEqual(
|
|
stderr.count(
|
|
'ResumableDownloadException: Artifically halting download'), 3)
|
|
|
|
def test_streaming_gzip_upload(self):
|
|
"""Tests error when compression flag is requested on a streaming source."""
|
|
bucket_uri = self.CreateBucket()
|
|
stderr = self.RunGsUtil(
|
|
['cp', '-Z', '-', suri(bucket_uri, 'foo')],
|
|
return_stderr=True,
|
|
expected_status=1,
|
|
stdin='streaming data')
|
|
if self._use_gcloud_storage:
|
|
self.assertIn(
|
|
'Gzip content encoding is not currently supported for streaming '
|
|
'uploads.', stderr)
|
|
else:
|
|
self.assertIn(
|
|
'gzip compression is not currently supported on streaming uploads',
|
|
stderr)
|
|
|
|
def test_seek_ahead_upload_cp(self):
|
|
"""Tests that the seek-ahead iterator estimates total upload work."""
|
|
tmpdir = self.CreateTempDir(test_files=3)
|
|
bucket_uri = self.CreateBucket()
|
|
|
|
with SetBotoConfigForTest([('GSUtil', 'task_estimation_threshold', '1'),
|
|
('GSUtil', 'task_estimation_force', 'True')]):
|
|
stderr = self.RunGsUtil(
|
|
['-m', 'cp', '-r', tmpdir, suri(bucket_uri)], return_stderr=True)
|
|
self.assertIn(
|
|
'Estimated work for this command: objects: 3, total size: 18', stderr)
|
|
|
|
with SetBotoConfigForTest([('GSUtil', 'task_estimation_threshold', '0'),
|
|
('GSUtil', 'task_estimation_force', 'True')]):
|
|
stderr = self.RunGsUtil(
|
|
['-m', 'cp', '-r', tmpdir, suri(bucket_uri)], return_stderr=True)
|
|
self.assertNotIn('Estimated work', stderr)
|
|
|
|
def test_seek_ahead_download_cp(self):
|
|
tmpdir = self.CreateTempDir()
|
|
bucket_uri = self.CreateBucket(test_objects=3)
|
|
self.AssertNObjectsInBucket(bucket_uri, 3)
|
|
|
|
with SetBotoConfigForTest([('GSUtil', 'task_estimation_threshold', '1'),
|
|
('GSUtil', 'task_estimation_force', 'True')]):
|
|
stderr = self.RunGsUtil(
|
|
['-m', 'cp', '-r', suri(bucket_uri), tmpdir], return_stderr=True)
|
|
self.assertIn(
|
|
'Estimated work for this command: objects: 3, total size: 18', stderr)
|
|
|
|
with SetBotoConfigForTest([('GSUtil', 'task_estimation_threshold', '0'),
|
|
('GSUtil', 'task_estimation_force', 'True')]):
|
|
stderr = self.RunGsUtil(
|
|
['-m', 'cp', '-r', suri(bucket_uri), tmpdir], return_stderr=True)
|
|
self.assertNotIn('Estimated work', stderr)
|
|
|
|
def test_canned_acl_cp(self):
|
|
"""Tests copying with a canned ACL."""
|
|
bucket1_uri = self.CreateBucket()
|
|
bucket2_uri = self.CreateBucket()
|
|
key_uri = self.CreateObject(bucket_uri=bucket1_uri, contents=b'foo')
|
|
self.RunGsUtil(
|
|
['cp', '-a', 'public-read',
|
|
suri(key_uri),
|
|
suri(bucket2_uri)])
|
|
# Set public-read on the original key after the copy so we can compare
|
|
# the ACLs.
|
|
self.RunGsUtil(['acl', 'set', 'public-read', suri(key_uri)])
|
|
public_read_acl = self.RunGsUtil(['acl', 'get', suri(key_uri)],
|
|
return_stdout=True)
|
|
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check():
|
|
uri = suri(bucket2_uri, key_uri.object_name)
|
|
new_acl_json = self.RunGsUtil(['acl', 'get', uri], return_stdout=True)
|
|
self.assertEqual(public_read_acl, new_acl_json)
|
|
|
|
_Check()
|
|
|
|
@SequentialAndParallelTransfer
|
|
def test_canned_acl_upload(self):
|
|
"""Tests uploading a file with a canned ACL."""
|
|
bucket1_uri = self.CreateBucket()
|
|
key_uri = self.CreateObject(bucket_uri=bucket1_uri, contents=b'foo')
|
|
# Set public-read on the object so we can compare the ACLs.
|
|
self.RunGsUtil(['acl', 'set', 'public-read', suri(key_uri)])
|
|
public_read_acl = self.RunGsUtil(['acl', 'get', suri(key_uri)],
|
|
return_stdout=True)
|
|
|
|
file_name = 'bar'
|
|
fpath = self.CreateTempFile(file_name=file_name, contents=b'foo')
|
|
self.RunGsUtil(['cp', '-a', 'public-read', fpath, suri(bucket1_uri)])
|
|
new_acl_json = self.RunGsUtil(
|
|
['acl', 'get', suri(bucket1_uri, file_name)], return_stdout=True)
|
|
self.assertEqual(public_read_acl, new_acl_json)
|
|
|
|
resumable_size = ONE_KIB
|
|
boto_config_for_test = ('GSUtil', 'resumable_threshold',
|
|
str(resumable_size))
|
|
with SetBotoConfigForTest([boto_config_for_test]):
|
|
resumable_file_name = 'resumable_bar'
|
|
resumable_contents = os.urandom(resumable_size)
|
|
resumable_fpath = self.CreateTempFile(file_name=resumable_file_name,
|
|
contents=resumable_contents)
|
|
self.RunGsUtil(
|
|
['cp', '-a', 'public-read', resumable_fpath,
|
|
suri(bucket1_uri)])
|
|
new_resumable_acl_json = self.RunGsUtil(
|
|
['acl', 'get', suri(bucket1_uri, resumable_file_name)],
|
|
return_stdout=True)
|
|
self.assertEqual(public_read_acl, new_resumable_acl_json)
|
|
|
|
def test_cp_key_to_local_stream(self):
|
|
bucket_uri = self.CreateBucket()
|
|
contents = b'foo'
|
|
key_uri = self.CreateObject(bucket_uri=bucket_uri, contents=contents)
|
|
stdout = self.RunGsUtil(['cp', suri(key_uri), '-'], return_stdout=True)
|
|
self.assertIn(contents, stdout.encode('ascii'))
|
|
|
|
def test_cp_local_file_to_local_stream(self):
|
|
contents = b'content'
|
|
fpath = self.CreateTempFile(contents=contents)
|
|
stdout = self.RunGsUtil(['cp', fpath, '-'], return_stdout=True)
|
|
self.assertIn(contents, stdout.encode(UTF8))
|
|
|
|
@SequentialAndParallelTransfer
|
|
def test_cp_zero_byte_file(self):
|
|
dst_bucket_uri = self.CreateBucket()
|
|
src_dir = self.CreateTempDir()
|
|
fpath = os.path.join(src_dir, 'zero_byte')
|
|
with open(fpath, 'w') as unused_out_file:
|
|
pass # Write a zero byte file
|
|
self.RunGsUtil(['cp', fpath, suri(dst_bucket_uri)])
|
|
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check1():
|
|
stdout = self.RunGsUtil(['ls', suri(dst_bucket_uri)], return_stdout=True)
|
|
self.assertIn(os.path.basename(fpath), stdout)
|
|
|
|
_Check1()
|
|
|
|
download_path = os.path.join(src_dir, 'zero_byte_download')
|
|
self.RunGsUtil(['cp', suri(dst_bucket_uri, 'zero_byte'), download_path])
|
|
self.assertTrue(os.stat(download_path))
|
|
|
|
def test_copy_bucket_to_bucket(self):
|
|
"""Tests recursively copying from bucket to bucket.
|
|
|
|
This should produce identically named objects (and not, in particular,
|
|
destination objects named by the version-specific URI from source objects).
|
|
"""
|
|
src_bucket_uri = self.CreateVersionedBucket()
|
|
dst_bucket_uri = self.CreateVersionedBucket()
|
|
self.CreateObject(bucket_uri=src_bucket_uri,
|
|
object_name='obj0',
|
|
contents=b'abc')
|
|
self.CreateObject(bucket_uri=src_bucket_uri,
|
|
object_name='obj1',
|
|
contents=b'def')
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _CopyAndCheck():
|
|
self.RunGsUtil(['cp', '-R', suri(src_bucket_uri), suri(dst_bucket_uri)])
|
|
stdout = self.RunGsUtil(['ls', '-R', dst_bucket_uri.uri],
|
|
return_stdout=True)
|
|
self.assertIn(
|
|
'%s%s/obj0\n' % (dst_bucket_uri, src_bucket_uri.bucket_name), stdout)
|
|
self.assertIn(
|
|
'%s%s/obj1\n' % (dst_bucket_uri, src_bucket_uri.bucket_name), stdout)
|
|
|
|
_CopyAndCheck()
|
|
|
|
def test_copy_duplicate_nested_object_names_to_new_cloud_dir(self):
|
|
"""Tests copying from bucket to same bucket preserves file structure."""
|
|
bucket_uri = self.CreateBucket()
|
|
self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='dir1/file.txt',
|
|
contents=b'data')
|
|
self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='dir2/file.txt',
|
|
contents=b'data')
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _CopyAndCheck():
|
|
self.RunGsUtil(
|
|
['cp', '-R',
|
|
suri(bucket_uri) + '/*',
|
|
suri(bucket_uri) + '/dst'])
|
|
stdout = self.RunGsUtil(['ls', '-R', bucket_uri.uri], return_stdout=True)
|
|
self.assertIn(suri(bucket_uri) + '/dst/dir1/file.txt', stdout)
|
|
self.assertIn(suri(bucket_uri) + '/dst/dir2/file.txt', stdout)
|
|
|
|
_CopyAndCheck()
|
|
|
|
def test_copy_duplicate_nested_object_names_to_existing_cloud_dir(self):
|
|
"""Tests copying from bucket to same bucket preserves file structure."""
|
|
bucket_uri = self.CreateBucket()
|
|
self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='dir1/file.txt',
|
|
contents=b'data')
|
|
self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='dir2/file.txt',
|
|
contents=b'data')
|
|
self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='dst/existing_file.txt',
|
|
contents=b'data')
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _CopyAndCheck():
|
|
self.RunGsUtil(
|
|
['cp', '-R',
|
|
suri(bucket_uri) + '/*',
|
|
suri(bucket_uri) + '/dst'])
|
|
stdout = self.RunGsUtil(['ls', '-R', bucket_uri.uri], return_stdout=True)
|
|
self.assertIn(suri(bucket_uri) + '/dst/dir1/file.txt', stdout)
|
|
self.assertIn(suri(bucket_uri) + '/dst/dir2/file.txt', stdout)
|
|
self.assertIn(suri(bucket_uri) + '/dst/existing_file.txt', stdout)
|
|
|
|
_CopyAndCheck()
|
|
|
|
@SkipForGS('Only s3 V4 signatures error on location mismatches.')
|
|
def test_copy_bucket_to_bucket_with_location_redirect(self):
|
|
# cp uses a sender function that raises an exception on location mismatches,
|
|
# instead of returning a response. This integration test ensures retries
|
|
# from exceptions work correctly.
|
|
|
|
src_bucket_region = 'ap-east-1'
|
|
dest_bucket_region = 'us-east-2'
|
|
src_bucket_host = 's3.%s.amazonaws.com' % src_bucket_region
|
|
dest_bucket_host = 's3.%s.amazonaws.com' % dest_bucket_region
|
|
client_host = 's3.eu-west-1.amazonaws.com'
|
|
|
|
with SetBotoConfigForTest([('s3', 'host', src_bucket_host)]):
|
|
src_bucket_uri = self.CreateBucket(location=src_bucket_region)
|
|
self.CreateObject(bucket_uri=src_bucket_uri,
|
|
object_name='obj0',
|
|
contents=b'abc')
|
|
self.CreateObject(bucket_uri=src_bucket_uri,
|
|
object_name='obj1',
|
|
contents=b'def')
|
|
|
|
with SetBotoConfigForTest([('s3', 'host', dest_bucket_host)]):
|
|
dst_bucket_uri = self.CreateBucket(location=dest_bucket_region)
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _CopyAndCheck():
|
|
self.RunGsUtil(['cp', '-R', suri(src_bucket_uri), suri(dst_bucket_uri)])
|
|
stdout = self.RunGsUtil(['ls', '-R', dst_bucket_uri.uri],
|
|
return_stdout=True)
|
|
self.assertIn(
|
|
'%s%s/obj0\n' % (dst_bucket_uri, src_bucket_uri.bucket_name), stdout)
|
|
self.assertIn(
|
|
'%s%s/obj1\n' % (dst_bucket_uri, src_bucket_uri.bucket_name), stdout)
|
|
|
|
with SetBotoConfigForTest([('s3', 'host', client_host)]):
|
|
_CopyAndCheck()
|
|
|
|
def test_copy_bucket_to_dir(self):
|
|
"""Tests recursively copying from bucket to a directory.
|
|
|
|
This should produce identically named objects (and not, in particular,
|
|
destination objects named by the version- specific URI from source objects).
|
|
"""
|
|
src_bucket_uri = self.CreateBucket()
|
|
dst_dir = self.CreateTempDir()
|
|
self.CreateObject(bucket_uri=src_bucket_uri,
|
|
object_name='obj0',
|
|
contents=b'abc')
|
|
self.CreateObject(bucket_uri=src_bucket_uri,
|
|
object_name='obj1',
|
|
contents=b'def')
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _CopyAndCheck():
|
|
"""Copies the bucket recursively and validates the results."""
|
|
self.RunGsUtil(['cp', '-R', suri(src_bucket_uri), dst_dir])
|
|
dir_list = []
|
|
for dirname, _, filenames in os.walk(dst_dir):
|
|
for filename in filenames:
|
|
dir_list.append(os.path.join(dirname, filename))
|
|
dir_list = sorted(dir_list)
|
|
self.assertEqual(len(dir_list), 2)
|
|
self.assertEqual(
|
|
os.path.join(dst_dir, src_bucket_uri.bucket_name, 'obj0'),
|
|
dir_list[0])
|
|
self.assertEqual(
|
|
os.path.join(dst_dir, src_bucket_uri.bucket_name, 'obj1'),
|
|
dir_list[1])
|
|
|
|
_CopyAndCheck()
|
|
|
|
@unittest.skipUnless(HAS_S3_CREDS, 'Test requires both S3 and GS credentials')
|
|
def test_copy_object_to_dir_s3_v4(self):
|
|
"""Tests copying object from s3 to local dir with v4 signature.
|
|
|
|
Regions like us-east2 accept only V4 signature, hence we will create
|
|
the bucket in us-east2 region to enforce testing with V4 signature.
|
|
"""
|
|
src_bucket_uri = self.CreateBucket(provider='s3', location='us-east-2')
|
|
dst_dir = self.CreateTempDir()
|
|
self.CreateObject(bucket_uri=src_bucket_uri,
|
|
object_name='obj0',
|
|
contents=b'abc')
|
|
self.CreateObject(bucket_uri=src_bucket_uri,
|
|
object_name='obj1',
|
|
contents=b'def')
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _CopyAndCheck():
|
|
"""Copies the bucket recursively and validates the results."""
|
|
self.RunGsUtil(['cp', '-R', suri(src_bucket_uri), dst_dir])
|
|
dir_list = []
|
|
for dirname, _, filenames in os.walk(dst_dir):
|
|
for filename in filenames:
|
|
dir_list.append(os.path.join(dirname, filename))
|
|
dir_list = sorted(dir_list)
|
|
self.assertEqual(len(dir_list), 2)
|
|
self.assertEqual(
|
|
os.path.join(dst_dir, src_bucket_uri.bucket_name, 'obj0'),
|
|
dir_list[0])
|
|
self.assertEqual(
|
|
os.path.join(dst_dir, src_bucket_uri.bucket_name, 'obj1'),
|
|
dir_list[1])
|
|
|
|
_CopyAndCheck()
|
|
|
|
@SkipForS3('The boto lib used for S3 does not handle objects '
|
|
'starting with slashes if we use V4 signature')
|
|
def test_recursive_download_with_leftover_slash_only_dir_placeholder(self):
|
|
"""Tests that we correctly handle leftover dir placeholders."""
|
|
src_bucket_uri = self.CreateBucket()
|
|
dst_dir = self.CreateTempDir()
|
|
self.CreateObject(bucket_uri=src_bucket_uri,
|
|
object_name='obj0',
|
|
contents=b'abc')
|
|
self.CreateObject(bucket_uri=src_bucket_uri,
|
|
object_name='obj1',
|
|
contents=b'def')
|
|
|
|
# Create a placeholder like what can be left over by web GUI tools.
|
|
key_uri = self.StorageUriCloneReplaceName(src_bucket_uri, '/')
|
|
self.StorageUriSetContentsFromString(key_uri, '')
|
|
self.AssertNObjectsInBucket(src_bucket_uri, 3)
|
|
|
|
self.RunGsUtil(['cp', '-R', suri(src_bucket_uri), dst_dir])
|
|
dir_list = []
|
|
for dirname, _, filenames in os.walk(dst_dir):
|
|
for filename in filenames:
|
|
dir_list.append(os.path.join(dirname, filename))
|
|
dir_list = sorted(dir_list)
|
|
self.assertEqual(len(dir_list), 2)
|
|
self.assertEqual(os.path.join(dst_dir, src_bucket_uri.bucket_name, 'obj0'),
|
|
dir_list[0])
|
|
self.assertEqual(os.path.join(dst_dir, src_bucket_uri.bucket_name, 'obj1'),
|
|
dir_list[1])
|
|
|
|
def test_recursive_download_with_leftover_dir_placeholder(self):
|
|
"""Tests that we correctly handle leftover dir placeholders."""
|
|
src_bucket_uri = self.CreateBucket()
|
|
dst_dir = self.CreateTempDir()
|
|
self.CreateObject(bucket_uri=src_bucket_uri,
|
|
object_name='obj0',
|
|
contents=b'abc')
|
|
self.CreateObject(bucket_uri=src_bucket_uri,
|
|
object_name='obj1',
|
|
contents=b'def')
|
|
|
|
# Create a placeholder like what can be left over by web GUI tools.
|
|
key_uri = self.StorageUriCloneReplaceName(src_bucket_uri, 'foo/')
|
|
self.StorageUriSetContentsFromString(key_uri, '')
|
|
self.AssertNObjectsInBucket(src_bucket_uri, 3)
|
|
|
|
self.RunGsUtil(['cp', '-R', suri(src_bucket_uri), dst_dir])
|
|
dir_list = []
|
|
for dirname, _, filenames in os.walk(dst_dir):
|
|
for filename in filenames:
|
|
dir_list.append(os.path.join(dirname, filename))
|
|
dir_list = sorted(dir_list)
|
|
self.assertEqual(len(dir_list), 2)
|
|
self.assertEqual(os.path.join(dst_dir, src_bucket_uri.bucket_name, 'obj0'),
|
|
dir_list[0])
|
|
self.assertEqual(os.path.join(dst_dir, src_bucket_uri.bucket_name, 'obj1'),
|
|
dir_list[1])
|
|
|
|
def test_copy_quiet(self):
|
|
bucket_uri = self.CreateBucket()
|
|
key_uri = self.CreateObject(bucket_uri=bucket_uri, contents=b'foo')
|
|
stderr = self.RunGsUtil([
|
|
'-q', 'cp',
|
|
suri(key_uri),
|
|
suri(self.StorageUriCloneReplaceName(bucket_uri, 'o2'))
|
|
],
|
|
return_stderr=True)
|
|
self.assertEqual(stderr.count('Copying '), 0)
|
|
|
|
def test_cp_md5_match(self):
|
|
"""Tests that the uploaded object has the expected MD5.
|
|
|
|
Note that while this does perform a file to object upload, MD5's are
|
|
not supported for composite objects so we don't use the decorator in this
|
|
case.
|
|
"""
|
|
bucket_uri = self.CreateBucket()
|
|
fpath = self.CreateTempFile(contents=b'bar')
|
|
with open(fpath, 'rb') as f_in:
|
|
md5 = binascii.unhexlify(CalculateMd5FromContents(f_in))
|
|
try:
|
|
encoded_bytes = base64.encodebytes(md5)
|
|
except AttributeError:
|
|
# For Python 2 compatability.
|
|
encoded_bytes = base64.encodestring(md5)
|
|
file_md5 = encoded_bytes.rstrip(b'\n')
|
|
self.RunGsUtil(['cp', fpath, suri(bucket_uri)])
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check1():
|
|
stdout = self.RunGsUtil(['ls', '-L', suri(bucket_uri)],
|
|
return_stdout=True)
|
|
self.assertRegex(
|
|
stdout, r'Hash\s+\(md5\):\s+%s' % re.escape(file_md5.decode('ascii')))
|
|
|
|
_Check1()
|
|
|
|
@unittest.skipIf(IS_WINDOWS,
|
|
'Unicode handling on Windows requires mods to site-packages')
|
|
@SequentialAndParallelTransfer
|
|
def test_cp_manifest_upload_unicode(self):
|
|
return self._ManifestUpload('foo-unicöde'.encode(UTF8),
|
|
'bar-unicöde'.encode(UTF8),
|
|
'manifest-unicöde'.encode(UTF8))
|
|
|
|
@SequentialAndParallelTransfer
|
|
def test_cp_manifest_upload(self):
|
|
"""Tests uploading with a mnifest file."""
|
|
return self._ManifestUpload('foo', 'bar', 'manifest')
|
|
|
|
def _ManifestUpload(self, file_name, object_name, manifest_name):
|
|
"""Tests uploading with a manifest file."""
|
|
bucket_uri = self.CreateBucket()
|
|
dsturi = suri(bucket_uri, object_name)
|
|
|
|
fpath = self.CreateTempFile(file_name=file_name, contents=b'bar')
|
|
logpath = self.CreateTempFile(file_name=manifest_name, contents=b'')
|
|
# Ensure the file is empty.
|
|
open(logpath, 'w').close()
|
|
self.RunGsUtil(['cp', '-L', logpath, fpath, dsturi])
|
|
|
|
with open(logpath, 'r') as f:
|
|
lines = f.readlines()
|
|
if six.PY2:
|
|
lines = [six.text_type(line, UTF8) for line in lines]
|
|
|
|
self.assertEqual(len(lines), 2)
|
|
|
|
expected_headers = [
|
|
'Source', 'Destination', 'Start', 'End', 'Md5', 'UploadId',
|
|
'Source Size', 'Bytes Transferred', 'Result', 'Description'
|
|
]
|
|
self.assertEqual(expected_headers, lines[0].strip().split(','))
|
|
results = lines[1].strip().split(',')
|
|
|
|
results = dict(zip(expected_headers, results))
|
|
|
|
self.assertEqual(
|
|
results['Source'],
|
|
'file://' + fpath,
|
|
)
|
|
self.assertEqual(
|
|
results['Destination'],
|
|
dsturi,
|
|
)
|
|
|
|
date_format = '%Y-%m-%dT%H:%M:%S.%fZ'
|
|
start_date = datetime.datetime.strptime(results['Start'], date_format)
|
|
end_date = datetime.datetime.strptime(results['End'], date_format)
|
|
self.assertEqual(end_date > start_date, True)
|
|
|
|
if self.RunGsUtil == testcase.GsUtilIntegrationTestCase.RunGsUtil:
|
|
# Check that we didn't do automatic parallel uploads - compose doesn't
|
|
# calculate the MD5 hash. Since RunGsUtil is overriden in
|
|
# TestCpParallelUploads to force parallel uploads, we can check which
|
|
# method was used.
|
|
self.assertEqual(results['Md5'], 'rL0Y20zC+Fzt72VPzMSk2A==')
|
|
|
|
self.assertEqual(int(results['Source Size']), 3)
|
|
self.assertEqual(int(results['Bytes Transferred']), 3)
|
|
self.assertEqual(results['Result'], 'OK')
|
|
|
|
@SequentialAndParallelTransfer
|
|
def test_cp_manifest_download(self):
|
|
"""Tests downloading with a manifest file."""
|
|
key_uri = self.CreateObject(contents=b'foo')
|
|
fpath = self.CreateTempFile(contents=b'')
|
|
logpath = self.CreateTempFile(contents=b'')
|
|
# Ensure the file is empty.
|
|
open(logpath, 'w').close()
|
|
self.RunGsUtil(
|
|
['cp', '-L', logpath, suri(key_uri), fpath], return_stdout=True)
|
|
with open(logpath, 'r') as f:
|
|
lines = f.readlines()
|
|
if six.PY3:
|
|
decode_lines = []
|
|
for line in lines:
|
|
if line.startswith("b'"):
|
|
some_strs = line.split(',')
|
|
line_parts = []
|
|
for some_str in some_strs:
|
|
if some_str.startswith("b'"):
|
|
line_parts.append(ast.literal_eval(some_str).decode(UTF8))
|
|
else:
|
|
line_parts.append(some_str)
|
|
decode_lines.append(','.join(line_parts))
|
|
else:
|
|
decode_lines.append(line)
|
|
lines = decode_lines
|
|
self.assertEqual(len(lines), 2)
|
|
|
|
expected_headers = [
|
|
'Source', 'Destination', 'Start', 'End', 'Md5', 'UploadId',
|
|
'Source Size', 'Bytes Transferred', 'Result', 'Description'
|
|
]
|
|
self.assertEqual(expected_headers, lines[0].strip().split(','))
|
|
|
|
results = lines[1].strip().split(',')
|
|
self.assertEqual(results[0][:5], '%s://' % self.default_provider) # source
|
|
self.assertEqual(results[1][:7], 'file://') # destination
|
|
date_format = '%Y-%m-%dT%H:%M:%S.%fZ'
|
|
start_date = datetime.datetime.strptime(results[2], date_format)
|
|
end_date = datetime.datetime.strptime(results[3], date_format)
|
|
self.assertEqual(end_date > start_date, True)
|
|
self.assertEqual(int(results[6]), 3) # Source Size
|
|
# Bytes transferred might be more than 3 if the file was gzipped, since
|
|
# the minimum gzip header is 10 bytes.
|
|
self.assertGreaterEqual(int(results[7]), 3) # Bytes Transferred
|
|
self.assertEqual(results[8], 'OK') # Result
|
|
|
|
@SequentialAndParallelTransfer
|
|
def test_copy_unicode_non_ascii_filename(self):
|
|
key_uri = self.CreateObject()
|
|
# Try with and without resumable upload threshold, to ensure that each
|
|
# scenario works. In particular, resumable uploads have tracker filename
|
|
# logic.
|
|
file_contents = b'x' * START_CALLBACK_PER_BYTES * 2
|
|
fpath = self.CreateTempFile(file_name='Аудиоархив', contents=file_contents)
|
|
with SetBotoConfigForTest([('GSUtil', 'resumable_threshold', '1')]):
|
|
# fpath_bytes = fpath.encode(UTF8)
|
|
self.RunGsUtil(['cp', fpath, suri(key_uri)], return_stderr=True)
|
|
stdout = self.RunGsUtil(['cat', suri(key_uri)],
|
|
return_stdout=True,
|
|
force_gsutil=True)
|
|
self.assertEqual(stdout.encode('ascii'), file_contents)
|
|
with SetBotoConfigForTest([('GSUtil', 'resumable_threshold',
|
|
str(START_CALLBACK_PER_BYTES * 3))]):
|
|
self.RunGsUtil(['cp', fpath, suri(key_uri)], return_stderr=True)
|
|
stdout = self.RunGsUtil(['cat', suri(key_uri)],
|
|
return_stdout=True,
|
|
force_gsutil=True)
|
|
self.assertEqual(stdout.encode('ascii'), file_contents)
|
|
|
|
# Note: We originally one time implemented a test
|
|
# (test_copy_invalid_unicode_filename) that invalid unicode filenames were
|
|
# skipped, but it turns out os.walk() on macOS doesn't have problems with
|
|
# such files (so, failed that test). Given that, we decided to remove the
|
|
# test.
|
|
|
|
@SequentialAndParallelTransfer
|
|
def test_gzip_upload_and_download(self):
|
|
bucket_uri = self.CreateBucket()
|
|
contents = b'x' * 10000
|
|
tmpdir = self.CreateTempDir()
|
|
self.CreateTempFile(file_name='test.html', tmpdir=tmpdir, contents=contents)
|
|
self.CreateTempFile(file_name='test.js', tmpdir=tmpdir, contents=contents)
|
|
self.CreateTempFile(file_name='test.txt', tmpdir=tmpdir, contents=contents)
|
|
# Test that copying specifying only 2 of the 3 prefixes gzips the correct
|
|
# files, and test that including whitespace in the extension list works.
|
|
self.RunGsUtil([
|
|
'cp', '-z', 'js, html',
|
|
os.path.join(tmpdir, 'test.*'),
|
|
suri(bucket_uri)
|
|
])
|
|
self.AssertNObjectsInBucket(bucket_uri, 3)
|
|
uri1 = suri(bucket_uri, 'test.html')
|
|
uri2 = suri(bucket_uri, 'test.js')
|
|
uri3 = suri(bucket_uri, 'test.txt')
|
|
stdout = self.RunGsUtil(['stat', uri1], return_stdout=True)
|
|
self.assertRegex(stdout, r'Content-Encoding:\s+gzip')
|
|
stdout = self.RunGsUtil(['stat', uri2], return_stdout=True)
|
|
self.assertRegex(stdout, r'Content-Encoding:\s+gzip')
|
|
stdout = self.RunGsUtil(['stat', uri3], return_stdout=True)
|
|
self.assertNotRegex(stdout, r'Content-Encoding:\s+gzip')
|
|
fpath4 = self.CreateTempFile()
|
|
for uri in (uri1, uri2, uri3):
|
|
self.RunGsUtil(['cp', uri, suri(fpath4)])
|
|
with open(fpath4, 'rb') as f:
|
|
self.assertEqual(f.read(), contents)
|
|
|
|
@SkipForS3('No compressed transport encoding support for S3.')
|
|
@SkipForXML('No compressed transport encoding support for the XML API.')
|
|
@SequentialAndParallelTransfer
|
|
def test_gzip_transport_encoded_upload_and_download(self):
|
|
"""Test gzip encoded files upload correctly.
|
|
|
|
This checks that files are not tagged with a gzip content encoding and
|
|
that the contents of the files are uncompressed in GCS. This test uses the
|
|
-j flag to target specific extensions.
|
|
"""
|
|
|
|
def _create_test_data(): # pylint: disable=invalid-name
|
|
"""Setup the bucket and local data to test with.
|
|
|
|
Returns:
|
|
Triplet containing the following values:
|
|
bucket_uri: String URI of cloud storage bucket to upload mock data
|
|
to.
|
|
tmpdir: String, path of a temporary directory to write mock data to.
|
|
local_uris: Tuple of three strings; each is the file path to a file
|
|
containing mock data.
|
|
"""
|
|
bucket_uri = self.CreateBucket()
|
|
contents = b'x' * 10000
|
|
tmpdir = self.CreateTempDir()
|
|
|
|
local_uris = []
|
|
for filename in ('test.html', 'test.js', 'test.txt'):
|
|
local_uris.append(
|
|
self.CreateTempFile(file_name=filename,
|
|
tmpdir=tmpdir,
|
|
contents=contents))
|
|
|
|
return (bucket_uri, tmpdir, local_uris)
|
|
|
|
def _upload_test_data(tmpdir, bucket_uri): # pylint: disable=invalid-name
|
|
"""Upload local test data.
|
|
|
|
Args:
|
|
tmpdir: String, path of a temporary directory to write mock data to.
|
|
bucket_uri: String URI of cloud storage bucket to upload mock data to.
|
|
|
|
Returns:
|
|
stderr: String output from running the gsutil command to upload mock
|
|
data.
|
|
"""
|
|
if self._use_gcloud_storage:
|
|
extension_list_string = 'js,html'
|
|
else:
|
|
extension_list_string = 'js, html'
|
|
stderr = self.RunGsUtil([
|
|
'-D', 'cp', '-j', extension_list_string,
|
|
os.path.join(tmpdir, 'test*'),
|
|
suri(bucket_uri)
|
|
],
|
|
return_stderr=True)
|
|
self.AssertNObjectsInBucket(bucket_uri, 3)
|
|
return stderr
|
|
|
|
def _assert_sent_compressed(local_uris, stderr): # pylint: disable=invalid-name
|
|
"""Ensure the correct files were marked for compression.
|
|
|
|
Args:
|
|
local_uris: Tuple of three strings; each is the file path to a file
|
|
containing mock data.
|
|
stderr: String output from running the gsutil command to upload mock
|
|
data.
|
|
"""
|
|
local_uri_html, local_uri_js, local_uri_txt = local_uris
|
|
assert_base_string = 'Using compressed transport encoding for file://{}.'
|
|
self.assertIn(assert_base_string.format(local_uri_html), stderr)
|
|
self.assertIn(assert_base_string.format(local_uri_js), stderr)
|
|
self.assertNotIn(assert_base_string.format(local_uri_txt), stderr)
|
|
|
|
def _assert_stored_uncompressed(bucket_uri, contents=b'x' * 10000): # pylint: disable=invalid-name
|
|
"""Ensure the files are not compressed when they are stored in the bucket.
|
|
|
|
Args:
|
|
bucket_uri: String with URI for bucket containing uploaded test data.
|
|
contents: Byte string that are stored in each file in the bucket.
|
|
"""
|
|
local_uri_html = suri(bucket_uri, 'test.html')
|
|
local_uri_js = suri(bucket_uri, 'test.js')
|
|
local_uri_txt = suri(bucket_uri, 'test.txt')
|
|
fpath4 = self.CreateTempFile()
|
|
|
|
for uri in (local_uri_html, local_uri_js, local_uri_txt):
|
|
stdout = self.RunGsUtil(['stat', uri], return_stdout=True)
|
|
self.assertNotRegex(stdout, r'Content-Encoding:\s+gzip')
|
|
self.RunGsUtil(['cp', uri, suri(fpath4)])
|
|
with open(fpath4, 'rb') as f:
|
|
self.assertEqual(f.read(), contents)
|
|
|
|
# Get mock data, run tests
|
|
bucket_uri, tmpdir, local_uris = _create_test_data()
|
|
stderr = _upload_test_data(tmpdir, bucket_uri)
|
|
_assert_sent_compressed(local_uris, stderr)
|
|
_assert_stored_uncompressed(bucket_uri)
|
|
|
|
@SkipForS3('No compressed transport encoding support for S3.')
|
|
@SkipForXML('No compressed transport encoding support for the XML API.')
|
|
@SequentialAndParallelTransfer
|
|
def test_gzip_transport_encoded_parallel_upload_non_resumable(self):
|
|
"""Test non resumable, gzip encoded files upload correctly in parallel.
|
|
|
|
This test generates a small amount of data (e.g. 100 chars) to upload.
|
|
Due to the small size, it will be below the resumable threshold,
|
|
and test the behavior of non-resumable uploads.
|
|
"""
|
|
# Setup the bucket and local data.
|
|
bucket_uri = self.CreateBucket()
|
|
contents = b'x' * 100
|
|
tmpdir = self.CreateTempDir(test_files=10, contents=contents)
|
|
# Upload the data.
|
|
with SetBotoConfigForTest([('GSUtil', 'resumable_threshold', str(ONE_KIB))
|
|
]):
|
|
stderr = self.RunGsUtil(
|
|
['-D', '-m', 'cp', '-J', '-r', tmpdir,
|
|
suri(bucket_uri)],
|
|
return_stderr=True)
|
|
# Ensure all objects are uploaded.
|
|
self.AssertNObjectsInBucket(bucket_uri, 10)
|
|
if not self._use_gcloud_storage:
|
|
# Ensure the progress logger sees a gzip encoding.
|
|
self.assertIn('send: Using gzip transport encoding for the request.',
|
|
stderr)
|
|
|
|
@SkipForS3('No compressed transport encoding support for S3.')
|
|
@SkipForXML('No compressed transport encoding support for the XML API.')
|
|
@SequentialAndParallelTransfer
|
|
def test_gzip_transport_encoded_parallel_upload_resumable(self):
|
|
"""Test resumable, gzip encoded files upload correctly in parallel.
|
|
|
|
This test generates a large amount of data (e.g. halt_size amount of chars)
|
|
to upload. Due to the large size, it will be above the resumable threshold,
|
|
and test the behavior of resumable uploads.
|
|
"""
|
|
# Setup the bucket and local data.
|
|
bucket_uri = self.CreateBucket()
|
|
contents = get_random_ascii_chars(size=self.halt_size)
|
|
tmpdir = self.CreateTempDir(test_files=10, contents=contents)
|
|
# Upload the data.
|
|
with SetBotoConfigForTest([('GSUtil', 'resumable_threshold', str(ONE_KIB))
|
|
]):
|
|
stderr = self.RunGsUtil(
|
|
['-D', '-m', 'cp', '-J', '-r', tmpdir,
|
|
suri(bucket_uri)],
|
|
return_stderr=True)
|
|
# Ensure all objects are uploaded.
|
|
self.AssertNObjectsInBucket(bucket_uri, 10)
|
|
if not self._use_gcloud_storage:
|
|
# Ensure the progress logger sees a gzip encoding.
|
|
self.assertIn('send: Using gzip transport encoding for the request.',
|
|
stderr)
|
|
|
|
@SequentialAndParallelTransfer
|
|
def test_gzip_all_upload_and_download(self):
|
|
bucket_uri = self.CreateBucket()
|
|
contents = b'x' * 10000
|
|
tmpdir = self.CreateTempDir()
|
|
self.CreateTempFile(file_name='test.html', tmpdir=tmpdir, contents=contents)
|
|
self.CreateTempFile(file_name='test.js', tmpdir=tmpdir, contents=contents)
|
|
self.CreateTempFile(file_name='test.txt', tmpdir=tmpdir, contents=contents)
|
|
self.CreateTempFile(file_name='test', tmpdir=tmpdir, contents=contents)
|
|
# Test that all files are compressed.
|
|
self.RunGsUtil(
|
|
['cp', '-Z',
|
|
os.path.join(tmpdir, 'test*'),
|
|
suri(bucket_uri)])
|
|
self.AssertNObjectsInBucket(bucket_uri, 4)
|
|
uri1 = suri(bucket_uri, 'test.html')
|
|
uri2 = suri(bucket_uri, 'test.js')
|
|
uri3 = suri(bucket_uri, 'test.txt')
|
|
uri4 = suri(bucket_uri, 'test')
|
|
stdout = self.RunGsUtil(['stat', uri1], return_stdout=True)
|
|
self.assertRegex(stdout, r'Content-Encoding:\s+gzip')
|
|
stdout = self.RunGsUtil(['stat', uri2], return_stdout=True)
|
|
self.assertRegex(stdout, r'Content-Encoding:\s+gzip')
|
|
stdout = self.RunGsUtil(['stat', uri3], return_stdout=True)
|
|
self.assertRegex(stdout, r'Content-Encoding:\s+gzip')
|
|
stdout = self.RunGsUtil(['stat', uri4], return_stdout=True)
|
|
self.assertRegex(stdout, r'Content-Encoding:\s+gzip')
|
|
fpath4 = self.CreateTempFile()
|
|
for uri in (uri1, uri2, uri3, uri4):
|
|
self.RunGsUtil(['cp', uri, suri(fpath4)])
|
|
with open(fpath4, 'rb') as f:
|
|
self.assertEqual(f.read(), contents)
|
|
|
|
@SkipForS3('No compressed transport encoding support for S3.')
|
|
@SkipForXML('No compressed transport encoding support for the XML API.')
|
|
@SequentialAndParallelTransfer
|
|
def test_gzip_transport_encoded_all_upload_and_download(self):
|
|
"""Test gzip encoded files upload correctly.
|
|
|
|
This checks that files are not tagged with a gzip content encoding and
|
|
that the contents of the files are uncompressed in GCS. This test uses the
|
|
-J flag to target all files.
|
|
"""
|
|
# Setup the bucket and local data.
|
|
bucket_uri = self.CreateBucket()
|
|
contents = b'x' * 10000
|
|
tmpdir = self.CreateTempDir()
|
|
local_uri1 = self.CreateTempFile(file_name='test.txt',
|
|
tmpdir=tmpdir,
|
|
contents=contents)
|
|
local_uri2 = self.CreateTempFile(file_name='test',
|
|
tmpdir=tmpdir,
|
|
contents=contents)
|
|
# Upload the data.
|
|
stderr = self.RunGsUtil(
|
|
['-D', 'cp', '-J',
|
|
os.path.join(tmpdir, 'test*'),
|
|
suri(bucket_uri)],
|
|
return_stderr=True)
|
|
self.AssertNObjectsInBucket(bucket_uri, 2)
|
|
# Ensure the correct files were marked for compression.
|
|
self.assertIn(
|
|
'Using compressed transport encoding for file://%s.' % (local_uri1),
|
|
stderr)
|
|
self.assertIn(
|
|
'Using compressed transport encoding for file://%s.' % (local_uri2),
|
|
stderr)
|
|
if not self._use_gcloud_storage:
|
|
# Ensure the progress logger sees a gzip encoding.
|
|
self.assertIn('send: Using gzip transport encoding for the request.',
|
|
stderr)
|
|
# Ensure the files do not have a stored encoding of gzip and are stored
|
|
# uncompressed.
|
|
remote_uri1 = suri(bucket_uri, 'test.txt')
|
|
remote_uri2 = suri(bucket_uri, 'test')
|
|
fpath4 = self.CreateTempFile()
|
|
for uri in (remote_uri1, remote_uri2):
|
|
stdout = self.RunGsUtil(['stat', uri], return_stdout=True)
|
|
self.assertNotRegex(stdout, r'Content-Encoding:\s+gzip')
|
|
self.RunGsUtil(['cp', uri, suri(fpath4)])
|
|
with open(fpath4, 'rb') as f:
|
|
self.assertEqual(f.read(), contents)
|
|
|
|
def test_both_gzip_options_error(self):
|
|
"""Test that mixing compression flags error."""
|
|
cases = (
|
|
# Test with -Z and -z
|
|
['cp', '-Z', '-z', 'html, js', 'a.js', 'b.js'],
|
|
# Same test, but with arguments in the opposite order.
|
|
['cp', '-z', 'html, js', '-Z', 'a.js', 'b.js'])
|
|
|
|
if self._use_gcloud_storage:
|
|
expected_status, expected_error_prefix, expected_error_substring = (
|
|
_GCLOUD_STORAGE_GZIP_FLAG_CONFLICT_OUTPUT)
|
|
else:
|
|
expected_status = 1
|
|
expected_error_prefix = 'CommandException'
|
|
expected_error_substring = (
|
|
'Specifying both the -z and -Z options together is invalid.')
|
|
for case in cases:
|
|
stderr = self.RunGsUtil(case,
|
|
return_stderr=True,
|
|
expected_status=expected_status)
|
|
self.assertIn(expected_error_prefix, stderr)
|
|
self.assertIn(expected_error_substring, stderr)
|
|
|
|
def test_both_gzip_transport_encoding_options_error(self):
|
|
"""Test that mixing transport encoding flags error."""
|
|
cases = (
|
|
# Test with -J and -j
|
|
['cp', '-J', '-j', 'html, js', 'a.js', 'b.js'],
|
|
# Same test, but with arguments in the opposite order.
|
|
['cp', '-j', 'html, js', '-J', 'a.js', 'b.js'])
|
|
|
|
if self._use_gcloud_storage:
|
|
expected_status, expected_error_prefix, expected_error_substring = (
|
|
_GCLOUD_STORAGE_GZIP_FLAG_CONFLICT_OUTPUT)
|
|
else:
|
|
expected_status = 1
|
|
expected_error_prefix = 'CommandException'
|
|
expected_error_substring = (
|
|
'Specifying both the -j and -J options together is invalid.')
|
|
|
|
for case in cases:
|
|
stderr = self.RunGsUtil(case,
|
|
return_stderr=True,
|
|
expected_status=expected_status)
|
|
self.assertIn(expected_error_prefix, stderr)
|
|
self.assertIn(expected_error_substring, stderr)
|
|
|
|
def test_combined_gzip_options_error(self):
|
|
"""Test that mixing transport encoding and compression flags error."""
|
|
cases = (['cp', '-Z', '-j', 'html, js', 'a.js',
|
|
'b.js'], ['cp', '-J', '-z', 'html, js', 'a.js',
|
|
'b.js'], ['cp', '-j', 'html, js', '-Z', 'a.js', 'b.js'],
|
|
['cp', '-z', 'html, js', '-J', 'a.js', 'b.js'])
|
|
|
|
if self._use_gcloud_storage:
|
|
expected_status, expected_error_prefix, expected_error_substring = (
|
|
_GCLOUD_STORAGE_GZIP_FLAG_CONFLICT_OUTPUT)
|
|
else:
|
|
expected_status = 1
|
|
expected_error_prefix = 'CommandException'
|
|
expected_error_substring = (
|
|
'Specifying both the -j/-J and -z/-Z options together is invalid.')
|
|
|
|
for case in cases:
|
|
stderr = self.RunGsUtil(case,
|
|
return_stderr=True,
|
|
expected_status=expected_status)
|
|
self.assertIn(expected_error_prefix, stderr)
|
|
self.assertIn(expected_error_substring, stderr)
|
|
|
|
def test_upload_with_subdir_and_unexpanded_wildcard(self):
|
|
fpath1 = self.CreateTempFile(file_name=('tmp', 'x', 'y', 'z'))
|
|
bucket_uri = self.CreateBucket()
|
|
wildcard_uri = '%s*' % fpath1[:-5]
|
|
stderr = self.RunGsUtil(
|
|
['cp', '-R', wildcard_uri, suri(bucket_uri)], return_stderr=True)
|
|
self.assertIn('Copying file:', stderr)
|
|
self.AssertNObjectsInBucket(bucket_uri, 1)
|
|
|
|
def test_upload_does_not_raise_with_content_md5_and_check_hashes_never(self):
|
|
fpath1 = self.CreateTempFile(file_name=('foo'))
|
|
bucket_uri = self.CreateBucket()
|
|
with SetBotoConfigForTest([('GSUtil', 'check_hashes', 'never')]):
|
|
stderr = self.RunGsUtil(
|
|
['-h', 'Content-MD5: invalid-md5', 'cp', fpath1,
|
|
suri(bucket_uri)],
|
|
return_stderr=True)
|
|
self.assertIn('Copying file:', stderr)
|
|
self.AssertNObjectsInBucket(bucket_uri, 1)
|
|
|
|
@SequentialAndParallelTransfer
|
|
def test_cp_object_ending_with_slash(self):
|
|
"""Tests that cp works with object names ending with slash."""
|
|
tmpdir = self.CreateTempDir()
|
|
bucket_uri = self.CreateBucket()
|
|
self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='abc/',
|
|
contents=b'dir')
|
|
self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='abc/def',
|
|
contents=b'def')
|
|
self.AssertNObjectsInBucket(bucket_uri, 2)
|
|
self.RunGsUtil(['cp', '-R', suri(bucket_uri), tmpdir])
|
|
# Check that files in the subdir got copied even though subdir object
|
|
# download was skipped.
|
|
with open(os.path.join(tmpdir, bucket_uri.bucket_name, 'abc', 'def')) as f:
|
|
self.assertEqual('def', '\n'.join(f.readlines()))
|
|
|
|
def test_cp_without_read_access(self):
|
|
"""Tests that cp fails without read access to the object."""
|
|
# TODO: With 401's triggering retries in apitools, this test will take
|
|
# a long time. Ideally, make apitools accept a num_retries config for this
|
|
# until we stop retrying the 401's.
|
|
bucket_uri = self.CreateBucket()
|
|
object_uri = self.CreateObject(bucket_uri=bucket_uri, contents=b'foo')
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
self.AssertNObjectsInBucket(bucket_uri, 1)
|
|
|
|
if self.default_provider == 's3':
|
|
expected_error_regex = r'AccessDenied'
|
|
else:
|
|
expected_error_regex = r'Anonymous \S+ do(es)? not have'
|
|
|
|
with self.SetAnonymousBotoCreds():
|
|
stderr = self.RunGsUtil(['cp', suri(object_uri), 'foo'],
|
|
return_stderr=True,
|
|
expected_status=1)
|
|
self.assertRegex(stderr, expected_error_regex)
|
|
|
|
@unittest.skipIf(IS_WINDOWS, 'os.symlink() is not available on Windows.')
|
|
def test_cp_minus_r_minus_e(self):
|
|
"""Tests that cp -e -r ignores symlinks when recursing."""
|
|
bucket_uri = self.CreateBucket()
|
|
tmpdir = self.CreateTempDir()
|
|
# Create a valid file, since cp expects to copy at least one source URL
|
|
# successfully.
|
|
self.CreateTempFile(tmpdir=tmpdir, contents=b'foo')
|
|
subdir = os.path.join(tmpdir, 'subdir')
|
|
os.mkdir(subdir)
|
|
os.mkdir(os.path.join(tmpdir, 'missing'))
|
|
# Create a blank directory that is a broken symlink to ensure that we
|
|
# don't fail recursive enumeration with a bad symlink.
|
|
os.symlink(os.path.join(tmpdir, 'missing'), os.path.join(subdir, 'missing'))
|
|
os.rmdir(os.path.join(tmpdir, 'missing'))
|
|
self.RunGsUtil(['cp', '-r', '-e', tmpdir, suri(bucket_uri)])
|
|
|
|
@unittest.skipIf(IS_WINDOWS, 'os.symlink() is not available on Windows.')
|
|
def test_cp_minus_e(self):
|
|
fpath_dir = self.CreateTempDir()
|
|
fpath1 = self.CreateTempFile(tmpdir=fpath_dir)
|
|
fpath2 = os.path.join(fpath_dir, 'cp_minus_e')
|
|
bucket_uri = self.CreateBucket()
|
|
os.symlink(fpath1, fpath2)
|
|
# We also use -c to continue on errors. One of the expanded glob entries
|
|
# should be the symlinked file, which should throw a CommandException since
|
|
# no valid (non-symlinked) files could be found at that path; we don't want
|
|
# the command to terminate if that's the first file we attempt to copy.
|
|
stderr = self.RunGsUtil([
|
|
'-m', 'cp', '-e',
|
|
'%s%s*' % (fpath_dir, os.path.sep),
|
|
suri(bucket_uri, 'files')
|
|
],
|
|
return_stderr=True)
|
|
self.assertIn('Copying file', stderr)
|
|
if self._use_gcloud_storage:
|
|
self.assertIn('Skipping symlink', stderr)
|
|
else:
|
|
self.assertIn('Skipping symbolic link', stderr)
|
|
|
|
# Ensure that top-level arguments are ignored if they are symlinks. The file
|
|
# at fpath1 should be successfully copied, then copying the symlink at
|
|
# fpath2 should fail.
|
|
stderr = self.RunGsUtil(
|
|
['cp', '-e', '-r', fpath1, fpath2,
|
|
suri(bucket_uri, 'files')],
|
|
return_stderr=True,
|
|
expected_status=1)
|
|
self.assertIn('Copying file', stderr)
|
|
if self._use_gcloud_storage:
|
|
self.assertIn('Skipping symlink', stderr)
|
|
self.assertIn('URL matched no objects or files: %s' % fpath2, stderr)
|
|
else:
|
|
self.assertIn('Skipping symbolic link', stderr)
|
|
self.assertIn('CommandException: No URLs matched: %s' % fpath2, stderr)
|
|
|
|
def test_cp_multithreaded_wildcard(self):
|
|
"""Tests that cp -m works with a wildcard."""
|
|
num_test_files = 5
|
|
tmp_dir = self.CreateTempDir(test_files=num_test_files)
|
|
bucket_uri = self.CreateBucket()
|
|
wildcard_uri = '%s%s*' % (tmp_dir, os.sep)
|
|
self.RunGsUtil(['-m', 'cp', wildcard_uri, suri(bucket_uri)])
|
|
self.AssertNObjectsInBucket(bucket_uri, num_test_files)
|
|
|
|
@SequentialAndParallelTransfer
|
|
def test_cp_duplicate_source_args(self):
|
|
"""Tests that cp -m works when a source argument is provided twice."""
|
|
object_contents = b'edge'
|
|
object_uri = self.CreateObject(object_name='foo', contents=object_contents)
|
|
tmp_dir = self.CreateTempDir()
|
|
self.RunGsUtil(['-m', 'cp', suri(object_uri), suri(object_uri), tmp_dir])
|
|
with open(os.path.join(tmp_dir, 'foo'), 'rb') as in_fp:
|
|
contents = in_fp.read()
|
|
# Contents should be not duplicated.
|
|
self.assertEqual(contents, object_contents)
|
|
|
|
@SkipForS3('gsutil doesn\'t support S3 customer-supplied encryption keys.')
|
|
@SequentialAndParallelTransfer
|
|
def test_cp_download_encrypted_object(self):
|
|
"""Tests downloading an encrypted object."""
|
|
if self.test_api == ApiSelector.XML:
|
|
return unittest.skip(
|
|
'gsutil does not support encryption with the XML API')
|
|
object_contents = b'bar'
|
|
object_uri = self.CreateObject(object_name='foo',
|
|
contents=object_contents,
|
|
encryption_key=TEST_ENCRYPTION_KEY1)
|
|
fpath = self.CreateTempFile()
|
|
boto_config_for_test = [('GSUtil', 'encryption_key', TEST_ENCRYPTION_KEY1)]
|
|
|
|
with SetBotoConfigForTest(boto_config_for_test):
|
|
self.RunGsUtil(['cp', suri(object_uri), suri(fpath)])
|
|
with open(fpath, 'rb') as f:
|
|
self.assertEqual(f.read(), object_contents)
|
|
|
|
# If multiple keys are supplied and one is correct, download should succeed.
|
|
fpath2 = self.CreateTempFile()
|
|
boto_config_for_test2 = [
|
|
('GSUtil', 'encryption_key', TEST_ENCRYPTION_KEY3),
|
|
('GSUtil', 'decryption_key1', TEST_ENCRYPTION_KEY2),
|
|
('GSUtil', 'decryption_key2', TEST_ENCRYPTION_KEY1)
|
|
]
|
|
|
|
with SetBotoConfigForTest(boto_config_for_test2):
|
|
self.RunGsUtil(['cp', suri(object_uri), suri(fpath2)])
|
|
with open(fpath2, 'rb') as f:
|
|
self.assertEqual(f.read(), object_contents)
|
|
|
|
@SkipForS3('gsutil doesn\'t support S3 customer-supplied encryption keys.')
|
|
@SequentialAndParallelTransfer
|
|
def test_cp_download_encrypted_object_without_key(self):
|
|
"""Tests downloading an encrypted object without the necessary key."""
|
|
if self.test_api == ApiSelector.XML:
|
|
return unittest.skip(
|
|
'gsutil does not support encryption with the XML API')
|
|
object_contents = b'bar'
|
|
object_uri = self.CreateObject(object_name='foo',
|
|
contents=object_contents,
|
|
encryption_key=TEST_ENCRYPTION_KEY1)
|
|
fpath = self.CreateTempFile()
|
|
|
|
stderr = self.RunGsUtil(
|
|
['cp', suri(object_uri), suri(fpath)],
|
|
expected_status=1,
|
|
return_stderr=True)
|
|
self.assertIn(
|
|
'Missing decryption key with SHA256 hash %s' %
|
|
TEST_ENCRYPTION_KEY1_SHA256_B64, stderr)
|
|
|
|
@SkipForS3('gsutil doesn\'t support S3 customer-supplied encryption keys.')
|
|
@SequentialAndParallelTransfer
|
|
def test_cp_upload_encrypted_object(self):
|
|
"""Tests uploading an encrypted object."""
|
|
if self.test_api == ApiSelector.XML:
|
|
return unittest.skip(
|
|
'gsutil does not support encryption with the XML API')
|
|
bucket_uri = self.CreateBucket()
|
|
object_uri = suri(bucket_uri, 'foo')
|
|
file_contents = b'bar'
|
|
fpath = self.CreateTempFile(contents=file_contents, file_name='foo')
|
|
|
|
boto_config_for_test = [('GSUtil', 'encryption_key', TEST_ENCRYPTION_KEY1)]
|
|
|
|
# Uploading the object should succeed.
|
|
with SetBotoConfigForTest(boto_config_for_test):
|
|
self.RunGsUtil(['cp', suri(fpath), suri(bucket_uri)])
|
|
|
|
self.AssertObjectUsesCSEK(object_uri, TEST_ENCRYPTION_KEY1)
|
|
|
|
with SetBotoConfigForTest(boto_config_for_test):
|
|
# Reading the object back should succeed.
|
|
fpath2 = self.CreateTempFile()
|
|
self.RunGsUtil(['cp', suri(bucket_uri, 'foo'), suri(fpath2)])
|
|
with open(fpath2, 'rb') as f:
|
|
self.assertEqual(f.read(), file_contents)
|
|
|
|
@SkipForS3('No resumable upload or encryption support for S3.')
|
|
def test_cp_resumable_upload_encrypted_object_break(self):
|
|
"""Tests that an encrypted upload resumes after a connection break."""
|
|
if self.test_api == ApiSelector.XML:
|
|
return unittest.skip(
|
|
'gsutil does not support encryption with the XML API')
|
|
bucket_uri = self.CreateBucket()
|
|
object_uri_str = suri(bucket_uri, 'foo')
|
|
fpath = self.CreateTempFile(contents=b'a' * self.halt_size)
|
|
boto_config_for_test = [('GSUtil', 'resumable_threshold', str(ONE_KIB)),
|
|
('GSUtil', 'encryption_key', TEST_ENCRYPTION_KEY1)]
|
|
test_callback_file = self.CreateTempFile(
|
|
contents=pickle.dumps(HaltingCopyCallbackHandler(True, 5)))
|
|
|
|
with SetBotoConfigForTest(boto_config_for_test):
|
|
stderr = self.RunGsUtil([
|
|
'cp', '--testcallbackfile', test_callback_file, fpath, object_uri_str
|
|
],
|
|
expected_status=1,
|
|
return_stderr=True)
|
|
self.assertIn('Artifically halting upload', stderr)
|
|
stderr = self.RunGsUtil(['cp', fpath, object_uri_str], return_stderr=True)
|
|
self.assertIn('Resuming upload', stderr)
|
|
stdout = self.RunGsUtil(['stat', object_uri_str], return_stdout=True)
|
|
with open(fpath, 'rb') as fp:
|
|
self.assertIn(CalculateB64EncodedMd5FromContents(fp), stdout)
|
|
|
|
self.AssertObjectUsesCSEK(object_uri_str, TEST_ENCRYPTION_KEY1)
|
|
|
|
@SkipForS3('No resumable upload or encryption support for S3.')
|
|
def test_cp_resumable_upload_encrypted_object_different_key(self):
|
|
"""Tests that an encrypted upload resume uses original encryption key."""
|
|
if self.test_api == ApiSelector.XML:
|
|
return unittest.skip(
|
|
'gsutil does not support encryption with the XML API')
|
|
bucket_uri = self.CreateBucket()
|
|
object_uri_str = suri(bucket_uri, 'foo')
|
|
file_contents = b'a' * self.halt_size
|
|
fpath = self.CreateTempFile(contents=file_contents)
|
|
boto_config_for_test = [('GSUtil', 'resumable_threshold', str(ONE_KIB)),
|
|
('GSUtil', 'encryption_key', TEST_ENCRYPTION_KEY1)]
|
|
test_callback_file = self.CreateTempFile(
|
|
contents=pickle.dumps(HaltingCopyCallbackHandler(True, 5)))
|
|
|
|
with SetBotoConfigForTest(boto_config_for_test):
|
|
stderr = self.RunGsUtil([
|
|
'cp', '--testcallbackfile', test_callback_file, fpath, object_uri_str
|
|
],
|
|
expected_status=1,
|
|
return_stderr=True)
|
|
self.assertIn('Artifically halting upload', stderr)
|
|
|
|
# Resume the upload with multiple keys, including the original.
|
|
boto_config_for_test2 = [('GSUtil', 'resumable_threshold', str(ONE_KIB)),
|
|
('GSUtil', 'decryption_key1',
|
|
TEST_ENCRYPTION_KEY2),
|
|
('GSUtil', 'encryption_key', TEST_ENCRYPTION_KEY1)]
|
|
|
|
with SetBotoConfigForTest(boto_config_for_test2):
|
|
stderr = self.RunGsUtil(['cp', fpath, object_uri_str], return_stderr=True)
|
|
self.assertIn('Resuming upload', stderr)
|
|
|
|
# Object should have the original key.
|
|
self.AssertObjectUsesCSEK(object_uri_str, TEST_ENCRYPTION_KEY1)
|
|
|
|
@SkipForS3('No resumable upload or encryption support for S3.')
|
|
def test_cp_resumable_upload_encrypted_object_missing_key(self):
|
|
"""Tests that an encrypted upload does not resume without original key."""
|
|
if self.test_api == ApiSelector.XML:
|
|
return unittest.skip(
|
|
'gsutil does not support encryption with the XML API')
|
|
bucket_uri = self.CreateBucket()
|
|
object_uri_str = suri(bucket_uri, 'foo')
|
|
file_contents = b'a' * self.halt_size
|
|
fpath = self.CreateTempFile(contents=file_contents)
|
|
boto_config_for_test = [('GSUtil', 'resumable_threshold', str(ONE_KIB)),
|
|
('GSUtil', 'encryption_key', TEST_ENCRYPTION_KEY1)]
|
|
test_callback_file = self.CreateTempFile(
|
|
contents=pickle.dumps(HaltingCopyCallbackHandler(True, 5)))
|
|
|
|
with SetBotoConfigForTest(boto_config_for_test):
|
|
stderr = self.RunGsUtil([
|
|
'cp', '--testcallbackfile', test_callback_file, fpath, object_uri_str
|
|
],
|
|
expected_status=1,
|
|
return_stderr=True)
|
|
self.assertIn('Artifically halting upload', stderr)
|
|
|
|
# Resume the upload without the original key.
|
|
boto_config_for_test2 = [('GSUtil', 'resumable_threshold', str(ONE_KIB)),
|
|
('GSUtil', 'encryption_key', TEST_ENCRYPTION_KEY2)]
|
|
|
|
with SetBotoConfigForTest(boto_config_for_test2):
|
|
stderr = self.RunGsUtil(['cp', fpath, object_uri_str], return_stderr=True)
|
|
self.assertNotIn('Resuming upload', stderr)
|
|
self.assertIn('does not match current encryption key', stderr)
|
|
self.assertIn('Restarting upload from scratch', stderr)
|
|
|
|
# Object should have the new key.
|
|
self.AssertObjectUsesCSEK(object_uri_str, TEST_ENCRYPTION_KEY2)
|
|
|
|
def _ensure_object_unencrypted(self, object_uri_str):
|
|
"""Strongly consistent check that the object is unencrypted."""
|
|
stdout = self.RunGsUtil(['stat', object_uri_str], return_stdout=True)
|
|
self.assertNotIn('Encryption Key', stdout)
|
|
|
|
@SkipForS3('No resumable upload support for S3.')
|
|
def test_cp_resumable_upload_break(self):
|
|
"""Tests that an upload can be resumed after a connection break."""
|
|
bucket_uri = self.CreateBucket()
|
|
fpath = self.CreateTempFile(contents=b'a' * self.halt_size)
|
|
boto_config_for_test = ('GSUtil', 'resumable_threshold', str(ONE_KIB))
|
|
test_callback_file = self.CreateTempFile(
|
|
contents=pickle.dumps(HaltingCopyCallbackHandler(True, 5)))
|
|
|
|
with SetBotoConfigForTest([boto_config_for_test]):
|
|
stderr = self.RunGsUtil([
|
|
'cp', '--testcallbackfile', test_callback_file, fpath,
|
|
suri(bucket_uri)
|
|
],
|
|
expected_status=1,
|
|
return_stderr=True)
|
|
self.assertIn('Artifically halting upload', stderr)
|
|
stderr = self.RunGsUtil(['cp', fpath, suri(bucket_uri)],
|
|
return_stderr=True)
|
|
self.assertIn('Resuming upload', stderr)
|
|
|
|
@SkipForS3('No compressed transport encoding support for S3.')
|
|
@SkipForXML('No compressed transport encoding support for the XML API.')
|
|
@SequentialAndParallelTransfer
|
|
def test_cp_resumable_upload_gzip_encoded_break(self):
|
|
"""Tests that a gzip encoded upload can be resumed."""
|
|
# Setup the bucket and local data. File contents are randomized to prevent
|
|
# them from compressing below the resumable-threshold and failing the test.
|
|
bucket_uri = self.CreateBucket()
|
|
contents = get_random_ascii_chars(size=self.halt_size)
|
|
local_uri = self.CreateTempFile(file_name='test.txt', contents=contents)
|
|
# Configure boto
|
|
boto_config_for_test = ('GSUtil', 'resumable_threshold', str(ONE_KIB))
|
|
test_callback_file = self.CreateTempFile(
|
|
contents=pickle.dumps(HaltingCopyCallbackHandler(True, 5)))
|
|
|
|
with SetBotoConfigForTest([boto_config_for_test]):
|
|
stderr = self.RunGsUtil([
|
|
'-D', 'cp', '-J', '--testcallbackfile', test_callback_file, local_uri,
|
|
suri(bucket_uri)
|
|
],
|
|
expected_status=1,
|
|
return_stderr=True)
|
|
# Ensure the progress logger sees a gzip encoding.
|
|
self.assertIn('send: Using gzip transport encoding for the request.',
|
|
stderr)
|
|
self.assertIn('Artifically halting upload', stderr)
|
|
stderr = self.RunGsUtil(['-D', 'cp', '-J', local_uri,
|
|
suri(bucket_uri)],
|
|
return_stderr=True)
|
|
self.assertIn('Resuming upload', stderr)
|
|
# Ensure the progress logger is still seeing a gzip encoding.
|
|
self.assertIn('send: Using gzip transport encoding for the request.',
|
|
stderr)
|
|
# Ensure the files do not have a stored encoding of gzip and are stored
|
|
# uncompressed.
|
|
temp_uri = self.CreateTempFile()
|
|
remote_uri = suri(bucket_uri, 'test.txt')
|
|
stdout = self.RunGsUtil(['stat', remote_uri], return_stdout=True)
|
|
self.assertNotRegex(stdout, r'Content-Encoding:\s+gzip')
|
|
self.RunGsUtil(['cp', remote_uri, suri(temp_uri)])
|
|
with open(temp_uri, 'rb') as f:
|
|
self.assertEqual(f.read(), contents)
|
|
|
|
@SkipForS3('No resumable upload support for S3.')
|
|
def test_cp_resumable_upload_retry(self):
|
|
"""Tests that a resumable upload completes with one retry."""
|
|
bucket_uri = self.CreateBucket()
|
|
fpath = self.CreateTempFile(contents=b'a' * self.halt_size)
|
|
# TODO: Raising an httplib or socket error blocks bucket teardown
|
|
# in JSON for 60-120s on a multiprocessing lock acquire. Figure out why;
|
|
# until then, raise an apitools retryable exception.
|
|
if self.test_api == ApiSelector.XML:
|
|
test_callback_file = self.CreateTempFile(contents=pickle.dumps(
|
|
_ResumableUploadRetryHandler(5, http_client.BadStatusLine, (
|
|
'unused',))))
|
|
else:
|
|
test_callback_file = self.CreateTempFile(contents=pickle.dumps(
|
|
_ResumableUploadRetryHandler(
|
|
5, apitools_exceptions.BadStatusCodeError, ('unused', 'unused',
|
|
'unused'))))
|
|
boto_config_for_test = ('GSUtil', 'resumable_threshold', str(ONE_KIB))
|
|
with SetBotoConfigForTest([boto_config_for_test]):
|
|
stderr = self.RunGsUtil([
|
|
'-D', 'cp', '--testcallbackfile', test_callback_file, fpath,
|
|
suri(bucket_uri)
|
|
],
|
|
return_stderr=1)
|
|
if self.test_api == ApiSelector.XML:
|
|
self.assertIn('Got retryable failure', stderr)
|
|
else:
|
|
self.assertIn('Retrying', stderr)
|
|
|
|
@SkipForS3('No resumable upload support for S3.')
|
|
def test_cp_resumable_streaming_upload_retry(self):
|
|
"""Tests that a streaming resumable upload completes with one retry."""
|
|
if self.test_api == ApiSelector.XML:
|
|
return unittest.skip('XML does not support resumable streaming uploads.')
|
|
bucket_uri = self.CreateBucket()
|
|
|
|
test_callback_file = self.CreateTempFile(contents=pickle.dumps(
|
|
_ResumableUploadRetryHandler(5, apitools_exceptions.BadStatusCodeError,
|
|
('unused', 'unused', 'unused'))))
|
|
# Need to reduce the JSON chunk size since streaming uploads buffer a
|
|
# full chunk.
|
|
boto_configs_for_test = [('GSUtil', 'json_resumable_chunk_size',
|
|
str(256 * ONE_KIB)), ('Boto', 'num_retries', '2')]
|
|
with SetBotoConfigForTest(boto_configs_for_test):
|
|
stderr = self.RunGsUtil([
|
|
'-D', 'cp', '--testcallbackfile', test_callback_file, '-',
|
|
suri(bucket_uri, 'foo')
|
|
],
|
|
stdin='a' * 512 * ONE_KIB,
|
|
return_stderr=1)
|
|
self.assertIn('Retrying', stderr)
|
|
|
|
@SkipForS3('preserve_acl flag not supported for S3.')
|
|
def test_cp_preserve_no_owner(self):
|
|
bucket_uri = self.CreateBucket()
|
|
object_uri = self.CreateObject(bucket_uri=bucket_uri, contents=b'foo')
|
|
# Anonymous user can read the object and write to the bucket, but does
|
|
# not own the object.
|
|
self.RunGsUtil(['acl', 'ch', '-u', 'AllUsers:R', suri(object_uri)])
|
|
self.RunGsUtil(['acl', 'ch', '-u', 'AllUsers:W', suri(bucket_uri)])
|
|
with self.SetAnonymousBotoCreds():
|
|
stderr = self.RunGsUtil(
|
|
['cp', '-p', suri(object_uri),
|
|
suri(bucket_uri, 'foo')],
|
|
return_stderr=True,
|
|
expected_status=1)
|
|
self.assertIn('OWNER permission is required for preserving ACLs', stderr)
|
|
|
|
@SkipForS3('No resumable upload support for S3.')
|
|
def test_cp_progress_callbacks(self):
|
|
bucket_uri = self.CreateBucket()
|
|
final_size_string = BytesToFixedWidthString(1024**2)
|
|
final_progress_callback = final_size_string + '/' + final_size_string
|
|
fpath = self.CreateTempFile(contents=b'a' * ONE_MIB, file_name='foo')
|
|
boto_config_for_test = ('GSUtil', 'resumable_threshold', str(ONE_KIB))
|
|
with SetBotoConfigForTest([boto_config_for_test]):
|
|
stderr = self.RunGsUtil(['cp', fpath, suri(bucket_uri)],
|
|
return_stderr=True)
|
|
self.assertEqual(1, stderr.count(final_progress_callback))
|
|
boto_config_for_test = ('GSUtil', 'resumable_threshold', str(2 * ONE_MIB))
|
|
with SetBotoConfigForTest([boto_config_for_test]):
|
|
stderr = self.RunGsUtil(['cp', fpath, suri(bucket_uri)],
|
|
return_stderr=True)
|
|
self.assertEqual(1, stderr.count(final_progress_callback))
|
|
stderr = self.RunGsUtil(['cp', suri(bucket_uri, 'foo'), fpath],
|
|
return_stderr=True)
|
|
self.assertEqual(1, stderr.count(final_progress_callback))
|
|
|
|
@SkipForS3('No resumable upload support for S3.')
|
|
def test_cp_resumable_upload(self):
|
|
"""Tests that a basic resumable upload completes successfully."""
|
|
bucket_uri = self.CreateBucket()
|
|
fpath = self.CreateTempFile(contents=b'a' * self.halt_size)
|
|
boto_config_for_test = ('GSUtil', 'resumable_threshold', str(ONE_KIB))
|
|
with SetBotoConfigForTest([boto_config_for_test]):
|
|
self.RunGsUtil(['cp', fpath, suri(bucket_uri)])
|
|
|
|
@SkipForS3('No resumable upload support for S3.')
|
|
def test_resumable_upload_break_leaves_tracker(self):
|
|
"""Tests that a tracker file is created with a resumable upload."""
|
|
bucket_uri = self.CreateBucket()
|
|
fpath = self.CreateTempFile(file_name='foo', contents=b'a' * self.halt_size)
|
|
boto_config_for_test = ('GSUtil', 'resumable_threshold', str(ONE_KIB))
|
|
with SetBotoConfigForTest([boto_config_for_test]):
|
|
tracker_filename = GetTrackerFilePath(
|
|
StorageUrlFromString(suri(bucket_uri, 'foo')), TrackerFileType.UPLOAD,
|
|
self.test_api)
|
|
test_callback_file = self.CreateTempFile(
|
|
contents=pickle.dumps(HaltingCopyCallbackHandler(True, 5)))
|
|
try:
|
|
stderr = self.RunGsUtil([
|
|
'cp', '--testcallbackfile', test_callback_file, fpath,
|
|
suri(bucket_uri, 'foo')
|
|
],
|
|
expected_status=1,
|
|
return_stderr=True)
|
|
self.assertIn('Artifically halting upload', stderr)
|
|
self.assertTrue(os.path.exists(tracker_filename),
|
|
'Tracker file %s not present.' % tracker_filename)
|
|
# Test the permissions
|
|
if os.name == 'posix':
|
|
mode = oct(stat.S_IMODE(os.stat(tracker_filename).st_mode))
|
|
# Assert that only user has read/write permission
|
|
self.assertEqual(oct(0o600), mode)
|
|
finally:
|
|
DeleteTrackerFile(tracker_filename)
|
|
|
|
@SkipForS3('No resumable upload support for S3.')
|
|
def test_cp_resumable_upload_break_file_size_change(self):
|
|
"""Tests a resumable upload where the uploaded file changes size.
|
|
|
|
This should fail when we read the tracker data.
|
|
"""
|
|
bucket_uri = self.CreateBucket()
|
|
tmp_dir = self.CreateTempDir()
|
|
fpath = self.CreateTempFile(file_name='foo',
|
|
tmpdir=tmp_dir,
|
|
contents=b'a' * self.halt_size)
|
|
test_callback_file = self.CreateTempFile(
|
|
contents=pickle.dumps(HaltingCopyCallbackHandler(True, 5)))
|
|
|
|
boto_config_for_test = ('GSUtil', 'resumable_threshold', str(ONE_KIB))
|
|
with SetBotoConfigForTest([boto_config_for_test]):
|
|
stderr = self.RunGsUtil([
|
|
'cp', '--testcallbackfile', test_callback_file, fpath,
|
|
suri(bucket_uri)
|
|
],
|
|
expected_status=1,
|
|
return_stderr=True)
|
|
self.assertIn('Artifically halting upload', stderr)
|
|
fpath = self.CreateTempFile(file_name='foo',
|
|
tmpdir=tmp_dir,
|
|
contents=b'a' * self.halt_size * 2)
|
|
stderr = self.RunGsUtil(['cp', fpath, suri(bucket_uri)],
|
|
expected_status=1,
|
|
return_stderr=True)
|
|
self.assertIn('ResumableUploadAbortException', stderr)
|
|
|
|
@SkipForS3('No resumable upload support for S3.')
|
|
def test_cp_resumable_upload_break_file_content_change(self):
|
|
"""Tests a resumable upload where the uploaded file changes content."""
|
|
if self.test_api == ApiSelector.XML:
|
|
return unittest.skip(
|
|
'XML doesn\'t make separate HTTP calls at fixed-size boundaries for '
|
|
'resumable uploads, so we can\'t guarantee that the server saves a '
|
|
'specific part of the upload.')
|
|
bucket_uri = self.CreateBucket()
|
|
tmp_dir = self.CreateTempDir()
|
|
fpath = self.CreateTempFile(file_name='foo',
|
|
tmpdir=tmp_dir,
|
|
contents=b'a' * ONE_KIB * ONE_KIB)
|
|
test_callback_file = self.CreateTempFile(contents=pickle.dumps(
|
|
HaltingCopyCallbackHandler(True,
|
|
int(ONE_KIB) * 512)))
|
|
resumable_threshold_for_test = ('GSUtil', 'resumable_threshold',
|
|
str(ONE_KIB))
|
|
resumable_chunk_size_for_test = ('GSUtil', 'json_resumable_chunk_size',
|
|
str(ONE_KIB * 256))
|
|
with SetBotoConfigForTest(
|
|
[resumable_threshold_for_test, resumable_chunk_size_for_test]):
|
|
stderr = self.RunGsUtil([
|
|
'cp', '--testcallbackfile', test_callback_file, fpath,
|
|
suri(bucket_uri)
|
|
],
|
|
expected_status=1,
|
|
return_stderr=True)
|
|
self.assertIn('Artifically halting upload', stderr)
|
|
fpath = self.CreateTempFile(file_name='foo',
|
|
tmpdir=tmp_dir,
|
|
contents=b'b' * ONE_KIB * ONE_KIB)
|
|
stderr = self.RunGsUtil(['cp', fpath, suri(bucket_uri)],
|
|
expected_status=1,
|
|
return_stderr=True)
|
|
self.assertIn('doesn\'t match cloud-supplied digest', stderr)
|
|
|
|
@SkipForS3('No resumable upload support for S3.')
|
|
def test_cp_resumable_upload_break_file_smaller_size(self):
|
|
"""Tests a resumable upload where the uploaded file changes content.
|
|
|
|
This should fail hash validation.
|
|
"""
|
|
bucket_uri = self.CreateBucket()
|
|
tmp_dir = self.CreateTempDir()
|
|
fpath = self.CreateTempFile(file_name='foo',
|
|
tmpdir=tmp_dir,
|
|
contents=b'a' * ONE_KIB * ONE_KIB)
|
|
test_callback_file = self.CreateTempFile(contents=pickle.dumps(
|
|
HaltingCopyCallbackHandler(True,
|
|
int(ONE_KIB) * 512)))
|
|
resumable_threshold_for_test = ('GSUtil', 'resumable_threshold',
|
|
str(ONE_KIB))
|
|
resumable_chunk_size_for_test = ('GSUtil', 'json_resumable_chunk_size',
|
|
str(ONE_KIB * 256))
|
|
with SetBotoConfigForTest(
|
|
[resumable_threshold_for_test, resumable_chunk_size_for_test]):
|
|
stderr = self.RunGsUtil([
|
|
'cp', '--testcallbackfile', test_callback_file, fpath,
|
|
suri(bucket_uri)
|
|
],
|
|
expected_status=1,
|
|
return_stderr=True)
|
|
self.assertIn('Artifically halting upload', stderr)
|
|
fpath = self.CreateTempFile(file_name='foo',
|
|
tmpdir=tmp_dir,
|
|
contents=b'a' * ONE_KIB)
|
|
stderr = self.RunGsUtil(['cp', fpath, suri(bucket_uri)],
|
|
expected_status=1,
|
|
return_stderr=True)
|
|
self.assertIn('ResumableUploadAbortException', stderr)
|
|
|
|
@SkipForS3('No resumable upload support for S3.')
|
|
def test_cp_composite_encrypted_upload_resume(self):
|
|
"""Tests that an encrypted composite upload resumes successfully."""
|
|
if self.test_api == ApiSelector.XML:
|
|
return unittest.skip(
|
|
'gsutil does not support encryption with the XML API')
|
|
bucket_uri = self.CreateBucket()
|
|
dst_url = StorageUrlFromString(suri(bucket_uri, 'foo'))
|
|
|
|
file_contents = b'foobar'
|
|
file_name = 'foobar'
|
|
source_file = self.CreateTempFile(contents=file_contents,
|
|
file_name=file_name)
|
|
src_url = StorageUrlFromString(source_file)
|
|
|
|
# Simulate an upload that had occurred by writing a tracker file
|
|
# that points to a previously uploaded component.
|
|
tracker_file_name = GetTrackerFilePath(dst_url,
|
|
TrackerFileType.PARALLEL_UPLOAD,
|
|
self.test_api, src_url)
|
|
tracker_prefix = '123'
|
|
|
|
# Create component 0 to be used in the resume; it must match the name
|
|
# that will be generated in copy_helper, so we use the same scheme.
|
|
encoded_name = (PARALLEL_UPLOAD_STATIC_SALT + source_file).encode(UTF8)
|
|
content_md5 = GetMd5()
|
|
content_md5.update(encoded_name)
|
|
digest = content_md5.hexdigest()
|
|
component_object_name = (tracker_prefix + PARALLEL_UPLOAD_TEMP_NAMESPACE +
|
|
digest + '_0')
|
|
|
|
component_size = 3
|
|
object_uri = self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name=component_object_name,
|
|
contents=file_contents[:component_size],
|
|
encryption_key=TEST_ENCRYPTION_KEY1)
|
|
existing_component = ObjectFromTracker(component_object_name,
|
|
str(object_uri.generation))
|
|
existing_components = [existing_component]
|
|
enc_key_sha256 = TEST_ENCRYPTION_KEY1_SHA256_B64
|
|
|
|
WriteParallelUploadTrackerFile(tracker_file_name,
|
|
tracker_prefix,
|
|
existing_components,
|
|
encryption_key_sha256=enc_key_sha256)
|
|
|
|
try:
|
|
# Now "resume" the upload using the original encryption key.
|
|
with SetBotoConfigForTest([
|
|
('GSUtil', 'parallel_composite_upload_threshold', '1'),
|
|
('GSUtil', 'parallel_composite_upload_component_size',
|
|
str(component_size)),
|
|
('GSUtil', 'encryption_key', TEST_ENCRYPTION_KEY1)
|
|
]):
|
|
stderr = self.RunGsUtil(
|
|
['cp', source_file, suri(bucket_uri, 'foo')], return_stderr=True)
|
|
self.assertIn('Found 1 existing temporary components to reuse.', stderr)
|
|
self.assertFalse(
|
|
os.path.exists(tracker_file_name),
|
|
'Tracker file %s should have been deleted.' % tracker_file_name)
|
|
read_contents = self.RunGsUtil(['cat', suri(bucket_uri, 'foo')],
|
|
return_stdout=True)
|
|
self.assertEqual(read_contents.encode('ascii'), file_contents)
|
|
finally:
|
|
# Clean up if something went wrong.
|
|
DeleteTrackerFile(tracker_file_name)
|
|
|
|
@SkipForS3('No resumable upload support for S3.')
|
|
def test_cp_composite_encrypted_upload_restart(self):
|
|
"""Tests that encrypted composite upload restarts given a different key."""
|
|
if self.test_api == ApiSelector.XML:
|
|
return unittest.skip(
|
|
'gsutil does not support encryption with the XML API')
|
|
bucket_uri = self.CreateBucket()
|
|
dst_url = StorageUrlFromString(suri(bucket_uri, 'foo'))
|
|
|
|
file_contents = b'foobar'
|
|
source_file = self.CreateTempFile(contents=file_contents, file_name='foo')
|
|
src_url = StorageUrlFromString(source_file)
|
|
|
|
# Simulate an upload that had occurred by writing a tracker file.
|
|
tracker_file_name = GetTrackerFilePath(dst_url,
|
|
TrackerFileType.PARALLEL_UPLOAD,
|
|
self.test_api, src_url)
|
|
tracker_prefix = '123'
|
|
existing_component_name = 'foo_1'
|
|
object_uri = self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='foo_1',
|
|
contents=b'foo',
|
|
encryption_key=TEST_ENCRYPTION_KEY1)
|
|
existing_component = ObjectFromTracker(existing_component_name,
|
|
str(object_uri.generation))
|
|
existing_components = [existing_component]
|
|
enc_key_sha256 = TEST_ENCRYPTION_KEY1_SHA256_B64
|
|
WriteParallelUploadTrackerFile(tracker_file_name, tracker_prefix,
|
|
existing_components,
|
|
enc_key_sha256.decode('ascii'))
|
|
|
|
try:
|
|
# Now "resume" the upload using the original encryption key.
|
|
with SetBotoConfigForTest([
|
|
('GSUtil', 'parallel_composite_upload_threshold', '1'),
|
|
('GSUtil', 'parallel_composite_upload_component_size', '3'),
|
|
('GSUtil', 'encryption_key', TEST_ENCRYPTION_KEY2)
|
|
]):
|
|
stderr = self.RunGsUtil(
|
|
['cp', source_file, suri(bucket_uri, 'foo')], return_stderr=True)
|
|
self.assertIn(
|
|
'does not match current encryption key. '
|
|
'Deleting old components and restarting upload', stderr)
|
|
self.assertNotIn('existing temporary components to reuse.', stderr)
|
|
self.assertFalse(
|
|
os.path.exists(tracker_file_name),
|
|
'Tracker file %s should have been deleted.' % tracker_file_name)
|
|
read_contents = self.RunGsUtil(['cat', suri(bucket_uri, 'foo')],
|
|
return_stdout=True)
|
|
self.assertEqual(read_contents.encode('ascii'), file_contents)
|
|
finally:
|
|
# Clean up if something went wrong.
|
|
DeleteTrackerFile(tracker_file_name)
|
|
|
|
@SkipForS3('Test uses gs-specific KMS encryption')
|
|
def test_kms_key_correctly_applied_to_composite_upload(self):
|
|
bucket_uri = self.CreateBucket()
|
|
fpath = self.CreateTempFile(contents=b'abcd')
|
|
obj_suri = suri(bucket_uri, 'composed')
|
|
key_fqn = AuthorizeProjectToUseTestingKmsKey()
|
|
|
|
with SetBotoConfigForTest([
|
|
('GSUtil', 'encryption_key', key_fqn),
|
|
('GSUtil', 'parallel_composite_upload_threshold', '1'),
|
|
('GSUtil', 'parallel_composite_upload_component_size', '1')
|
|
]):
|
|
self.RunGsUtil(['cp', fpath, obj_suri])
|
|
|
|
with SetBotoConfigForTest([('GSUtil', 'prefer_api', 'json')]):
|
|
self.AssertObjectUsesCMEK(obj_suri, key_fqn)
|
|
|
|
@SkipForS3('No composite upload support for S3.')
|
|
def test_nearline_applied_to_parallel_composite_upload(self):
|
|
bucket_uri = self.CreateBucket(storage_class='standard')
|
|
fpath = self.CreateTempFile(contents=b'abcd')
|
|
obj_suri = suri(bucket_uri, 'composed')
|
|
|
|
with SetBotoConfigForTest([
|
|
('GSUtil', 'parallel_composite_upload_threshold', '1'),
|
|
('GSUtil', 'parallel_composite_upload_component_size', '1')
|
|
]):
|
|
self.RunGsUtil(['cp', '-s', 'nearline', fpath, obj_suri])
|
|
stdout = self.RunGsUtil(['ls', '-L', obj_suri], return_stdout=True)
|
|
self.assertRegexpMatchesWithFlags(stdout,
|
|
r'Storage class: NEARLINE',
|
|
flags=re.IGNORECASE)
|
|
|
|
# This temporarily changes the tracker directory to unwritable which
|
|
# interferes with any parallel running tests that use the tracker directory.
|
|
@NotParallelizable
|
|
@SkipForS3('No resumable upload support for S3.')
|
|
@unittest.skipIf(IS_WINDOWS, 'chmod on dir unsupported on Windows.')
|
|
@SequentialAndParallelTransfer
|
|
def test_cp_unwritable_tracker_file(self):
|
|
"""Tests a resumable upload with an unwritable tracker file."""
|
|
bucket_uri = self.CreateBucket()
|
|
tracker_filename = GetTrackerFilePath(
|
|
StorageUrlFromString(suri(bucket_uri, 'foo')), TrackerFileType.UPLOAD,
|
|
self.test_api)
|
|
tracker_dir = os.path.dirname(tracker_filename)
|
|
fpath = self.CreateTempFile(file_name='foo', contents=b'a' * ONE_KIB)
|
|
boto_config_for_test = ('GSUtil', 'resumable_threshold', str(ONE_KIB))
|
|
save_mod = os.stat(tracker_dir).st_mode
|
|
|
|
try:
|
|
os.chmod(tracker_dir, 0)
|
|
with SetBotoConfigForTest([boto_config_for_test]):
|
|
stderr = self.RunGsUtil(['cp', fpath, suri(bucket_uri)],
|
|
expected_status=1,
|
|
return_stderr=True)
|
|
self.assertIn('Couldn\'t write tracker file', stderr)
|
|
finally:
|
|
os.chmod(tracker_dir, save_mod)
|
|
if os.path.exists(tracker_filename):
|
|
os.unlink(tracker_filename)
|
|
|
|
# This temporarily changes the tracker directory to unwritable which
|
|
# interferes with any parallel running tests that use the tracker directory.
|
|
@NotParallelizable
|
|
@unittest.skipIf(IS_WINDOWS, 'chmod on dir unsupported on Windows.')
|
|
@SequentialAndParallelTransfer
|
|
def test_cp_unwritable_tracker_file_download(self):
|
|
"""Tests downloads with an unwritable tracker file."""
|
|
object_uri = self.CreateObject(contents=b'foo' * ONE_KIB)
|
|
tracker_filename = GetTrackerFilePath(
|
|
StorageUrlFromString(suri(object_uri)), TrackerFileType.DOWNLOAD,
|
|
self.test_api)
|
|
tracker_dir = os.path.dirname(tracker_filename)
|
|
fpath = self.CreateTempFile()
|
|
save_mod = os.stat(tracker_dir).st_mode
|
|
|
|
try:
|
|
os.chmod(tracker_dir, 0)
|
|
boto_config_for_test = ('GSUtil', 'resumable_threshold', str(EIGHT_MIB))
|
|
with SetBotoConfigForTest([boto_config_for_test]):
|
|
# Should succeed because we are below the threshold.
|
|
self.RunGsUtil(['cp', suri(object_uri), fpath])
|
|
boto_config_for_test = ('GSUtil', 'resumable_threshold', str(ONE_KIB))
|
|
with SetBotoConfigForTest([boto_config_for_test]):
|
|
stderr = self.RunGsUtil(['cp', suri(object_uri), fpath],
|
|
expected_status=1,
|
|
return_stderr=True)
|
|
self.assertIn('Couldn\'t write tracker file', stderr)
|
|
finally:
|
|
os.chmod(tracker_dir, save_mod)
|
|
if os.path.exists(tracker_filename):
|
|
os.unlink(tracker_filename)
|
|
|
|
def _test_cp_resumable_download_break_helper(self,
|
|
boto_config,
|
|
encryption_key=None):
|
|
"""Helper function for different modes of resumable download break.
|
|
|
|
Args:
|
|
boto_config: List of boto configuration tuples for use with
|
|
SetBotoConfigForTest.
|
|
encryption_key: Base64 encryption key for object encryption (if any).
|
|
"""
|
|
bucket_uri = self.CreateBucket()
|
|
file_contents = b'a' * self.halt_size
|
|
object_uri = self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='foo',
|
|
contents=file_contents,
|
|
encryption_key=encryption_key)
|
|
fpath = self.CreateTempFile()
|
|
test_callback_file = self.CreateTempFile(
|
|
contents=pickle.dumps(HaltingCopyCallbackHandler(False, 5)))
|
|
|
|
with SetBotoConfigForTest(boto_config):
|
|
stderr = self.RunGsUtil([
|
|
'cp', '--testcallbackfile', test_callback_file,
|
|
suri(object_uri), fpath
|
|
],
|
|
expected_status=1,
|
|
return_stderr=True)
|
|
self.assertIn('Artifically halting download.', stderr)
|
|
tracker_filename = GetTrackerFilePath(StorageUrlFromString(fpath),
|
|
TrackerFileType.DOWNLOAD,
|
|
self.test_api)
|
|
self.assertTrue(os.path.isfile(tracker_filename))
|
|
stderr = self.RunGsUtil(['cp', suri(object_uri), fpath],
|
|
return_stderr=True)
|
|
self.assertIn('Resuming download', stderr)
|
|
with open(fpath, 'rb') as f:
|
|
self.assertEqual(f.read(), file_contents, 'File contents differ')
|
|
|
|
def test_cp_resumable_download_break(self):
|
|
"""Tests that a download can be resumed after a connection break."""
|
|
self._test_cp_resumable_download_break_helper([
|
|
('GSUtil', 'resumable_threshold', str(ONE_KIB))
|
|
])
|
|
|
|
@SkipForS3('gsutil doesn\'t support S3 customer-supplied encryption keys.')
|
|
def test_cp_resumable_encrypted_download_break(self):
|
|
"""Tests that an encrypted download resumes after a connection break."""
|
|
if self.test_api == ApiSelector.XML:
|
|
return unittest.skip(
|
|
'gsutil does not support encryption with the XML API')
|
|
self._test_cp_resumable_download_break_helper(
|
|
[('GSUtil', 'resumable_threshold', str(ONE_KIB)),
|
|
('GSUtil', 'encryption_key', TEST_ENCRYPTION_KEY1)],
|
|
encryption_key=TEST_ENCRYPTION_KEY1)
|
|
|
|
@SkipForS3('gsutil doesn\'t support S3 customer-supplied encryption keys.')
|
|
def test_cp_resumable_encrypted_download_key_rotation(self):
|
|
"""Tests that a download restarts with a rotated encryption key."""
|
|
if self.test_api == ApiSelector.XML:
|
|
return unittest.skip(
|
|
'gsutil does not support encryption with the XML API')
|
|
bucket_uri = self.CreateBucket()
|
|
file_contents = b'a' * self.halt_size
|
|
object_uri = self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='foo',
|
|
contents=file_contents,
|
|
encryption_key=TEST_ENCRYPTION_KEY1)
|
|
fpath = self.CreateTempFile()
|
|
test_callback_file = self.CreateTempFile(
|
|
contents=pickle.dumps(HaltingCopyCallbackHandler(False, 5)))
|
|
|
|
boto_config_for_test = [('GSUtil', 'resumable_threshold', str(ONE_KIB)),
|
|
('GSUtil', 'encryption_key', TEST_ENCRYPTION_KEY1)]
|
|
|
|
with SetBotoConfigForTest(boto_config_for_test):
|
|
stderr = self.RunGsUtil([
|
|
'cp', '--testcallbackfile', test_callback_file,
|
|
suri(object_uri), fpath
|
|
],
|
|
expected_status=1,
|
|
return_stderr=True)
|
|
self.assertIn('Artifically halting download.', stderr)
|
|
tracker_filename = GetTrackerFilePath(StorageUrlFromString(fpath),
|
|
TrackerFileType.DOWNLOAD,
|
|
self.test_api)
|
|
self.assertTrue(os.path.isfile(tracker_filename))
|
|
|
|
# After simulated connection break, rotate the key on the object.
|
|
boto_config_for_test2 = [('GSUtil', 'resumable_threshold', str(ONE_KIB)),
|
|
('GSUtil', 'decryption_key1',
|
|
TEST_ENCRYPTION_KEY1),
|
|
('GSUtil', 'encryption_key', TEST_ENCRYPTION_KEY2)]
|
|
with SetBotoConfigForTest(boto_config_for_test2):
|
|
self.RunGsUtil(['rewrite', '-k', suri(object_uri)])
|
|
|
|
# Now resume the download using only the new encryption key. Since its
|
|
# generation changed, we must restart it.
|
|
boto_config_for_test3 = [('GSUtil', 'resumable_threshold', str(ONE_KIB)),
|
|
('GSUtil', 'encryption_key', TEST_ENCRYPTION_KEY2)]
|
|
with SetBotoConfigForTest(boto_config_for_test3):
|
|
stderr = self.RunGsUtil(['cp', suri(object_uri), fpath],
|
|
return_stderr=True)
|
|
self.assertIn('Restarting download', stderr)
|
|
with open(fpath, 'rb') as f:
|
|
self.assertEqual(f.read(), file_contents, 'File contents differ')
|
|
|
|
@SequentialAndParallelTransfer
|
|
def test_cp_resumable_download_etag_differs(self):
|
|
"""Tests that download restarts the file when the source object changes.
|
|
|
|
This causes the etag not to match.
|
|
"""
|
|
bucket_uri = self.CreateBucket()
|
|
object_uri = self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='foo',
|
|
contents=b'abc' * self.halt_size)
|
|
fpath = self.CreateTempFile()
|
|
test_callback_file = self.CreateTempFile(
|
|
contents=pickle.dumps(HaltingCopyCallbackHandler(False, 5)))
|
|
boto_config_for_test = ('GSUtil', 'resumable_threshold', str(ONE_KIB))
|
|
with SetBotoConfigForTest([boto_config_for_test]):
|
|
# This will create a tracker file with an ETag.
|
|
stderr = self.RunGsUtil([
|
|
'cp', '--testcallbackfile', test_callback_file,
|
|
suri(object_uri), fpath
|
|
],
|
|
expected_status=1,
|
|
return_stderr=True)
|
|
self.assertIn('Artifically halting download.', stderr)
|
|
# Create a new object with different contents - it should have a
|
|
# different ETag since the content has changed.
|
|
object_uri = self.CreateObject(
|
|
bucket_uri=bucket_uri,
|
|
object_name='foo',
|
|
contents=b'b' * self.halt_size,
|
|
gs_idempotent_generation=object_uri.generation)
|
|
stderr = self.RunGsUtil(['cp', suri(object_uri), fpath],
|
|
return_stderr=True)
|
|
self.assertNotIn('Resuming download', stderr)
|
|
|
|
# TODO: Enable this test for sequential downloads when their tracker files are
|
|
# modified to contain the source object generation.
|
|
@unittest.skipUnless(UsingCrcmodExtension(),
|
|
'Sliced download requires fast crcmod.')
|
|
@SkipForS3('No sliced download support for S3.')
|
|
def test_cp_resumable_download_generation_differs(self):
|
|
"""Tests that a resumable download restarts if the generation differs."""
|
|
bucket_uri = self.CreateBucket()
|
|
file_contents = b'abcd' * self.halt_size
|
|
object_uri = self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='foo',
|
|
contents=file_contents)
|
|
fpath = self.CreateTempFile()
|
|
|
|
test_callback_file = self.CreateTempFile(
|
|
contents=pickle.dumps(HaltingCopyCallbackHandler(False, 5)))
|
|
|
|
boto_config_for_test = [
|
|
('GSUtil', 'resumable_threshold', str(self.halt_size)),
|
|
('GSUtil', 'sliced_object_download_threshold', str(self.halt_size)),
|
|
('GSUtil', 'sliced_object_download_max_components', '3')
|
|
]
|
|
|
|
with SetBotoConfigForTest(boto_config_for_test):
|
|
stderr = self.RunGsUtil([
|
|
'cp', '--testcallbackfile', test_callback_file,
|
|
suri(object_uri),
|
|
suri(fpath)
|
|
],
|
|
return_stderr=True,
|
|
expected_status=1)
|
|
self.assertIn('Artifically halting download.', stderr)
|
|
|
|
# Overwrite the object with an identical object, increasing
|
|
# the generation but leaving other metadata the same.
|
|
identical_file = self.CreateTempFile(contents=file_contents)
|
|
self.RunGsUtil(['cp', suri(identical_file), suri(object_uri)])
|
|
|
|
stderr = self.RunGsUtil(
|
|
['cp', suri(object_uri), suri(fpath)], return_stderr=True)
|
|
self.assertIn('Restarting download from scratch', stderr)
|
|
with open(fpath, 'rb') as f:
|
|
self.assertEqual(f.read(), file_contents, 'File contents differ')
|
|
|
|
def test_cp_resumable_download_file_larger(self):
|
|
"""Tests download deletes the tracker file when existing file is larger."""
|
|
bucket_uri = self.CreateBucket()
|
|
fpath = self.CreateTempFile()
|
|
object_uri = self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='foo',
|
|
contents=b'a' * self.halt_size)
|
|
test_callback_file = self.CreateTempFile(
|
|
contents=pickle.dumps(HaltingCopyCallbackHandler(False, 5)))
|
|
boto_config_for_test = ('GSUtil', 'resumable_threshold', str(ONE_KIB))
|
|
with SetBotoConfigForTest([boto_config_for_test]):
|
|
stderr = self.RunGsUtil([
|
|
'cp', '--testcallbackfile', test_callback_file,
|
|
suri(object_uri), fpath
|
|
],
|
|
expected_status=1,
|
|
return_stderr=True)
|
|
self.assertIn('Artifically halting download.', stderr)
|
|
with open(fpath + '_.gstmp', 'w') as larger_file:
|
|
for _ in range(self.halt_size * 2):
|
|
larger_file.write('a')
|
|
stderr = self.RunGsUtil(['cp', suri(object_uri), fpath],
|
|
expected_status=1,
|
|
return_stderr=True)
|
|
self.assertNotIn('Resuming download', stderr)
|
|
self.assertIn('Deleting tracker file', stderr)
|
|
|
|
def test_cp_resumable_download_content_differs(self):
|
|
"""Tests that we do not re-download when tracker file matches existing file.
|
|
|
|
We only compare size, not contents, so re-download should not occur even
|
|
though the contents are technically different. However, hash validation on
|
|
the file should still occur and we will delete the file then because
|
|
the hashes differ.
|
|
"""
|
|
bucket_uri = self.CreateBucket()
|
|
tmp_dir = self.CreateTempDir()
|
|
fpath = self.CreateTempFile(tmpdir=tmp_dir)
|
|
temp_download_file = fpath + '_.gstmp'
|
|
with open(temp_download_file, 'w') as fp:
|
|
fp.write('abcd' * ONE_KIB)
|
|
|
|
object_uri = self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='foo',
|
|
contents=b'efgh' * ONE_KIB)
|
|
stdout = self.RunGsUtil(['ls', '-L', suri(object_uri)], return_stdout=True)
|
|
etag_match = re.search(r'\s*ETag:\s*(.*)', stdout)
|
|
self.assertIsNotNone(etag_match, 'Could not get object ETag')
|
|
self.assertEqual(len(etag_match.groups()), 1,
|
|
'Did not match expected single ETag')
|
|
etag = etag_match.group(1)
|
|
|
|
tracker_filename = GetTrackerFilePath(StorageUrlFromString(fpath),
|
|
TrackerFileType.DOWNLOAD,
|
|
self.test_api)
|
|
try:
|
|
with open(tracker_filename, 'w') as tracker_fp:
|
|
tracker_fp.write(etag)
|
|
boto_config_for_test = ('GSUtil', 'resumable_threshold', str(ONE_KIB))
|
|
with SetBotoConfigForTest([boto_config_for_test]):
|
|
stderr = self.RunGsUtil(['cp', suri(object_uri), fpath],
|
|
return_stderr=True,
|
|
expected_status=1)
|
|
self.assertIn('Download already complete', stderr)
|
|
self.assertIn('doesn\'t match cloud-supplied digest', stderr)
|
|
# File and tracker file should be deleted.
|
|
self.assertFalse(os.path.isfile(temp_download_file))
|
|
self.assertFalse(os.path.isfile(tracker_filename))
|
|
# Permanent file should not have been created.
|
|
self.assertFalse(os.path.isfile(fpath))
|
|
finally:
|
|
if os.path.exists(tracker_filename):
|
|
os.unlink(tracker_filename)
|
|
|
|
def test_cp_resumable_download_content_matches(self):
|
|
"""Tests download no-ops when tracker file matches existing file."""
|
|
bucket_uri = self.CreateBucket()
|
|
tmp_dir = self.CreateTempDir()
|
|
fpath = self.CreateTempFile(tmpdir=tmp_dir)
|
|
matching_contents = b'abcd' * ONE_KIB
|
|
temp_download_file = fpath + '_.gstmp'
|
|
with open(temp_download_file, 'wb') as fp:
|
|
fp.write(matching_contents)
|
|
|
|
object_uri = self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='foo',
|
|
contents=matching_contents)
|
|
stdout = self.RunGsUtil(['ls', '-L', suri(object_uri)], return_stdout=True)
|
|
etag_match = re.search(r'\s*ETag:\s*(.*)', stdout)
|
|
self.assertIsNotNone(etag_match, 'Could not get object ETag')
|
|
self.assertEqual(len(etag_match.groups()), 1,
|
|
'Did not match expected single ETag')
|
|
etag = etag_match.group(1)
|
|
tracker_filename = GetTrackerFilePath(StorageUrlFromString(fpath),
|
|
TrackerFileType.DOWNLOAD,
|
|
self.test_api)
|
|
with open(tracker_filename, 'w') as tracker_fp:
|
|
tracker_fp.write(etag)
|
|
try:
|
|
boto_config_for_test = ('GSUtil', 'resumable_threshold', str(ONE_KIB))
|
|
with SetBotoConfigForTest([boto_config_for_test]):
|
|
stderr = self.RunGsUtil(['cp', suri(object_uri), fpath],
|
|
return_stderr=True)
|
|
self.assertIn('Download already complete', stderr)
|
|
# Tracker file should be removed after successful hash validation.
|
|
self.assertFalse(os.path.isfile(tracker_filename))
|
|
finally:
|
|
if os.path.exists(tracker_filename):
|
|
os.unlink(tracker_filename)
|
|
|
|
def test_cp_resumable_download_tracker_file_not_matches(self):
|
|
"""Tests that download overwrites when tracker file etag does not match."""
|
|
bucket_uri = self.CreateBucket()
|
|
tmp_dir = self.CreateTempDir()
|
|
fpath = self.CreateTempFile(tmpdir=tmp_dir, contents=b'abcd' * ONE_KIB)
|
|
object_uri = self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='foo',
|
|
contents=b'efgh' * ONE_KIB)
|
|
stdout = self.RunGsUtil(['ls', '-L', suri(object_uri)], return_stdout=True)
|
|
etag_match = re.search(r'\s*ETag:\s*(.*)', stdout)
|
|
self.assertIsNotNone(etag_match, 'Could not get object ETag')
|
|
self.assertEqual(len(etag_match.groups()), 1,
|
|
'Did not match regex for exactly one object ETag')
|
|
etag = etag_match.group(1)
|
|
etag += 'nonmatching'
|
|
tracker_filename = GetTrackerFilePath(StorageUrlFromString(fpath),
|
|
TrackerFileType.DOWNLOAD,
|
|
self.test_api)
|
|
with open(tracker_filename, 'w') as tracker_fp:
|
|
tracker_fp.write(etag)
|
|
try:
|
|
boto_config_for_test = ('GSUtil', 'resumable_threshold', str(ONE_KIB))
|
|
with SetBotoConfigForTest([boto_config_for_test]):
|
|
stderr = self.RunGsUtil(['cp', suri(object_uri), fpath],
|
|
return_stderr=True)
|
|
self.assertNotIn('Resuming download', stderr)
|
|
# Ensure the file was overwritten.
|
|
with open(fpath, 'r') as in_fp:
|
|
contents = in_fp.read()
|
|
self.assertEqual(
|
|
contents, 'efgh' * ONE_KIB,
|
|
'File not overwritten when it should have been '
|
|
'due to a non-matching tracker file.')
|
|
self.assertFalse(os.path.isfile(tracker_filename))
|
|
finally:
|
|
if os.path.exists(tracker_filename):
|
|
os.unlink(tracker_filename)
|
|
|
|
def test_cp_double_gzip(self):
|
|
"""Tests that upload and download of a doubly-gzipped file succeeds."""
|
|
bucket_uri = self.CreateBucket()
|
|
fpath = self.CreateTempFile(file_name='looks-zipped.gz', contents=b'foo')
|
|
self.RunGsUtil([
|
|
'-h', 'content-type:application/gzip', 'cp', '-Z',
|
|
suri(fpath),
|
|
suri(bucket_uri, 'foo')
|
|
])
|
|
self.RunGsUtil(['cp', suri(bucket_uri, 'foo'), fpath])
|
|
|
|
@SkipForS3('No compressed transport encoding support for S3.')
|
|
@SkipForXML('No compressed transport encoding support for the XML API.')
|
|
@SequentialAndParallelTransfer
|
|
def test_cp_double_gzip_transport_encoded(self):
|
|
"""Tests that upload and download of a doubly-gzipped file succeeds."""
|
|
bucket_uri = self.CreateBucket()
|
|
fpath = self.CreateTempFile(file_name='looks-zipped.gz', contents=b'foo')
|
|
stderr = self.RunGsUtil([
|
|
'-DD', '-h', 'content-type:application/gzip', 'cp', '-J',
|
|
suri(fpath),
|
|
suri(bucket_uri, 'foo')
|
|
],
|
|
return_stderr=True)
|
|
if self._use_gcloud_storage:
|
|
self.assertIn("b\'Content-Encoding\': b\'gzip\'", stderr)
|
|
self.assertIn('"contentType": "application/gzip"', stderr)
|
|
else:
|
|
self.assertIn("\'Content-Encoding\': \'gzip\'", stderr)
|
|
self.assertIn('contentType: \'application/gzip\'', stderr)
|
|
self.RunGsUtil(['cp', suri(bucket_uri, 'foo'), fpath])
|
|
|
|
@unittest.skipIf(IS_WINDOWS, 'TODO(b/293885158) Timeout on Windows.')
|
|
@SequentialAndParallelTransfer
|
|
def test_cp_resumable_download_gzip(self):
|
|
"""Tests that download can be resumed successfully with a gzipped file."""
|
|
# Generate some reasonably incompressible data. This compresses to a bit
|
|
# around 128K in practice, but we assert specifically below that it is
|
|
# larger than self.halt_size to guarantee that we can halt the download
|
|
# partway through.
|
|
object_uri = self.CreateObject()
|
|
random.seed(0)
|
|
contents = str([
|
|
random.choice(string.ascii_letters) for _ in range(self.halt_size)
|
|
]).encode('ascii')
|
|
random.seed() # Reset the seed for any other tests.
|
|
fpath1 = self.CreateTempFile(file_name='unzipped.txt', contents=contents)
|
|
self.RunGsUtil(['cp', '-z', 'txt', suri(fpath1), suri(object_uri)])
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _GetObjectSize():
|
|
stdout = self.RunGsUtil(['du', suri(object_uri)], return_stdout=True)
|
|
size_match = re.search(r'(\d+)\s+.*', stdout)
|
|
self.assertIsNotNone(size_match, 'Could not get object size')
|
|
self.assertEqual(len(size_match.groups()), 1,
|
|
'Did not match regex for exactly one object size.')
|
|
return long(size_match.group(1))
|
|
|
|
object_size = _GetObjectSize()
|
|
self.assertGreaterEqual(
|
|
object_size, self.halt_size,
|
|
'Compresed object size was not large enough to '
|
|
'allow for a halted download, so the test results '
|
|
'would be invalid. Please increase the compressed '
|
|
'object size in the test.')
|
|
fpath2 = self.CreateTempFile()
|
|
test_callback_file = self.CreateTempFile(
|
|
contents=pickle.dumps(HaltingCopyCallbackHandler(False, 5)))
|
|
|
|
boto_config_for_test = ('GSUtil', 'resumable_threshold', str(ONE_KIB))
|
|
with SetBotoConfigForTest([boto_config_for_test]):
|
|
stderr = self.RunGsUtil([
|
|
'cp', '--testcallbackfile', test_callback_file,
|
|
suri(object_uri),
|
|
suri(fpath2)
|
|
],
|
|
return_stderr=True,
|
|
expected_status=1)
|
|
self.assertIn('Artifically halting download.', stderr)
|
|
self.assertIn('Downloading to temp gzip filename', stderr)
|
|
|
|
# Tracker files will have different names depending on if we are
|
|
# downloading sequentially or in parallel.
|
|
sliced_download_threshold = HumanReadableToBytes(
|
|
boto.config.get('GSUtil', 'sliced_object_download_threshold',
|
|
DEFAULT_SLICED_OBJECT_DOWNLOAD_THRESHOLD))
|
|
sliced_download = (len(contents) > sliced_download_threshold and
|
|
sliced_download_threshold > 0 and
|
|
UsingCrcmodExtension())
|
|
if sliced_download:
|
|
trackerfile_type = TrackerFileType.SLICED_DOWNLOAD
|
|
else:
|
|
trackerfile_type = TrackerFileType.DOWNLOAD
|
|
tracker_filename = GetTrackerFilePath(StorageUrlFromString(fpath2),
|
|
trackerfile_type, self.test_api)
|
|
|
|
# We should have a temporary gzipped file, a tracker file, and no
|
|
# final file yet.
|
|
self.assertTrue(os.path.isfile(tracker_filename))
|
|
self.assertTrue(os.path.isfile('%s_.gztmp' % fpath2))
|
|
stderr = self.RunGsUtil(
|
|
['cp', suri(object_uri), suri(fpath2)], return_stderr=True)
|
|
self.assertIn('Resuming download', stderr)
|
|
with open(fpath2, 'rb') as f:
|
|
self.assertEqual(f.read(), contents, 'File contents did not match.')
|
|
self.assertFalse(os.path.isfile(tracker_filename))
|
|
self.assertFalse(os.path.isfile('%s_.gztmp' % fpath2))
|
|
|
|
def _GetFaviconFile(self):
|
|
# Make a temp file from favicon.ico.gz. Finding the location of our test
|
|
# data varies depending on how/where gsutil was installed, so we get the
|
|
# data via pkgutil and use this workaround.
|
|
if not hasattr(self, 'test_data_favicon_file'):
|
|
contents = pkgutil.get_data('gslib', 'tests/test_data/favicon.ico.gz')
|
|
self.test_data_favicon_file = self.CreateTempFile(contents=contents)
|
|
return self.test_data_favicon_file
|
|
|
|
def test_cp_download_transfer_encoded(self):
|
|
"""Tests chunked transfer encoded download handling.
|
|
|
|
Tests that download works correctly with a gzipped chunked transfer-encoded
|
|
object (which therefore lacks Content-Length) of a size that gets fetched
|
|
in a single chunk (exercising downloading of objects lacking a length
|
|
response header).
|
|
"""
|
|
# Upload a file / content-encoding / content-type that triggers this flow.
|
|
# Note: We need to use the file with pre-zipped format and manually set the
|
|
# content-encoding and content-type because the Python gzip module (used by
|
|
# gsutil cp -Z) won't reproduce the bytes that trigger this problem.
|
|
bucket_uri = self.CreateBucket()
|
|
object_uri = self.CreateObject(bucket_uri=bucket_uri, object_name='foo')
|
|
input_filename = self._GetFaviconFile()
|
|
self.RunGsUtil([
|
|
'-h', 'Content-Encoding:gzip', '-h', 'Content-Type:image/x-icon', 'cp',
|
|
suri(input_filename),
|
|
suri(object_uri)
|
|
])
|
|
# Compute the MD5 of the uncompressed bytes.
|
|
with gzip.open(input_filename) as fp:
|
|
hash_dict = {'md5': GetMd5()}
|
|
hashing_helper.CalculateHashesFromContents(fp, hash_dict)
|
|
in_file_md5 = hash_dict['md5'].digest()
|
|
|
|
# Downloading this file triggers the flow.
|
|
fpath2 = self.CreateTempFile()
|
|
self.RunGsUtil(['cp', suri(object_uri), suri(fpath2)])
|
|
# Compute MD5 of the downloaded (uncompressed) file, and validate it.
|
|
with open(fpath2, 'rb') as fp:
|
|
hash_dict = {'md5': GetMd5()}
|
|
hashing_helper.CalculateHashesFromContents(fp, hash_dict)
|
|
out_file_md5 = hash_dict['md5'].digest()
|
|
self.assertEqual(in_file_md5, out_file_md5)
|
|
|
|
@SequentialAndParallelTransfer
|
|
def test_cp_resumable_download_check_hashes_never(self):
|
|
"""Tests that resumble downloads work with check_hashes = never."""
|
|
bucket_uri = self.CreateBucket()
|
|
contents = b'abcd' * self.halt_size
|
|
object_uri = self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='foo',
|
|
contents=contents)
|
|
fpath = self.CreateTempFile()
|
|
test_callback_file = self.CreateTempFile(
|
|
contents=pickle.dumps(HaltingCopyCallbackHandler(False, 5)))
|
|
|
|
boto_config_for_test = [('GSUtil', 'resumable_threshold', str(ONE_KIB)),
|
|
('GSUtil', 'check_hashes', 'never')]
|
|
with SetBotoConfigForTest(boto_config_for_test):
|
|
stderr = self.RunGsUtil([
|
|
'cp', '--testcallbackfile', test_callback_file,
|
|
suri(object_uri), fpath
|
|
],
|
|
expected_status=1,
|
|
return_stderr=True)
|
|
self.assertIn('Artifically halting download.', stderr)
|
|
stderr = self.RunGsUtil(['cp', suri(object_uri), fpath],
|
|
return_stderr=True)
|
|
self.assertIn('Resuming download', stderr)
|
|
self.assertIn('Found no hashes to validate object downloaded', stderr)
|
|
with open(fpath, 'rb') as f:
|
|
self.assertEqual(f.read(), contents, 'File contents did not match.')
|
|
|
|
@SkipForS3('No resumable upload support for S3.')
|
|
def test_cp_resumable_upload_bucket_deleted(self):
|
|
"""Tests that a not found exception is raised if bucket no longer exists."""
|
|
bucket_uri = self.CreateBucket()
|
|
fpath = self.CreateTempFile(contents=b'a' * 2 * ONE_KIB)
|
|
boto_config_for_test = ('GSUtil', 'resumable_threshold', str(ONE_KIB))
|
|
test_callback_file = self.CreateTempFile(contents=pickle.dumps(
|
|
_DeleteBucketThenStartOverCopyCallbackHandler(5, bucket_uri)))
|
|
|
|
with SetBotoConfigForTest([boto_config_for_test]):
|
|
stderr = self.RunGsUtil([
|
|
'cp', '--testcallbackfile', test_callback_file, fpath,
|
|
suri(bucket_uri)
|
|
],
|
|
return_stderr=True,
|
|
expected_status=1)
|
|
self.assertIn('Deleting bucket', stderr)
|
|
self.assertIn('bucket does not exist', stderr)
|
|
|
|
@SkipForS3('No sliced download support for S3.')
|
|
def test_cp_sliced_download(self):
|
|
"""Tests that sliced object download works in the general case."""
|
|
bucket_uri = self.CreateBucket()
|
|
object_uri = self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='foo',
|
|
contents=b'abc' * ONE_KIB)
|
|
fpath = self.CreateTempFile()
|
|
|
|
# Force fast crcmod to return True to test the basic sliced download
|
|
# scenario, ensuring that if the user installs crcmod, it will work.
|
|
boto_config_for_test = [
|
|
('GSUtil', 'resumable_threshold', str(ONE_KIB)),
|
|
('GSUtil', 'test_assume_fast_crcmod', 'True'),
|
|
('GSUtil', 'sliced_object_download_threshold', str(ONE_KIB)),
|
|
('GSUtil', 'sliced_object_download_max_components', '3')
|
|
]
|
|
|
|
with SetBotoConfigForTest(boto_config_for_test):
|
|
self.RunGsUtil(['cp', suri(object_uri), fpath])
|
|
|
|
# Each tracker file should have been deleted.
|
|
tracker_filenames = GetSlicedDownloadTrackerFilePaths(
|
|
StorageUrlFromString(fpath), self.test_api)
|
|
for tracker_filename in tracker_filenames:
|
|
self.assertFalse(os.path.isfile(tracker_filename))
|
|
|
|
with open(fpath, 'rb') as f:
|
|
self.assertEqual(f.read(), b'abc' * ONE_KIB, 'File contents differ')
|
|
|
|
@unittest.skipUnless(UsingCrcmodExtension(),
|
|
'Sliced download requires fast crcmod.')
|
|
@SkipForS3('No sliced download support for S3.')
|
|
def test_cp_unresumable_sliced_download(self):
|
|
"""Tests sliced download works when resumability is disabled."""
|
|
bucket_uri = self.CreateBucket()
|
|
object_uri = self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='foo',
|
|
contents=b'abcd' * self.halt_size)
|
|
fpath = self.CreateTempFile()
|
|
test_callback_file = self.CreateTempFile(
|
|
contents=pickle.dumps(HaltingCopyCallbackHandler(False, 5)))
|
|
|
|
boto_config_for_test = [
|
|
('GSUtil', 'resumable_threshold', str(self.halt_size * 5)),
|
|
('GSUtil', 'sliced_object_download_threshold', str(self.halt_size)),
|
|
('GSUtil', 'sliced_object_download_max_components', '4')
|
|
]
|
|
|
|
with SetBotoConfigForTest(boto_config_for_test):
|
|
stderr = self.RunGsUtil([
|
|
'cp', '--testcallbackfile', test_callback_file,
|
|
suri(object_uri),
|
|
suri(fpath)
|
|
],
|
|
return_stderr=True,
|
|
expected_status=1)
|
|
self.assertIn('not downloaded successfully', stderr)
|
|
# Temporary download file should exist.
|
|
self.assertTrue(os.path.isfile(fpath + '_.gstmp'))
|
|
|
|
# No tracker files should exist.
|
|
tracker_filenames = GetSlicedDownloadTrackerFilePaths(
|
|
StorageUrlFromString(fpath), self.test_api)
|
|
for tracker_filename in tracker_filenames:
|
|
self.assertFalse(os.path.isfile(tracker_filename))
|
|
|
|
# Perform the entire download, without resuming.
|
|
with SetBotoConfigForTest(boto_config_for_test):
|
|
stderr = self.RunGsUtil(
|
|
['cp', suri(object_uri), suri(fpath)], return_stderr=True)
|
|
self.assertNotIn('Resuming download', stderr)
|
|
# Temporary download file should have been deleted.
|
|
self.assertFalse(os.path.isfile(fpath + '_.gstmp'))
|
|
with open(fpath, 'rb') as f:
|
|
self.assertEqual(f.read(), b'abcd' * self.halt_size,
|
|
'File contents differ')
|
|
|
|
@unittest.skipUnless(UsingCrcmodExtension(),
|
|
'Sliced download requires fast crcmod.')
|
|
@SkipForS3('No sliced download support for S3.')
|
|
def test_cp_sliced_download_resume(self):
|
|
"""Tests that sliced object download is resumable."""
|
|
bucket_uri = self.CreateBucket()
|
|
object_uri = self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='foo',
|
|
contents=b'abc' * self.halt_size)
|
|
fpath = self.CreateTempFile()
|
|
test_callback_file = self.CreateTempFile(
|
|
contents=pickle.dumps(HaltingCopyCallbackHandler(False, 5)))
|
|
|
|
boto_config_for_test = [
|
|
('GSUtil', 'resumable_threshold', str(self.halt_size)),
|
|
('GSUtil', 'sliced_object_download_threshold', str(self.halt_size)),
|
|
('GSUtil', 'sliced_object_download_max_components', '3')
|
|
]
|
|
|
|
with SetBotoConfigForTest(boto_config_for_test):
|
|
stderr = self.RunGsUtil([
|
|
'cp', '--testcallbackfile', test_callback_file,
|
|
suri(object_uri),
|
|
suri(fpath)
|
|
],
|
|
return_stderr=True,
|
|
expected_status=1)
|
|
self.assertIn('not downloaded successfully', stderr)
|
|
|
|
# Each tracker file should exist.
|
|
tracker_filenames = GetSlicedDownloadTrackerFilePaths(
|
|
StorageUrlFromString(fpath), self.test_api)
|
|
for tracker_filename in tracker_filenames:
|
|
self.assertTrue(os.path.isfile(tracker_filename))
|
|
|
|
stderr = self.RunGsUtil(['cp', suri(object_uri), fpath],
|
|
return_stderr=True)
|
|
self.assertIn('Resuming download', stderr)
|
|
|
|
# Each tracker file should have been deleted.
|
|
tracker_filenames = GetSlicedDownloadTrackerFilePaths(
|
|
StorageUrlFromString(fpath), self.test_api)
|
|
for tracker_filename in tracker_filenames:
|
|
self.assertFalse(os.path.isfile(tracker_filename))
|
|
|
|
with open(fpath, 'rb') as f:
|
|
self.assertEqual(f.read(), b'abc' * self.halt_size,
|
|
'File contents differ')
|
|
|
|
@unittest.skipUnless(UsingCrcmodExtension(),
|
|
'Sliced download requires fast crcmod.')
|
|
@SkipForS3('No sliced download support for S3.')
|
|
def test_cp_sliced_download_partial_resume(self):
|
|
"""Test sliced download resumability when some components are finished."""
|
|
bucket_uri = self.CreateBucket()
|
|
object_uri = self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='foo',
|
|
contents=b'abc' * self.halt_size)
|
|
fpath = self.CreateTempFile()
|
|
test_callback_file = self.CreateTempFile(
|
|
contents=pickle.dumps(HaltOneComponentCopyCallbackHandler(5)))
|
|
|
|
boto_config_for_test = [
|
|
('GSUtil', 'resumable_threshold', str(self.halt_size)),
|
|
('GSUtil', 'sliced_object_download_threshold', str(self.halt_size)),
|
|
('GSUtil', 'sliced_object_download_max_components', '3')
|
|
]
|
|
|
|
with SetBotoConfigForTest(boto_config_for_test):
|
|
stderr = self.RunGsUtil([
|
|
'cp', '--testcallbackfile', test_callback_file,
|
|
suri(object_uri),
|
|
suri(fpath)
|
|
],
|
|
return_stderr=True,
|
|
expected_status=1)
|
|
self.assertIn('not downloaded successfully', stderr)
|
|
|
|
# Each tracker file should exist.
|
|
tracker_filenames = GetSlicedDownloadTrackerFilePaths(
|
|
StorageUrlFromString(fpath), self.test_api)
|
|
for tracker_filename in tracker_filenames:
|
|
self.assertTrue(os.path.isfile(tracker_filename))
|
|
|
|
stderr = self.RunGsUtil(['cp', suri(object_uri), fpath],
|
|
return_stderr=True)
|
|
self.assertIn('Resuming download', stderr)
|
|
self.assertIn('Download already complete', stderr)
|
|
|
|
# Each tracker file should have been deleted.
|
|
tracker_filenames = GetSlicedDownloadTrackerFilePaths(
|
|
StorageUrlFromString(fpath), self.test_api)
|
|
for tracker_filename in tracker_filenames:
|
|
self.assertFalse(os.path.isfile(tracker_filename))
|
|
|
|
with open(fpath, 'rb') as f:
|
|
self.assertEqual(f.read(), b'abc' * self.halt_size,
|
|
'File contents differ')
|
|
|
|
@unittest.skipUnless(UsingCrcmodExtension(),
|
|
'Sliced download requires fast crcmod.')
|
|
@SkipForS3('No sliced download support for S3.')
|
|
def test_cp_sliced_download_resume_content_differs(self):
|
|
"""Tests differing file contents are detected by sliced downloads."""
|
|
bucket_uri = self.CreateBucket()
|
|
object_uri = self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='foo',
|
|
contents=b'abc' * self.halt_size)
|
|
fpath = self.CreateTempFile(contents=b'')
|
|
test_callback_file = self.CreateTempFile(
|
|
contents=pickle.dumps(HaltingCopyCallbackHandler(False, 5)))
|
|
|
|
boto_config_for_test = [
|
|
('GSUtil', 'resumable_threshold', str(self.halt_size)),
|
|
('GSUtil', 'sliced_object_download_threshold', str(self.halt_size)),
|
|
('GSUtil', 'sliced_object_download_max_components', '3')
|
|
]
|
|
|
|
with SetBotoConfigForTest(boto_config_for_test):
|
|
stderr = self.RunGsUtil([
|
|
'cp', '--testcallbackfile', test_callback_file,
|
|
suri(object_uri),
|
|
suri(fpath)
|
|
],
|
|
return_stderr=True,
|
|
expected_status=1)
|
|
self.assertIn('not downloaded successfully', stderr)
|
|
|
|
# Temporary download file should exist.
|
|
self.assertTrue(os.path.isfile(fpath + '_.gstmp'))
|
|
|
|
# Each tracker file should exist.
|
|
tracker_filenames = GetSlicedDownloadTrackerFilePaths(
|
|
StorageUrlFromString(fpath), self.test_api)
|
|
for tracker_filename in tracker_filenames:
|
|
self.assertTrue(os.path.isfile(tracker_filename))
|
|
|
|
with open(fpath + '_.gstmp', 'r+b') as f:
|
|
f.write(b'altered file contents')
|
|
|
|
stderr = self.RunGsUtil(['cp', suri(object_uri), fpath],
|
|
return_stderr=True,
|
|
expected_status=1)
|
|
self.assertIn('Resuming download', stderr)
|
|
self.assertIn('doesn\'t match cloud-supplied digest', stderr)
|
|
self.assertIn('HashMismatchException: crc32c', stderr)
|
|
|
|
# Each tracker file should have been deleted.
|
|
tracker_filenames = GetSlicedDownloadTrackerFilePaths(
|
|
StorageUrlFromString(fpath), self.test_api)
|
|
for tracker_filename in tracker_filenames:
|
|
self.assertFalse(os.path.isfile(tracker_filename))
|
|
|
|
# Temporary file should have been deleted due to hash mismatch.
|
|
self.assertFalse(os.path.isfile(fpath + '_.gstmp'))
|
|
# Final file should not exist.
|
|
self.assertFalse(os.path.isfile(fpath))
|
|
|
|
@unittest.skipUnless(UsingCrcmodExtension(),
|
|
'Sliced download requires fast crcmod.')
|
|
@SkipForS3('No sliced download support for S3.')
|
|
def test_cp_sliced_download_component_size_changed(self):
|
|
"""Tests sliced download doesn't break when the boto config changes.
|
|
|
|
If the number of components used changes cross-process, the download should
|
|
be restarted.
|
|
"""
|
|
bucket_uri = self.CreateBucket()
|
|
object_uri = self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='foo',
|
|
contents=b'abcd' * self.halt_size)
|
|
fpath = self.CreateTempFile()
|
|
test_callback_file = self.CreateTempFile(
|
|
contents=pickle.dumps(HaltingCopyCallbackHandler(False, 5)))
|
|
|
|
boto_config_for_test = [
|
|
('GSUtil', 'resumable_threshold', str(self.halt_size)),
|
|
('GSUtil', 'sliced_object_download_threshold', str(self.halt_size)),
|
|
('GSUtil', 'sliced_object_download_component_size',
|
|
str(self.halt_size // 4)),
|
|
('GSUtil', 'sliced_object_download_max_components', '4')
|
|
]
|
|
|
|
with SetBotoConfigForTest(boto_config_for_test):
|
|
stderr = self.RunGsUtil([
|
|
'cp', '--testcallbackfile', test_callback_file,
|
|
suri(object_uri),
|
|
suri(fpath)
|
|
],
|
|
return_stderr=True,
|
|
expected_status=1)
|
|
self.assertIn('not downloaded successfully', stderr)
|
|
|
|
boto_config_for_test = [
|
|
('GSUtil', 'resumable_threshold', str(self.halt_size)),
|
|
('GSUtil', 'sliced_object_download_threshold', str(self.halt_size)),
|
|
('GSUtil', 'sliced_object_download_component_size',
|
|
str(self.halt_size // 2)),
|
|
('GSUtil', 'sliced_object_download_max_components', '2')
|
|
]
|
|
|
|
with SetBotoConfigForTest(boto_config_for_test):
|
|
stderr = self.RunGsUtil(['cp', suri(object_uri), fpath],
|
|
return_stderr=True)
|
|
self.assertIn('Sliced download tracker file doesn\'t match ', stderr)
|
|
self.assertIn('Restarting download from scratch', stderr)
|
|
self.assertNotIn('Resuming download', stderr)
|
|
|
|
@unittest.skipUnless(UsingCrcmodExtension(),
|
|
'Sliced download requires fast crcmod.')
|
|
@SkipForS3('No sliced download support for S3.')
|
|
def test_cp_sliced_download_disabled_cross_process(self):
|
|
"""Tests temporary files are not orphaned if sliced download is disabled.
|
|
|
|
Specifically, temporary files should be deleted when the corresponding
|
|
non-sliced download is completed.
|
|
"""
|
|
bucket_uri = self.CreateBucket()
|
|
object_uri = self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='foo',
|
|
contents=b'abcd' * self.halt_size)
|
|
fpath = self.CreateTempFile()
|
|
test_callback_file = self.CreateTempFile(
|
|
contents=pickle.dumps(HaltingCopyCallbackHandler(False, 5)))
|
|
|
|
boto_config_for_test = [
|
|
('GSUtil', 'resumable_threshold', str(self.halt_size)),
|
|
('GSUtil', 'sliced_object_download_threshold', str(self.halt_size)),
|
|
('GSUtil', 'sliced_object_download_max_components', '4')
|
|
]
|
|
|
|
with SetBotoConfigForTest(boto_config_for_test):
|
|
stderr = self.RunGsUtil([
|
|
'cp', '--testcallbackfile', test_callback_file,
|
|
suri(object_uri),
|
|
suri(fpath)
|
|
],
|
|
return_stderr=True,
|
|
expected_status=1)
|
|
self.assertIn('not downloaded successfully', stderr)
|
|
# Temporary download file should exist.
|
|
self.assertTrue(os.path.isfile(fpath + '_.gstmp'))
|
|
|
|
# Each tracker file should exist.
|
|
tracker_filenames = GetSlicedDownloadTrackerFilePaths(
|
|
StorageUrlFromString(fpath), self.test_api)
|
|
for tracker_filename in tracker_filenames:
|
|
self.assertTrue(os.path.isfile(tracker_filename))
|
|
|
|
# Disable sliced downloads by increasing the threshold
|
|
boto_config_for_test = [
|
|
('GSUtil', 'resumable_threshold', str(self.halt_size)),
|
|
('GSUtil', 'sliced_object_download_threshold', str(self.halt_size * 5)),
|
|
('GSUtil', 'sliced_object_download_max_components', '4')
|
|
]
|
|
|
|
with SetBotoConfigForTest(boto_config_for_test):
|
|
stderr = self.RunGsUtil(['cp', suri(object_uri), fpath],
|
|
return_stderr=True)
|
|
self.assertNotIn('Resuming download', stderr)
|
|
# Temporary download file should have been deleted.
|
|
self.assertFalse(os.path.isfile(fpath + '_.gstmp'))
|
|
|
|
# Each tracker file should have been deleted.
|
|
for tracker_filename in tracker_filenames:
|
|
self.assertFalse(os.path.isfile(tracker_filename))
|
|
with open(fpath, 'rb') as f:
|
|
self.assertEqual(f.read(), b'abcd' * self.halt_size)
|
|
|
|
@SkipForS3('No resumable upload support for S3.')
|
|
def test_cp_resumable_upload_start_over_http_error(self):
|
|
for start_over_error in (
|
|
403, # If user doesn't have storage.buckets.get access to dest bucket.
|
|
404, # If the dest bucket exists, but the dest object does not.
|
|
410): # If the service tells us to restart the upload from scratch.
|
|
self.start_over_error_test_helper(start_over_error)
|
|
|
|
def start_over_error_test_helper(self, http_error_num):
|
|
bucket_uri = self.CreateBucket()
|
|
# The object contents need to be fairly large to avoid the race condition
|
|
# where the contents finish uploading before we artifically halt the copy.
|
|
rand_chars = get_random_ascii_chars(size=(ONE_MIB * 4))
|
|
fpath = self.CreateTempFile(contents=rand_chars)
|
|
boto_config_for_test = ('GSUtil', 'resumable_threshold', str(ONE_KIB))
|
|
if self.test_api == ApiSelector.JSON:
|
|
test_callback_file = self.CreateTempFile(
|
|
contents=pickle.dumps(_JSONForceHTTPErrorCopyCallbackHandler(5, 404)))
|
|
elif self.test_api == ApiSelector.XML:
|
|
test_callback_file = self.CreateTempFile(contents=pickle.dumps(
|
|
_XMLResumableUploadStartOverCopyCallbackHandler(5)))
|
|
|
|
with SetBotoConfigForTest([boto_config_for_test]):
|
|
stderr = self.RunGsUtil([
|
|
'cp', '--testcallbackfile', test_callback_file, fpath,
|
|
suri(bucket_uri)
|
|
],
|
|
return_stderr=True)
|
|
self.assertIn('Restarting upload of', stderr)
|
|
|
|
def test_cp_minus_c(self):
|
|
bucket_uri = self.CreateBucket()
|
|
object_uri = self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='foo',
|
|
contents=b'foo')
|
|
|
|
cp_command = [
|
|
'cp',
|
|
'-c',
|
|
suri(bucket_uri) + '/foo2',
|
|
suri(object_uri),
|
|
suri(bucket_uri) + '/dir/',
|
|
]
|
|
self.RunGsUtil(cp_command, expected_status=1)
|
|
self.RunGsUtil(['stat', '%s/dir/foo' % suri(bucket_uri)])
|
|
|
|
def test_rewrite_cp(self):
|
|
"""Tests the JSON Rewrite API."""
|
|
if self.test_api == ApiSelector.XML:
|
|
return unittest.skip('Rewrite API is only supported in JSON.')
|
|
bucket_uri = self.CreateBucket()
|
|
object_uri = self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='foo',
|
|
contents=b'bar')
|
|
gsutil_api = GcsJsonApi(BucketStorageUri, logging.getLogger(),
|
|
DiscardMessagesQueue(), self.default_provider)
|
|
key = object_uri.get_key()
|
|
src_obj_metadata = apitools_messages.Object(name=key.name,
|
|
bucket=key.bucket.name,
|
|
contentType=key.content_type)
|
|
dst_obj_metadata = apitools_messages.Object(
|
|
bucket=src_obj_metadata.bucket,
|
|
name=self.MakeTempName('object'),
|
|
contentType=src_obj_metadata.contentType)
|
|
gsutil_api.CopyObject(src_obj_metadata, dst_obj_metadata)
|
|
self.assertEqual(
|
|
gsutil_api.GetObjectMetadata(src_obj_metadata.bucket,
|
|
src_obj_metadata.name,
|
|
fields=['customerEncryption',
|
|
'md5Hash']).md5Hash,
|
|
gsutil_api.GetObjectMetadata(dst_obj_metadata.bucket,
|
|
dst_obj_metadata.name,
|
|
fields=['customerEncryption',
|
|
'md5Hash']).md5Hash,
|
|
'Error: Rewritten object\'s hash doesn\'t match source object.')
|
|
|
|
def test_rewrite_cp_resume(self):
|
|
"""Tests the JSON Rewrite API, breaking and resuming via a tracker file."""
|
|
if self.test_api == ApiSelector.XML:
|
|
return unittest.skip('Rewrite API is only supported in JSON.')
|
|
bucket_uri = self.CreateBucket()
|
|
# Second bucket needs to be a different storage class so the service
|
|
# actually rewrites the bytes.
|
|
bucket_uri2 = self.CreateBucket(
|
|
storage_class='durable_reduced_availability')
|
|
# maxBytesPerCall must be >= 1 MiB, so create an object > 2 MiB because we
|
|
# need 2 response from the service: 1 success, 1 failure prior to
|
|
# completion.
|
|
object_uri = self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='foo',
|
|
contents=(b'12' * ONE_MIB) + b'bar',
|
|
prefer_json_api=True)
|
|
gsutil_api = GcsJsonApi(BucketStorageUri, logging.getLogger(),
|
|
DiscardMessagesQueue(), self.default_provider)
|
|
key = object_uri.get_key()
|
|
src_obj_metadata = apitools_messages.Object(name=key.name,
|
|
bucket=key.bucket.name,
|
|
contentType=key.content_type,
|
|
etag=key.etag.strip('"\''))
|
|
dst_obj_name = self.MakeTempName('object')
|
|
dst_obj_metadata = apitools_messages.Object(
|
|
bucket=bucket_uri2.bucket_name,
|
|
name=dst_obj_name,
|
|
contentType=src_obj_metadata.contentType)
|
|
tracker_file_name = GetRewriteTrackerFilePath(src_obj_metadata.bucket,
|
|
src_obj_metadata.name,
|
|
dst_obj_metadata.bucket,
|
|
dst_obj_metadata.name,
|
|
self.test_api)
|
|
try:
|
|
try:
|
|
gsutil_api.CopyObject(src_obj_metadata,
|
|
dst_obj_metadata,
|
|
progress_callback=HaltingRewriteCallbackHandler(
|
|
ONE_MIB * 2).call,
|
|
max_bytes_per_call=ONE_MIB)
|
|
self.fail('Expected RewriteHaltException.')
|
|
except RewriteHaltException:
|
|
pass
|
|
|
|
# Tracker file should be left over.
|
|
self.assertTrue(os.path.exists(tracker_file_name))
|
|
|
|
# Now resume. Callback ensures we didn't start over.
|
|
gsutil_api.CopyObject(
|
|
src_obj_metadata,
|
|
dst_obj_metadata,
|
|
progress_callback=EnsureRewriteResumeCallbackHandler(ONE_MIB *
|
|
2).call,
|
|
max_bytes_per_call=ONE_MIB)
|
|
|
|
# Copy completed; tracker file should be deleted.
|
|
self.assertFalse(os.path.exists(tracker_file_name))
|
|
|
|
self.assertEqual(
|
|
gsutil_api.GetObjectMetadata(src_obj_metadata.bucket,
|
|
src_obj_metadata.name,
|
|
fields=['customerEncryption',
|
|
'md5Hash']).md5Hash,
|
|
gsutil_api.GetObjectMetadata(dst_obj_metadata.bucket,
|
|
dst_obj_metadata.name,
|
|
fields=['customerEncryption',
|
|
'md5Hash']).md5Hash,
|
|
'Error: Rewritten object\'s hash doesn\'t match source object.')
|
|
finally:
|
|
# Clean up if something went wrong.
|
|
DeleteTrackerFile(tracker_file_name)
|
|
|
|
def test_rewrite_cp_resume_source_changed(self):
|
|
"""Tests that Rewrite starts over when the source object has changed."""
|
|
if self.test_api == ApiSelector.XML:
|
|
return unittest.skip('Rewrite API is only supported in JSON.')
|
|
bucket_uri = self.CreateBucket()
|
|
# Second bucket needs to be a different storage class so the service
|
|
# actually rewrites the bytes.
|
|
bucket_uri2 = self.CreateBucket(
|
|
storage_class='durable_reduced_availability')
|
|
# maxBytesPerCall must be >= 1 MiB, so create an object > 2 MiB because we
|
|
# need 2 response from the service: 1 success, 1 failure prior to
|
|
# completion.
|
|
object_uri = self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='foo',
|
|
contents=(b'12' * ONE_MIB) + b'bar',
|
|
prefer_json_api=True)
|
|
gsutil_api = GcsJsonApi(BucketStorageUri, logging.getLogger(),
|
|
DiscardMessagesQueue(), self.default_provider)
|
|
key = object_uri.get_key()
|
|
src_obj_metadata = apitools_messages.Object(name=key.name,
|
|
bucket=key.bucket.name,
|
|
contentType=key.content_type,
|
|
etag=key.etag.strip('"\''))
|
|
dst_obj_name = self.MakeTempName('object')
|
|
dst_obj_metadata = apitools_messages.Object(
|
|
bucket=bucket_uri2.bucket_name,
|
|
name=dst_obj_name,
|
|
contentType=src_obj_metadata.contentType)
|
|
tracker_file_name = GetRewriteTrackerFilePath(src_obj_metadata.bucket,
|
|
src_obj_metadata.name,
|
|
dst_obj_metadata.bucket,
|
|
dst_obj_metadata.name,
|
|
self.test_api)
|
|
try:
|
|
try:
|
|
gsutil_api.CopyObject(src_obj_metadata,
|
|
dst_obj_metadata,
|
|
progress_callback=HaltingRewriteCallbackHandler(
|
|
ONE_MIB * 2).call,
|
|
max_bytes_per_call=ONE_MIB)
|
|
self.fail('Expected RewriteHaltException.')
|
|
except RewriteHaltException:
|
|
pass
|
|
# Overwrite the original object.
|
|
object_uri2 = self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='foo',
|
|
contents=b'bar',
|
|
prefer_json_api=True)
|
|
key2 = object_uri2.get_key()
|
|
src_obj_metadata2 = apitools_messages.Object(
|
|
name=key2.name,
|
|
bucket=key2.bucket.name,
|
|
contentType=key2.content_type,
|
|
etag=key2.etag.strip('"\''))
|
|
|
|
# Tracker file for original object should still exist.
|
|
self.assertTrue(os.path.exists(tracker_file_name))
|
|
|
|
# Copy the new object.
|
|
gsutil_api.CopyObject(src_obj_metadata2,
|
|
dst_obj_metadata,
|
|
max_bytes_per_call=ONE_MIB)
|
|
|
|
# Copy completed; original tracker file should be deleted.
|
|
self.assertFalse(os.path.exists(tracker_file_name))
|
|
|
|
self.assertEqual(
|
|
gsutil_api.GetObjectMetadata(src_obj_metadata2.bucket,
|
|
src_obj_metadata2.name,
|
|
fields=['customerEncryption',
|
|
'md5Hash']).md5Hash,
|
|
gsutil_api.GetObjectMetadata(dst_obj_metadata.bucket,
|
|
dst_obj_metadata.name,
|
|
fields=['customerEncryption',
|
|
'md5Hash']).md5Hash,
|
|
'Error: Rewritten object\'s hash doesn\'t match source object.')
|
|
finally:
|
|
# Clean up if something went wrong.
|
|
DeleteTrackerFile(tracker_file_name)
|
|
|
|
def test_rewrite_cp_resume_command_changed(self):
|
|
"""Tests that Rewrite starts over when the arguments changed."""
|
|
if self.test_api == ApiSelector.XML:
|
|
return unittest.skip('Rewrite API is only supported in JSON.')
|
|
bucket_uri = self.CreateBucket()
|
|
# Second bucket needs to be a different storage class so the service
|
|
# actually rewrites the bytes.
|
|
bucket_uri2 = self.CreateBucket(
|
|
storage_class='durable_reduced_availability')
|
|
# maxBytesPerCall must be >= 1 MiB, so create an object > 2 MiB because we
|
|
# need 2 response from the service: 1 success, 1 failure prior to
|
|
# completion.
|
|
object_uri = self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='foo',
|
|
contents=(b'12' * ONE_MIB) + b'bar',
|
|
prefer_json_api=True)
|
|
gsutil_api = GcsJsonApi(BucketStorageUri, logging.getLogger(),
|
|
DiscardMessagesQueue(), self.default_provider)
|
|
key = object_uri.get_key()
|
|
src_obj_metadata = apitools_messages.Object(name=key.name,
|
|
bucket=key.bucket.name,
|
|
contentType=key.content_type,
|
|
etag=key.etag.strip('"\''))
|
|
dst_obj_name = self.MakeTempName('object')
|
|
dst_obj_metadata = apitools_messages.Object(
|
|
bucket=bucket_uri2.bucket_name,
|
|
name=dst_obj_name,
|
|
contentType=src_obj_metadata.contentType)
|
|
tracker_file_name = GetRewriteTrackerFilePath(src_obj_metadata.bucket,
|
|
src_obj_metadata.name,
|
|
dst_obj_metadata.bucket,
|
|
dst_obj_metadata.name,
|
|
self.test_api)
|
|
try:
|
|
try:
|
|
gsutil_api.CopyObject(src_obj_metadata,
|
|
dst_obj_metadata,
|
|
canned_acl='private',
|
|
progress_callback=HaltingRewriteCallbackHandler(
|
|
ONE_MIB * 2).call,
|
|
max_bytes_per_call=ONE_MIB)
|
|
self.fail('Expected RewriteHaltException.')
|
|
except RewriteHaltException:
|
|
pass
|
|
|
|
# Tracker file for original object should still exist.
|
|
self.assertTrue(os.path.exists(tracker_file_name))
|
|
|
|
# Copy the same object but with different call parameters.
|
|
gsutil_api.CopyObject(src_obj_metadata,
|
|
dst_obj_metadata,
|
|
canned_acl='public-read',
|
|
max_bytes_per_call=ONE_MIB)
|
|
|
|
# Copy completed; original tracker file should be deleted.
|
|
self.assertFalse(os.path.exists(tracker_file_name))
|
|
|
|
new_obj_metadata = gsutil_api.GetObjectMetadata(
|
|
dst_obj_metadata.bucket,
|
|
dst_obj_metadata.name,
|
|
fields=['acl', 'customerEncryption', 'md5Hash'])
|
|
self.assertEqual(
|
|
gsutil_api.GetObjectMetadata(src_obj_metadata.bucket,
|
|
src_obj_metadata.name,
|
|
fields=['customerEncryption',
|
|
'md5Hash']).md5Hash,
|
|
new_obj_metadata.md5Hash,
|
|
'Error: Rewritten object\'s hash doesn\'t match source object.')
|
|
# New object should have a public-read ACL from the second command.
|
|
found_public_acl = False
|
|
for acl_entry in new_obj_metadata.acl:
|
|
if acl_entry.entity == 'allUsers':
|
|
found_public_acl = True
|
|
self.assertTrue(found_public_acl,
|
|
'New object was not written with a public ACL.')
|
|
finally:
|
|
# Clean up if something went wrong.
|
|
DeleteTrackerFile(tracker_file_name)
|
|
|
|
@unittest.skipIf(IS_WINDOWS, 'POSIX attributes not available on Windows.')
|
|
@unittest.skipUnless(UsingCrcmodExtension(), 'Test requires fast crcmod.')
|
|
def test_cp_preserve_posix_bucket_to_dir_no_errors(self):
|
|
"""Tests use of the -P flag with cp from a bucket to a local dir.
|
|
|
|
Specifically tests combinations of POSIX attributes in metadata that will
|
|
pass validation.
|
|
"""
|
|
bucket_uri = self.CreateBucket()
|
|
tmpdir = self.CreateTempDir()
|
|
TestCpMvPOSIXBucketToLocalNoErrors(self, bucket_uri, tmpdir, is_cp=True)
|
|
|
|
@unittest.skipIf(IS_WINDOWS, 'POSIX attributes not available on Windows.')
|
|
def test_cp_preserve_posix_bucket_to_dir_errors(self):
|
|
"""Tests use of the -P flag with cp from a bucket to a local dir.
|
|
|
|
Specifically, combinations of POSIX attributes in metadata that will fail
|
|
validation.
|
|
"""
|
|
bucket_uri = self.CreateBucket()
|
|
tmpdir = self.CreateTempDir()
|
|
|
|
obj = self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='obj',
|
|
contents=b'obj')
|
|
TestCpMvPOSIXBucketToLocalErrors(self, bucket_uri, obj, tmpdir, is_cp=True)
|
|
|
|
@unittest.skipIf(IS_WINDOWS, 'POSIX attributes not available on Windows.')
|
|
def test_cp_preseve_posix_dir_to_bucket_no_errors(self):
|
|
"""Tests use of the -P flag with cp from a local dir to a bucket."""
|
|
bucket_uri = self.CreateBucket()
|
|
TestCpMvPOSIXLocalToBucketNoErrors(self, bucket_uri, is_cp=True)
|
|
|
|
def test_cp_minus_s_to_non_cloud_dest_fails(self):
|
|
"""Test that cp -s operations to a non-cloud destination are prevented."""
|
|
local_file = self.CreateTempFile(contents=b'foo')
|
|
dest_dir = self.CreateTempDir()
|
|
stderr = self.RunGsUtil(['cp', '-s', 'standard', local_file, dest_dir],
|
|
expected_status=1,
|
|
return_stderr=True)
|
|
self.assertIn('Cannot specify storage class for a non-cloud destination:',
|
|
stderr)
|
|
|
|
# TODO: Remove @skip annotation from this test once we upgrade to the Boto
|
|
# version that parses the storage class header for HEAD Object responses.
|
|
@SkipForXML('Need Boto version > 2.46.1')
|
|
def test_cp_specify_nondefault_storage_class(self):
|
|
bucket_uri = self.CreateBucket()
|
|
object_uri = self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='foo',
|
|
contents=b'foo')
|
|
object2_suri = suri(object_uri) + 'bar'
|
|
# Specify storage class name as mixed case here to ensure that it
|
|
# gets normalized to uppercase (S3 would return an error otherwise), and
|
|
# that using the normalized case is accepted by each API.
|
|
nondefault_storage_class = {
|
|
's3': 'Standard_iA',
|
|
'gs': 'durable_REDUCED_availability'
|
|
}
|
|
storage_class = nondefault_storage_class[self.default_provider]
|
|
self.RunGsUtil(['cp', '-s', storage_class, suri(object_uri), object2_suri])
|
|
stdout = self.RunGsUtil(['stat', object2_suri], return_stdout=True)
|
|
self.assertRegexpMatchesWithFlags(stdout,
|
|
r'Storage class:\s+%s' % storage_class,
|
|
flags=re.IGNORECASE)
|
|
|
|
@SkipForS3('Test uses gs-specific storage classes.')
|
|
def test_cp_sets_correct_dest_storage_class(self):
|
|
"""Tests that object storage class is set correctly with and without -s."""
|
|
# Use a non-default storage class as the default for the bucket.
|
|
bucket_uri = self.CreateBucket(storage_class='nearline')
|
|
# Ensure storage class is set correctly for a local-to-cloud copy.
|
|
local_fname = 'foo-orig'
|
|
local_fpath = self.CreateTempFile(contents=b'foo', file_name=local_fname)
|
|
foo_cloud_suri = suri(bucket_uri) + '/' + local_fname
|
|
self.RunGsUtil(['cp', '-s', 'standard', local_fpath, foo_cloud_suri])
|
|
with SetBotoConfigForTest([('GSUtil', 'prefer_api', 'json')]):
|
|
stdout = self.RunGsUtil(['stat', foo_cloud_suri], return_stdout=True)
|
|
self.assertRegexpMatchesWithFlags(stdout,
|
|
r'Storage class:\s+STANDARD',
|
|
flags=re.IGNORECASE)
|
|
|
|
# Ensure storage class is set correctly for a cloud-to-cloud copy when no
|
|
# destination storage class is specified.
|
|
foo_nl_suri = suri(bucket_uri) + '/foo-nl'
|
|
self.RunGsUtil(['cp', foo_cloud_suri, foo_nl_suri])
|
|
# TODO: Remove with-clause after adding storage class parsing in Boto.
|
|
with SetBotoConfigForTest([('GSUtil', 'prefer_api', 'json')]):
|
|
stdout = self.RunGsUtil(['stat', foo_nl_suri], return_stdout=True)
|
|
self.assertRegexpMatchesWithFlags(stdout,
|
|
r'Storage class:\s+NEARLINE',
|
|
flags=re.IGNORECASE)
|
|
|
|
# Ensure storage class is set correctly for a cloud-to-cloud copy when a
|
|
# non-bucket-default storage class is specified.
|
|
foo_std_suri = suri(bucket_uri) + '/foo-std'
|
|
self.RunGsUtil(['cp', '-s', 'standard', foo_nl_suri, foo_std_suri])
|
|
# TODO: Remove with-clause after adding storage class parsing in Boto.
|
|
with SetBotoConfigForTest([('GSUtil', 'prefer_api', 'json')]):
|
|
stdout = self.RunGsUtil(['stat', foo_std_suri], return_stdout=True)
|
|
self.assertRegexpMatchesWithFlags(stdout,
|
|
r'Storage class:\s+STANDARD',
|
|
flags=re.IGNORECASE)
|
|
|
|
@SkipForS3('Test uses gs-specific KMS encryption')
|
|
def test_kms_key_correctly_applied_to_dst_obj_from_src_with_no_key(self):
|
|
bucket_uri = self.CreateBucket()
|
|
obj1_name = 'foo'
|
|
obj2_name = 'bar'
|
|
key_fqn = AuthorizeProjectToUseTestingKmsKey()
|
|
|
|
# Create the unencrypted object, then copy it, specifying a KMS key for the
|
|
# new object.
|
|
obj_uri = self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name=obj1_name,
|
|
contents=b'foo')
|
|
with SetBotoConfigForTest([('GSUtil', 'encryption_key', key_fqn)]):
|
|
self.RunGsUtil(
|
|
['cp', suri(obj_uri),
|
|
'%s/%s' % (suri(bucket_uri), obj2_name)])
|
|
|
|
# Make sure the new object is encrypted with the specified KMS key.
|
|
with SetBotoConfigForTest([('GSUtil', 'prefer_api', 'json')]):
|
|
self.AssertObjectUsesCMEK('%s/%s' % (suri(bucket_uri), obj2_name),
|
|
key_fqn)
|
|
|
|
@SkipForS3('Test uses gs-specific KMS encryption')
|
|
def test_kms_key_correctly_applied_to_dst_obj_from_local_file(self):
|
|
bucket_uri = self.CreateBucket()
|
|
fpath = self.CreateTempFile(contents=b'abcd')
|
|
obj_name = 'foo'
|
|
obj_suri = suri(bucket_uri) + '/' + obj_name
|
|
key_fqn = AuthorizeProjectToUseTestingKmsKey()
|
|
|
|
with SetBotoConfigForTest([('GSUtil', 'encryption_key', key_fqn)]):
|
|
self.RunGsUtil(['cp', fpath, obj_suri])
|
|
|
|
with SetBotoConfigForTest([('GSUtil', 'prefer_api', 'json')]):
|
|
self.AssertObjectUsesCMEK(obj_suri, key_fqn)
|
|
|
|
@SkipForS3('Test uses gs-specific KMS encryption')
|
|
def test_kms_key_works_with_resumable_upload(self):
|
|
resumable_threshold = 1024 * 1024 # 1M
|
|
bucket_uri = self.CreateBucket()
|
|
fpath = self.CreateTempFile(contents=b'a' * resumable_threshold)
|
|
obj_name = 'foo'
|
|
obj_suri = suri(bucket_uri) + '/' + obj_name
|
|
key_fqn = AuthorizeProjectToUseTestingKmsKey()
|
|
|
|
with SetBotoConfigForTest([('GSUtil', 'encryption_key', key_fqn),
|
|
('GSUtil', 'resumable_threshold',
|
|
str(resumable_threshold))]):
|
|
self.RunGsUtil(['cp', fpath, obj_suri])
|
|
|
|
with SetBotoConfigForTest([('GSUtil', 'prefer_api', 'json')]):
|
|
self.AssertObjectUsesCMEK(obj_suri, key_fqn)
|
|
|
|
@SkipForS3('Test uses gs-specific KMS encryption')
|
|
def test_kms_key_correctly_applied_to_dst_obj_from_src_with_diff_key(self):
|
|
bucket_uri = self.CreateBucket()
|
|
obj1_name = 'foo'
|
|
obj2_name = 'bar'
|
|
key1_fqn = AuthorizeProjectToUseTestingKmsKey()
|
|
key2_fqn = AuthorizeProjectToUseTestingKmsKey(
|
|
key_name=KmsTestingResources.CONSTANT_KEY_NAME2)
|
|
obj1_suri = suri(
|
|
self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name=obj1_name,
|
|
contents=b'foo',
|
|
kms_key_name=key1_fqn))
|
|
|
|
# Copy the object to the same bucket, specifying a different key to be used.
|
|
obj2_suri = '%s/%s' % (suri(bucket_uri), obj2_name)
|
|
with SetBotoConfigForTest([('GSUtil', 'encryption_key', key2_fqn)]):
|
|
self.RunGsUtil(['cp', obj1_suri, obj2_suri])
|
|
|
|
# Ensure the new object has the different key.
|
|
with SetBotoConfigForTest([('GSUtil', 'prefer_api', 'json')]):
|
|
self.AssertObjectUsesCMEK(obj2_suri, key2_fqn)
|
|
|
|
@SkipForS3('Test uses gs-specific KMS encryption')
|
|
@SkipForXML('Copying KMS-encrypted objects prohibited with XML API')
|
|
def test_kms_key_not_applied_to_nonkms_dst_obj_from_src_with_kms_key(self):
|
|
bucket_uri = self.CreateBucket()
|
|
obj1_name = 'foo'
|
|
obj2_name = 'bar'
|
|
key1_fqn = AuthorizeProjectToUseTestingKmsKey()
|
|
obj1_suri = suri(
|
|
self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name=obj1_name,
|
|
contents=b'foo',
|
|
kms_key_name=key1_fqn))
|
|
|
|
# Copy the object to the same bucket, not specifying any KMS key.
|
|
obj2_suri = '%s/%s' % (suri(bucket_uri), obj2_name)
|
|
self.RunGsUtil(['cp', obj1_suri, obj2_suri])
|
|
|
|
# Ensure the new object has no KMS key.
|
|
with SetBotoConfigForTest([('GSUtil', 'prefer_api', 'json')]):
|
|
self.AssertObjectUnencrypted(obj2_suri)
|
|
|
|
@unittest.skipUnless(
|
|
IS_WINDOWS,
|
|
'Only Windows paths need to be normalized to use backslashes instead of '
|
|
'forward slashes.')
|
|
def test_windows_path_with_back_and_forward_slash_is_normalized(self):
|
|
# Prior to this test and its corresponding fix, running
|
|
# `gsutil cp dir/./file gs://bucket` would result in an object whose name
|
|
# was "dir/./file", rather than just "file", as Windows tried to split on
|
|
# the path component separator "\" intead of "/".
|
|
tmp_dir = self.CreateTempDir()
|
|
self.CreateTempFile(tmpdir=tmp_dir, file_name='obj1', contents=b'foo')
|
|
bucket_uri = self.CreateBucket()
|
|
self.RunGsUtil(['cp', '%s\\./obj1' % tmp_dir, suri(bucket_uri)])
|
|
# If the destination path was not created correctly, this stat call should
|
|
# fail with a non-zero exit code because the specified object won't exist.
|
|
self.RunGsUtil(['stat', '%s/obj1' % suri(bucket_uri)])
|
|
|
|
def test_cp_minus_m_streaming_upload(self):
|
|
"""Tests that cp -m - anything is disallowed."""
|
|
stderr = self.RunGsUtil(['-m', 'cp', '-', 'file'],
|
|
return_stderr=True,
|
|
expected_status=1)
|
|
if self._use_gcloud_storage:
|
|
self.assertIn(
|
|
'WARNING: Using sequential instead of parallel task execution to'
|
|
' transfer from stdin', stderr)
|
|
else:
|
|
self.assertIn(
|
|
'CommandException: Cannot upload from a stream when using gsutil -m',
|
|
stderr)
|
|
|
|
@SequentialAndParallelTransfer
|
|
def test_cp_overwrites_existing_destination(self):
|
|
key_uri = self.CreateObject(contents=b'foo')
|
|
fpath = self.CreateTempFile(contents=b'bar')
|
|
stderr = self.RunGsUtil(['cp', suri(key_uri), fpath], return_stderr=True)
|
|
with open(fpath, 'rb') as f:
|
|
self.assertEqual(f.read(), b'foo')
|
|
|
|
@SequentialAndParallelTransfer
|
|
def test_downloads_are_reliable_with_more_than_one_gsutil_instance(self):
|
|
test_file_count = 10
|
|
temporary_directory = self.CreateTempDir()
|
|
bucket_uri = self.CreateBucket(test_objects=test_file_count)
|
|
|
|
cp_args = ['cp', suri(bucket_uri, '*'), temporary_directory]
|
|
threads = []
|
|
for _ in range(2):
|
|
thread = threading.Thread(target=self.RunGsUtil, args=[cp_args])
|
|
thread.start()
|
|
threads.append(thread)
|
|
[t.join() for t in threads]
|
|
|
|
self.assertEqual(len(os.listdir(temporary_directory)), test_file_count)
|
|
|
|
|
|
class TestCpUnitTests(testcase.GsUtilUnitTestCase):
|
|
"""Unit tests for gsutil cp."""
|
|
|
|
def testDownloadWithNoHashAvailable(self):
|
|
"""Tests a download with no valid server-supplied hash."""
|
|
# S3 should have a special message for non-MD5 etags.
|
|
bucket_uri = self.CreateBucket(provider='s3')
|
|
object_uri = self.CreateObject(bucket_uri=bucket_uri, contents=b'foo')
|
|
object_uri.get_key().etag = '12345' # Not an MD5
|
|
dst_dir = self.CreateTempDir()
|
|
|
|
log_handler = self.RunCommand('cp', [suri(object_uri), dst_dir],
|
|
return_log_handler=True)
|
|
warning_messages = log_handler.messages['warning']
|
|
self.assertEqual(2, len(warning_messages))
|
|
self.assertRegex(
|
|
warning_messages[0], r'Non-MD5 etag \(12345\) present for key .*, '
|
|
r'data integrity checks are not possible')
|
|
self.assertIn('Integrity cannot be assured', warning_messages[1])
|
|
|
|
def testDownloadWithDestinationEndingWithDelimiterRaisesError(self):
|
|
"""Tests a download with no valid server-supplied hash."""
|
|
# S3 should have a special message for non-MD5 etags.
|
|
bucket_uri = self.CreateBucket(provider='s3')
|
|
object_uri = self.CreateObject(bucket_uri=bucket_uri, contents=b'foo')
|
|
destination_path = 'random_dir' + os.path.sep
|
|
|
|
with self.assertRaises(InvalidUrlError) as error:
|
|
self.RunCommand('cp', [suri(object_uri), destination_path])
|
|
self.assertEqual(str(error), 'Invalid destination path: random_dir/')
|
|
|
|
def test_object_and_prefix_same_name(self):
|
|
bucket_uri = self.CreateBucket()
|
|
object_uri = self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='foo',
|
|
contents=b'foo')
|
|
self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='foo/bar',
|
|
contents=b'bar')
|
|
fpath = self.CreateTempFile()
|
|
# MockKey doesn't support hash_algs, so the MD5 will not match.
|
|
with SetBotoConfigForTest([('GSUtil', 'check_hashes', 'never')]):
|
|
self.RunCommand('cp', [suri(object_uri), fpath])
|
|
with open(fpath, 'rb') as f:
|
|
self.assertEqual(f.read(), b'foo')
|
|
|
|
def test_cp_upload_respects_no_hashes(self):
|
|
bucket_uri = self.CreateBucket()
|
|
fpath = self.CreateTempFile(contents=b'abcd')
|
|
with SetBotoConfigForTest([('GSUtil', 'check_hashes', 'never')]):
|
|
log_handler = self.RunCommand('cp', [fpath, suri(bucket_uri)],
|
|
return_log_handler=True)
|
|
warning_messages = log_handler.messages['warning']
|
|
self.assertEqual(1, len(warning_messages))
|
|
self.assertIn('Found no hashes to validate object upload',
|
|
warning_messages[0])
|
|
|
|
@unittest.skipIf(IS_WINDOWS, 'POSIX attributes not available on Windows.')
|
|
@mock.patch('os.geteuid', new=mock.Mock(return_value=0))
|
|
@mock.patch.object(os, 'chown', autospec=True)
|
|
def test_posix_runs_chown_as_super_user(self, mock_chown):
|
|
fpath = self.CreateTempFile(contents=b'abcd')
|
|
obj = apitools_messages.Object()
|
|
obj.metadata = CreateCustomMetadata(entries={UID_ATTR: USER_ID})
|
|
ParseAndSetPOSIXAttributes(fpath, obj, False, True)
|
|
mock_chown.assert_called_once_with(fpath, USER_ID, -1)
|
|
|
|
@unittest.skipIf(IS_WINDOWS, 'POSIX attributes not available on Windows.')
|
|
@mock.patch('os.geteuid', new=mock.Mock(return_value=1))
|
|
@mock.patch.object(os, 'chown', autospec=True)
|
|
def test_posix_skips_chown_when_not_super_user(self, mock_chown):
|
|
fpath = self.CreateTempFile(contents=b'abcd')
|
|
obj = apitools_messages.Object()
|
|
obj.metadata = CreateCustomMetadata(entries={UID_ATTR: USER_ID})
|
|
ParseAndSetPOSIXAttributes(fpath, obj, False, True)
|
|
mock_chown.assert_not_called()
|
|
|
|
@mock.patch(
|
|
'gslib.utils.copy_helper.TriggerReauthForDestinationProviderIfNecessary')
|
|
@mock.patch('gslib.command.Command._GetProcessAndThreadCount')
|
|
@mock.patch('gslib.command.Command.Apply',
|
|
new=mock.Mock(spec=command.Command.Apply))
|
|
def test_cp_triggers_reauth(self, mock_get_process_and_thread_count,
|
|
mock_trigger_reauth):
|
|
path = self.CreateTempFile(file_name=('foo'))
|
|
bucket_uri = self.CreateBucket()
|
|
mock_get_process_and_thread_count.return_value = 2, 3
|
|
|
|
self.RunCommand('cp', [path, suri(bucket_uri)])
|
|
|
|
mock_trigger_reauth.assert_called_once_with(
|
|
StorageUrlFromString(suri(bucket_uri)),
|
|
mock.ANY, # Gsutil API.
|
|
6, # Worker count.
|
|
)
|
|
|
|
mock_get_process_and_thread_count.assert_called_once_with(
|
|
process_count=None,
|
|
thread_count=None,
|
|
parallel_operations_override=None,
|
|
print_macos_warning=False,
|
|
)
|
|
|
|
def test_translates_predefined_acl_sub_opts(self):
|
|
sub_opts = [('--flag-key', 'flag-value'), ('-a', 'public-read'),
|
|
('-a', 'does-not-exist')]
|
|
ShimTranslatePredefinedAclSubOptForCopy(sub_opts)
|
|
self.assertEqual(sub_opts, [('--flag-key', 'flag-value'),
|
|
('-a', 'publicRead'), ('-a', 'does-not-exist')])
|
|
|
|
|
|
class TestCpShimUnitTests(testcase.ShimUnitTestBase):
|
|
"""Unit tests for shimming cp flags"""
|
|
|
|
def test_shim_translates_flags(self):
|
|
bucket_uri = self.CreateBucket()
|
|
fpath = self.CreateTempFile(contents=b'abcd')
|
|
with SetBotoConfigForTest([('GSUtil', 'use_gcloud_storage', 'True'),
|
|
('GSUtil', 'hidden_shim_mode', 'dry_run')]):
|
|
with SetEnvironmentForTest({
|
|
'CLOUDSDK_CORE_PASS_CREDENTIALS_TO_GSUTIL': 'True',
|
|
'CLOUDSDK_ROOT_DIR': 'fake_dir',
|
|
}):
|
|
mock_log_handler = self.RunCommand('cp', [
|
|
'-e', '-n', '-r', '-R', '-s', 'some-class', '-v', '-U', '-a',
|
|
'public-read', fpath,
|
|
suri(bucket_uri)
|
|
],
|
|
return_log_handler=True)
|
|
info_lines = '\n'.join(mock_log_handler.messages['info'])
|
|
self.assertIn(
|
|
'Gcloud Storage Command: {} storage cp'
|
|
' --ignore-symlinks --no-clobber -r -r --storage-class some-class'
|
|
' --print-created-message --skip-unsupported --predefined-acl publicRead {} {}'.format(
|
|
shim_util._get_gcloud_binary_path('fake_dir'), fpath,
|
|
suri(bucket_uri)), info_lines)
|
|
warn_lines = '\n'.join(mock_log_handler.messages['warning'])
|
|
self.assertIn('Use the -m flag to enable parallelism', warn_lines)
|