3468 lines
150 KiB
Python
3468 lines
150 KiB
Python
# -*- coding: utf-8 -*-
|
|
# Copyright 2014 Google Inc. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
"""Integration tests for rsync command."""
|
|
|
|
from __future__ import absolute_import
|
|
from __future__ import print_function
|
|
from __future__ import division
|
|
from __future__ import unicode_literals
|
|
|
|
import os
|
|
import re
|
|
from unittest import mock
|
|
|
|
import six
|
|
|
|
from gslib import command
|
|
from gslib.commands import rsync
|
|
from gslib.project_id import PopulateProjectId
|
|
from gslib.storage_url import StorageUrlFromString
|
|
import gslib.tests.testcase as testcase
|
|
from gslib.tests.testcase.integration_testcase import SkipForGS
|
|
from gslib.tests.testcase.integration_testcase import SkipForS3
|
|
from gslib.tests.testcase.integration_testcase import SkipForXML
|
|
from gslib.tests.util import AuthorizeProjectToUseTestingKmsKey
|
|
from gslib.tests.util import TEST_ENCRYPTION_KEY_S3
|
|
from gslib.tests.util import TEST_ENCRYPTION_KEY_S3_MD5
|
|
from gslib.tests.util import BuildErrorRegex
|
|
from gslib.tests.util import ObjectToURI as suri
|
|
from gslib.tests.util import ORPHANED_FILE
|
|
from gslib.tests.util import POSIX_GID_ERROR
|
|
from gslib.tests.util import POSIX_INSUFFICIENT_ACCESS_ERROR
|
|
from gslib.tests.util import POSIX_MODE_ERROR
|
|
from gslib.tests.util import POSIX_UID_ERROR
|
|
from gslib.tests.util import SequentialAndParallelTransfer
|
|
from gslib.tests.util import SetBotoConfigForTest
|
|
from gslib.tests.util import SetEnvironmentForTest
|
|
from gslib.tests.util import TailSet
|
|
from gslib.tests.util import unittest
|
|
from gslib.utils.boto_util import UsingCrcmodExtension
|
|
from gslib.utils.hashing_helper import SLOW_CRCMOD_RSYNC_WARNING
|
|
from gslib.utils.posix_util import ConvertDatetimeToPOSIX
|
|
from gslib.utils.posix_util import GID_ATTR
|
|
from gslib.utils.posix_util import MODE_ATTR
|
|
from gslib.utils.posix_util import MTIME_ATTR
|
|
from gslib.utils.posix_util import NA_TIME
|
|
from gslib.utils.posix_util import UID_ATTR
|
|
from gslib.utils.retry_util import Retry
|
|
from gslib.utils.system_util import IS_OSX
|
|
from gslib.utils.system_util import IS_WINDOWS
|
|
from gslib.utils import shim_util
|
|
|
|
# These POSIX-specific variables aren't defined for Windows.
|
|
# pylint: disable=g-import-not-at-top
|
|
if not IS_WINDOWS:
|
|
from gslib.tests import util
|
|
from gslib.tests.util import DEFAULT_MODE
|
|
from gslib.tests.util import INVALID_GID
|
|
from gslib.tests.util import INVALID_UID
|
|
from gslib.tests.util import USER_ID
|
|
# pylint: enable=g-import-not-at-top
|
|
|
|
if six.PY3:
|
|
long = int
|
|
|
|
MACOS_WARNING = (
|
|
'If you experience problems with multiprocessing on MacOS, they might be '
|
|
'related to https://bugs.python.org/issue33725. You can disable '
|
|
'multiprocessing by editing your .boto config or by adding the following '
|
|
'flag to your command: `-o "GSUtil:parallel_process_count=1"`. Note that '
|
|
'multithreading is still available even if you disable multiprocessing.\n\n'
|
|
)
|
|
|
|
if IS_OSX:
|
|
NO_CHANGES = ('Building synchronization state...\n' + MACOS_WARNING +
|
|
'Starting synchronization...\n')
|
|
else:
|
|
NO_CHANGES = (
|
|
'Building synchronization state...\nStarting synchronization...\n')
|
|
|
|
if not UsingCrcmodExtension():
|
|
NO_CHANGES = SLOW_CRCMOD_RSYNC_WARNING + '\n' + NO_CHANGES
|
|
|
|
|
|
class TestRsyncUnit(testcase.GsUtilUnitTestCase):
|
|
"""Unit tests for methods in the commands.rsync module."""
|
|
|
|
def testUrlEncodeAndDecode(self):
|
|
# Test special URL chars as well as unicode:
|
|
decoded_url = 'gs://bkt/space fslash/plus+tilde~unicodeeè'
|
|
encoded_url = (
|
|
'gs%3A%2F%2Fbkt%2Fspace+fslash%2Fplus%2Btilde~unicodee%C3%A8')
|
|
|
|
# Encode accepts unicode, returns language-appropriate string type.
|
|
self.assertEqual(rsync._EncodeUrl(six.ensure_text(decoded_url)),
|
|
six.ensure_str(encoded_url))
|
|
# Decode accepts language-appropriate string type, returns unicode.
|
|
self.assertEqual(rsync._DecodeUrl(six.ensure_str(encoded_url)),
|
|
six.ensure_text(decoded_url))
|
|
|
|
@mock.patch(
|
|
'gslib.utils.copy_helper.TriggerReauthForDestinationProviderIfNecessary')
|
|
@mock.patch('gslib.command.Command._GetProcessAndThreadCount')
|
|
@mock.patch('gslib.command.Command.Apply',
|
|
new=mock.Mock(spec=command.Command.Apply))
|
|
def testRsyncTriggersReauth(self, mock_get_process_and_thread_count,
|
|
mock_trigger_reauth):
|
|
path = self.CreateTempDir()
|
|
bucket_uri = self.CreateBucket()
|
|
mock_get_process_and_thread_count.return_value = 2, 3
|
|
|
|
self.RunCommand('rsync', [path, suri(bucket_uri)])
|
|
|
|
mock_trigger_reauth.assert_called_once_with(
|
|
StorageUrlFromString(suri(bucket_uri)),
|
|
mock.ANY, # Gsutil API.
|
|
worker_count=6,
|
|
)
|
|
|
|
mock_get_process_and_thread_count.assert_called_once_with(
|
|
process_count=None,
|
|
thread_count=None,
|
|
parallel_operations_override=command.Command.ParallelOverrideReason.
|
|
SPEED,
|
|
print_macos_warning=False,
|
|
)
|
|
|
|
|
|
class TestRsyncUnitWithShim(testcase.ShimUnitTestBase):
|
|
|
|
def testShimTranslatesFlags(self):
|
|
bucket_uri = self.CreateBucket()
|
|
fpath = self.CreateTempDir()
|
|
with SetBotoConfigForTest([('GSUtil', 'use_gcloud_storage', 'True'),
|
|
('GSUtil', 'hidden_shim_mode', 'dry_run')]):
|
|
with SetEnvironmentForTest({
|
|
'CLOUDSDK_CORE_PASS_CREDENTIALS_TO_GSUTIL': 'True',
|
|
'CLOUDSDK_ROOT_DIR': 'fake_dir',
|
|
}):
|
|
mock_log_handler = self.RunCommand(
|
|
'rsync', ['-a', 'public-read', '-P',
|
|
suri(bucket_uri), fpath],
|
|
return_log_handler=True)
|
|
info_lines = '\n'.join(mock_log_handler.messages['info'])
|
|
self.assertIn(
|
|
'Gcloud Storage Command: {} storage rsync'
|
|
' --no-ignore-symlinks --predefined-acl publicRead'
|
|
' --preserve-posix {} {}'.format(
|
|
shim_util._get_gcloud_binary_path('fake_dir'), suri(bucket_uri),
|
|
fpath), info_lines)
|
|
warn_lines = '\n'.join(mock_log_handler.messages['warning'])
|
|
self.assertIn('By default, gsutil copies file symlinks', warn_lines)
|
|
self.assertIn('For preserving POSIX with rsync downloads', warn_lines)
|
|
|
|
|
|
# TODO: Add inspection to the retry wrappers in this test suite where the state
|
|
# at the end of a retry block is depended upon by subsequent tests (since
|
|
# listing content can vary depending on which backend server is reached until
|
|
# eventual consistency is reached).
|
|
# TODO: Remove retry wrappers and AssertNObjectsInBucket calls if GCS ever
|
|
# supports strong listing consistency.
|
|
class TestRsync(testcase.GsUtilIntegrationTestCase):
|
|
"""Integration tests for rsync command."""
|
|
|
|
def _GetMetadataAttribute(self, bucket_name, object_name, attr_name):
|
|
"""Retrieves and returns an attribute from an objects metadata.
|
|
|
|
Args:
|
|
bucket_name: The name of the bucket the object is in.
|
|
object_name: The name of the object itself.
|
|
attr_name: The name of the custom metadata attribute.
|
|
|
|
Returns:
|
|
The value at the specified attribute name in the metadata. If not present,
|
|
returns None.
|
|
"""
|
|
gsutil_api = (self.json_api
|
|
if self.default_provider == 'gs' else self.xml_api)
|
|
metadata = gsutil_api.GetObjectMetadata(bucket_name,
|
|
object_name,
|
|
provider=self.default_provider,
|
|
fields=[attr_name])
|
|
return getattr(metadata, attr_name, None)
|
|
|
|
def _VerifyNoChanges(self, stderr):
|
|
if self._use_gcloud_storage:
|
|
self.assertNotIn('Completed', stderr)
|
|
else:
|
|
self.assertEqual(NO_CHANGES, stderr)
|
|
|
|
def _VerifyObjectMtime(self,
|
|
bucket_name,
|
|
object_name,
|
|
expected_mtime,
|
|
expected_present=True):
|
|
"""Retrieves the object's mtime.
|
|
|
|
Args:
|
|
bucket_name: The name of the bucket the object is in.
|
|
object_name: The name of the object itself.
|
|
expected_mtime: The expected retrieved mtime.
|
|
expected_present: True if the mtime must be present in the
|
|
object metadata, False if it must not be present.
|
|
|
|
|
|
Returns:
|
|
None
|
|
"""
|
|
self.VerifyObjectCustomAttribute(bucket_name,
|
|
object_name,
|
|
MTIME_ATTR,
|
|
expected_mtime,
|
|
expected_present=expected_present)
|
|
|
|
def test_invalid_args(self):
|
|
"""Tests various invalid argument cases."""
|
|
bucket_uri = self.CreateBucket()
|
|
obj1 = self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='obj1',
|
|
contents=b'obj1')
|
|
tmpdir = self.CreateTempDir()
|
|
# rsync object to bucket.
|
|
self.RunGsUtil(['rsync', suri(obj1), suri(bucket_uri)], expected_status=1)
|
|
# rsync bucket to object.
|
|
self.RunGsUtil(['rsync', suri(bucket_uri), suri(obj1)], expected_status=1)
|
|
# rsync bucket to non-existent bucket.
|
|
self.RunGsUtil(
|
|
['rsync',
|
|
suri(bucket_uri), 'gs://' + self.nonexistent_bucket_name],
|
|
expected_status=1)
|
|
# rsync object to dir.
|
|
self.RunGsUtil(['rsync', suri(obj1), tmpdir], expected_status=1)
|
|
if not self._use_gcloud_storage:
|
|
# rsync dir to object.
|
|
# Gsutil asks you to add a slash after the object name.
|
|
# Gcloud automatically adds the slash and creates the prefix or directory.
|
|
self.RunGsUtil(['rsync', tmpdir, suri(obj1)], expected_status=1)
|
|
# rsync dir to non-existent bucket.
|
|
self.RunGsUtil(
|
|
['rsync', tmpdir,
|
|
suri(obj1), 'gs://' + self.nonexistent_bucket_name],
|
|
expected_status=1)
|
|
|
|
# Note: The tests below exercise the cases
|
|
# {src_dir, src_bucket} X {dst_dir, dst_bucket}. We use gsutil rsync -d for
|
|
# all the cases but then have just one test without -d (test_bucket_to_bucket)
|
|
# as representative of handling without the -d option. This provides
|
|
# reasonable test coverage because the -d handling it src/dest URI-type
|
|
# independent, and keeps the test case combinations more manageable.
|
|
|
|
def test_invalid_src_mtime(self):
|
|
"""Tests that an exception is thrown if mtime cannot be cast as a long."""
|
|
# Create 1 bucket with 1 file present with mtime set as a string of
|
|
# non-numeric characters, and as a number.
|
|
bucket1_uri = self.CreateBucket()
|
|
bucket2_uri = self.CreateBucket()
|
|
self.CreateObject(bucket_uri=bucket1_uri,
|
|
object_name='obj1',
|
|
contents=b'obj1',
|
|
mtime='xyz')
|
|
self.CreateObject(bucket_uri=bucket1_uri,
|
|
object_name='obj2',
|
|
contents=b'obj2',
|
|
mtime=123)
|
|
# This creates an object that has an mtime sometime on 41091-11-25 UTC. It
|
|
# is used to verify that a warning is thrown for objects set at least a day
|
|
# in the future. If this test is not updated before that date, this test
|
|
# will fail because of the hardcoded timestamp.
|
|
self.CreateObject(bucket_uri=bucket1_uri,
|
|
object_name='obj3',
|
|
contents=b'obj3',
|
|
mtime=long(1234567891011))
|
|
# Create objects with a negative mtime.
|
|
self.CreateObject(bucket_uri=bucket1_uri,
|
|
object_name='obj4',
|
|
contents=b'obj4',
|
|
mtime=-100)
|
|
self.CreateObject(bucket_uri=bucket1_uri,
|
|
object_name='obj5',
|
|
contents=b'obj5',
|
|
mtime=-1)
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check1():
|
|
stderr = self.RunGsUtil(
|
|
['rsync', suri(bucket1_uri),
|
|
suri(bucket2_uri)], return_stderr=True)
|
|
if self._use_gcloud_storage:
|
|
self.assertRegex(
|
|
stderr, r'obj1#\d+ metadata did not contain a numeric value for'
|
|
r' goog-reserved-file-mtime')
|
|
self.assertNotRegex(
|
|
stderr, r'obj2#\d+ metadata did not contain a numeric value for'
|
|
r' goog-reserved-file-mtime')
|
|
self.assertRegex(
|
|
stderr,
|
|
r'obj3#\d+ metadata that is more than one day in the future from'
|
|
r' the system time')
|
|
self.assertRegex(stderr, r'Found negative time value in gs://.*/obj4')
|
|
self.assertRegex(stderr, r'Found negative time value in gs://.*/obj5')
|
|
else:
|
|
self.assertIn('obj1 has an invalid mtime in its metadata', stderr)
|
|
self.assertNotIn('obj2 has an invalid mtime in its metadata', stderr)
|
|
self.assertIn(
|
|
'obj3 has an mtime more than 1 day from current system '
|
|
'time', stderr)
|
|
self.assertIn('obj4 has a negative mtime in its metadata', stderr)
|
|
self.assertIn('obj5 has a negative mtime in its metadata', stderr)
|
|
|
|
_Check1()
|
|
|
|
@unittest.skipIf(IS_WINDOWS, 'POSIX attributes not available on Windows.')
|
|
@unittest.skipUnless(UsingCrcmodExtension(), 'Test requires fast crcmod.')
|
|
def test_bucket_to_bucket_preserve_posix(self):
|
|
"""Tests that rsync -P works with bucket to bucket."""
|
|
# Note that unlike bucket to dir tests POSIX attributes cannot be verified
|
|
# beyond basic validations because the cloud buckets have no notion of UID
|
|
# or GID especially considering that these values can come from systems
|
|
# with vastly different mappings. In addition, the mode will not be
|
|
# verified.
|
|
src_bucket = self.CreateBucket()
|
|
dst_bucket = self.CreateBucket()
|
|
primary_gid = os.getgid()
|
|
non_primary_gid = util.GetNonPrimaryGid()
|
|
# Create source objects.
|
|
self.CreateObject(bucket_uri=src_bucket,
|
|
object_name='obj1',
|
|
contents=b'obj1',
|
|
mode='444')
|
|
self.CreateObject(bucket_uri=src_bucket,
|
|
object_name='obj2',
|
|
contents=b'obj2',
|
|
gid=primary_gid)
|
|
self.CreateObject(bucket_uri=src_bucket,
|
|
object_name='obj3',
|
|
contents=b'obj3',
|
|
gid=non_primary_gid)
|
|
self.CreateObject(bucket_uri=src_bucket,
|
|
object_name='obj4',
|
|
contents=b'obj3',
|
|
uid=INVALID_UID(),
|
|
gid=INVALID_GID(),
|
|
mode='222')
|
|
self.CreateObject(bucket_uri=src_bucket,
|
|
object_name='obj5',
|
|
contents=b'obj5',
|
|
uid=USER_ID,
|
|
gid=primary_gid,
|
|
mode=str(DEFAULT_MODE))
|
|
# Create destination objects.
|
|
# obj5 at the source and destination have the same content so we will only
|
|
# patch the destination metadata instead of copying the entire object.
|
|
self.CreateObject(bucket_uri=dst_bucket,
|
|
object_name='obj5',
|
|
contents=b'obj5')
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check1():
|
|
"""Test bucket to bucket rsync with -P flag and verify attributes."""
|
|
stderr = self.RunGsUtil(
|
|
['rsync', '-P', suri(src_bucket),
|
|
suri(dst_bucket)],
|
|
return_stderr=True)
|
|
listing1 = TailSet(suri(src_bucket), self.FlatListBucket(src_bucket))
|
|
listing2 = TailSet(suri(dst_bucket), self.FlatListBucket(dst_bucket))
|
|
# First bucket should have un-altered content.
|
|
self.assertEqual(listing1,
|
|
set(['/obj1', '/obj2', '/obj3', '/obj4', '/obj5']))
|
|
# dst_bucket should have new content from src_bucket.
|
|
self.assertEqual(listing2,
|
|
set(['/obj1', '/obj2', '/obj3', '/obj4', '/obj5']))
|
|
if self._use_gcloud_storage:
|
|
self.assertIn('Patching', stderr)
|
|
else:
|
|
self.assertIn('Copying POSIX attributes from src to dst for', stderr)
|
|
|
|
_Check1()
|
|
|
|
self.VerifyObjectCustomAttribute(dst_bucket.bucket_name, 'obj1', MODE_ATTR,
|
|
'444')
|
|
self.VerifyObjectCustomAttribute(dst_bucket.bucket_name, 'obj2', GID_ATTR,
|
|
str(primary_gid))
|
|
self.VerifyObjectCustomAttribute(dst_bucket.bucket_name, 'obj3', GID_ATTR,
|
|
str(non_primary_gid))
|
|
# Verify all of the attributes for obj4. Even though these are all 'invalid'
|
|
# values, the file was copied to the destination because bucket to bucket
|
|
# with preserve POSIX enabled will blindly copy the object metadata.
|
|
self.VerifyObjectCustomAttribute(dst_bucket.bucket_name, 'obj4', GID_ATTR,
|
|
str(INVALID_GID()))
|
|
self.VerifyObjectCustomAttribute(dst_bucket.bucket_name, 'obj4', UID_ATTR,
|
|
str(INVALID_UID()))
|
|
self.VerifyObjectCustomAttribute(dst_bucket.bucket_name, 'obj4', MODE_ATTR,
|
|
'222')
|
|
# Verify obj5 attributes were copied.
|
|
self.VerifyObjectCustomAttribute(dst_bucket.bucket_name, 'obj5', UID_ATTR,
|
|
str(USER_ID))
|
|
self.VerifyObjectCustomAttribute(dst_bucket.bucket_name, 'obj5', GID_ATTR,
|
|
str(primary_gid))
|
|
self.VerifyObjectCustomAttribute(dst_bucket.bucket_name, 'obj5', MODE_ATTR,
|
|
str(DEFAULT_MODE))
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check2():
|
|
"""Check that we are not patching destination metadata a second time."""
|
|
stderr = self.RunGsUtil(
|
|
['rsync', '-P', suri(src_bucket),
|
|
suri(dst_bucket)],
|
|
return_stderr=True)
|
|
if self._use_gcloud_storage:
|
|
self.assertNotIn('Patching', stderr)
|
|
else:
|
|
self.assertNotIn('Copying POSIX attributes from src to dst for', stderr)
|
|
|
|
_Check2()
|
|
|
|
def test_bucket_to_bucket_same_objects_src_mtime(self):
|
|
"""Tests bucket to bucket with mtime.
|
|
|
|
Each has the same items but only the source has mtime stored in its
|
|
metadata.
|
|
Ensure that destination now also has the mtime of the files in its metadata.
|
|
"""
|
|
# Create 2 buckets where the source and destination have 2 objects each with
|
|
# the same name and content, where mtime is only set on src_bucket.
|
|
src_bucket = self.CreateBucket()
|
|
dst_bucket = self.CreateBucket()
|
|
self.CreateObject(bucket_uri=src_bucket,
|
|
object_name='obj1',
|
|
contents=b'obj1',
|
|
mtime=0)
|
|
self.CreateObject(bucket_uri=src_bucket,
|
|
object_name='subdir/obj2',
|
|
contents=b'subdir/obj2',
|
|
mtime=1)
|
|
self.CreateObject(bucket_uri=dst_bucket,
|
|
object_name='obj1',
|
|
contents=b'obj1')
|
|
self.CreateObject(bucket_uri=dst_bucket,
|
|
object_name='subdir/obj2',
|
|
contents=b'subdir/obj2')
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check1():
|
|
"""Tests rsync works as expected."""
|
|
self.RunGsUtil(['rsync', '-r', suri(src_bucket), suri(dst_bucket)])
|
|
listing1 = TailSet(suri(src_bucket), self.FlatListBucket(src_bucket))
|
|
# First bucket should have un-altered content.
|
|
self.assertEqual(listing1, set(['/obj1', '/subdir/obj2']))
|
|
|
|
_Check1()
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check2():
|
|
stderr = self.RunGsUtil(
|
|
['rsync', suri(src_bucket),
|
|
suri(dst_bucket)], return_stderr=True)
|
|
# Check that re-running the same rsync command causes no more changes.
|
|
self._VerifyNoChanges(stderr)
|
|
|
|
_Check2()
|
|
|
|
if self._use_gcloud_storage:
|
|
# Gcloud skips patching mtimes for cloud-to-cloud to save API calls b/c
|
|
# rsync uses hash comparisons for cloud-to-cloud anyways.
|
|
self._VerifyObjectMtime(dst_bucket.bucket_name,
|
|
'obj1',
|
|
NA_TIME,
|
|
expected_present=False)
|
|
self._VerifyObjectMtime(dst_bucket.bucket_name,
|
|
'subdir/obj2',
|
|
NA_TIME,
|
|
expected_present=False)
|
|
else:
|
|
# Verify objects' mtime in dst_bucket
|
|
self._VerifyObjectMtime(dst_bucket.bucket_name, 'obj1', '0')
|
|
self._VerifyObjectMtime(dst_bucket.bucket_name, 'subdir/obj2', '1')
|
|
|
|
def test_bucket_to_bucket_src_mtime(self):
|
|
"""Tests bucket to bucket where source has mtime in files."""
|
|
# Create 2 buckets where the source has 2 objects one at root level and the
|
|
# other in a subdirectory. The other bucket will be empty.
|
|
src_bucket = self.CreateBucket()
|
|
dst_bucket = self.CreateBucket()
|
|
obj1 = self.CreateObject(bucket_uri=src_bucket,
|
|
object_name='obj1',
|
|
contents=b'obj1',
|
|
mtime=0)
|
|
obj2 = self.CreateObject(bucket_uri=src_bucket,
|
|
object_name='subdir/obj2',
|
|
contents=b'subdir/obj2',
|
|
mtime=1)
|
|
# Verify objects' mtime in the buckets
|
|
self._VerifyObjectMtime(obj1.bucket_name, obj1.object_name, '0')
|
|
self._VerifyObjectMtime(obj2.bucket_name, obj2.object_name, '1')
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check1():
|
|
"""Tests rsync works as expected."""
|
|
self.RunGsUtil(['rsync', '-r', suri(src_bucket), suri(dst_bucket)])
|
|
listing1 = TailSet(suri(src_bucket), self.FlatListBucket(src_bucket))
|
|
listing2 = TailSet(suri(dst_bucket), self.FlatListBucket(dst_bucket))
|
|
# First bucket should have un-altered content.
|
|
self.assertEqual(listing1, set(['/obj1', '/subdir/obj2']))
|
|
# Second bucket should have new objects added from source bucket.
|
|
self.assertEqual(listing2, set(['/obj1', '/subdir/obj2']))
|
|
|
|
_Check1()
|
|
|
|
# Get and verify the metadata for the 2 objects at the destination.
|
|
self._VerifyObjectMtime(dst_bucket.bucket_name, 'obj1', '0')
|
|
self._VerifyObjectMtime(dst_bucket.bucket_name, 'subdir/obj2', '1')
|
|
|
|
def test_bucket_to_bucket_dst_mtime(self):
|
|
"""Tests bucket to bucket where destination has mtime in objects."""
|
|
# Create 2 buckets where the source has files without mtime and the
|
|
# destination has it present in its object's metadata. For obj1 and obj6
|
|
# this tests the behavior where the file names are the same but the content
|
|
# is different. obj1 has no mtime at source, but obj6 has mtime at src that
|
|
# matches dst.
|
|
src_bucket = self.CreateBucket()
|
|
dst_bucket = self.CreateBucket()
|
|
self.CreateObject(bucket_uri=src_bucket,
|
|
object_name='obj1',
|
|
contents=b'OBJ1')
|
|
self.CreateObject(bucket_uri=src_bucket,
|
|
object_name='subdir/obj2',
|
|
contents=b'subdir/obj2')
|
|
self.CreateObject(bucket_uri=src_bucket,
|
|
object_name='.obj3',
|
|
contents=b'.obj3')
|
|
self.CreateObject(bucket_uri=src_bucket,
|
|
object_name='subdir/obj4',
|
|
contents=b'subdir/obj4')
|
|
self.CreateObject(bucket_uri=src_bucket,
|
|
object_name='obj6',
|
|
contents=b'OBJ6',
|
|
mtime=100)
|
|
self.CreateObject(bucket_uri=dst_bucket,
|
|
object_name='obj1',
|
|
contents=b'obj1',
|
|
mtime=10)
|
|
self.CreateObject(bucket_uri=dst_bucket,
|
|
object_name='subdir/obj2',
|
|
contents=b'subdir/obj2',
|
|
mtime=10)
|
|
self.CreateObject(bucket_uri=dst_bucket,
|
|
object_name='.obj3',
|
|
contents=b'.OBJ3',
|
|
mtime=long(1000000000000))
|
|
self.CreateObject(bucket_uri=dst_bucket,
|
|
object_name='subdir/obj5',
|
|
contents=b'subdir/obj5',
|
|
mtime=10)
|
|
self.CreateObject(bucket_uri=dst_bucket,
|
|
object_name='obj6',
|
|
contents=b'obj6',
|
|
mtime=100)
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check1():
|
|
"""Tests rsync works as expected."""
|
|
self.RunGsUtil(['rsync', '-r', '-d',
|
|
suri(src_bucket),
|
|
suri(dst_bucket)],
|
|
return_stderr=True),
|
|
|
|
listing1 = TailSet(suri(src_bucket), self.FlatListBucket(src_bucket))
|
|
listing2 = TailSet(suri(dst_bucket), self.FlatListBucket(dst_bucket))
|
|
# First bucket should have un-altered content.
|
|
self.assertEqual(
|
|
listing1,
|
|
set(['/obj1', '/subdir/obj2', '/.obj3', '/subdir/obj4', '/obj6']))
|
|
# Second bucket should have new objects added from source bucket.
|
|
self.assertEqual(
|
|
listing2,
|
|
set(['/obj1', '/subdir/obj2', '/.obj3', '/subdir/obj4', '/obj6']))
|
|
|
|
_Check1()
|
|
|
|
# Get and verify the metadata for the objects at the destination.
|
|
self._VerifyObjectMtime(dst_bucket.bucket_name, 'subdir/obj2', '10')
|
|
if self._use_gcloud_storage:
|
|
# Gcloud uses creation time if mtime missing. Since source and destination
|
|
# objects have different hashes, it will trigger a copy.
|
|
source_o1_time_created = self.GetObjectMetadataWithFields(
|
|
src_bucket.bucket_name, 'obj1', ['timeCreated']).timeCreated
|
|
source_o1_posix_time_created = str(
|
|
ConvertDatetimeToPOSIX(source_o1_time_created))
|
|
self._VerifyObjectMtime(dst_bucket.bucket_name, 'obj1',
|
|
source_o1_posix_time_created)
|
|
source_o4_time_created = self.GetObjectMetadataWithFields(
|
|
src_bucket.bucket_name, 'subdir/obj4', ['timeCreated']).timeCreated
|
|
source_o4_posix_time_created = str(
|
|
ConvertDatetimeToPOSIX(source_o4_time_created))
|
|
self._VerifyObjectMtime(dst_bucket.bucket_name, 'subdir/obj4',
|
|
source_o4_posix_time_created)
|
|
else:
|
|
self._VerifyObjectMtime(dst_bucket.bucket_name,
|
|
'obj1',
|
|
NA_TIME,
|
|
expected_present=False)
|
|
self._VerifyObjectMtime(dst_bucket.bucket_name,
|
|
'subdir/obj4',
|
|
NA_TIME,
|
|
expected_present=False)
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check3():
|
|
# Check that re-running the same rsync command causes no more changes.
|
|
stderr = self.RunGsUtil(
|
|
['rsync', suri(src_bucket),
|
|
suri(dst_bucket)], return_stderr=True)
|
|
self._VerifyNoChanges(stderr)
|
|
|
|
_Check3()
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check4():
|
|
# Check that obj1 changed because mtime was not available; hashes were
|
|
# used to compare the objects.
|
|
self.assertEqual(
|
|
'OBJ1',
|
|
self.RunGsUtil(['cat', suri(dst_bucket, 'obj1')], return_stdout=True))
|
|
# Ensure that .obj3 was updated even though its modification time comes
|
|
# after the creation time of .obj3 at the source.
|
|
self.assertEqual(
|
|
'.obj3',
|
|
self.RunGsUtil(['cat', suri(dst_bucket, '.obj3')],
|
|
return_stdout=True))
|
|
# Check that obj6 was updated even though the mtimes match. In this case
|
|
# bucket to bucket sync will compare hashes.
|
|
self.assertEqual(
|
|
'OBJ6',
|
|
self.RunGsUtil(['cat', suri(dst_bucket, 'obj6')], return_stdout=True))
|
|
|
|
_Check4()
|
|
|
|
# Now rerun the rsync with the -c option.
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check5():
|
|
"""Tests rsync -c works as expected."""
|
|
self.RunGsUtil(
|
|
['rsync', '-r', '-d', '-c',
|
|
suri(src_bucket),
|
|
suri(dst_bucket)])
|
|
listing1 = TailSet(suri(src_bucket), self.FlatListBucket(src_bucket))
|
|
listing2 = TailSet(suri(dst_bucket), self.FlatListBucket(dst_bucket))
|
|
# First bucket should have un-altered content.
|
|
self.assertEqual(
|
|
listing1,
|
|
set(['/obj1', '/subdir/obj2', '/.obj3', '/subdir/obj4', '/obj6']))
|
|
# Second bucket should have new objects added from source bucket.
|
|
self.assertEqual(
|
|
listing2,
|
|
set(['/obj1', '/subdir/obj2', '/.obj3', '/subdir/obj4', '/obj6']))
|
|
# Assert that the contents of obj6 have now changed because the -c flag
|
|
# was used to force checksums.
|
|
self.assertEqual(
|
|
'OBJ6',
|
|
self.RunGsUtil(['cat', suri(dst_bucket, 'obj6')], return_stdout=True))
|
|
# Verify the mtime for obj6 is correct.
|
|
self._VerifyObjectMtime(dst_bucket.bucket_name, 'obj6', '100')
|
|
|
|
_Check5()
|
|
|
|
def test_bucket_to_bucket(self):
|
|
"""Tests that flat and recursive rsync between 2 buckets works correctly."""
|
|
# Create 2 buckets with 1 overlapping object, 1 extra object at root level
|
|
# in each, and 1 extra object 1 level down in each, where one of the objects
|
|
# starts with "." to test that we don't skip those objects. Make the
|
|
# overlapping objects named the same but with different content, to test
|
|
# that we detect and properly copy in that case.
|
|
bucket1_uri = self.CreateBucket()
|
|
bucket2_uri = self.CreateBucket()
|
|
self.CreateObject(bucket_uri=bucket1_uri,
|
|
object_name='obj1',
|
|
contents=b'obj1')
|
|
self.CreateObject(bucket_uri=bucket1_uri,
|
|
object_name='.obj2',
|
|
contents=b'.obj2',
|
|
mtime=10)
|
|
self.CreateObject(bucket_uri=bucket1_uri,
|
|
object_name='subdir/obj3',
|
|
contents=b'subdir/obj3')
|
|
self.CreateObject(bucket_uri=bucket1_uri,
|
|
object_name='obj6',
|
|
contents=b'obj6_',
|
|
mtime=100)
|
|
# .obj2 will be replaced and have mtime of 10
|
|
self.CreateObject(bucket_uri=bucket2_uri,
|
|
object_name='.obj2',
|
|
contents=b'.OBJ2')
|
|
self.CreateObject(bucket_uri=bucket2_uri,
|
|
object_name='obj4',
|
|
contents=b'obj4')
|
|
self.CreateObject(bucket_uri=bucket2_uri,
|
|
object_name='subdir/obj5',
|
|
contents=b'subdir/obj5')
|
|
self.CreateObject(bucket_uri=bucket2_uri,
|
|
object_name='obj6',
|
|
contents=b'obj6',
|
|
mtime=100)
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check1():
|
|
"""Tests rsync works as expected."""
|
|
self.RunGsUtil(['rsync', suri(bucket1_uri), suri(bucket2_uri)])
|
|
listing1 = TailSet(suri(bucket1_uri), self.FlatListBucket(bucket1_uri))
|
|
listing2 = TailSet(suri(bucket2_uri), self.FlatListBucket(bucket2_uri))
|
|
# First bucket should have un-altered content.
|
|
self.assertEqual(listing1,
|
|
set(['/obj1', '/.obj2', '/subdir/obj3', '/obj6']))
|
|
# Second bucket should have new objects added from source bucket (without
|
|
# removing extraneeous object found in dest bucket), and without the
|
|
# subdir objects synchronized.
|
|
self.assertEqual(
|
|
listing2, set(['/obj1', '/.obj2', '/obj4', '/subdir/obj5', '/obj6']))
|
|
# Assert that the src/dest objects that had same length but different
|
|
# content were correctly synchronized (bucket to bucket rsync uses
|
|
# checksums).
|
|
self.assertEqual(
|
|
'.obj2',
|
|
self.RunGsUtil(['cat', suri(bucket1_uri, '.obj2')],
|
|
return_stdout=True))
|
|
self.assertEqual(
|
|
'.obj2',
|
|
self.RunGsUtil(['cat', suri(bucket2_uri, '.obj2')],
|
|
return_stdout=True))
|
|
self.assertEqual(
|
|
'obj6_',
|
|
self.RunGsUtil(['cat', suri(bucket2_uri, 'obj6')],
|
|
return_stdout=True))
|
|
# Verify that .obj2 had its mtime updated at the destination.
|
|
self._VerifyObjectMtime(bucket2_uri.bucket_name, '.obj2', '10')
|
|
|
|
_Check1()
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check2():
|
|
# Check that re-running the same rsync command causes no more changes.
|
|
stderr = self.RunGsUtil(
|
|
['rsync', suri(bucket1_uri),
|
|
suri(bucket2_uri)], return_stderr=True)
|
|
self._VerifyNoChanges(stderr)
|
|
|
|
_Check2()
|
|
|
|
# Now add, overwrite, and remove some objects in each bucket and test
|
|
# rsync -r.
|
|
self.CreateObject(bucket_uri=bucket1_uri,
|
|
object_name='obj6',
|
|
contents=b'obj6')
|
|
self.CreateObject(bucket_uri=bucket2_uri,
|
|
object_name='obj7',
|
|
contents=b'obj7')
|
|
self.RunGsUtil(['rm', suri(bucket1_uri, 'obj1')])
|
|
self.RunGsUtil(['rm', suri(bucket2_uri, '.obj2')])
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check3():
|
|
self.RunGsUtil(['rsync', '-r', suri(bucket1_uri), suri(bucket2_uri)])
|
|
listing1 = TailSet(suri(bucket1_uri), self.FlatListBucket(bucket1_uri))
|
|
listing2 = TailSet(suri(bucket2_uri), self.FlatListBucket(bucket2_uri))
|
|
# First bucket should have un-altered content.
|
|
self.assertEqual(listing1, set(['/.obj2', '/obj6', '/subdir/obj3']))
|
|
# Second bucket should have objects tha were newly added to first bucket
|
|
# (wihout removing extraneous dest bucket objects), and without the
|
|
# subdir objects synchronized.
|
|
self.assertEqual(
|
|
listing2,
|
|
set([
|
|
'/obj1', '/.obj2', '/obj4', '/obj6', '/obj7', '/subdir/obj3',
|
|
'/subdir/obj5'
|
|
]))
|
|
|
|
_Check3()
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check4():
|
|
# Check that re-running the same rsync command causes no more changes.
|
|
stderr = self.RunGsUtil(
|
|
['rsync', '-r', suri(bucket1_uri),
|
|
suri(bucket2_uri)],
|
|
return_stderr=True)
|
|
self._VerifyNoChanges(stderr)
|
|
|
|
_Check4()
|
|
|
|
def test_bucket_to_bucket_minus_d(self):
|
|
"""Tests that flat and recursive rsync between 2 buckets works correctly."""
|
|
# Create 2 buckets with 1 overlapping object, 1 extra object at root level
|
|
# in each, and 1 extra object 1 level down in each, where one of the objects
|
|
# starts with "." to test that we don't skip those objects. Make the
|
|
# overlapping objects named the same but with different content, to test
|
|
# that we detect and properly copy in that case.
|
|
bucket1_uri = self.CreateBucket()
|
|
bucket2_uri = self.CreateBucket()
|
|
self.CreateObject(bucket_uri=bucket1_uri,
|
|
object_name='obj1',
|
|
contents=b'obj1')
|
|
self.CreateObject(bucket_uri=bucket1_uri,
|
|
object_name='.obj2',
|
|
contents=b'.obj2')
|
|
self.CreateObject(bucket_uri=bucket1_uri,
|
|
object_name='subdir/obj3',
|
|
contents=b'subdir/obj3')
|
|
self.CreateObject(bucket_uri=bucket2_uri,
|
|
object_name='.obj2',
|
|
contents=b'.OBJ2')
|
|
self.CreateObject(bucket_uri=bucket2_uri,
|
|
object_name='obj4',
|
|
contents=b'obj4')
|
|
self.CreateObject(bucket_uri=bucket2_uri,
|
|
object_name='subdir/obj5',
|
|
contents=b'subdir/obj5')
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check1():
|
|
"""Tests rsync works as expected."""
|
|
self.RunGsUtil(['rsync', '-d', suri(bucket1_uri), suri(bucket2_uri)])
|
|
listing1 = TailSet(suri(bucket1_uri), self.FlatListBucket(bucket1_uri))
|
|
listing2 = TailSet(suri(bucket2_uri), self.FlatListBucket(bucket2_uri))
|
|
# First bucket should have un-altered content.
|
|
self.assertEqual(listing1, set(['/obj1', '/.obj2', '/subdir/obj3']))
|
|
# Second bucket should have content like first bucket but without the
|
|
# subdir objects synchronized.
|
|
self.assertEqual(listing2, set(['/obj1', '/.obj2', '/subdir/obj5']))
|
|
# Assert that the src/dest objects that had same length but different
|
|
# content were correctly synchronized (bucket to bucket rsync uses
|
|
# checksums).
|
|
self.assertEqual(
|
|
'.obj2',
|
|
self.RunGsUtil(['cat', suri(bucket1_uri, '.obj2')],
|
|
return_stdout=True))
|
|
self.assertEqual(
|
|
'.obj2',
|
|
self.RunGsUtil(['cat', suri(bucket2_uri, '.obj2')],
|
|
return_stdout=True))
|
|
|
|
_Check1()
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check2():
|
|
# Check that re-running the same rsync command causes no more changes.
|
|
stderr = self.RunGsUtil(
|
|
['rsync', '-d', suri(bucket1_uri),
|
|
suri(bucket2_uri)],
|
|
return_stderr=True)
|
|
self._VerifyNoChanges(stderr)
|
|
|
|
_Check2()
|
|
|
|
# Now add and remove some objects in each bucket and test rsync -r.
|
|
self.CreateObject(bucket_uri=bucket1_uri,
|
|
object_name='obj6',
|
|
contents=b'obj6')
|
|
self.CreateObject(bucket_uri=bucket2_uri,
|
|
object_name='obj7',
|
|
contents=b'obj7')
|
|
self.RunGsUtil(['rm', suri(bucket1_uri, 'obj1')])
|
|
self.RunGsUtil(['rm', suri(bucket2_uri, '.obj2')])
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check3():
|
|
self.RunGsUtil(
|
|
['rsync', '-d', '-r',
|
|
suri(bucket1_uri),
|
|
suri(bucket2_uri)])
|
|
listing1 = TailSet(suri(bucket1_uri), self.FlatListBucket(bucket1_uri))
|
|
listing2 = TailSet(suri(bucket2_uri), self.FlatListBucket(bucket2_uri))
|
|
# First bucket should have un-altered content.
|
|
self.assertEqual(listing1, set(['/.obj2', '/obj6', '/subdir/obj3']))
|
|
# Second bucket should have content like first bucket but without the
|
|
# subdir objects synchronized.
|
|
self.assertEqual(listing2, set(['/.obj2', '/obj6', '/subdir/obj3']))
|
|
|
|
_Check3()
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check4():
|
|
# Check that re-running the same rsync command causes no more changes.
|
|
stderr = self.RunGsUtil(
|
|
['rsync', '-d', '-r',
|
|
suri(bucket1_uri),
|
|
suri(bucket2_uri)],
|
|
return_stderr=True)
|
|
self._VerifyNoChanges(stderr)
|
|
|
|
_Check4()
|
|
|
|
# Test sequential upload as well as parallel composite upload case.
|
|
@SequentialAndParallelTransfer
|
|
@unittest.skipUnless(UsingCrcmodExtension(), 'Test requires fast crcmod.')
|
|
def test_dir_to_bucket_mtime(self):
|
|
"""Tests dir to bucket with mtime.
|
|
|
|
Each has the same items, the source has mtime for all objects, whereas dst
|
|
only has mtime for obj5 and obj6 to test for different a later mtime at src
|
|
and the same mtime from src to dst, respectively. Ensure that destination
|
|
now also has the mtime of the files in its metadata.
|
|
"""
|
|
# Create directory and bucket, where the directory has different
|
|
# combinations of mtime and sub-directories.
|
|
tmpdir = self.CreateTempDir()
|
|
subdir = os.path.join(tmpdir, 'subdir')
|
|
os.mkdir(subdir)
|
|
self.CreateTempFile(tmpdir=tmpdir,
|
|
file_name='obj1',
|
|
contents=b'obj1',
|
|
mtime=10)
|
|
self.CreateTempFile(tmpdir=tmpdir,
|
|
file_name='.obj2',
|
|
contents=b'.obj2',
|
|
mtime=10)
|
|
self.CreateTempFile(tmpdir=subdir,
|
|
file_name='obj3',
|
|
contents=b'subdir/obj3',
|
|
mtime=10)
|
|
self.CreateTempFile(tmpdir=subdir,
|
|
file_name='obj5',
|
|
contents=b'subdir/obj5',
|
|
mtime=15)
|
|
self.CreateTempFile(tmpdir=tmpdir,
|
|
file_name='obj6',
|
|
contents=b'obj6',
|
|
mtime=100)
|
|
self.CreateTempFile(tmpdir=tmpdir,
|
|
file_name='obj7',
|
|
contents=b'obj7_',
|
|
mtime=100)
|
|
bucket_uri = self.CreateBucket()
|
|
self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='obj1',
|
|
contents=b'OBJ1')
|
|
self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='.obj2',
|
|
contents=b'.obj2')
|
|
self._SetObjectCustomMetadataAttribute(self.default_provider,
|
|
bucket_uri.bucket_name, '.obj2',
|
|
'test', 'test')
|
|
self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='obj4',
|
|
contents=b'obj4')
|
|
self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='subdir/obj5',
|
|
contents=b'subdir/obj5',
|
|
mtime=10)
|
|
self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='obj6',
|
|
contents=b'OBJ6',
|
|
mtime=100)
|
|
self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='obj7',
|
|
contents=b'obj7',
|
|
mtime=100)
|
|
|
|
cumulative_stderr = set()
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check1():
|
|
"""Tests rsync works as expected."""
|
|
stderr = self.RunGsUtil(['rsync', '-r', '-d', tmpdir,
|
|
suri(bucket_uri)],
|
|
return_stderr=True)
|
|
cumulative_stderr.update([s for s in stderr.splitlines() if s])
|
|
listing1 = TailSet(tmpdir, self.FlatListDir(tmpdir))
|
|
listing2 = TailSet(suri(bucket_uri), self.FlatListBucket(bucket_uri))
|
|
# Dir should have un-altered content.
|
|
self.assertEqual(
|
|
listing1,
|
|
set([
|
|
'/obj1', '/.obj2', '/subdir/obj3', '/subdir/obj5', '/obj6',
|
|
'/obj7'
|
|
]))
|
|
# Bucket should have content like dir.
|
|
self.assertEqual(
|
|
listing2,
|
|
set([
|
|
'/obj1', '/.obj2', '/subdir/obj3', '/subdir/obj5', '/obj6',
|
|
'/obj7'
|
|
]))
|
|
# Check that obj6 didn't change even though the contents did. This is
|
|
# because the object/file have the same mtime.
|
|
self.assertEqual(
|
|
'OBJ6',
|
|
self.RunGsUtil(['cat', suri(bucket_uri, 'obj6')], return_stdout=True))
|
|
# Check that obj7 changed because the size was different.
|
|
self.assertEqual(
|
|
'obj7_',
|
|
self.RunGsUtil(['cat', suri(bucket_uri, 'obj7')], return_stdout=True))
|
|
|
|
_Check1()
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check2():
|
|
# Check that re-running the same rsync command causes no more changes.
|
|
stderr = self.RunGsUtil(['rsync', '-r', '-d', tmpdir,
|
|
suri(bucket_uri)],
|
|
return_stderr=True)
|
|
self._VerifyNoChanges(stderr)
|
|
|
|
_Check2()
|
|
|
|
# Verify objects' mtime in the bucket.
|
|
self._VerifyObjectMtime(bucket_uri.bucket_name, 'obj1', '10')
|
|
self._VerifyObjectMtime(bucket_uri.bucket_name, '.obj2', '10')
|
|
self._VerifyObjectMtime(bucket_uri.bucket_name, 'subdir/obj3', '10')
|
|
self._VerifyObjectMtime(bucket_uri.bucket_name, 'subdir/obj5', '15')
|
|
self._VerifyObjectMtime(bucket_uri.bucket_name, 'obj6', '100')
|
|
# Rsync using S3 without object owner permission will copy over old objects.
|
|
copied_over_object_notice = (
|
|
'Copying whole file/object for %s instead of patching because you '
|
|
'don\'t have owner permission on the object.' %
|
|
suri(bucket_uri, '.obj2'))
|
|
if (copied_over_object_notice not in cumulative_stderr and
|
|
not self._use_gcloud_storage):
|
|
# Gsutil: Make sure test attribute wasn't removed when mtime was updated.
|
|
# Gcloud: Does not fall back on copies to perform patches.
|
|
self.VerifyObjectCustomAttribute(bucket_uri.bucket_name, '.obj2', 'test',
|
|
'test')
|
|
|
|
# Now rerun the rsync with the -c option.
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check4():
|
|
"""Tests rsync -c works as expected."""
|
|
self.RunGsUtil(['rsync', '-r', '-d', '-c', tmpdir, suri(bucket_uri)])
|
|
listing1 = TailSet(tmpdir, self.FlatListDir(tmpdir))
|
|
listing2 = TailSet(suri(bucket_uri), self.FlatListBucket(bucket_uri))
|
|
# Dir should have un-altered content.
|
|
self.assertEqual(
|
|
listing1,
|
|
set([
|
|
'/obj1', '/.obj2', '/subdir/obj3', '/subdir/obj5', '/obj6',
|
|
'/obj7'
|
|
]))
|
|
# Bucket should have content like dir with the subdirectories synced.
|
|
self.assertEqual(
|
|
listing2,
|
|
set([
|
|
'/obj1', '/.obj2', '/subdir/obj3', '/subdir/obj5', '/obj6',
|
|
'/obj7'
|
|
]))
|
|
# Assert that the contents of obj6 have now changed because the -c flag
|
|
# was used to force checksums.
|
|
self.assertEqual(
|
|
'obj6',
|
|
self.RunGsUtil(['cat', suri(bucket_uri, 'obj6')], return_stdout=True))
|
|
self._VerifyObjectMtime(bucket_uri.bucket_name, 'obj6', '100')
|
|
|
|
_Check4()
|
|
|
|
# Test sequential upload as well as parallel composite upload case.
|
|
@SequentialAndParallelTransfer
|
|
@unittest.skipUnless(UsingCrcmodExtension(), 'Test requires fast crcmod.')
|
|
def test_dir_to_bucket_seek_ahead(self):
|
|
"""Tests that rsync seek-ahead iterator works correctly."""
|
|
# Unfortunately, we have to retry the entire operation in the case of
|
|
# eventual consistency because the estimated values will differ.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check1():
|
|
"""Test estimating an rsync upload operation."""
|
|
tmpdir = self.CreateTempDir()
|
|
subdir = os.path.join(tmpdir, 'subdir')
|
|
os.mkdir(subdir)
|
|
self.CreateTempFile(tmpdir=tmpdir, file_name='obj1', contents=b'obj1')
|
|
self.CreateTempFile(tmpdir=tmpdir, file_name='.obj2', contents=b'.obj2')
|
|
self.CreateTempFile(tmpdir=subdir,
|
|
file_name='obj3',
|
|
contents=b'subdir/obj3')
|
|
bucket_uri = self.CreateBucket()
|
|
self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='.obj2',
|
|
contents=b'.OBJ2')
|
|
self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='obj4',
|
|
contents=b'obj4')
|
|
self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='subdir/obj5',
|
|
contents=b'subdir/obj5')
|
|
# Need to make sure the bucket listing is caught-up, otherwise the
|
|
# first rsync may not see .obj2 and overwrite it.
|
|
self.AssertNObjectsInBucket(bucket_uri, 3)
|
|
|
|
with SetBotoConfigForTest([('GSUtil', 'task_estimation_threshold', '1'),
|
|
('GSUtil', 'task_estimation_force', 'True')]):
|
|
stderr = self.RunGsUtil(
|
|
['-m', 'rsync', '-d', '-r', tmpdir,
|
|
suri(bucket_uri)],
|
|
return_stderr=True)
|
|
# Objects: 4 (2 removed, 2 added)
|
|
# Bytes: 15 (added objects are 4 bytes and 11 bytes, respectively).
|
|
self.assertIn(
|
|
'Estimated work for this command: objects: 5, total size: 20',
|
|
stderr)
|
|
|
|
self.AssertNObjectsInBucket(bucket_uri, 3)
|
|
# Re-running should produce no estimate, because there is no work to do.
|
|
stderr = self.RunGsUtil(
|
|
['-m', 'rsync', '-d', '-r', tmpdir,
|
|
suri(bucket_uri)],
|
|
return_stderr=True)
|
|
self.assertNotIn('Estimated work', stderr)
|
|
|
|
_Check1()
|
|
|
|
tmpdir = self.CreateTempDir(test_files=1)
|
|
bucket_uri = self.CreateBucket()
|
|
# Running with task estimation turned off (and work to perform) should not
|
|
# produce an estimate.
|
|
with SetBotoConfigForTest([('GSUtil', 'task_estimation_threshold', '0'),
|
|
('GSUtil', 'task_estimation_force', 'True')]):
|
|
stderr = self.RunGsUtil(
|
|
['-m', 'rsync', '-d', '-r', tmpdir,
|
|
suri(bucket_uri)],
|
|
return_stderr=True)
|
|
self.assertNotIn('Estimated work', stderr)
|
|
|
|
# Test sequential upload as well as parallel composite upload case.
|
|
@SequentialAndParallelTransfer
|
|
@unittest.skipUnless(UsingCrcmodExtension(), 'Test requires fast crcmod.')
|
|
def test_dir_to_bucket_minus_d(self):
|
|
"""Tests that flat and recursive rsync dir to bucket works correctly."""
|
|
# Create dir and bucket with 1 overlapping object, 1 extra object at root
|
|
# level in each, and 1 extra object 1 level down in each, where one of the
|
|
# objects starts with "." to test that we don't skip those objects. Make the
|
|
# overlapping objects named the same but with different content, to test
|
|
# that we detect and properly copy in that case.
|
|
tmpdir = self.CreateTempDir()
|
|
subdir = os.path.join(tmpdir, 'subdir')
|
|
os.mkdir(subdir)
|
|
bucket_uri = self.CreateBucket()
|
|
self.CreateTempFile(tmpdir=tmpdir, file_name='obj1', contents=b'obj1')
|
|
o2_path = self.CreateTempFile(tmpdir=tmpdir,
|
|
file_name='.obj2',
|
|
contents=b'.obj2')
|
|
self.CreateTempFile(tmpdir=subdir,
|
|
file_name='obj3',
|
|
contents=b'subdir/obj3')
|
|
self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='.obj2',
|
|
contents=b'.OBJ2')
|
|
self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='obj4',
|
|
contents=b'obj4')
|
|
self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='subdir/obj5',
|
|
contents=b'subdir/obj5')
|
|
|
|
# Need to make sure the bucket listing is caught-up, otherwise the
|
|
# first rsync may not see .obj2 and overwrite it.
|
|
self.AssertNObjectsInBucket(bucket_uri, 3)
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check1():
|
|
"""Tests rsync works as expected."""
|
|
self.RunGsUtil(['rsync', '-d', tmpdir, suri(bucket_uri)])
|
|
listing1 = TailSet(tmpdir, self.FlatListDir(tmpdir))
|
|
listing2 = TailSet(suri(bucket_uri), self.FlatListBucket(bucket_uri))
|
|
# Dir should have un-altered content.
|
|
self.assertEqual(listing1, set(['/obj1', '/.obj2', '/subdir/obj3']))
|
|
# Bucket should have content like dir but without the subdir objects
|
|
# synchronized.
|
|
self.assertEqual(listing2, set(['/obj1', '/.obj2', '/subdir/obj5']))
|
|
# Assert that the src/dest objects that had same length but different
|
|
# content were synchronized (dir to bucket rsync uses checksums as a
|
|
# backup to mtime unless you specify -c to make hashes the priority).
|
|
with open(os.path.join(tmpdir, '.obj2')) as f:
|
|
self.assertEqual('.obj2', '\n'.join(f.readlines()))
|
|
cloud_obj2_content = self.RunGsUtil(
|
|
['cat', suri(bucket_uri, '.obj2')], return_stdout=True)
|
|
if self._use_gcloud_storage:
|
|
# Gcloud uses ctime if mtime is missing, so if the creation of cloud
|
|
# .obj2 is close enough to the local creation, the times may match.
|
|
# This means no fallback to checksums and then copy occurs.
|
|
local_obj2_mtime = int(os.path.getmtime(o2_path))
|
|
cloud_obj2_ctime = ConvertDatetimeToPOSIX(
|
|
self._GetMetadataAttribute(bucket_uri.bucket_name, '.obj2',
|
|
'timeCreated'))
|
|
self.assertTrue(cloud_obj2_content == '.obj2' or
|
|
local_obj2_mtime == cloud_obj2_ctime)
|
|
else:
|
|
self.assertEqual('.obj2', cloud_obj2_content)
|
|
|
|
_Check1()
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check2():
|
|
# Check that re-running the same rsync command causes no more changes.
|
|
stderr = self.RunGsUtil(
|
|
['rsync', '-d', tmpdir, suri(bucket_uri)], return_stderr=True)
|
|
self._VerifyNoChanges(stderr)
|
|
|
|
_Check2()
|
|
|
|
# Now rerun the rsync with the -c option.
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check3():
|
|
"""Tests rsync -c works as expected."""
|
|
self.RunGsUtil(['rsync', '-d', '-c', tmpdir, suri(bucket_uri)])
|
|
listing1 = TailSet(tmpdir, self.FlatListDir(tmpdir))
|
|
listing2 = TailSet(suri(bucket_uri), self.FlatListBucket(bucket_uri))
|
|
# Dir should have un-altered content.
|
|
self.assertEqual(listing1, set(['/obj1', '/.obj2', '/subdir/obj3']))
|
|
# Bucket should have content like dir but without the subdir objects
|
|
# synchronized.
|
|
self.assertEqual(listing2, set(['/obj1', '/.obj2', '/subdir/obj5']))
|
|
# Assert that the src/dest objects that had same length but different
|
|
# content were synchronized (dir to bucket rsync with -c uses checksums).
|
|
with open(os.path.join(tmpdir, '.obj2')) as f:
|
|
self.assertEqual('.obj2', '\n'.join(f.readlines()))
|
|
self.assertEqual(
|
|
'.obj2',
|
|
self.RunGsUtil(['cat', suri(bucket_uri, '.obj2')],
|
|
return_stdout=True))
|
|
|
|
_Check3()
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check4():
|
|
# Check that re-running the same rsync command causes no more changes.
|
|
stderr = self.RunGsUtil(['rsync', '-d', '-c', tmpdir,
|
|
suri(bucket_uri)],
|
|
return_stderr=True)
|
|
self._VerifyNoChanges(stderr)
|
|
|
|
_Check4()
|
|
|
|
# Now add and remove some objects in dir and bucket and test rsync -r.
|
|
self.CreateTempFile(tmpdir=tmpdir, file_name='obj6', contents=b'obj6')
|
|
self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='obj7',
|
|
contents=b'obj7')
|
|
os.unlink(os.path.join(tmpdir, 'obj1'))
|
|
self.RunGsUtil(['rm', suri(bucket_uri, '.obj2')])
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check5():
|
|
self.RunGsUtil(['rsync', '-d', '-r', tmpdir, suri(bucket_uri)])
|
|
listing1 = TailSet(tmpdir, self.FlatListDir(tmpdir))
|
|
listing2 = TailSet(suri(bucket_uri), self.FlatListBucket(bucket_uri))
|
|
# Dir should have un-altered content.
|
|
self.assertEqual(listing1, set(['/.obj2', '/obj6', '/subdir/obj3']))
|
|
# Bucket should have content like dir but without the subdir objects
|
|
# synchronized.
|
|
self.assertEqual(listing2, set(['/.obj2', '/obj6', '/subdir/obj3']))
|
|
|
|
_Check5()
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check6():
|
|
# Check that re-running the same rsync command causes no more changes.
|
|
stderr = self.RunGsUtil(['rsync', '-d', '-r', tmpdir,
|
|
suri(bucket_uri)],
|
|
return_stderr=True)
|
|
self._VerifyNoChanges(stderr)
|
|
|
|
_Check6()
|
|
|
|
@unittest.skipUnless(UsingCrcmodExtension(), 'Test requires fast crcmod.')
|
|
def test_dir_to_dir_mtime(self):
|
|
"""Tests that flat and recursive rsync dir to dir works correctly."""
|
|
# Create 2 dirs with 1 overlapping file, 1 extra file at root
|
|
# level in each, and 1 extra file 1 level down in each, where one of the
|
|
# objects starts with "." to test that we don't skip those objects. Make the
|
|
# overlapping files named the same but with different content, to test
|
|
# that we detect and properly copy in that case.
|
|
tmpdir1 = self.CreateTempDir()
|
|
tmpdir2 = self.CreateTempDir()
|
|
subdir1 = os.path.join(tmpdir1, 'subdir1')
|
|
subdir2 = os.path.join(tmpdir2, 'subdir2')
|
|
os.mkdir(subdir1)
|
|
os.mkdir(subdir2)
|
|
self.CreateTempFile(tmpdir=tmpdir1,
|
|
file_name='obj1',
|
|
contents=b'obj1',
|
|
mtime=10)
|
|
self.CreateTempFile(tmpdir=tmpdir1,
|
|
file_name='.obj2',
|
|
contents=b'.obj2',
|
|
mtime=10)
|
|
self.CreateTempFile(tmpdir=subdir1,
|
|
file_name='obj3',
|
|
contents=b'subdir1/obj3',
|
|
mtime=10)
|
|
self.CreateTempFile(tmpdir=tmpdir1,
|
|
file_name='obj6',
|
|
contents=b'obj6',
|
|
mtime=100)
|
|
self.CreateTempFile(tmpdir=tmpdir1,
|
|
file_name='obj7',
|
|
contents=b'obj7_',
|
|
mtime=100)
|
|
self.CreateTempFile(tmpdir=tmpdir2,
|
|
file_name='.obj2',
|
|
contents=b'.OBJ2',
|
|
mtime=1000)
|
|
self.CreateTempFile(tmpdir=tmpdir2,
|
|
file_name='obj4',
|
|
contents=b'obj4',
|
|
mtime=10)
|
|
self.CreateTempFile(tmpdir=subdir2,
|
|
file_name='obj5',
|
|
contents=b'subdir2/obj5',
|
|
mtime=10)
|
|
self.CreateTempFile(tmpdir=tmpdir2,
|
|
file_name='obj6',
|
|
contents=b'OBJ6',
|
|
mtime=100)
|
|
self.CreateTempFile(tmpdir=tmpdir2,
|
|
file_name='obj7',
|
|
contents=b'obj7',
|
|
mtime=100)
|
|
|
|
self.RunGsUtil(['rsync', '-r', '-d', tmpdir1, tmpdir2])
|
|
listing1 = TailSet(tmpdir1, self.FlatListDir(tmpdir1))
|
|
listing2 = TailSet(tmpdir2, self.FlatListDir(tmpdir2))
|
|
# dir1 should have un-altered content.
|
|
self.assertEqual(
|
|
listing1, set(['/obj1', '/.obj2', '/subdir1/obj3', '/obj6', '/obj7']))
|
|
# dir2 should now have content like dir1.
|
|
self.assertEqual(
|
|
listing2, set(['/obj1', '/.obj2', '/subdir1/obj3', '/obj6', '/obj7']))
|
|
# Assert that the src/dest objects that had same length but different
|
|
# checksums were synchronized properly according to mtime.
|
|
with open(os.path.join(tmpdir2, '.obj2')) as f:
|
|
self.assertEqual('.obj2', '\n'.join(f.readlines()))
|
|
with open(os.path.join(tmpdir2, 'obj6')) as f:
|
|
self.assertEqual('OBJ6', '\n'.join(f.readlines()))
|
|
with open(os.path.join(tmpdir2, 'obj7')) as f:
|
|
self.assertEqual('obj7_', '\n'.join(f.readlines()))
|
|
|
|
def _Check1():
|
|
# Check that re-running the same rsync command causes no more changes.
|
|
self.assertEqual(
|
|
NO_CHANGES,
|
|
self.RunGsUtil(['rsync', '-d', tmpdir1, tmpdir2], return_stderr=True))
|
|
|
|
_Check1()
|
|
|
|
# Now rerun the rsync with the -c option.
|
|
self.RunGsUtil(['rsync', '-r', '-d', '-c', tmpdir1, tmpdir2])
|
|
listing1 = TailSet(tmpdir1, self.FlatListDir(tmpdir1))
|
|
listing2 = TailSet(tmpdir2, self.FlatListDir(tmpdir2))
|
|
# dir1 should have un-altered content.
|
|
self.assertEqual(
|
|
listing1, set(['/obj1', '/.obj2', '/subdir1/obj3', '/obj6', '/obj7']))
|
|
# dir2 should now have content like dir1.
|
|
self.assertEqual(
|
|
listing2, set(['/obj1', '/.obj2', '/subdir1/obj3', '/obj6', '/obj7']))
|
|
# Assert that the src/dst objects that had same length, mtime, but different
|
|
# content were synchronized (dir to dir rsync with -c uses checksums).
|
|
with open(os.path.join(tmpdir1, '.obj2')) as f:
|
|
self.assertEqual('.obj2', '\n'.join(f.readlines()))
|
|
with open(os.path.join(tmpdir1, '.obj2')) as f:
|
|
self.assertEqual('.obj2', '\n'.join(f.readlines()))
|
|
with open(os.path.join(tmpdir2, 'obj6')) as f:
|
|
self.assertEqual('obj6', '\n'.join(f.readlines()))
|
|
|
|
def _Check2():
|
|
# Check that re-running the same rsync command causes no more changes.
|
|
self.assertEqual(
|
|
NO_CHANGES,
|
|
self.RunGsUtil(['rsync', '-d', '-c', tmpdir1, tmpdir2],
|
|
return_stderr=True))
|
|
|
|
_Check2()
|
|
|
|
# Now add and remove some objects in both dirs and test rsync -r.
|
|
os.unlink(os.path.join(tmpdir1, 'obj7'))
|
|
os.unlink(os.path.join(tmpdir2, 'obj7'))
|
|
self.CreateTempFile(tmpdir=tmpdir1,
|
|
file_name='obj6',
|
|
contents=b'obj6',
|
|
mtime=10)
|
|
self.CreateTempFile(tmpdir=tmpdir2,
|
|
file_name='obj7',
|
|
contents=b'obj7',
|
|
mtime=100)
|
|
os.unlink(os.path.join(tmpdir1, 'obj1'))
|
|
os.unlink(os.path.join(tmpdir2, '.obj2'))
|
|
|
|
self.RunGsUtil(['rsync', '-d', '-r', tmpdir1, tmpdir2])
|
|
listing1 = TailSet(tmpdir1, self.FlatListDir(tmpdir1))
|
|
listing2 = TailSet(tmpdir2, self.FlatListDir(tmpdir2))
|
|
# dir1 should have un-altered content.
|
|
self.assertEqual(listing1, set(['/.obj2', '/obj6', '/subdir1/obj3']))
|
|
# dir2 should have content like dir1.
|
|
self.assertEqual(listing2, set(['/.obj2', '/obj6', '/subdir1/obj3']))
|
|
|
|
def _Check3():
|
|
# Check that re-running the same rsync command causes no more changes.
|
|
self.assertEqual(
|
|
NO_CHANGES,
|
|
self.RunGsUtil(['rsync', '-d', '-r', tmpdir1, tmpdir2],
|
|
return_stderr=True))
|
|
|
|
_Check3()
|
|
|
|
@unittest.skipUnless(UsingCrcmodExtension(), 'Test requires fast crcmod.')
|
|
def test_dir_to_dir_minus_d(self):
|
|
"""Tests that flat and recursive rsync dir to dir works correctly."""
|
|
# Create 2 dirs with 1 overlapping file, 1 extra file at root
|
|
# level in each, and 1 extra file 1 level down in each, where one of the
|
|
# objects starts with "." to test that we don't skip those objects. Make the
|
|
# overlapping files named the same but with different content, to test
|
|
# that we detect and properly copy in that case.
|
|
tmpdir1 = self.CreateTempDir()
|
|
tmpdir2 = self.CreateTempDir()
|
|
subdir1 = os.path.join(tmpdir1, 'subdir1')
|
|
subdir2 = os.path.join(tmpdir2, 'subdir2')
|
|
os.mkdir(subdir1)
|
|
os.mkdir(subdir2)
|
|
self.CreateTempFile(tmpdir=tmpdir1, file_name='obj1', contents=b'obj1')
|
|
self.CreateTempFile(tmpdir=tmpdir1, file_name='.obj2', contents=b'.obj2')
|
|
self.CreateTempFile(tmpdir=subdir1,
|
|
file_name='obj3',
|
|
contents=b'subdir1/obj3')
|
|
self.CreateTempFile(tmpdir=tmpdir2, file_name='.obj2', contents=b'.OBJ2')
|
|
self.CreateTempFile(tmpdir=tmpdir2, file_name='obj4', contents=b'obj4')
|
|
self.CreateTempFile(tmpdir=subdir2,
|
|
file_name='obj5',
|
|
contents=b'subdir2/obj5')
|
|
|
|
self.RunGsUtil(['rsync', '-d', tmpdir1, tmpdir2])
|
|
listing1 = TailSet(tmpdir1, self.FlatListDir(tmpdir1))
|
|
listing2 = TailSet(tmpdir2, self.FlatListDir(tmpdir2))
|
|
# dir1 should have un-altered content.
|
|
self.assertEqual(listing1, set(['/obj1', '/.obj2', '/subdir1/obj3']))
|
|
# dir2 should have content like dir1 but without the subdir1 objects
|
|
# synchronized.
|
|
self.assertEqual(listing2, set(['/obj1', '/.obj2', '/subdir2/obj5']))
|
|
# Assert that the src/dest objects that had same length but different
|
|
# checksums were not synchronized (dir to dir rsync doesn't use checksums
|
|
# unless you specify -c).
|
|
with open(os.path.join(tmpdir1, '.obj2')) as f:
|
|
self.assertEqual('.obj2', '\n'.join(f.readlines()))
|
|
with open(os.path.join(tmpdir2, '.obj2')) as f:
|
|
self.assertEqual('.OBJ2', '\n'.join(f.readlines()))
|
|
|
|
# Don't use @Retry since this is a dir-to-dir test, thus we don't need to
|
|
# worry about eventual consistency of bucket listings. This also allows us
|
|
# to make sure that we don't miss any unintended behavior when a first
|
|
# attempt behaves incorrectly and a subsequent retry behaves correctly.
|
|
def _Check1():
|
|
# Check that re-running the same rsync command causes no more changes.
|
|
self.assertEqual(
|
|
NO_CHANGES,
|
|
self.RunGsUtil(['rsync', '-d', tmpdir1, tmpdir2], return_stderr=True))
|
|
|
|
_Check1()
|
|
|
|
# Now rerun the rsync with the -c option.
|
|
self.RunGsUtil(['rsync', '-d', '-c', tmpdir1, tmpdir2])
|
|
listing1 = TailSet(tmpdir1, self.FlatListDir(tmpdir1))
|
|
listing2 = TailSet(tmpdir2, self.FlatListDir(tmpdir2))
|
|
# dir1 should have un-altered content.
|
|
self.assertEqual(listing1, set(['/obj1', '/.obj2', '/subdir1/obj3']))
|
|
# dir2 should have content like dir but without the subdir objects
|
|
# synchronized.
|
|
self.assertEqual(listing2, set(['/obj1', '/.obj2', '/subdir2/obj5']))
|
|
# Assert that the src/dest objects that had same length but different
|
|
# content were synchronized (dir to dir rsync with -c uses checksums).
|
|
with open(os.path.join(tmpdir1, '.obj2')) as f:
|
|
self.assertEqual('.obj2', '\n'.join(f.readlines()))
|
|
with open(os.path.join(tmpdir1, '.obj2')) as f:
|
|
self.assertEqual('.obj2', '\n'.join(f.readlines()))
|
|
|
|
# Don't use @Retry since this is a dir-to-dir test, thus we don't need to
|
|
# worry about eventual consistency of bucket listings.
|
|
def _Check2():
|
|
# Check that re-running the same rsync command causes no more changes.
|
|
self.assertEqual(
|
|
NO_CHANGES,
|
|
self.RunGsUtil(['rsync', '-d', '-c', tmpdir1, tmpdir2],
|
|
return_stderr=True))
|
|
|
|
_Check2()
|
|
|
|
# Now add and remove some objects in both dirs and test rsync -r.
|
|
self.CreateTempFile(tmpdir=tmpdir1, file_name='obj6', contents=b'obj6')
|
|
self.CreateTempFile(tmpdir=tmpdir2, file_name='obj7', contents=b'obj7')
|
|
os.unlink(os.path.join(tmpdir1, 'obj1'))
|
|
os.unlink(os.path.join(tmpdir2, '.obj2'))
|
|
|
|
self.RunGsUtil(['rsync', '-d', '-r', tmpdir1, tmpdir2])
|
|
listing1 = TailSet(tmpdir1, self.FlatListDir(tmpdir1))
|
|
listing2 = TailSet(tmpdir2, self.FlatListDir(tmpdir2))
|
|
# dir1 should have un-altered content.
|
|
self.assertEqual(listing1, set(['/.obj2', '/obj6', '/subdir1/obj3']))
|
|
# dir2 should have content like dir but without the subdir objects
|
|
# synchronized.
|
|
self.assertEqual(listing2, set(['/.obj2', '/obj6', '/subdir1/obj3']))
|
|
|
|
# Don't use @Retry since this is a dir-to-dir test, thus we don't need to
|
|
# worry about eventual consistency of bucket listings.
|
|
def _Check3():
|
|
# Check that re-running the same rsync command causes no more changes.
|
|
self.assertEqual(
|
|
NO_CHANGES,
|
|
self.RunGsUtil(['rsync', '-d', '-r', tmpdir1, tmpdir2],
|
|
return_stderr=True))
|
|
|
|
_Check3()
|
|
|
|
# Create 2 dirs and add a file to the first. Then create another file and
|
|
# add it to both dirs, making sure its filename evaluates to greater than
|
|
# the previous filename. Make sure both files are present in the second
|
|
# dir after issuing an rsync -d command.
|
|
tmpdir1 = self.CreateTempDir()
|
|
tmpdir2 = self.CreateTempDir()
|
|
self.CreateTempFile(tmpdir=tmpdir1, file_name='obj1', contents=b'obj1')
|
|
self.CreateTempFile(tmpdir=tmpdir1, file_name='obj2', contents=b'obj2')
|
|
self.CreateTempFile(tmpdir=tmpdir2, file_name='obj2', contents=b'obj2')
|
|
|
|
self.RunGsUtil(['rsync', '-d', tmpdir1, tmpdir2])
|
|
listing1 = TailSet(tmpdir1, self.FlatListDir(tmpdir1))
|
|
listing2 = TailSet(tmpdir2, self.FlatListDir(tmpdir2))
|
|
# First dir should have un-altered content.
|
|
self.assertEqual(listing1, set(['/obj1', '/obj2']))
|
|
# Second dir should have same content as first.
|
|
self.assertEqual(listing2, set(['/obj1', '/obj2']))
|
|
|
|
# Don't use @Retry since this is a dir-to-dir test, thus we don't need to
|
|
# worry about eventual consistency of bucket listings.
|
|
def _Check4():
|
|
# Check that re-running the same rsync command causes no more changes.
|
|
self.assertEqual(
|
|
NO_CHANGES,
|
|
self.RunGsUtil(['rsync', '-d', tmpdir1, tmpdir2], return_stderr=True))
|
|
|
|
_Check4()
|
|
|
|
def test_dir_to_dir_minus_d_more_files_than_bufsize(self):
|
|
"""Tests concurrently building listing from multiple tmp file ranges."""
|
|
# Create 2 dirs, where each dir has 1000 objects and differing names.
|
|
tmpdir1 = self.CreateTempDir()
|
|
tmpdir2 = self.CreateTempDir()
|
|
for i in range(0, 1000):
|
|
self.CreateTempFile(tmpdir=tmpdir1,
|
|
file_name='d1-%s' % i,
|
|
contents=b'x',
|
|
mtime=(i + 1))
|
|
self.CreateTempFile(tmpdir=tmpdir2,
|
|
file_name='d2-%s' % i,
|
|
contents=b'y',
|
|
mtime=i)
|
|
|
|
# We open a new temp file each time we reach rsync_buffer_lines of
|
|
# listing output. On Windows, this will result in a 'too many open file
|
|
# handles' error, so choose a larger value so as not to open so many files.
|
|
rsync_buffer_config = [('GSUtil', 'rsync_buffer_lines',
|
|
'50' if IS_WINDOWS else '2')]
|
|
# Run gsutil with config option to make buffer size << # files.
|
|
with SetBotoConfigForTest(rsync_buffer_config):
|
|
self.RunGsUtil(['rsync', '-d', tmpdir1, tmpdir2])
|
|
listing1 = TailSet(tmpdir1, self.FlatListDir(tmpdir1))
|
|
listing2 = TailSet(tmpdir2, self.FlatListDir(tmpdir2))
|
|
self.assertEqual(listing1, listing2)
|
|
for i in range(0, 1000):
|
|
self.assertEqual(
|
|
i + 1, long(os.path.getmtime((os.path.join(tmpdir2, 'd1-%s' % i)))))
|
|
with open(os.path.join(tmpdir2, 'd1-%s' % i)) as f:
|
|
self.assertEqual('x', '\n'.join(f.readlines()))
|
|
|
|
# Don't use @Retry since this is a dir-to-dir test, thus we don't need to
|
|
# worry about eventual consistency of bucket listings.
|
|
def _Check():
|
|
# Check that re-running the same rsync command causes no more changes.
|
|
self.assertEqual(
|
|
NO_CHANGES,
|
|
self.RunGsUtil(['rsync', '-d', tmpdir1, tmpdir2], return_stderr=True))
|
|
|
|
_Check()
|
|
|
|
@unittest.skipUnless(UsingCrcmodExtension(), 'Test requires fast crcmod.')
|
|
def test_bucket_to_dir_compressed_encoding(self):
|
|
temp_file = self.CreateTempFile(contents=b'foo', file_name='bar')
|
|
bucket_uri = self.CreateBucket()
|
|
tmpdir = self.CreateTempDir()
|
|
self.RunGsUtil(['cp', '-Z', temp_file, suri(bucket_uri)])
|
|
stderr = self.RunGsUtil(['rsync', suri(bucket_uri), tmpdir],
|
|
return_stderr=True)
|
|
# rsync should decompress the destination file.
|
|
with open(os.path.join(tmpdir, 'bar'), 'rb') as fp:
|
|
self.assertEqual(b'foo', fp.read())
|
|
self.assertIn('bar has a compressed content-encoding', stderr)
|
|
|
|
@SequentialAndParallelTransfer
|
|
@unittest.skipUnless(UsingCrcmodExtension(), 'Test requires fast crcmod.')
|
|
def test_bucket_to_dir_mtime(self):
|
|
"""Tests bucket to dir with mtime at the source."""
|
|
# Create bucket and dir with overlapping content and other combinations of
|
|
# mtime.
|
|
bucket_uri = self.CreateBucket()
|
|
tmpdir = self.CreateTempDir()
|
|
subdir = os.path.join(tmpdir, 'subdir')
|
|
os.mkdir(subdir)
|
|
self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='obj1',
|
|
contents=b'obj1',
|
|
mtime=5)
|
|
self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='.obj2',
|
|
contents=b'.obj2',
|
|
mtime=5)
|
|
self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='subdir/obj3',
|
|
contents=b'subdir/obj3')
|
|
self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='obj4',
|
|
contents=b'OBJ4')
|
|
self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='obj6',
|
|
contents=b'obj6',
|
|
mtime=50)
|
|
self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='obj7',
|
|
contents=b'obj7',
|
|
mtime=5)
|
|
self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='obj8',
|
|
contents=b'obj8',
|
|
mtime=100)
|
|
self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='obj9',
|
|
contents=b'obj9',
|
|
mtime=25)
|
|
self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='obj10',
|
|
contents=b'obj10')
|
|
time_created = ConvertDatetimeToPOSIX(
|
|
self._GetMetadataAttribute(bucket_uri.bucket_name, 'obj10',
|
|
'timeCreated'))
|
|
self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='obj11',
|
|
contents=b'obj11_',
|
|
mtime=75)
|
|
self.CreateTempFile(tmpdir=tmpdir,
|
|
file_name='.obj2',
|
|
contents=b'.OBJ2',
|
|
mtime=10)
|
|
self.CreateTempFile(tmpdir=tmpdir,
|
|
file_name='obj4',
|
|
contents=b'obj4',
|
|
mtime=100)
|
|
self.CreateTempFile(tmpdir=subdir,
|
|
file_name='obj5',
|
|
contents=b'subdir/obj5',
|
|
mtime=10)
|
|
self.CreateTempFile(tmpdir=tmpdir,
|
|
file_name='obj6',
|
|
contents=b'obj6',
|
|
mtime=50)
|
|
self.CreateTempFile(tmpdir=tmpdir,
|
|
file_name='obj7',
|
|
contents=b'OBJ7',
|
|
mtime=50)
|
|
self.CreateTempFile(tmpdir=tmpdir,
|
|
file_name='obj8',
|
|
contents=b'obj8',
|
|
mtime=10)
|
|
self.CreateTempFile(tmpdir=tmpdir,
|
|
file_name='obj9',
|
|
contents=b'OBJ9',
|
|
mtime=25)
|
|
self.CreateTempFile(tmpdir=tmpdir,
|
|
file_name='obj10',
|
|
contents=b'OBJ10',
|
|
mtime=time_created)
|
|
self.CreateTempFile(tmpdir=tmpdir,
|
|
file_name='obj11',
|
|
contents=b'obj11',
|
|
mtime=75)
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check1():
|
|
"""Tests rsync works as expected."""
|
|
self.RunGsUtil(['rsync', '-d', suri(bucket_uri), tmpdir])
|
|
listing1 = TailSet(suri(bucket_uri), self.FlatListBucket(bucket_uri))
|
|
listing2 = TailSet(tmpdir, self.FlatListDir(tmpdir))
|
|
# Bucket should have un-altered content.
|
|
self.assertEqual(
|
|
listing1,
|
|
set([
|
|
'/obj1', '/.obj2', '/subdir/obj3', '/obj4', '/obj6', '/obj7',
|
|
'/obj8', '/obj9', '/obj10', '/obj11'
|
|
]))
|
|
# Dir should have content like bucket except without sub-directories
|
|
# synced.
|
|
self.assertEqual(
|
|
listing2,
|
|
set([
|
|
'/obj1', '/.obj2', '/obj4', '/subdir/obj5', '/obj6', '/obj7',
|
|
'/obj8', '/obj9', '/obj10', '/obj11'
|
|
]))
|
|
# Assert that the dst objects that had an earlier mtime were not
|
|
# synchronized because the source didn't have an mtime.
|
|
with open(os.path.join(tmpdir, '.obj2')) as f:
|
|
self.assertEqual('.obj2', '\n'.join(f.readlines()))
|
|
# Assert that obj4 was synchronized to dst because mtime cannot be used,
|
|
# and the hashes are different.
|
|
with open(os.path.join(tmpdir, 'obj4')) as f:
|
|
self.assertEqual('OBJ4', '\n'.join(f.readlines()))
|
|
# Verify obj9 and obj10 content didn't change because mtimes from src and
|
|
# dst were equal. obj9 had mtimes that matched while the obj10 mtime was
|
|
# equal to the creation time of the corresponding object.
|
|
with open(os.path.join(tmpdir, 'obj9')) as f:
|
|
self.assertEqual('OBJ9', '\n'.join(f.readlines()))
|
|
# Also verifies if obj10 used time created to determine if a copy was
|
|
# necessary.
|
|
with open(os.path.join(tmpdir, 'obj10')) as f:
|
|
self.assertEqual('OBJ10', '\n'.join(f.readlines()))
|
|
with open(os.path.join(tmpdir, 'obj11')) as f:
|
|
self.assertEqual('obj11_', '\n'.join(f.readlines()))
|
|
|
|
_Check1()
|
|
|
|
def _Check2():
|
|
"""Verify mtime was set for objects at destination."""
|
|
self.assertEqual(long(os.path.getmtime(os.path.join(tmpdir, 'obj1'))), 5)
|
|
self.assertEqual(long(os.path.getmtime(os.path.join(tmpdir, '.obj2'))), 5)
|
|
self.assertEqual(long(os.path.getmtime(os.path.join(tmpdir, 'obj6'))), 50)
|
|
self.assertEqual(long(os.path.getmtime(os.path.join(tmpdir, 'obj8'))),
|
|
100)
|
|
self.assertEqual(long(os.path.getmtime(os.path.join(tmpdir, 'obj9'))), 25)
|
|
|
|
_Check2()
|
|
|
|
# Now rerun the rsync with the -c option.
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check3():
|
|
"""Tests rsync -c works as expected."""
|
|
self.RunGsUtil(['rsync', '-r', '-d', '-c', suri(bucket_uri), tmpdir])
|
|
listing1 = TailSet(suri(bucket_uri), self.FlatListBucket(bucket_uri))
|
|
listing2 = TailSet(tmpdir, self.FlatListDir(tmpdir))
|
|
# Bucket should have un-altered content.
|
|
self.assertEqual(
|
|
listing1,
|
|
set([
|
|
'/obj1', '/.obj2', '/subdir/obj3', '/obj4', '/obj6', '/obj7',
|
|
'/obj8', '/obj9', '/obj10', '/obj11'
|
|
]))
|
|
# Dir should have content like bucket this time with subdirectories
|
|
# synced.
|
|
self.assertEqual(
|
|
listing2,
|
|
set([
|
|
'/obj1', '/.obj2', '/subdir/obj3', '/obj4', '/obj6', '/obj7',
|
|
'/obj8', '/obj9', '/obj10', '/obj11'
|
|
]))
|
|
# Assert that the contents of obj7 have now changed because the -c flag
|
|
# was used to force checksums.
|
|
self.assertEqual(
|
|
'obj7',
|
|
self.RunGsUtil(['cat', suri(bucket_uri, 'obj7')], return_stdout=True))
|
|
self._VerifyObjectMtime(bucket_uri.bucket_name, 'obj7', '5')
|
|
# Check the mtime of obj7 in the destination to see that it changed.
|
|
self.assertEqual(long(os.path.getmtime(os.path.join(tmpdir, 'obj7'))), 5)
|
|
# Verify obj9 and obj10 content has changed because hashes were used in
|
|
# comparisons.
|
|
with open(os.path.join(tmpdir, 'obj9')) as f:
|
|
self.assertEqual('obj9', '\n'.join(f.readlines()))
|
|
with open(os.path.join(tmpdir, 'obj10')) as f:
|
|
self.assertEqual('obj10', '\n'.join(f.readlines()))
|
|
|
|
_Check3()
|
|
|
|
@unittest.skipIf(IS_WINDOWS, 'POSIX attributes not available on Windows.')
|
|
def test_bucket_to_dir_preserve_posix_errors(self):
|
|
"""Tests that rsync -P works properly with files that would be orphaned."""
|
|
bucket_uri = self.CreateBucket()
|
|
tmpdir = self.CreateTempDir()
|
|
primary_gid = os.stat(tmpdir).st_gid
|
|
non_primary_gid = util.GetNonPrimaryGid()
|
|
subdir = os.path.join(tmpdir, 'subdir')
|
|
os.mkdir(subdir)
|
|
# obj1 - Invalid Mode
|
|
obj1 = self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='obj1',
|
|
contents=b'obj1',
|
|
mode='222',
|
|
uid=os.getuid())
|
|
# obj2 - Invalid GID
|
|
obj2 = self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='.obj2',
|
|
contents=b'.obj2',
|
|
gid=INVALID_GID(),
|
|
mode='540')
|
|
self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='subdir/obj3',
|
|
contents=b'subdir/obj3')
|
|
# obj6 - Invalid GID
|
|
obj6 = self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='obj6',
|
|
contents=b'obj6',
|
|
gid=INVALID_GID(),
|
|
mode='440')
|
|
# obj7 - Invalid Mode
|
|
obj7 = self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='obj7',
|
|
contents=b'obj7',
|
|
gid=non_primary_gid,
|
|
mode='333')
|
|
# obj8 - Invalid UID
|
|
obj8 = self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='obj8',
|
|
contents=b'obj8',
|
|
uid=INVALID_UID())
|
|
# obj9 - Invalid UID w/ mode 777
|
|
obj9 = self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='obj9',
|
|
contents=b'obj9',
|
|
uid=INVALID_UID(),
|
|
mode='777')
|
|
# obj10 - Invalid UID & GID
|
|
obj10 = self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='obj10',
|
|
contents=b'obj10',
|
|
gid=INVALID_GID(),
|
|
uid=INVALID_UID())
|
|
# obj11 - Invalid UID & GID with mode 554
|
|
obj11 = self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='obj11',
|
|
contents=b'obj11',
|
|
gid=INVALID_GID(),
|
|
uid=INVALID_UID(),
|
|
mode='544')
|
|
# obj12 - Invalid UID & GID
|
|
obj12 = self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='obj12',
|
|
contents=b'obj12',
|
|
uid=INVALID_UID(),
|
|
gid=USER_ID)
|
|
# obj13 - Invalid UID, good GID, mode 644
|
|
obj13 = self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='obj13',
|
|
contents=b'obj13',
|
|
uid=INVALID_UID(),
|
|
gid=primary_gid,
|
|
mode='644')
|
|
# obj14 - Good UID, Invalid GID, no mode specified
|
|
obj14 = self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='obj14',
|
|
contents=b'obj14',
|
|
uid=USER_ID,
|
|
gid=INVALID_GID())
|
|
# obj15 - Good UID, Invalid GID, mode 655
|
|
obj15 = self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='obj15',
|
|
contents=b'obj15',
|
|
uid=USER_ID,
|
|
gid=INVALID_GID(),
|
|
mode='655')
|
|
# obj16 - Invalid Permissions - mode 244
|
|
obj16 = self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='obj16',
|
|
contents=b'obj16',
|
|
uid=USER_ID,
|
|
mode='244')
|
|
# obj17 - Invalid Mode
|
|
obj17 = self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='obj17',
|
|
contents=b'obj17',
|
|
uid=USER_ID,
|
|
gid=primary_gid,
|
|
mode='222')
|
|
# obj18 - Invalid GID, mode 333
|
|
obj18 = self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='obj18',
|
|
contents=b'obj18',
|
|
uid=USER_ID,
|
|
gid=non_primary_gid,
|
|
mode='333')
|
|
# obj19 - No UID/GID with mod 222
|
|
obj19 = self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='obj19',
|
|
contents=b'obj19',
|
|
mode='222')
|
|
self.CreateTempFile(tmpdir=tmpdir, file_name='.obj2', contents=b'.OBJ2')
|
|
self.CreateTempFile(tmpdir=tmpdir, file_name='obj4', contents=b'obj4')
|
|
self.CreateTempFile(tmpdir=subdir,
|
|
file_name='obj5',
|
|
contents=b'subdir/obj5')
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check1():
|
|
"""Tests that an exception is thrown because files will be orphaned."""
|
|
stderr = self.RunGsUtil(
|
|
['rsync', '-P', '-r', suri(bucket_uri), tmpdir],
|
|
expected_status=1,
|
|
return_stderr=True)
|
|
if self._use_gcloud_storage:
|
|
gcloud_preserve_posix_warning = (
|
|
r'For preserving POSIX with rsync downloads, gsutil aborts if a single'
|
|
r' download will result in invalid destination POSIX. However, this'
|
|
)
|
|
self.assertRegex(stderr, gcloud_preserve_posix_warning)
|
|
|
|
read_regex = r"{}#\d+\. User \d+ owns file, but owner does not have read"
|
|
gid_regex = r"{}#\d+ metadata doesn't exist on current system\. GID"
|
|
uid_regex = r"{}#\d+ metadata doesn't exist on current system\. UID"
|
|
self.assertRegex(stderr, read_regex.format('obj1'))
|
|
self.assertRegex(stderr, gid_regex.format('obj2'))
|
|
self.assertRegex(stderr, gid_regex.format('obj6'))
|
|
self.assertRegex(stderr, read_regex.format('obj7'))
|
|
self.assertRegex(stderr, uid_regex.format('obj8'))
|
|
self.assertRegex(stderr, uid_regex.format('obj9'))
|
|
self.assertRegex(stderr, uid_regex.format('obj10'))
|
|
self.assertRegex(stderr, uid_regex.format('obj11'))
|
|
self.assertRegex(stderr, uid_regex.format('obj12'))
|
|
self.assertRegex(stderr, uid_regex.format('obj13'))
|
|
self.assertRegex(stderr, gid_regex.format('obj14'))
|
|
self.assertRegex(stderr, gid_regex.format('obj15'))
|
|
self.assertRegex(stderr, read_regex.format('obj16'))
|
|
self.assertRegex(stderr, read_regex.format('obj17'))
|
|
self.assertRegex(stderr, read_regex.format('obj18'))
|
|
self.assertRegex(stderr, read_regex.format('obj19'))
|
|
else:
|
|
self.assertIn(ORPHANED_FILE, stderr)
|
|
self.assertRegex(stderr, BuildErrorRegex(obj1, POSIX_MODE_ERROR))
|
|
self.assertRegex(stderr, BuildErrorRegex(obj2, POSIX_GID_ERROR))
|
|
self.assertRegex(stderr, BuildErrorRegex(obj6, POSIX_GID_ERROR))
|
|
self.assertRegex(stderr, BuildErrorRegex(obj7, POSIX_MODE_ERROR))
|
|
self.assertRegex(stderr, BuildErrorRegex(obj8, POSIX_UID_ERROR))
|
|
self.assertRegex(stderr, BuildErrorRegex(obj9, POSIX_UID_ERROR))
|
|
self.assertRegex(stderr, BuildErrorRegex(obj10, POSIX_UID_ERROR))
|
|
self.assertRegex(stderr, BuildErrorRegex(obj11, POSIX_UID_ERROR))
|
|
self.assertRegex(stderr, BuildErrorRegex(obj12, POSIX_UID_ERROR))
|
|
self.assertRegex(stderr, BuildErrorRegex(obj13, POSIX_UID_ERROR))
|
|
self.assertRegex(stderr, BuildErrorRegex(obj14, POSIX_GID_ERROR))
|
|
self.assertRegex(stderr, BuildErrorRegex(obj15, POSIX_GID_ERROR))
|
|
self.assertRegex(
|
|
stderr, BuildErrorRegex(obj16, POSIX_INSUFFICIENT_ACCESS_ERROR))
|
|
self.assertRegex(stderr, BuildErrorRegex(obj17, POSIX_MODE_ERROR))
|
|
self.assertRegex(stderr, BuildErrorRegex(obj18, POSIX_MODE_ERROR))
|
|
self.assertRegex(stderr, BuildErrorRegex(obj19, POSIX_MODE_ERROR))
|
|
listing1 = TailSet(suri(bucket_uri), self.FlatListBucket(bucket_uri))
|
|
listing2 = TailSet(tmpdir, self.FlatListDir(tmpdir))
|
|
# Bucket should have un-altered content.
|
|
self.assertEqual(
|
|
listing1,
|
|
set([
|
|
'/obj1', '/.obj2', '/subdir/obj3', '/obj6', '/obj7', '/obj8',
|
|
'/obj9', '/obj10', '/obj11', '/obj12', '/obj13', '/obj14',
|
|
'/obj15', '/obj16', '/obj17', '/obj18', '/obj19'
|
|
]))
|
|
if self._use_gcloud_storage:
|
|
# Gcloud still performs valid copies whereas gsutil aborts if one
|
|
# will result in invalid POSIX.
|
|
self.assertEqual(
|
|
listing2, set(['/.obj2', '/obj4', '/subdir/obj3', '/subdir/obj5']))
|
|
else:
|
|
# Dir should have un-altered content.
|
|
self.assertEqual(listing2, set(['/.obj2', '/obj4', '/subdir/obj5']))
|
|
|
|
_Check1()
|
|
|
|
# Set a valid mode for another object.
|
|
self._SetObjectCustomMetadataAttribute(self.default_provider,
|
|
bucket_uri.bucket_name, '.obj2',
|
|
MODE_ATTR, '640')
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check2():
|
|
"""Tests that a file with a valid mode in metadata, nothing changed."""
|
|
# Test that even though a file now has a valid mode, no files were copied.
|
|
stderr = self.RunGsUtil(
|
|
['rsync', '-P', '-r', suri(bucket_uri), tmpdir],
|
|
expected_status=1,
|
|
return_stderr=True)
|
|
if self._use_gcloud_storage:
|
|
self.assertIn("doesn't exist on current system. GID:", stderr)
|
|
else:
|
|
self.assertIn(ORPHANED_FILE, stderr)
|
|
listing1 = TailSet(suri(bucket_uri), self.FlatListBucket(bucket_uri))
|
|
listing2 = TailSet(tmpdir, self.FlatListDir(tmpdir))
|
|
# Bucket should have un-altered content.
|
|
self.assertEqual(
|
|
listing1,
|
|
set([
|
|
'/obj1', '/.obj2', '/subdir/obj3', '/obj6', '/obj7', '/obj8',
|
|
'/obj9', '/obj10', '/obj11', '/obj12', '/obj13', '/obj14',
|
|
'/obj15', '/obj16', '/obj17', '/obj18', '/obj19'
|
|
]))
|
|
if self._use_gcloud_storage:
|
|
# See note from Check1.
|
|
self.assertEqual(
|
|
listing2, set(['/.obj2', '/obj4', '/subdir/obj3', '/subdir/obj5']))
|
|
else:
|
|
self.assertEqual(listing2, set(['/.obj2', '/obj4', '/subdir/obj5']))
|
|
|
|
_Check2()
|
|
|
|
@SequentialAndParallelTransfer
|
|
@unittest.skipIf(IS_WINDOWS, 'POSIX attributes not available on Windows.')
|
|
@unittest.skipUnless(UsingCrcmodExtension(), 'Test requires fast crcmod.')
|
|
def test_bucket_to_dir_preserve_posix_no_errors(self):
|
|
"""Tests that rsync -P works properly with default file attributes."""
|
|
bucket_uri = self.CreateBucket()
|
|
tmpdir = self.CreateTempDir()
|
|
primary_gid = os.stat(tmpdir).st_gid
|
|
non_primary_gid = util.GetNonPrimaryGid()
|
|
subdir = os.path.join(tmpdir, 'subdir')
|
|
os.mkdir(subdir)
|
|
self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='obj1',
|
|
contents=b'obj1',
|
|
mode='444')
|
|
self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='.obj2',
|
|
contents=b'.obj2',
|
|
gid=primary_gid)
|
|
self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='subdir/obj3',
|
|
contents=b'subdir/obj3',
|
|
gid=non_primary_gid)
|
|
self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='obj6',
|
|
contents=b'obj6',
|
|
gid=primary_gid,
|
|
mode='555')
|
|
self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='obj7',
|
|
contents=b'obj7',
|
|
gid=non_primary_gid,
|
|
mode='444')
|
|
self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='obj8',
|
|
contents=b'obj8',
|
|
uid=USER_ID)
|
|
self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='obj9',
|
|
contents=b'obj9',
|
|
uid=USER_ID,
|
|
mode='422')
|
|
self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='obj10',
|
|
contents=b'obj10',
|
|
uid=USER_ID,
|
|
gid=primary_gid)
|
|
self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='obj11',
|
|
contents=b'obj11',
|
|
uid=USER_ID,
|
|
gid=non_primary_gid)
|
|
self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='obj12',
|
|
contents=b'obj12',
|
|
uid=USER_ID,
|
|
gid=primary_gid,
|
|
mode='400')
|
|
self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='obj13',
|
|
contents=b'obj13',
|
|
uid=USER_ID,
|
|
gid=non_primary_gid,
|
|
mode='533')
|
|
self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='obj14',
|
|
contents=b'obj14',
|
|
uid=USER_ID,
|
|
mode='444')
|
|
self.CreateTempFile(tmpdir=tmpdir, file_name='.obj2', contents=b'.OBJ2')
|
|
self.CreateTempFile(tmpdir=tmpdir, file_name='obj4', contents=b'obj4')
|
|
self.CreateTempFile(tmpdir=subdir,
|
|
file_name='obj5',
|
|
contents=b'subdir/obj5')
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check1():
|
|
"""Verifies that all attributes were copied correctly when -P is used."""
|
|
self.RunGsUtil(['rsync', '-P', '-r', suri(bucket_uri), tmpdir])
|
|
listing1 = TailSet(suri(bucket_uri), self.FlatListBucket(bucket_uri))
|
|
listing2 = TailSet(tmpdir, self.FlatListDir(tmpdir))
|
|
# Bucket should have un-altered content.
|
|
self.assertEqual(
|
|
listing1,
|
|
set([
|
|
'/obj1', '/.obj2', '/subdir/obj3', '/obj6', '/obj7', '/obj8',
|
|
'/obj9', '/obj10', '/obj11', '/obj12', '/obj13', '/obj14'
|
|
]))
|
|
# Dir should have any new content from bucket.
|
|
self.assertEqual(
|
|
listing2,
|
|
set([
|
|
'/obj1', '/.obj2', '/subdir/obj3', '/obj4', '/subdir/obj5',
|
|
'/obj6', '/obj7', '/obj8', '/obj9', '/obj10', '/obj11', '/obj12',
|
|
'/obj13', '/obj14'
|
|
]))
|
|
|
|
_Check1()
|
|
|
|
self.VerifyLocalPOSIXPermissions(os.path.join(tmpdir, 'obj1'),
|
|
uid=os.getuid(),
|
|
mode=0o444)
|
|
self.VerifyLocalPOSIXPermissions(os.path.join(tmpdir, '.obj2'),
|
|
gid=primary_gid,
|
|
uid=os.getuid(),
|
|
mode=DEFAULT_MODE)
|
|
self.VerifyLocalPOSIXPermissions(os.path.join(subdir, 'obj3'),
|
|
gid=non_primary_gid,
|
|
mode=DEFAULT_MODE)
|
|
self.VerifyLocalPOSIXPermissions(os.path.join(tmpdir, 'obj6'),
|
|
gid=primary_gid,
|
|
mode=0o555)
|
|
self.VerifyLocalPOSIXPermissions(os.path.join(tmpdir, 'obj7'),
|
|
gid=non_primary_gid,
|
|
mode=0o444)
|
|
self.VerifyLocalPOSIXPermissions(os.path.join(tmpdir, 'obj8'),
|
|
gid=primary_gid,
|
|
mode=DEFAULT_MODE)
|
|
self.VerifyLocalPOSIXPermissions(os.path.join(tmpdir, 'obj9'),
|
|
uid=USER_ID,
|
|
mode=0o422)
|
|
self.VerifyLocalPOSIXPermissions(os.path.join(tmpdir, 'obj10'),
|
|
uid=USER_ID,
|
|
gid=primary_gid,
|
|
mode=DEFAULT_MODE)
|
|
self.VerifyLocalPOSIXPermissions(os.path.join(tmpdir, 'obj11'),
|
|
uid=USER_ID,
|
|
gid=non_primary_gid,
|
|
mode=DEFAULT_MODE)
|
|
self.VerifyLocalPOSIXPermissions(os.path.join(tmpdir, 'obj12'),
|
|
uid=USER_ID,
|
|
gid=primary_gid,
|
|
mode=0o400)
|
|
self.VerifyLocalPOSIXPermissions(os.path.join(tmpdir, 'obj13'),
|
|
uid=USER_ID,
|
|
gid=non_primary_gid,
|
|
mode=0o533)
|
|
self.VerifyLocalPOSIXPermissions(os.path.join(tmpdir, 'obj14'),
|
|
uid=USER_ID,
|
|
mode=0o444)
|
|
|
|
@unittest.skipUnless(UsingCrcmodExtension(), 'Test requires fast crcmod.')
|
|
def test_bucket_to_dir_minus_d(self):
|
|
"""Tests that flat and recursive rsync bucket to dir works correctly."""
|
|
# Create bucket and dir with 1 overlapping object, 1 extra object at root
|
|
# level in each, and 1 extra object 1 level down in each, where one of the
|
|
# objects starts with "." to test that we don't skip those objects. Make the
|
|
# overlapping objects named the same but with different content, to test
|
|
# that we detect and properly copy in that case.
|
|
bucket_uri = self.CreateBucket()
|
|
tmpdir = self.CreateTempDir()
|
|
subdir = os.path.join(tmpdir, 'subdir')
|
|
os.mkdir(subdir)
|
|
self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='obj1',
|
|
contents=b'obj1')
|
|
# Set the mtime for obj2 because obj2 in the cloud and obj2 on the local
|
|
# file system have the potential to be created during the same second.
|
|
self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='.obj2',
|
|
contents=b'.obj2',
|
|
mtime=0)
|
|
self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='subdir/obj3',
|
|
contents=b'subdir/obj3')
|
|
self.CreateTempFile(tmpdir=tmpdir, file_name='.obj2', contents=b'.OBJ2')
|
|
self.CreateTempFile(tmpdir=tmpdir, file_name='obj4', contents=b'obj4')
|
|
self.CreateTempFile(tmpdir=subdir,
|
|
file_name='obj5',
|
|
contents=b'subdir/obj5')
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check1():
|
|
"""Tests rsync works as expected."""
|
|
self.RunGsUtil(['rsync', '-d', suri(bucket_uri), tmpdir])
|
|
listing1 = TailSet(suri(bucket_uri), self.FlatListBucket(bucket_uri))
|
|
listing2 = TailSet(tmpdir, self.FlatListDir(tmpdir))
|
|
# Bucket should have un-altered content.
|
|
self.assertEqual(listing1, set(['/obj1', '/.obj2', '/subdir/obj3']))
|
|
# Dir should have content like bucket but without the subdir objects
|
|
# synchronized.
|
|
self.assertEqual(listing2, set(['/obj1', '/.obj2', '/subdir/obj5']))
|
|
# Assert that the src/dest objects that had same length but different
|
|
# content were synchronized (bucket to dir rsync uses checksums as a
|
|
# backup to mtime unless you specify -c).
|
|
self.assertEqual(
|
|
'.obj2',
|
|
self.RunGsUtil(['cat', suri(bucket_uri, '.obj2')],
|
|
return_stdout=True))
|
|
with open(os.path.join(tmpdir, '.obj2')) as f:
|
|
self.assertEqual('.obj2', '\n'.join(f.readlines()))
|
|
|
|
_Check1()
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check2():
|
|
# Check that re-running the same rsync command causes no more changes.
|
|
stderr = self.RunGsUtil(
|
|
['rsync', '-d', suri(bucket_uri), tmpdir], return_stderr=True)
|
|
self._VerifyNoChanges(stderr)
|
|
|
|
_Check2()
|
|
|
|
# Now rerun the rsync with the -c option.
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check3():
|
|
"""Tests rsync -c works as expected."""
|
|
self.RunGsUtil(['rsync', '-d', '-c', suri(bucket_uri), tmpdir])
|
|
listing1 = TailSet(suri(bucket_uri), self.FlatListBucket(bucket_uri))
|
|
listing2 = TailSet(tmpdir, self.FlatListDir(tmpdir))
|
|
# Bucket should have un-altered content.
|
|
self.assertEqual(listing1, set(['/obj1', '/.obj2', '/subdir/obj3']))
|
|
# Dir should have content like bucket but without the subdir objects
|
|
# synchronized.
|
|
self.assertEqual(listing2, set(['/obj1', '/.obj2', '/subdir/obj5']))
|
|
# Assert that the src/dest objects that had same length but different
|
|
# content were synchronized (bucket to dir rsync with -c uses checksums).
|
|
self.assertEqual(
|
|
'.obj2',
|
|
self.RunGsUtil(['cat', suri(bucket_uri, '.obj2')],
|
|
return_stdout=True))
|
|
with open(os.path.join(tmpdir, '.obj2')) as f:
|
|
self.assertEqual('.obj2', '\n'.join(f.readlines()))
|
|
|
|
_Check3()
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check4():
|
|
# Check that re-running the same rsync command causes no more changes.
|
|
stderr = self.RunGsUtil(
|
|
['rsync', '-d', '-c', suri(bucket_uri), tmpdir], return_stderr=True)
|
|
self._VerifyNoChanges(stderr)
|
|
|
|
_Check4()
|
|
|
|
# Now add and remove some objects in dir and bucket and test rsync -r.
|
|
self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='obj6',
|
|
contents=b'obj6')
|
|
self.CreateTempFile(tmpdir=tmpdir, file_name='obj7', contents=b'obj7')
|
|
self.RunGsUtil(['rm', suri(bucket_uri, 'obj1')])
|
|
os.unlink(os.path.join(tmpdir, '.obj2'))
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check5():
|
|
self.RunGsUtil(['rsync', '-d', '-r', suri(bucket_uri), tmpdir])
|
|
listing1 = TailSet(suri(bucket_uri), self.FlatListBucket(bucket_uri))
|
|
listing2 = TailSet(tmpdir, self.FlatListDir(tmpdir))
|
|
# Bucket should have un-altered content.
|
|
self.assertEqual(listing1, set(['/.obj2', '/obj6', '/subdir/obj3']))
|
|
# Dir should have content like bucket but without the subdir objects
|
|
# synchronized.
|
|
self.assertEqual(listing2, set(['/.obj2', '/obj6', '/subdir/obj3']))
|
|
|
|
_Check5()
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check6():
|
|
# Check that re-running the same rsync command causes no more changes.
|
|
stderr = self.RunGsUtil(
|
|
['rsync', '-d', '-r', suri(bucket_uri), tmpdir], return_stderr=True)
|
|
self._VerifyNoChanges(stderr)
|
|
|
|
_Check6()
|
|
|
|
@unittest.skipUnless(UsingCrcmodExtension(), 'Test requires fast crcmod.')
|
|
def test_bucket_to_dir_minus_d_with_fname_case_change(self):
|
|
"""Tests that name case changes work correctly.
|
|
|
|
Example:
|
|
|
|
Windows filenames are case-preserving in what you wrote, but case-
|
|
insensitive when compared. If you synchronize from FS to cloud and then
|
|
change case-naming in local files, you could end up with this situation:
|
|
|
|
Cloud copy is called .../TiVo/...
|
|
FS copy is called .../Tivo/...
|
|
|
|
Then, if you rsync from cloud to FS, if rsync doesn't recognize that on
|
|
Windows these names are identical, each rsync run will cause both a copy
|
|
and a delete to be executed.
|
|
"""
|
|
# Create bucket and dir with same objects, but dir copy has different name
|
|
# case.
|
|
bucket_uri = self.CreateBucket()
|
|
tmpdir = self.CreateTempDir()
|
|
self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='obj1',
|
|
contents=b'obj1')
|
|
self.CreateTempFile(tmpdir=tmpdir, file_name='Obj1', contents=b'obj1')
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check1():
|
|
"""Tests rsync works as expected."""
|
|
output = self.RunGsUtil(
|
|
['rsync', '-d', '-r', suri(bucket_uri), tmpdir], return_stderr=True)
|
|
# Nothing should be copied or removed under Windows.
|
|
if IS_WINDOWS:
|
|
self.assertEqual(NO_CHANGES, output)
|
|
else:
|
|
self.assertNotEqual(NO_CHANGES, output)
|
|
|
|
_Check1()
|
|
|
|
@unittest.skipUnless(UsingCrcmodExtension(), 'Test requires fast crcmod.')
|
|
@SkipForS3('The boto lib used for S3 does not handle objects'
|
|
' starting with slashes if we use V4 signature.')
|
|
def test_bucket_to_dir_minus_d_with_leftover_dir_placeholder(self):
|
|
"""Tests that we correctly handle leftover dir placeholders.
|
|
|
|
See comments in gslib.commands.rsync._FieldedListingIterator for details.
|
|
"""
|
|
bucket_uri = self.CreateBucket()
|
|
tmpdir = self.CreateTempDir()
|
|
self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='obj1',
|
|
contents=b'obj1')
|
|
# Create a placeholder like what can be left over by web GUI tools.
|
|
key_uri = self.StorageUriCloneReplaceName(bucket_uri, '/')
|
|
self.StorageUriSetContentsFromString(key_uri, '')
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check1():
|
|
"""Tests rsync works as expected."""
|
|
self.RunGsUtil(
|
|
['rsync', '-d', '-r', suri(bucket_uri), tmpdir], return_stderr=True)
|
|
listing1 = TailSet(suri(bucket_uri), self.FlatListBucket(bucket_uri))
|
|
listing2 = TailSet(tmpdir, self.FlatListDir(tmpdir))
|
|
# Bucket should have un-altered content.
|
|
self.assertEqual(listing1, set(['/obj1', '//']))
|
|
# Bucket should not have the placeholder object.
|
|
self.assertEqual(listing2, set(['/obj1']))
|
|
|
|
_Check1()
|
|
|
|
@unittest.skipIf(IS_WINDOWS, 'os.symlink() is not available on Windows.')
|
|
def test_rsync_minus_r_minus_e(self):
|
|
"""Tests that rsync -e -r ignores symlinks when recursing."""
|
|
bucket_uri = self.CreateBucket()
|
|
tmpdir = self.CreateTempDir()
|
|
subdir = os.path.join(tmpdir, 'subdir')
|
|
os.mkdir(subdir)
|
|
os.mkdir(os.path.join(tmpdir, 'missing'))
|
|
# Create a blank directory that is a broken symlink to ensure that we
|
|
# don't fail recursive enumeration with a bad symlink.
|
|
os.symlink(os.path.join(tmpdir, 'missing'), os.path.join(subdir, 'missing'))
|
|
os.rmdir(os.path.join(tmpdir, 'missing'))
|
|
self.RunGsUtil(['rsync', '-r', '-e', tmpdir, suri(bucket_uri)])
|
|
|
|
@unittest.skipIf(IS_WINDOWS, 'os.symlink() is not available on Windows.')
|
|
def test_rsync_minus_d_minus_e(self):
|
|
"""Tests that rsync -e ignores symlinks."""
|
|
tmpdir = self.CreateTempDir()
|
|
subdir = os.path.join(tmpdir, 'subdir')
|
|
os.mkdir(subdir)
|
|
bucket_uri = self.CreateBucket()
|
|
fpath1 = self.CreateTempFile(tmpdir=tmpdir,
|
|
file_name='obj1',
|
|
contents=b'obj1')
|
|
self.CreateTempFile(tmpdir=tmpdir, file_name='.obj2', contents=b'.obj2')
|
|
self.CreateTempFile(tmpdir=subdir,
|
|
file_name='obj3',
|
|
contents=b'subdir/obj3')
|
|
good_symlink_path = os.path.join(tmpdir, 'symlink1')
|
|
os.symlink(fpath1, good_symlink_path)
|
|
# Make a symlink that points to a non-existent path to test that -e also
|
|
# handles that case.
|
|
bad_symlink_path = os.path.join(tmpdir, 'symlink2')
|
|
os.symlink(os.path.join('/', 'non-existent'), bad_symlink_path)
|
|
self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='.obj2',
|
|
contents=b'.OBJ2')
|
|
self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='obj4',
|
|
contents=b'obj4')
|
|
self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='subdir/obj5',
|
|
contents=b'subdir/obj5')
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check1():
|
|
"""Ensure listings match the commented expectations."""
|
|
self.RunGsUtil(['rsync', '-d', '-e', tmpdir, suri(bucket_uri)])
|
|
listing1 = TailSet(tmpdir, self.FlatListDir(tmpdir))
|
|
listing2 = TailSet(suri(bucket_uri), self.FlatListBucket(bucket_uri))
|
|
# Dir should have un-altered content.
|
|
self.assertEqual(
|
|
listing1,
|
|
set(['/obj1', '/.obj2', '/subdir/obj3', '/symlink1', '/symlink2']))
|
|
# Bucket should have content like dir but without the symlink, and
|
|
# without subdir objects synchronized.
|
|
self.assertEqual(listing2, set(['/obj1', '/.obj2', '/subdir/obj5']))
|
|
|
|
_Check1()
|
|
|
|
# Now remove invalid symlink and run without -e, and see that symlink gets
|
|
# copied (as file to which it points). Use @Retry as hedge against bucket
|
|
# listing eventual consistency.
|
|
os.unlink(bad_symlink_path)
|
|
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check2():
|
|
"""Tests rsync works as expected."""
|
|
self.RunGsUtil(['rsync', '-d', tmpdir, suri(bucket_uri)])
|
|
listing1 = TailSet(tmpdir, self.FlatListDir(tmpdir))
|
|
listing2 = TailSet(suri(bucket_uri), self.FlatListBucket(bucket_uri))
|
|
# Dir should have un-altered content.
|
|
self.assertEqual(listing1,
|
|
set(['/obj1', '/.obj2', '/subdir/obj3', '/symlink1']))
|
|
# Bucket should have content like dir but without the symlink, and
|
|
# without subdir objects synchronized.
|
|
self.assertEqual(listing2,
|
|
set(['/obj1', '/.obj2', '/subdir/obj5', '/symlink1']))
|
|
self.assertEqual(
|
|
'obj1',
|
|
self.RunGsUtil(['cat', suri(bucket_uri, 'symlink1')],
|
|
return_stdout=True))
|
|
|
|
_Check2()
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check3():
|
|
# Check that re-running the same rsync command causes no more changes.
|
|
stderr = self.RunGsUtil(
|
|
['rsync', '-d', tmpdir, suri(bucket_uri)], return_stderr=True)
|
|
self._VerifyNoChanges(stderr)
|
|
|
|
_Check3()
|
|
|
|
@SkipForS3('S3 does not support composite objects')
|
|
def test_bucket_to_bucket_minus_d_with_composites(self):
|
|
"""Tests that rsync works with composite objects (which don't have MD5s)."""
|
|
bucket1_uri = self.CreateBucket()
|
|
bucket2_uri = self.CreateBucket()
|
|
self.CreateObject(bucket_uri=bucket1_uri,
|
|
object_name='obj1',
|
|
contents=b'obj1')
|
|
self.CreateObject(bucket_uri=bucket1_uri,
|
|
object_name='.obj2',
|
|
contents=b'.obj2')
|
|
self.RunGsUtil([
|
|
'compose',
|
|
suri(bucket1_uri, 'obj1'),
|
|
suri(bucket1_uri, '.obj2'),
|
|
suri(bucket1_uri, 'obj3')
|
|
])
|
|
self.CreateObject(bucket_uri=bucket2_uri,
|
|
object_name='.obj2',
|
|
contents=b'.OBJ2')
|
|
self.CreateObject(bucket_uri=bucket2_uri,
|
|
object_name='obj4',
|
|
contents=b'obj4')
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check1():
|
|
self.RunGsUtil(['rsync', '-d', suri(bucket1_uri), suri(bucket2_uri)])
|
|
listing1 = TailSet(suri(bucket1_uri), self.FlatListBucket(bucket1_uri))
|
|
listing2 = TailSet(suri(bucket2_uri), self.FlatListBucket(bucket2_uri))
|
|
# First bucket should have un-altered content.
|
|
self.assertEqual(listing1, set(['/obj1', '/.obj2', '/obj3']))
|
|
# Second bucket should have content like first bucket but without the
|
|
# subdir objects synchronized.
|
|
self.assertEqual(listing2, set(['/obj1', '/.obj2', '/obj3']))
|
|
|
|
_Check1()
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check2():
|
|
# Check that re-running the same rsync command causes no more changes.
|
|
stderr = self.RunGsUtil(
|
|
['rsync', '-d', suri(bucket1_uri),
|
|
suri(bucket2_uri)],
|
|
return_stderr=True)
|
|
self._VerifyNoChanges(stderr)
|
|
|
|
_Check2()
|
|
|
|
def test_bucket_to_bucket_minus_d_empty_dest(self):
|
|
"""Tests working with empty dest bucket (iter runs out before src iter)."""
|
|
bucket1_uri = self.CreateBucket()
|
|
bucket2_uri = self.CreateBucket()
|
|
self.CreateObject(bucket_uri=bucket1_uri,
|
|
object_name='obj1',
|
|
contents=b'obj1')
|
|
self.CreateObject(bucket_uri=bucket1_uri,
|
|
object_name='.obj2',
|
|
contents=b'.obj2')
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check1():
|
|
self.RunGsUtil(['rsync', '-d', suri(bucket1_uri), suri(bucket2_uri)])
|
|
listing1 = TailSet(suri(bucket1_uri), self.FlatListBucket(bucket1_uri))
|
|
listing2 = TailSet(suri(bucket2_uri), self.FlatListBucket(bucket2_uri))
|
|
self.assertEqual(listing1, set(['/obj1', '/.obj2']))
|
|
self.assertEqual(listing2, set(['/obj1', '/.obj2']))
|
|
|
|
_Check1()
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check2():
|
|
# Check that re-running the same rsync command causes no more changes.
|
|
stderr = self.RunGsUtil(
|
|
['rsync', '-d', suri(bucket1_uri),
|
|
suri(bucket2_uri)],
|
|
return_stderr=True)
|
|
self._VerifyNoChanges(stderr)
|
|
|
|
_Check2()
|
|
|
|
def test_bucket_to_bucket_minus_d_empty_src(self):
|
|
"""Tests working with empty src bucket (iter runs out before dst iter)."""
|
|
bucket1_uri = self.CreateBucket()
|
|
bucket2_uri = self.CreateBucket()
|
|
self.CreateObject(bucket_uri=bucket2_uri,
|
|
object_name='obj1',
|
|
contents=b'obj1')
|
|
self.CreateObject(bucket_uri=bucket2_uri,
|
|
object_name='.obj2',
|
|
contents=b'.obj2')
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check1():
|
|
self.RunGsUtil(['rsync', '-d', suri(bucket1_uri), suri(bucket2_uri)])
|
|
stderr = self.RunGsUtil(['ls', suri(bucket1_uri, '**')],
|
|
expected_status=1,
|
|
return_stderr=True)
|
|
self.assertIn('One or more URLs matched no objects', stderr)
|
|
stderr = self.RunGsUtil(['ls', suri(bucket2_uri, '**')],
|
|
expected_status=1,
|
|
return_stderr=True)
|
|
self.assertIn('One or more URLs matched no objects', stderr)
|
|
|
|
_Check1()
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check2():
|
|
# Check that re-running the same rsync command causes no more changes.
|
|
stderr = self.RunGsUtil(
|
|
['rsync', '-d', suri(bucket1_uri),
|
|
suri(bucket2_uri)],
|
|
return_stderr=True)
|
|
self._VerifyNoChanges(stderr)
|
|
|
|
_Check2()
|
|
|
|
def test_rsync_minus_d_minus_p(self):
|
|
"""Tests that rsync -p preserves ACLs."""
|
|
bucket1_uri = self.CreateBucket()
|
|
bucket2_uri = self.CreateBucket()
|
|
self.CreateObject(bucket_uri=bucket1_uri,
|
|
object_name='obj1',
|
|
contents=b'obj1')
|
|
# Set public-read (non-default) ACL so we can verify that rsync -p works.
|
|
self.RunGsUtil(['acl', 'set', 'public-read', suri(bucket1_uri, 'obj1')])
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check1():
|
|
"""Tests rsync -p works as expected."""
|
|
self.RunGsUtil(
|
|
['rsync', '-d', '-p',
|
|
suri(bucket1_uri),
|
|
suri(bucket2_uri)])
|
|
listing1 = TailSet(suri(bucket1_uri), self.FlatListBucket(bucket1_uri))
|
|
listing2 = TailSet(suri(bucket2_uri), self.FlatListBucket(bucket2_uri))
|
|
self.assertEqual(listing1, set(['/obj1']))
|
|
self.assertEqual(listing2, set(['/obj1']))
|
|
acl1_json = self.RunGsUtil(
|
|
['acl', 'get', suri(bucket1_uri, 'obj1')], return_stdout=True)
|
|
acl2_json = self.RunGsUtil(
|
|
['acl', 'get', suri(bucket2_uri, 'obj1')], return_stdout=True)
|
|
self.assertEqual(acl1_json, acl2_json)
|
|
|
|
_Check1()
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check2():
|
|
# Check that re-running the same rsync command causes no more changes.
|
|
stderr = self.RunGsUtil(
|
|
['rsync', '-d', '-p',
|
|
suri(bucket1_uri),
|
|
suri(bucket2_uri)],
|
|
return_stderr=True)
|
|
self._VerifyNoChanges(stderr)
|
|
|
|
_Check2()
|
|
|
|
def test_rsync_canned_acl(self):
|
|
"""Tests that rsync -a applies ACLs."""
|
|
bucket1_uri = self.CreateBucket()
|
|
bucket2_uri = self.CreateBucket()
|
|
self.CreateObject(bucket_uri=bucket1_uri,
|
|
object_name='obj1',
|
|
contents=b'obj1')
|
|
self.RunGsUtil(['acl', 'get', suri(bucket1_uri, 'obj1')])
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check():
|
|
"""Tests rsync -a works as expected."""
|
|
self.RunGsUtil([
|
|
'rsync', '-d', '-a', 'public-read',
|
|
suri(bucket1_uri),
|
|
suri(bucket2_uri)
|
|
])
|
|
listing1 = TailSet(suri(bucket1_uri), self.FlatListBucket(bucket1_uri))
|
|
listing2 = TailSet(suri(bucket2_uri), self.FlatListBucket(bucket2_uri))
|
|
self.assertEqual(listing1, set(['/obj1']))
|
|
self.assertEqual(listing2, set(['/obj1']))
|
|
# Set public-read on the original key after the rsync so we can compare
|
|
# the ACLs.
|
|
self.RunGsUtil(['acl', 'set', 'public-read', suri(bucket1_uri, 'obj1')])
|
|
acl1_json = self.RunGsUtil(
|
|
['acl', 'get', suri(bucket1_uri, 'obj1')], return_stdout=True)
|
|
acl2_json = self.RunGsUtil(
|
|
['acl', 'get', suri(bucket2_uri, 'obj1')], return_stdout=True)
|
|
self.assertEqual(acl1_json, acl2_json)
|
|
|
|
_Check()
|
|
|
|
def test_rsync_to_nonexistent_bucket_subdir(self):
|
|
"""Tests that rsync to non-existent bucket subdir works."""
|
|
# Create dir with some objects and empty bucket.
|
|
tmpdir = self.CreateTempDir()
|
|
subdir = os.path.join(tmpdir, 'subdir')
|
|
os.mkdir(subdir)
|
|
bucket_url = self.CreateBucket()
|
|
self.CreateTempFile(tmpdir=tmpdir, file_name='obj1', contents=b'obj1')
|
|
self.CreateTempFile(tmpdir=tmpdir, file_name='.obj2', contents=b'.obj2')
|
|
self.CreateTempFile(tmpdir=subdir,
|
|
file_name='obj3',
|
|
contents=b'subdir/obj3')
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check1():
|
|
"""Tests rsync works as expected."""
|
|
self.RunGsUtil(['rsync', '-r', tmpdir, suri(bucket_url, 'subdir')])
|
|
listing1 = TailSet(tmpdir, self.FlatListDir(tmpdir))
|
|
listing2 = TailSet(
|
|
suri(bucket_url, 'subdir'),
|
|
self.FlatListBucket(
|
|
self.StorageUriCloneReplaceName(bucket_url, 'subdir')))
|
|
# Dir should have un-altered content.
|
|
self.assertEqual(listing1, set(['/obj1', '/.obj2', '/subdir/obj3']))
|
|
# Bucket subdir should have content like dir.
|
|
self.assertEqual(listing2, set(['/obj1', '/.obj2', '/subdir/obj3']))
|
|
|
|
_Check1()
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check2():
|
|
# Check that re-running the same rsync command causes no more changes.
|
|
stderr = self.RunGsUtil(
|
|
['rsync', '-r', tmpdir,
|
|
suri(bucket_url, 'subdir')],
|
|
return_stderr=True)
|
|
self._VerifyNoChanges(stderr)
|
|
|
|
_Check2()
|
|
|
|
def test_rsync_to_nonexistent_bucket_subdir_prefix_of_existing_obj(self):
|
|
"""Tests that rsync with destination url as a prefix of existing obj works.
|
|
|
|
Test to make sure that a dir/subdir gets created if it does not exist
|
|
even when the new dir is a prefix of an existing dir.
|
|
e.g if gs://some_bucket/foobar exists, and we run the command
|
|
rsync some_dir gs://some_bucket/foo
|
|
this should create a subdir foo
|
|
"""
|
|
# Create dir with some objects and empty bucket.
|
|
tmpdir = self.CreateTempDir()
|
|
bucket_url = self.CreateBucket()
|
|
self.CreateObject(bucket_uri=bucket_url,
|
|
object_name='foobar',
|
|
contents=b'obj1')
|
|
self.CreateTempFile(tmpdir=tmpdir, file_name='obj1', contents=b'obj1')
|
|
self.CreateTempFile(tmpdir=tmpdir, file_name='.obj2', contents=b'.obj2')
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check1():
|
|
"""Tests rsync works as expected."""
|
|
self.RunGsUtil(['rsync', '-r', tmpdir, suri(bucket_url, 'foo')])
|
|
listing1 = TailSet(tmpdir, self.FlatListDir(tmpdir))
|
|
listing2 = TailSet(
|
|
suri(bucket_url, 'foo'),
|
|
self.FlatListBucket(self.StorageUriCloneReplaceName(
|
|
bucket_url, 'foo')))
|
|
# Dir should have un-altered content.
|
|
self.assertEqual(listing1, set(['/obj1', '/.obj2']))
|
|
# Bucket subdir should have content like dir.
|
|
self.assertEqual(listing2, set(['/obj1', '/.obj2']))
|
|
|
|
_Check1()
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check2():
|
|
# Check that re-running the same rsync command causes no more changes.
|
|
stderr = self.RunGsUtil(['rsync', '-r', tmpdir,
|
|
suri(bucket_url, 'foo')],
|
|
return_stderr=True)
|
|
self._VerifyNoChanges(stderr)
|
|
|
|
_Check2()
|
|
|
|
def test_rsync_from_nonexistent_bucket(self):
|
|
"""Tests that rsync from a non-existent bucket subdir fails gracefully."""
|
|
tmpdir = self.CreateTempDir()
|
|
self.CreateTempFile(tmpdir=tmpdir, file_name='obj1', contents=b'obj1')
|
|
self.CreateTempFile(tmpdir=tmpdir, file_name='.obj2', contents=b'.obj2')
|
|
bucket_url_str = '%s://%s' % (self.default_provider,
|
|
self.nonexistent_bucket_name)
|
|
# TODO(b/135780661): Remove retry after bug resolved
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check():
|
|
stderr = self.RunGsUtil(['rsync', '-d', bucket_url_str, tmpdir],
|
|
expected_status=1,
|
|
return_stderr=True)
|
|
if self._use_gcloud_storage:
|
|
self.assertIn('not found: 404', stderr)
|
|
else:
|
|
self.assertIn('Caught non-retryable exception', stderr)
|
|
listing = TailSet(tmpdir, self.FlatListDir(tmpdir))
|
|
# Dir should have un-altered content.
|
|
self.assertEqual(listing, set(['/obj1', '/.obj2']))
|
|
|
|
_Check()
|
|
|
|
def test_rsync_to_nonexistent_bucket(self):
|
|
"""Tests that rsync from a non-existent bucket subdir fails gracefully."""
|
|
tmpdir = self.CreateTempDir()
|
|
self.CreateTempFile(tmpdir=tmpdir, file_name='obj1', contents=b'obj1')
|
|
self.CreateTempFile(tmpdir=tmpdir, file_name='.obj2', contents=b'.obj2')
|
|
bucket_url_str = '%s://%s' % (self.default_provider,
|
|
self.nonexistent_bucket_name)
|
|
# TODO(b/135780661): Remove retry after bug resolved
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check():
|
|
stderr = self.RunGsUtil(['rsync', '-d', bucket_url_str, tmpdir],
|
|
expected_status=1,
|
|
return_stderr=True)
|
|
if self._use_gcloud_storage:
|
|
self.assertIn('not found: 404', stderr)
|
|
else:
|
|
self.assertIn('Caught non-retryable exception', stderr)
|
|
listing = TailSet(tmpdir, self.FlatListDir(tmpdir))
|
|
# Dir should have un-altered content.
|
|
self.assertEqual(listing, set(['/obj1', '/.obj2']))
|
|
|
|
_Check()
|
|
|
|
def test_bucket_to_bucket_minus_d_with_overwrite_and_punc_chars(self):
|
|
"""Tests that punc chars in filenames don't confuse sort order."""
|
|
bucket1_uri = self.CreateBucket()
|
|
bucket2_uri = self.CreateBucket()
|
|
# Create 2 objects in each bucket, with one overwritten with a name that's
|
|
# less than the next name in destination bucket when encoded, but not when
|
|
# compared without encoding.
|
|
self.CreateObject(bucket_uri=bucket1_uri,
|
|
object_name='e/obj1',
|
|
contents=b'obj1')
|
|
self.CreateObject(bucket_uri=bucket1_uri,
|
|
object_name='e-1/.obj2',
|
|
contents=b'.obj2')
|
|
self.CreateObject(bucket_uri=bucket2_uri,
|
|
object_name='e/obj1',
|
|
contents=b'OBJ1')
|
|
self.CreateObject(bucket_uri=bucket2_uri,
|
|
object_name='e-1/.obj2',
|
|
contents=b'.obj2')
|
|
# Need to make sure the bucket listings are caught-up, otherwise the
|
|
# rsync may not see all objects and fail to synchronize correctly.
|
|
self.AssertNObjectsInBucket(bucket1_uri, 2)
|
|
self.AssertNObjectsInBucket(bucket2_uri, 2)
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check1():
|
|
"""Tests rsync works as expected."""
|
|
self.RunGsUtil(['rsync', '-rd', suri(bucket1_uri), suri(bucket2_uri)])
|
|
listing1 = TailSet(suri(bucket1_uri), self.FlatListBucket(bucket1_uri))
|
|
listing2 = TailSet(suri(bucket2_uri), self.FlatListBucket(bucket2_uri))
|
|
# First bucket should have un-altered content.
|
|
self.assertEqual(listing1, set(['/e/obj1', '/e-1/.obj2']))
|
|
self.assertEqual(listing2, set(['/e/obj1', '/e-1/.obj2']))
|
|
# Assert correct contents.
|
|
self.assertEqual(
|
|
'obj1',
|
|
self.RunGsUtil(['cat', suri(bucket2_uri, 'e/obj1')],
|
|
return_stdout=True))
|
|
self.assertEqual(
|
|
'.obj2',
|
|
self.RunGsUtil(['cat', suri(bucket2_uri, 'e-1/.obj2')],
|
|
return_stdout=True))
|
|
|
|
_Check1()
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check2():
|
|
# Check that re-running the same rsync command causes no more changes.
|
|
stderr = self.RunGsUtil(
|
|
['rsync', '-d', suri(bucket1_uri),
|
|
suri(bucket2_uri)],
|
|
return_stderr=True)
|
|
self._VerifyNoChanges(stderr)
|
|
|
|
_Check2()
|
|
|
|
def _test_dir_to_bucket_regex_paramaterized(self, flag):
|
|
"""Tests that rsync regex exclusions work correctly."""
|
|
# Create dir and bucket with 1 overlapping and 2 extra objects in each.
|
|
tmpdir = self.CreateTempDir()
|
|
bucket_uri = self.CreateBucket()
|
|
self.CreateTempFile(tmpdir=tmpdir, file_name='obj1', contents=b'obj1')
|
|
self.CreateTempFile(tmpdir=tmpdir, file_name='.obj2', contents=b'.obj2')
|
|
self.CreateTempFile(tmpdir=tmpdir, file_name='obj3', contents=b'obj3')
|
|
self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='.obj2',
|
|
contents=b'.obj2')
|
|
self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='obj4',
|
|
contents=b'obj4')
|
|
self.CreateObject(bucket_uri=bucket_uri,
|
|
object_name='obj5',
|
|
contents=b'obj5')
|
|
|
|
# Need to make sure the bucket listing is caught-up, otherwise the
|
|
# first rsync may not see .obj2 and overwrite it.
|
|
self.AssertNObjectsInBucket(bucket_uri, 3)
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check1():
|
|
"""Tests rsync works as expected."""
|
|
self.RunGsUtil(['rsync', '-d', flag, 'obj[34]', tmpdir, suri(bucket_uri)])
|
|
listing1 = TailSet(tmpdir, self.FlatListDir(tmpdir))
|
|
listing2 = TailSet(suri(bucket_uri), self.FlatListBucket(bucket_uri))
|
|
# Dir should have un-altered content.
|
|
self.assertEqual(listing1, set(['/obj1', '/.obj2', '/obj3']))
|
|
# Bucket should have content like dir but ignoring obj3 from dir and not
|
|
# deleting obj4 from bucket (per exclude regex).
|
|
self.assertEqual(listing2, set(['/obj1', '/.obj2', '/obj4']))
|
|
|
|
_Check1()
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check2():
|
|
# Check that re-running the same rsync command causes no more changes.
|
|
stderr = self.RunGsUtil(
|
|
['rsync', '-d', flag, 'obj[34]', tmpdir,
|
|
suri(bucket_uri)],
|
|
return_stderr=True)
|
|
self._VerifyNoChanges(stderr)
|
|
|
|
_Check2()
|
|
|
|
def test_dir_to_bucket_minus_x(self):
|
|
"""Tests that rsync regex exclusions work correctly for -x."""
|
|
self._test_dir_to_bucket_regex_paramaterized('-x')
|
|
|
|
def test_dir_to_bucket_minus_y(self):
|
|
"""Tests that rsync regex exclusions work correctly for -y."""
|
|
self._test_dir_to_bucket_regex_paramaterized('-y')
|
|
|
|
def _test_dir_to_bucket_regex_negative_lookahead(self, flag, includes):
|
|
"""Tests if negative lookahead includes files/objects."""
|
|
bucket_uri = self.CreateBucket()
|
|
tmpdir = self.CreateTempDir(test_files=[
|
|
'a', 'b', 'c', ('data1',
|
|
'a.txt'), ('data1',
|
|
'ok'), ('data2',
|
|
'b.txt'), ('data3', 'data4', 'c.txt')
|
|
])
|
|
self.RunGsUtil(
|
|
['rsync', '-r', flag, r'^(?!.*\.txt$).*', tmpdir,
|
|
suri(bucket_uri)],
|
|
return_stderr=True)
|
|
listing = TailSet(tmpdir, self.FlatListDir(tmpdir))
|
|
self.assertEqual(
|
|
listing,
|
|
set([
|
|
'/a', '/b', '/c', '/data1/a.txt', '/data1/ok', '/data2/b.txt',
|
|
'/data3/data4/c.txt'
|
|
]))
|
|
if includes:
|
|
actual = TailSet(suri(bucket_uri), self.FlatListBucket(bucket_uri))
|
|
self.assertEqual(
|
|
actual, set(['/data1/a.txt', '/data2/b.txt', '/data3/data4/c.txt']))
|
|
else:
|
|
stderr = self.RunGsUtil(['ls', suri(bucket_uri, '**')],
|
|
expected_status=1,
|
|
return_stderr=True)
|
|
self.assertIn('One or more URLs matched no objects', stderr)
|
|
|
|
def test_dir_to_bucket_negative_lookahead_works_in_minus_x(self):
|
|
"""Test that rsync -x negative lookahead includes objects/files."""
|
|
self._test_dir_to_bucket_regex_negative_lookahead('-x', includes=True)
|
|
|
|
def test_dir_to_bucket_negative_lookahead_breaks_in_minux_y(self):
|
|
"""Test that rsync -y nevative lookahead does not includes objects/files."""
|
|
self._test_dir_to_bucket_regex_negative_lookahead('-y', includes=False)
|
|
|
|
def _test_dir_to_bucket_relative_regex_paramaterized(self, flag, skip_dirs):
|
|
"""Test that rsync regex options work with a relative regex per the docs."""
|
|
tmpdir = self.CreateTempDir(test_files=[
|
|
'a', 'b', 'c', ('data1',
|
|
'a.txt'), ('data1',
|
|
'ok'), ('data2',
|
|
'b.txt'), ('data3', 'data4', 'c.txt')
|
|
])
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _check_exclude_regex(exclude_regex, expected):
|
|
"""Tests rsync skips the excluded pattern."""
|
|
bucket_uri = self.CreateBucket()
|
|
stderr = ''
|
|
# Add a trailing slash to the source directory to ensure its removed.
|
|
local = tmpdir + ('\\' if IS_WINDOWS else '/')
|
|
stderr += self.RunGsUtil(
|
|
['rsync', '-r', flag, exclude_regex, local,
|
|
suri(bucket_uri)],
|
|
return_stderr=True)
|
|
|
|
listing = TailSet(tmpdir, self.FlatListDir(tmpdir))
|
|
self.assertEqual(
|
|
listing,
|
|
set([
|
|
'/a', '/b', '/c', '/data1/a.txt', '/data1/ok', '/data2/b.txt',
|
|
'/data3/data4/c.txt'
|
|
]))
|
|
actual = TailSet(suri(bucket_uri), self.FlatListBucket(bucket_uri))
|
|
self.assertEqual(actual, expected)
|
|
return stderr
|
|
|
|
def _Check1():
|
|
"""Ensure the example exclude pattern from the docs works as expected."""
|
|
_check_exclude_regex('data.[/\\\\].*\\.txt$',
|
|
set(['/a', '/b', '/c', '/data1/ok']))
|
|
|
|
_Check1()
|
|
|
|
def _Check2():
|
|
"""Tests that a regex with a pipe works as expected."""
|
|
if self._use_gcloud_storage and IS_WINDOWS:
|
|
# Pipe character should be escaped for windows cmd.
|
|
# It appears that ^ is not escaping | as expected but ^^^ does So
|
|
# changing input for windows shim mode gsutil test.
|
|
_check_exclude_regex('^data^^^|[bc]$', set(['/a']))
|
|
else:
|
|
_check_exclude_regex('^data|[bc]$', set(['/a']))
|
|
|
|
_Check2()
|
|
|
|
def _Check3():
|
|
"""Tests that directories are skipped from iteration as expected."""
|
|
stderr = _check_exclude_regex(
|
|
'data3',
|
|
set(['/a', '/b', '/c', '/data1/ok', '/data1/a.txt', '/data2/b.txt']))
|
|
self.assertIn(
|
|
'Skipping excluded directory {}...'.format(
|
|
os.path.join(tmpdir, 'data3')), stderr)
|
|
self.assertNotIn(
|
|
'Skipping excluded directory {}...'.format(
|
|
os.path.join(tmpdir, 'data3', 'data4')), stderr)
|
|
|
|
if skip_dirs:
|
|
_Check3()
|
|
|
|
def test_dir_to_bucket_relative_minus_x(self):
|
|
"""Test that rsync -x option works with a relative regex per the docs."""
|
|
self._test_dir_to_bucket_relative_regex_paramaterized('-x', skip_dirs=False)
|
|
|
|
def test_dir_to_bucket_relative_minus_y(self):
|
|
"""Test that rsync -y option works with a relative regex per the docs."""
|
|
self._test_dir_to_bucket_relative_regex_paramaterized('-y', skip_dirs=True)
|
|
|
|
@unittest.skipIf(IS_WINDOWS,
|
|
"os.chmod() won't make file unreadable on Windows.")
|
|
def test_dir_to_bucket_minus_C(self):
|
|
"""Tests that rsync -C option works correctly."""
|
|
# Create dir with 3 objects, the middle of which is unreadable.
|
|
tmpdir = self.CreateTempDir()
|
|
bucket_uri = self.CreateBucket()
|
|
self.CreateTempFile(tmpdir=tmpdir, file_name='obj1', contents=b'obj1')
|
|
path = self.CreateTempFile(tmpdir=tmpdir,
|
|
file_name='obj2',
|
|
contents=b'obj2')
|
|
os.chmod(path, 0)
|
|
self.CreateTempFile(tmpdir=tmpdir, file_name='obj3', contents=b'obj3')
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check():
|
|
"""Tests rsync works as expected."""
|
|
stderr = self.RunGsUtil(
|
|
['rsync', '-C', tmpdir, suri(bucket_uri)],
|
|
expected_status=1,
|
|
return_stderr=True)
|
|
if self._use_gcloud_storage:
|
|
self.assertIn("Permission denied: '{}'".format(path), stderr)
|
|
else:
|
|
self.assertIn('1 files/objects could not be copied/removed.', stderr)
|
|
listing1 = TailSet(tmpdir, self.FlatListDir(tmpdir))
|
|
listing2 = TailSet(suri(bucket_uri), self.FlatListBucket(bucket_uri))
|
|
# Dir should have un-altered content.
|
|
self.assertEqual(listing1, set(['/obj1', '/obj2', '/obj3']))
|
|
# Bucket should have obj1 and obj3 even though obj2 was unreadable.
|
|
self.assertEqual(listing2, set(['/obj1', '/obj3']))
|
|
|
|
_Check()
|
|
|
|
@unittest.skipIf(IS_WINDOWS,
|
|
'Windows Unicode support is problematic in Python 2.x.')
|
|
def test_dir_to_bucket_with_unicode_chars(self):
|
|
"""Tests that rsync -r works correctly with unicode filenames."""
|
|
|
|
tmpdir = self.CreateTempDir()
|
|
bucket_uri = self.CreateBucket()
|
|
# macOS displays the "Ì" (I\xcc\x80) unicode character the same
|
|
# as "Ì" (\xc3\x8c) in both the first and second filenames. Despite being
|
|
# different characters, the second filename will not be created since the OS
|
|
# detects them as the same name.
|
|
file_list = [
|
|
'morales_sueños.jpg', # 'morales_suenI\xcc\x80\xc6\x92os.jpg'
|
|
'morales_sueños.jpg', # 'morales_suen\xc3\x8c\xc6\x92os.jpg'
|
|
'fooꝾoo', # 'foo\xea\x9d\xbeoo'
|
|
]
|
|
for filename in file_list:
|
|
self.CreateTempFile(tmpdir=tmpdir, file_name=filename)
|
|
|
|
expected_list_results = (frozenset(
|
|
['/morales_sueños.jpg', '/fooꝾoo']) if IS_OSX else frozenset(
|
|
['/morales_sueños.jpg', '/morales_sueños.jpg', '/fooꝾoo']))
|
|
|
|
# Use @Retry as hedge against bucket listing eventual consistency.
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check():
|
|
"""Tests rsync works as expected."""
|
|
self.RunGsUtil(['rsync', '-r', tmpdir, suri(bucket_uri)])
|
|
listing1 = TailSet(tmpdir, self.FlatListDir(tmpdir))
|
|
listing2 = TailSet(suri(bucket_uri), self.FlatListBucket(bucket_uri))
|
|
self.assertEqual(set(listing1), expected_list_results)
|
|
self.assertEqual(set(listing2), expected_list_results)
|
|
|
|
_Check()
|
|
|
|
def test_dir_to_bucket_minus_u(self):
|
|
"""Tests that rsync -u works correctly."""
|
|
tmpdir = self.CreateTempDir()
|
|
dst_bucket = self.CreateBucket()
|
|
ORIG_MTIME = 10
|
|
|
|
self.CreateObject(bucket_uri=dst_bucket,
|
|
object_name='obj1',
|
|
contents=b'obj1-1',
|
|
mtime=ORIG_MTIME)
|
|
self.CreateObject(bucket_uri=dst_bucket,
|
|
object_name='obj2',
|
|
contents=b'obj2-1',
|
|
mtime=ORIG_MTIME)
|
|
self.CreateObject(bucket_uri=dst_bucket,
|
|
object_name='obj3',
|
|
contents=b'obj3-1',
|
|
mtime=ORIG_MTIME)
|
|
self.CreateObject(bucket_uri=dst_bucket,
|
|
object_name='obj4',
|
|
contents=b'obj4-1',
|
|
mtime=ORIG_MTIME)
|
|
self.CreateObject(bucket_uri=dst_bucket,
|
|
object_name='obj5',
|
|
contents=b'obj5-1',
|
|
mtime=ORIG_MTIME)
|
|
self.CreateObject(bucket_uri=dst_bucket,
|
|
object_name='obj6',
|
|
contents=b'obj6-1',
|
|
mtime=ORIG_MTIME)
|
|
|
|
# Source files with older mtimes should NOT be copied:
|
|
# Same size, different contents.
|
|
self.CreateTempFile(tmpdir=tmpdir,
|
|
file_name='obj1',
|
|
contents=b'obj1-2',
|
|
mtime=ORIG_MTIME - 1)
|
|
# Same size, same contents.
|
|
self.CreateTempFile(tmpdir=tmpdir,
|
|
file_name='obj2',
|
|
contents=b'obj2-1',
|
|
mtime=ORIG_MTIME - 1)
|
|
# Different size and contents.
|
|
self.CreateTempFile(tmpdir=tmpdir,
|
|
file_name='obj3',
|
|
contents=b'obj3-newer',
|
|
mtime=ORIG_MTIME - 1)
|
|
|
|
# Source files with equal mtimes should fall back to other checks:
|
|
# Same size and mtime without specifying `-c` => should NOT be copied even
|
|
# if the contents differ.
|
|
self.CreateTempFile(tmpdir=tmpdir,
|
|
file_name='obj4',
|
|
contents=b'obj4-2',
|
|
mtime=ORIG_MTIME)
|
|
# Different file size => SHOULD be copied.
|
|
self.CreateTempFile(tmpdir=tmpdir,
|
|
file_name='obj5',
|
|
contents=b'obj5-bigger',
|
|
mtime=ORIG_MTIME)
|
|
|
|
# Source files with newer mtimes should be copied:
|
|
# Same size and contents => SHOULD still be copied.
|
|
self.CreateTempFile(tmpdir=tmpdir,
|
|
file_name='obj6',
|
|
contents=b'obj6-1',
|
|
mtime=ORIG_MTIME + 1)
|
|
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check():
|
|
self.RunGsUtil(['rsync', '-u', tmpdir, suri(dst_bucket)])
|
|
# Objects 1-4 should NOT have been overwritten:
|
|
self.assertEqual(
|
|
'obj1-1',
|
|
self.RunGsUtil(['cat', suri(dst_bucket, 'obj1')], return_stdout=True))
|
|
self.assertEqual(
|
|
'obj2-1',
|
|
self.RunGsUtil(['cat', suri(dst_bucket, 'obj2')], return_stdout=True))
|
|
self.assertEqual(
|
|
'obj3-1',
|
|
self.RunGsUtil(['cat', suri(dst_bucket, 'obj3')], return_stdout=True))
|
|
self.assertEqual(
|
|
'obj4-1',
|
|
self.RunGsUtil(['cat', suri(dst_bucket, 'obj4')], return_stdout=True))
|
|
# Objects 5 and 6 SHOULD have been overwritten:
|
|
self.assertEqual(
|
|
'obj5-bigger',
|
|
self.RunGsUtil(['cat', suri(dst_bucket, 'obj5')], return_stdout=True))
|
|
# Contents were equal, so we verify via mtime that the source was copied.
|
|
self._VerifyObjectMtime(dst_bucket.bucket_name, 'obj6',
|
|
str(ORIG_MTIME + 1))
|
|
|
|
_Check()
|
|
|
|
def test_dir_to_bucket_minus_i(self):
|
|
"""Tests that rsync -i works correctly."""
|
|
tmpdir = self.CreateTempDir()
|
|
dst_bucket = self.CreateBucket()
|
|
ORIG_MTIME = 10
|
|
|
|
self.CreateObject(bucket_uri=dst_bucket,
|
|
object_name='obj1',
|
|
contents=b'obj1-1')
|
|
self.CreateObject(bucket_uri=dst_bucket,
|
|
object_name='obj2',
|
|
contents=b'obj2-1')
|
|
self.CreateObject(bucket_uri=dst_bucket,
|
|
object_name='obj3',
|
|
contents=b'obj3-1',
|
|
mtime=ORIG_MTIME)
|
|
|
|
# Source files with same name should NOT be copied:
|
|
# Same size, different contents.
|
|
self.CreateTempFile(tmpdir=tmpdir, file_name='obj1', contents=b'obj1-2')
|
|
# Same size, same contents.
|
|
self.CreateTempFile(tmpdir=tmpdir,
|
|
file_name='obj2',
|
|
contents=b'obj2-1',
|
|
mtime=ORIG_MTIME)
|
|
# Different size and contents.
|
|
self.CreateTempFile(tmpdir=tmpdir,
|
|
file_name='obj3',
|
|
contents=b'obj3-newer',
|
|
mtime=ORIG_MTIME - 1)
|
|
|
|
@Retry(AssertionError, tries=3, timeout_secs=1)
|
|
def _Check():
|
|
self.RunGsUtil(['rsync', '-i', tmpdir, suri(dst_bucket)])
|
|
# Objects 1-3 should NOT have been overwritten:
|
|
self.assertEqual(
|
|
'obj1-1',
|
|
self.RunGsUtil(['cat', suri(dst_bucket, 'obj1')], return_stdout=True))
|
|
self.assertEqual(
|
|
'obj2-1',
|
|
self.RunGsUtil(['cat', suri(dst_bucket, 'obj2')], return_stdout=True))
|
|
self.assertEqual(
|
|
'obj3-1',
|
|
self.RunGsUtil(['cat', suri(dst_bucket, 'obj3')], return_stdout=True))
|
|
|
|
_Check()
|
|
|
|
def test_rsync_files_with_whitespace(self):
|
|
"""Test to ensure filenames with whitespace can be rsynced"""
|
|
filename = 'foo bar baz.txt'
|
|
local_uris = []
|
|
bucket_uri = self.CreateBucket()
|
|
tmpdir = self.CreateTempDir()
|
|
contents = 'File from rsync test: test_rsync_files_with_whitespace'
|
|
local_file = self.CreateTempFile(tmpdir, contents, filename)
|
|
|
|
expected_list_results = frozenset(['/foo bar baz.txt'])
|
|
|
|
# Tests rsync works as expected.
|
|
self.RunGsUtil(['rsync', '-r', tmpdir, suri(bucket_uri)])
|
|
listing1 = TailSet(tmpdir, self.FlatListDir(tmpdir))
|
|
listing2 = TailSet(suri(bucket_uri), self.FlatListBucket(bucket_uri))
|
|
self.assertEqual(set(listing1), expected_list_results)
|
|
self.assertEqual(set(listing2), expected_list_results)
|
|
|
|
def test_rsync_files_with_special_characters(self):
|
|
"""Test to ensure filenames with special characters can be rsynced"""
|
|
filename = 'Æ.txt'
|
|
local_uris = []
|
|
bucket_uri = self.CreateBucket()
|
|
tmpdir = self.CreateTempDir()
|
|
contents = 'File from rsync test: test_rsync_files_with_special_characters'
|
|
local_file = self.CreateTempFile(tmpdir, contents, filename)
|
|
|
|
expected_list_results = frozenset(['/Æ.txt'])
|
|
|
|
# Tests rsync works as expected.
|
|
self.RunGsUtil(['rsync', '-r', tmpdir, suri(bucket_uri)])
|
|
listing1 = TailSet(tmpdir, self.FlatListDir(tmpdir))
|
|
listing2 = TailSet(suri(bucket_uri), self.FlatListBucket(bucket_uri))
|
|
self.assertEqual(set(listing1), expected_list_results)
|
|
self.assertEqual(set(listing2), expected_list_results)
|
|
|
|
@SkipForS3('No compressed transport encoding support for S3.')
|
|
@SkipForXML('No compressed transport encoding support for the XML API.')
|
|
@SequentialAndParallelTransfer
|
|
def test_gzip_transport_encoded_all_upload(self):
|
|
"""Test gzip encoded files upload correctly."""
|
|
# Setup the bucket and local data.
|
|
file_names = ('test', 'test.txt', 'test.xml')
|
|
local_uris = []
|
|
bucket_uri = self.CreateBucket()
|
|
tmpdir = self.CreateTempDir()
|
|
contents = b'x' * 10000
|
|
# Create local files.
|
|
for file_name in file_names:
|
|
local_uris.append(self.CreateTempFile(tmpdir, contents, file_name))
|
|
# Upload the data.
|
|
stderr = self.RunGsUtil(
|
|
['-D', 'rsync', '-J', '-r', tmpdir,
|
|
suri(bucket_uri)],
|
|
return_stderr=True)
|
|
self.AssertNObjectsInBucket(bucket_uri, len(local_uris))
|
|
# Ensure the correct files were marked for compression.
|
|
for local_uri in local_uris:
|
|
self.assertIn(
|
|
'Using compressed transport encoding for file://%s.' % (local_uri),
|
|
stderr)
|
|
if not self._use_gcloud_storage:
|
|
# Ensure the progress logger sees a gzip encoding.
|
|
self.assertIn('send: Using gzip transport encoding for the request.',
|
|
stderr)
|
|
|
|
@SkipForS3('No compressed transport encoding support for S3.')
|
|
@SkipForXML('No compressed transport encoding support for the XML API.')
|
|
@SequentialAndParallelTransfer
|
|
def test_gzip_transport_encoded_filtered_upload(self):
|
|
"""Test gzip encoded files upload correctly."""
|
|
# Setup the bucket and local data.
|
|
file_names_valid = ('test.txt', 'photo.txt')
|
|
file_names_invalid = ('file', 'test.png', 'test.xml')
|
|
local_uris_valid = []
|
|
local_uris_invalid = []
|
|
bucket_uri = self.CreateBucket()
|
|
tmpdir = self.CreateTempDir()
|
|
contents = b'x' * 10000
|
|
# Create local files.
|
|
for file_name in file_names_valid:
|
|
local_uris_valid.append(self.CreateTempFile(tmpdir, contents, file_name))
|
|
for file_name in file_names_invalid:
|
|
local_uris_invalid.append(self.CreateTempFile(tmpdir, contents,
|
|
file_name))
|
|
# Upload the data.
|
|
stderr = self.RunGsUtil(
|
|
['-D', 'rsync', '-j', 'txt', '-r', tmpdir,
|
|
suri(bucket_uri)],
|
|
return_stderr=True)
|
|
self.AssertNObjectsInBucket(bucket_uri,
|
|
len(file_names_valid) + len(file_names_invalid))
|
|
# Ensure the correct files were marked for compression.
|
|
for local_uri in local_uris_valid:
|
|
self.assertIn(
|
|
'Using compressed transport encoding for file://%s.' % (local_uri),
|
|
stderr)
|
|
for local_uri in local_uris_invalid:
|
|
self.assertNotIn(
|
|
'Using compressed transport encoding for file://%s.' % (local_uri),
|
|
stderr)
|
|
if not self._use_gcloud_storage:
|
|
# Ensure the progress logger sees a gzip encoding.
|
|
self.assertIn('send: Using gzip transport encoding for the request.',
|
|
stderr)
|
|
|
|
@SkipForS3('No compressed transport encoding support for S3.')
|
|
@SkipForXML('No compressed transport encoding support for the XML API.')
|
|
@SequentialAndParallelTransfer
|
|
def test_gzip_transport_encoded_all_upload_parallel(self):
|
|
"""Test gzip encoded files upload correctly."""
|
|
# Setup the bucket and local data.
|
|
file_names = ('test', 'test.txt', 'test.xml')
|
|
local_uris = []
|
|
bucket_uri = self.CreateBucket()
|
|
tmpdir = self.CreateTempDir()
|
|
contents = b'x' * 10000
|
|
for file_name in file_names:
|
|
local_uris.append(self.CreateTempFile(tmpdir, contents, file_name))
|
|
# Upload the data.
|
|
stderr = self.RunGsUtil(
|
|
['-D', '-m', 'rsync', '-J', '-r', tmpdir,
|
|
suri(bucket_uri)],
|
|
return_stderr=True)
|
|
self.AssertNObjectsInBucket(bucket_uri, len(local_uris))
|
|
# Ensure the correct files were marked for compression.
|
|
for local_uri in local_uris:
|
|
self.assertIn(
|
|
'Using compressed transport encoding for file://%s.' % (local_uri),
|
|
stderr)
|
|
if not self._use_gcloud_storage:
|
|
# Ensure the progress logger sees a gzip encoding.
|
|
self.assertIn('send: Using gzip transport encoding for the request.',
|
|
stderr)
|
|
|
|
@SkipForS3('Test uses gs-specific KMS encryption')
|
|
def test_kms_key_applied_to_dest_objects(self):
|
|
bucket_uri = self.CreateBucket()
|
|
cloud_container_suri = suri(bucket_uri) + '/foo'
|
|
obj_name = 'bar'
|
|
obj_contents = b'bar'
|
|
tmp_dir = self.CreateTempDir()
|
|
self.CreateTempFile(tmpdir=tmp_dir,
|
|
file_name=obj_name,
|
|
contents=obj_contents)
|
|
key_fqn = AuthorizeProjectToUseTestingKmsKey()
|
|
|
|
# Rsync the object from our tmpdir to a GCS bucket, specifying a KMS key.
|
|
with SetBotoConfigForTest([('GSUtil', 'encryption_key', key_fqn)]):
|
|
self.RunGsUtil(['rsync', tmp_dir, cloud_container_suri])
|
|
|
|
# Make sure the new object is encrypted with the specified KMS key.
|
|
with SetBotoConfigForTest([('GSUtil', 'prefer_api', 'json')]):
|
|
stdout = self.RunGsUtil(
|
|
['ls', '-L', '%s/%s' % (cloud_container_suri, obj_name)],
|
|
return_stdout=True)
|
|
self.assertRegex(stdout, r'KMS key:\s+%s' % key_fqn)
|
|
|
|
@SkipForGS('Tests that gs-specific encryption settings are skipped for s3.')
|
|
def test_kms_key_specified_will_not_prevent_non_kms_copy_to_s3(self):
|
|
tmp_dir = self.CreateTempDir()
|
|
self.CreateTempFile(tmpdir=tmp_dir, contents=b'foo')
|
|
bucket_uri = self.CreateBucket()
|
|
dummy_key = ('projects/myproject/locations/global/keyRings/mykeyring/'
|
|
'cryptoKeys/mykey')
|
|
|
|
# Would throw an exception if the command failed because of invalid
|
|
# formatting (i.e. specifying KMS key in a request to S3's API).
|
|
with SetBotoConfigForTest([('GSUtil', 'prefer_api', 'json')]):
|
|
self.RunGsUtil(['rsync', tmp_dir, suri(bucket_uri)])
|
|
|
|
def test_bucket_to_bucket_includes_arbitrary_headers(self):
|
|
bucket1_uri = self.CreateBucket()
|
|
bucket2_uri = self.CreateBucket()
|
|
self.CreateObject(bucket_uri=bucket1_uri,
|
|
object_name='obj1',
|
|
contents=b'obj1')
|
|
|
|
stderr = self.RunGsUtil([
|
|
'-DD', '-h', 'arbitrary:header', 'rsync',
|
|
suri(bucket1_uri),
|
|
suri(bucket2_uri)
|
|
],
|
|
return_stderr=True)
|
|
if self._use_gcloud_storage:
|
|
request_count = len(re.findall(r'= request start =', stderr))
|
|
target_header_count = len(re.findall(r"b'arbitrary': b'header'", stderr))
|
|
self.assertEqual(request_count, target_header_count)
|
|
else:
|
|
headers_for_all_requests = re.findall(r"Headers: \{([\s\S]*?)\}", stderr)
|
|
self.assertTrue(headers_for_all_requests)
|
|
for headers in headers_for_all_requests:
|
|
self.assertIn("'arbitrary': 'header'", headers)
|
|
|
|
@SkipForGS('Tests that S3 SSE-C is handled.')
|
|
def test_s3_sse_is_handled_with_arbitrary_headers(self):
|
|
tmp_dir = self.CreateTempDir()
|
|
tmp_file = self.CreateTempFile(tmpdir=tmp_dir, contents=b'foo')
|
|
bucket_uri1 = self.CreateBucket()
|
|
bucket_uri2 = self.CreateBucket()
|
|
|
|
header_flags = [
|
|
'-h',
|
|
'"x-amz-server-side-encryption-customer-algorithm:AES256"',
|
|
'-h',
|
|
'"x-amz-server-side-encryption-customer-key:{}"'.format(
|
|
TEST_ENCRYPTION_KEY_S3),
|
|
'-h',
|
|
'"x-amz-server-side-encryption-customer-key-md5:{}"'.format(
|
|
TEST_ENCRYPTION_KEY_S3_MD5),
|
|
]
|
|
|
|
with SetBotoConfigForTest([('GSUtil', 'check_hashes', 'never')]):
|
|
self.RunGsUtil(header_flags + ['cp', tmp_file, suri(bucket_uri1, 'test')])
|
|
self.RunGsUtil(header_flags +
|
|
['rsync', suri(bucket_uri1),
|
|
suri(bucket_uri2)])
|
|
contents = self.RunGsUtil(header_flags +
|
|
['cat', suri(bucket_uri2, 'test')],
|
|
return_stdout=True)
|
|
|
|
self.assertEqual(contents, 'foo')
|