novafarma/gcloud auth application-default login/google-cloud-sdk/platform/gsutil/gslib/commands/cp.py

# -*- coding: utf-8 -*-
# Copyright 2011 Google Inc. All Rights Reserved.
# Copyright 2011, Nexenta Systems Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Implementation of Unix-like cp command for cloud storage providers."""

from __future__ import absolute_import
from __future__ import print_function
from __future__ import division
from __future__ import unicode_literals

import errno
import itertools
import logging
import os
import time
import traceback

from apitools.base.py import encoding
from gslib import gcs_json_api
from gslib.command import Command
from gslib.command_argument import CommandArgument
from gslib.cs_api_map import ApiSelector
from gslib.exception import CommandException
from gslib.metrics import LogPerformanceSummaryParams
from gslib.name_expansion import CopyObjectsIterator
from gslib.name_expansion import DestinationInfo
from gslib.name_expansion import NameExpansionIterator
from gslib.name_expansion import NameExpansionIteratorDestinationTuple
from gslib.name_expansion import SeekAheadNameExpansionIterator
from gslib.storage_url import ContainsWildcard
from gslib.storage_url import IsCloudSubdirPlaceholder
from gslib.storage_url import StorageUrlFromString
from gslib.third_party.storage_apitools import storage_v1_messages as apitools_messages
from gslib.utils import cat_helper
from gslib.utils import copy_helper
from gslib.utils import parallelism_framework_util
from gslib.utils.cloud_api_helper import GetCloudApiInstance
from gslib.utils.constants import DEBUGLEVEL_DUMP_REQUESTS
from gslib.utils.constants import NO_MAX
from gslib.utils.copy_helper import CreateCopyHelperOpts
from gslib.utils.copy_helper import GetSourceFieldsNeededForCopy
from gslib.utils.copy_helper import GZIP_ALL_FILES
from gslib.utils.copy_helper import ItemExistsError
from gslib.utils.copy_helper import Manifest
from gslib.utils.copy_helper import SkipUnsupportedObjectError
from gslib.utils.posix_util import ConvertModeToBase8
from gslib.utils.posix_util import DeserializeFileAttributesFromObjectMetadata
from gslib.utils.posix_util import InitializePreservePosixData
from gslib.utils.posix_util import POSIXAttributes
from gslib.utils.posix_util import SerializeFileAttributesToObjectMetadata
from gslib.utils.posix_util import ValidateFilePermissionAccess
from gslib.utils.shim_util import GcloudStorageFlag
from gslib.utils.shim_util import GcloudStorageMap
from gslib.utils.system_util import GetStreamFromFileUrl
from gslib.utils.system_util import StdinIterator
from gslib.utils.system_util import StdinIteratorCls
from gslib.utils.text_util import NormalizeStorageClass
from gslib.utils.text_util import RemoveCRLFFromString
from gslib.utils.unit_util import CalculateThroughput
from gslib.utils.unit_util import MakeHumanReadable

_SYNOPSIS = """
  gsutil cp [OPTION]... src_url dst_url
  gsutil cp [OPTION]... src_url... dst_url
  gsutil cp [OPTION]... -I dst_url
"""

_SYNOPSIS_TEXT = """
<B>SYNOPSIS</B>
""" + _SYNOPSIS

_DESCRIPTION_TEXT = """
<B>DESCRIPTION</B>
  The ``gsutil cp`` command allows you to copy data between your local file
  system and the cloud, within the cloud, and between
  cloud storage providers. For example, to upload all text files from the
  local directory to a bucket, you can run:

    gsutil cp *.txt gs://my-bucket

  You can also download data from a bucket. The following command downloads
  all text files from the top-level of a bucket to your current directory:

    gsutil cp gs://my-bucket/*.txt .

  You can use the ``-n`` option to prevent overwriting the content of
  existing files. The following example downloads text files from a bucket
  without clobbering the data in your directory:

    gsutil cp -n gs://my-bucket/*.txt .

  Use the ``-r`` option to copy an entire directory tree.
  For example, to upload the directory tree ``dir``:

    gsutil cp -r dir gs://my-bucket

  If you have a large number of files to transfer, you can perform a parallel
  multi-threaded/multi-processing copy using the
  top-level gsutil ``-m`` option (see "gsutil help options"):

    gsutil -m cp -r dir gs://my-bucket

  You can use the ``-I`` option with ``stdin`` to specify a list of URLs to
  copy, one per line. This allows you to use gsutil
  in a pipeline to upload or download objects as generated by a program:

    cat filelist | gsutil -m cp -I gs://my-bucket

  or:

    cat filelist | gsutil -m cp -I ./download_dir

  where the output of ``cat filelist`` is a list of files, cloud URLs, and
  wildcards of files and cloud URLs.

  NOTE: Shells like ``bash`` and ``zsh`` sometimes attempt to expand
  wildcards in ways that can be surprising. You may also encounter issues when
  attempting to copy files whose names contain wildcard characters. For more
  details about these issues, see `Wildcard behavior considerations
  <https://cloud.google.com/storage/docs/wildcards#surprising-behavior>`_.
"""

_NAME_CONSTRUCTION_TEXT = """
<B>HOW NAMES ARE CONSTRUCTED</B>
  The ``gsutil cp`` command attempts to name objects in ways that are consistent with the
  Linux ``cp`` command. This means that names are constructed depending
  on whether you're performing a recursive directory copy or copying
  individually-named objects, or whether you're copying to an existing or
  non-existent directory.

  When you perform recursive directory copies, object names are constructed to
  mirror the source directory structure starting at the point of recursive
  processing. For example, if ``dir1/dir2`` contains the file ``a/b/c``, then the
  following command creates the object ``gs://my-bucket/dir2/a/b/c``:

    gsutil cp -r dir1/dir2 gs://my-bucket

  In contrast, copying individually-named files results in objects named by
  the final path component of the source files. For example, assuming again that
  ``dir1/dir2`` contains ``a/b/c``, the following command creates the object
  ``gs://my-bucket/c``:

    gsutil cp dir1/dir2/** gs://my-bucket

  Note that in the above example, the '**' wildcard matches all names
  anywhere under ``dir``. The wildcard '*' matches names just one level deep. For
  more details, see `URI wildcards
  <https://cloud.google.com/storage/docs/wildcards#surprising-behavior>`_.

  The same rules apply for uploads and downloads: recursive copies of buckets and
  bucket subdirectories produce a mirrored filename structure, while copying
  individually or wildcard-named objects produce flatly-named files.

  In addition, the resulting names depend on whether the destination subdirectory
  exists. For example, if ``gs://my-bucket/subdir`` exists as a subdirectory,
  the following command creates the object ``gs://my-bucket/subdir/dir2/a/b/c``:

    gsutil cp -r dir1/dir2 gs://my-bucket/subdir

  In contrast, if ``gs://my-bucket/subdir`` does not exist, this same ``gsutil cp``
  command creates the object ``gs://my-bucket/subdir/a/b/c``.

  NOTE: The
  `Google Cloud Platform Console <https://console.cloud.google.com>`_
  creates folders by creating "placeholder" objects that end
  with a "/" character. gsutil skips these objects when downloading from the
  cloud to the local file system, because creating a file that
  ends with a "/" is not allowed on Linux and macOS. We
  recommend that you only create objects that end with "/" if you don't
  intend to download such objects using gsutil.
"""

_SUBDIRECTORIES_TEXT = """
<B>COPYING TO/FROM SUBDIRECTORIES; DISTRIBUTING TRANSFERS ACROSS MACHINES</B>
  You can use gsutil to copy to and from subdirectories by using a command
  like this:

    gsutil cp -r dir gs://my-bucket/data

  This causes ``dir`` and all of its files and nested subdirectories to be
  copied under the specified destination, resulting in objects with names like
  ``gs://my-bucket/data/dir/a/b/c``. Similarly, you can download from bucket
  subdirectories using the following command:

    gsutil cp -r gs://my-bucket/data dir

  This causes everything nested under ``gs://my-bucket/data`` to be downloaded
  into ``dir``, resulting in files with names like ``dir/data/a/b/c``.

  Copying subdirectories is useful if you want to add data to an existing
  bucket directory structure over time. It's also useful if you want
  to parallelize uploads and downloads across multiple machines (potentially
  reducing overall transfer time compared with running ``gsutil -m
  cp`` on one machine). For example, if your bucket contains this structure:

    gs://my-bucket/data/result_set_01/
    gs://my-bucket/data/result_set_02/
    ...
    gs://my-bucket/data/result_set_99/

  you can perform concurrent downloads across 3 machines by running these
  commands on each machine, respectively:

    gsutil -m cp -r gs://my-bucket/data/result_set_[0-3]* dir
    gsutil -m cp -r gs://my-bucket/data/result_set_[4-6]* dir
    gsutil -m cp -r gs://my-bucket/data/result_set_[7-9]* dir

  Note that ``dir`` could be a local directory on each machine, or a
  directory mounted off of a shared file server. The performance of the latter
  depends on several factors, so we recommend experimenting
  to find out what works best for your computing environment.
"""

_COPY_IN_CLOUD_TEXT = """
<B>COPYING IN THE CLOUD AND METADATA PRESERVATION</B>
  If both the source and destination URL are cloud URLs from the same
  provider, gsutil copies data "in the cloud" (without downloading
  to and uploading from the machine where you run gsutil). In addition to
  the performance and cost advantages of doing this, copying in the cloud
  preserves metadata such as ``Content-Type`` and ``Cache-Control``. In contrast,
  when you download data from the cloud, it ends up in a file with
  no associated metadata, unless you have some way to keep
  or re-create that metadata.

  Copies spanning locations and/or storage classes cause data to be rewritten
  in the cloud, which may take some time (but is still faster than
  downloading and re-uploading). Such operations can be resumed with the same
  command if they are interrupted, so long as the command parameters are
  identical.

  Note that by default, the gsutil ``cp`` command does not copy the object
  ACL to the new object, and instead uses the default bucket ACL (see
  "gsutil help defacl"). You can override this behavior with the ``-p``
  option.

  When copying in the cloud, if the destination bucket has Object Versioning
  enabled, by default ``gsutil cp`` copies only live versions of the
  source object. For example, the following command causes only the single live
  version of ``gs://bucket1/obj`` to be copied to ``gs://bucket2``, even if there
  are noncurrent versions of ``gs://bucket1/obj``:

    gsutil cp gs://bucket1/obj gs://bucket2

  To also copy noncurrent versions, use the ``-A`` flag:

    gsutil cp -A gs://bucket1/obj gs://bucket2

  The top-level gsutil ``-m`` flag is  not allowed when using the ``cp -A`` flag.
"""

_CHECKSUM_VALIDATION_TEXT = """


<B>CHECKSUM VALIDATION</B>
  gsutil automatically performs checksum validation for copies to and from Cloud
  Storage. For more information, see `Hashes and ETags
  <https://cloud.google.com/storage/docs/hashes-etags#cli>`_.
"""

_RETRY_HANDLING_TEXT = """
<B>RETRY HANDLING</B>
  The ``cp`` command retries when failures occur, but if enough failures happen
  during a particular copy or delete operation, or if a failure isn't retryable,
  the ``cp`` command skips that object and moves on. If any failures were not
  successfully retried by the end of the copy run, the ``cp`` command reports the
  number of failures and exits with a non-zero status.

  For details about gsutil's overall retry handling, see `Retry strategy
  <https://cloud.google.com/storage/docs/retry-strategy#tools>`_.
"""

_RESUMABLE_TRANSFERS_TEXT = """
<B>RESUMABLE TRANSFERS</B>
  gsutil automatically resumes interrupted downloads and interrupted `resumable
  uploads <https://cloud.google.com/storage/docs/resumable-uploads#gsutil>`_,
  except when performing streaming transfers. In the case of an interrupted
  download, a partially downloaded temporary file is visible in the destination
  directory with the suffix ``_.gstmp`` in its name. Upon completion, the
  original file is deleted and replaced with the downloaded contents.

  Resumable transfers store state information in files under
  ~/.gsutil, named by the destination object or file.

  See "gsutil help prod" for details on using resumable transfers
  in production.
"""

_STREAMING_TRANSFERS_TEXT = """
<B>STREAMING TRANSFERS</B>
  Use '-' in place of src_url or dst_url to perform a `streaming transfer
  <https://cloud.google.com/storage/docs/streaming>`_.

  Streaming uploads using the `JSON API
  <https://cloud.google.com/storage/docs/request-endpoints#gsutil>`_ are buffered
  in memory part-way back into the file and can thus sometimes resume in the event
  of network or service problems.

  gsutil does not support resuming streaming uploads using the XML API or
  resuming streaming downloads for either JSON or XML. If you have a large amount
  of data to transfer in these cases, we recommend that you write the data to a
  local file and copy that file rather than streaming it.
"""

_SLICED_OBJECT_DOWNLOADS_TEXT = """
<B>SLICED OBJECT DOWNLOADS</B>
  gsutil can automatically use ranged ``GET`` requests to perform downloads in
  parallel for large files being downloaded from Cloud Storage. See `sliced object
  download documentation
  <https://cloud.google.com/storage/docs/sliced-object-downloads>`_
  for a complete discussion.
"""

_PARALLEL_COMPOSITE_UPLOADS_TEXT = """
<B>PARALLEL COMPOSITE UPLOADS</B>
  gsutil can automatically use
  `object composition <https://cloud.google.com/storage/docs/composite-objects>`_
  to perform uploads in parallel for large, local files being uploaded to
  Cloud Storage. See the `parallel composite uploads documentation
  <https://cloud.google.com/storage/docs/parallel-composite-uploads>`_ for a
  complete discussion.
"""

_CHANGING_TEMP_DIRECTORIES_TEXT = """
<B>CHANGING TEMP DIRECTORIES</B>
  gsutil writes data to a temporary directory in several cases:

  - when compressing data to be uploaded (see the ``-z`` and ``-Z`` options)
  - when decompressing data being downloaded (for example, when the data has
    ``Content-Encoding:gzip`` as a result of being uploaded
    using gsutil cp -z or gsutil cp -Z)
  - when running integration tests using the gsutil test command

  In these cases, it's possible the temporary file location on your system that
  gsutil selects by default may not have enough space. If gsutil runs out of
  space during one of these operations (for example, raising
  "CommandException: Inadequate temp space available to compress <your file>"
  during a ``gsutil cp -z`` operation), you can change where it writes these
  temp files by setting the TMPDIR environment variable. On Linux and macOS,
  you can set the variable as follows:

    TMPDIR=/some/directory gsutil cp ...

  You can also add this line to your ~/.bashrc file and restart the shell
  before running gsutil:

    export TMPDIR=/some/directory

  On Windows 7, you can change the TMPDIR environment variable from Start ->
  Computer -> System -> Advanced System Settings -> Environment Variables.
  You need to reboot after making this change for it to take effect. Rebooting
  is not necessary after running the export command on Linux and macOS.
"""

_COPYING_SPECIAL_FILES_TEXT = """
<B>SYNCHRONIZING OVER OS-SPECIFIC FILE TYPES (SUCH AS SYMLINKS AND DEVICES)</B>

  Please see the section about OS-specific file types in "gsutil help rsync".
  While that section refers to the ``rsync`` command, analogous
  points apply to the ``cp`` command.
"""

_OPTIONS_TEXT = """
<B>OPTIONS</B>
  -a predef_acl  Applies the specific predefined ACL to uploaded objects. See
                 "gsutil help acls" for further details.

  -A             Copy all source versions from a source bucket or folder.
                 If not set, only the live version of each source object is
                 copied.

                 NOTE: This option is only useful when the destination
                 bucket has Object Versioning enabled. Additionally, the generation
                 numbers of copied versions do not necessarily match the order of the
                 original generation numbers.

  -c             If an error occurs, continue attempting to copy the remaining
                 files. If any copies are unsuccessful, gsutil's exit status
                 is non-zero, even if this flag is set. This option is
                 implicitly set when running ``gsutil -m cp...``.

                 NOTE: ``-c`` only applies to the actual copying operation. If an
                 error, such as ``invalid Unicode file name``, occurs while iterating
                 over the files in the local directory, gsutil prints an error
                 message and aborts.

  -D             Copy in "daisy chain" mode, which means copying between two buckets
                 by first downloading to the machine where gsutil is run, then
                 uploading to the destination bucket. The default mode is a
                 "copy in the cloud," where data is copied between two buckets without
                 uploading or downloading.

                 During a "copy in the cloud," a source composite object remains composite
                 at its destination. However, you can use "daisy chain" mode to change a
                 composite object into a non-composite object. For example:

                     gsutil cp -D gs://bucket/obj gs://bucket/obj_tmp
                     gsutil mv gs://bucket/obj_tmp gs://bucket/obj

                 NOTE: "Daisy chain" mode is automatically used when copying
                 between providers: for example, when copying data from Cloud Storage
                 to another provider.

  -e             Exclude symlinks. When specified, symbolic links are not copied.

  -I             Use ``stdin`` to specify a list of files or objects to copy. You can use
                 gsutil in a pipeline to upload or download objects as generated by a program.
                 For example:

                   cat filelist | gsutil -m cp -I gs://my-bucket

                 where the output of ``cat filelist`` is a one-per-line list of
                 files, cloud URLs, and wildcards of files and cloud URLs.

  -j <ext,...>   Applies gzip transport encoding to any file upload whose
                 extension matches the ``-j`` extension list. This is useful when
                 uploading files with compressible content such as .js, .css,
                 or .html files. This also saves network bandwidth while
                 leaving the data uncompressed in Cloud Storage.

                 When you specify the ``-j`` option, files being uploaded are
                 compressed in-memory and on-the-wire only. Both the local
                 files and Cloud Storage objects remain uncompressed. The
                 uploaded objects retain the ``Content-Type`` and name of the
                 original files.

                 Note that if you want to use the ``-m`` `top-level option
                 <https://cloud.google.com/storage/docs/gsutil/addlhelp/GlobalCommandLineOptions>`_
                 to parallelize copies along with the ``-j/-J`` options, your
                 performance may be bottlenecked by the
                 "max_upload_compression_buffer_size" boto config option,
                 which is set to 2 GiB by default. You can change this
                 compression buffer size to a higher limit. For example:

                   gsutil -o "GSUtil:max_upload_compression_buffer_size=8G" \\
                     -m cp -j html,txt -r /local/source/dir gs://bucket/path

  -J             Applies gzip transport encoding to file uploads. This option
                 works like the ``-j`` option described above, but it applies to
                 all uploaded files, regardless of extension.

                 CAUTION: If some of the source files don't compress well, such
                 as binary data, using this option may result in longer uploads.

  -L <file>      Outputs a manifest log file with detailed information about
                 each item that was copied. This manifest contains the following
                 information for each item:

                 - Source path.
                 - Destination path.
                 - Source size.
                 - Bytes transferred.
                 - MD5 hash.
                 - Transfer start time and date in UTC and ISO 8601 format.
                 - Transfer completion time and date in UTC and ISO 8601 format.
                 - Upload id, if a resumable upload was performed.
                 - Final result of the attempted transfer, either success or failure.
                 - Failure details, if any.

                 If the log file already exists, gsutil uses the file as an
                 input to the copy process, and appends log items to
                 the existing file. Objects that are marked in the
                 existing log file as having been successfully copied or
                 skipped are ignored. Objects without entries are
                 copied and ones previously marked as unsuccessful are
                 retried. This option can be used in conjunction with the ``-c`` option to
                 build a script that copies a large number of objects reliably,
                 using a bash script like the following:

                   until gsutil cp -c -L cp.log -r ./dir gs://bucket; do
                     sleep 1
                   done

                 The -c option enables copying to continue after failures
                 occur, and the -L option allows gsutil to pick up where it
                 left off without duplicating work. The loop continues
                 running as long as gsutil exits with a non-zero status. A non-zero
                 status indicates there was at least one failure during the copy
                 operation.

                 NOTE: If you are synchronizing the contents of a
                 directory and a bucket, or the contents of two buckets, see
                 "gsutil help rsync".

  -n             No-clobber. When specified, existing files or objects at the
                 destination are not replaced. Any items that are skipped
                 by this option are reported as skipped. gsutil
                 performs an additional GET request to check if an item
                 exists before attempting to upload the data. This saves gsutil
                 from retransmitting data, but the additional HTTP requests may make
                 small object transfers slower and more expensive.

  -p             Preserves ACLs when copying in the cloud. Note
                 that this option has performance and cost implications only when
                 using the XML API, as the XML API requires separate HTTP calls for
                 interacting with ACLs. You can mitigate this
                 performance issue using ``gsutil -m cp`` to perform parallel
                 copying. Note that this option only works if you have OWNER access
                 to all objects that are copied. If you want all objects in the
                 destination bucket to end up with the same ACL, you can avoid these
                 performance issues by setting a default object ACL on that bucket
                 instead of using ``cp -p``. See "gsutil help defacl".

                 Note that it's not valid to specify both the ``-a`` and ``-p`` options
                 together.

  -P             Enables POSIX attributes to be preserved when objects are
                 copied. ``gsutil cp`` copies fields provided by ``stat``. These fields
                 are the user ID of the owner, the group
                 ID of the owning group, the mode or permissions of the file, and
                 the access and modification time of the file. For downloads, these
                 attributes are only set if the source objects were uploaded
                 with this flag enabled.

                 On Windows, this flag only sets and restores access time and
                 modification time. This is because Windows doesn't support
                 POSIX uid/gid/mode.

  -R, -r         The ``-R`` and ``-r`` options are synonymous. They enable directories,
                 buckets, and bucket subdirectories to be copied recursively.
                 If you don't use this option for an upload, gsutil copies objects
                 it finds and skips directories. Similarly, if you don't
                 specify this option for a download, gsutil copies
                 objects at the current bucket directory level and skips subdirectories.

  -s <class>     Specifies the storage class of the destination object. If not
                 specified, the default storage class of the destination bucket
                 is used. This option is not valid for copying to non-cloud destinations.

  -U             Skips objects with unsupported object types instead of failing.
                 Unsupported object types include Amazon S3 objects in the GLACIER
                 storage class.

  -v             Prints the version-specific URL for each uploaded object. You can
                 use these URLs to safely make concurrent upload requests, because
                 Cloud Storage refuses to perform an update if the current
                 object version doesn't match the version-specific URL. See
                 `generation numbers
                 <https://cloud.google.com/storage/docs/metadata#generation-number>`_
                 for more details.

  -z <ext,...>   Applies gzip content-encoding to any file upload whose
                 extension matches the ``-z`` extension list. This is useful when
                 uploading files with compressible content such as .js, .css,
                 or .html files, because it reduces network bandwidth and storage
                 sizes. This can both improve performance and reduce costs.

                 When you specify the ``-z`` option, the data from your files is
                 compressed before it is uploaded, but your actual files are
                 left uncompressed on the local disk. The uploaded objects
                 retain the ``Content-Type`` and name of the original files, but
                 have their ``Content-Encoding`` metadata set to ``gzip`` to
                 indicate that the object data stored are compressed on the
                 Cloud Storage servers and have their ``Cache-Control`` metadata
                 set to ``no-transform``.

                 For example, the following command:

                   gsutil cp -z html \\
                     cattypes.html tabby.jpeg gs://mycats

                 does the following:

                 - The ``cp`` command uploads the files ``cattypes.html`` and
                   ``tabby.jpeg`` to the bucket ``gs://mycats``.
                 - Based on the file extensions, gsutil sets the ``Content-Type``
                   of ``cattypes.html`` to ``text/html`` and ``tabby.jpeg`` to
                   ``image/jpeg``.
                 - The ``-z`` option compresses the data in the file ``cattypes.html``.
                 - The ``-z`` option also sets the ``Content-Encoding`` for
                   ``cattypes.html`` to ``gzip`` and the ``Cache-Control`` for
                   ``cattypes.html`` to ``no-transform``.

                 Because the ``-z/-Z`` options compress data prior to upload, they
                 are not subject to the same compression buffer bottleneck that
                 can affect the ``-j/-J`` options.

                 Note that if you download an object with ``Content-Encoding:gzip``,
                 gsutil decompresses the content before writing the local file.

  -Z             Applies gzip content-encoding to file uploads. This option
                 works like the ``-z`` option described above, but it applies to
                 all uploaded files, regardless of extension.

                 CAUTION: If some of the source files don't compress well, such
                 as binary data, using this option may result in files taking up
                 more space in the cloud than they would if left uncompressed.

  --stet         If the STET binary can be found in boto or PATH, cp will
                 use the split-trust encryption tool for end-to-end encryption.
"""

_DETAILED_HELP_TEXT = '\n\n'.join([
    _SYNOPSIS_TEXT,
    _DESCRIPTION_TEXT,
    _NAME_CONSTRUCTION_TEXT,
    _SUBDIRECTORIES_TEXT,
    _COPY_IN_CLOUD_TEXT,
    _CHECKSUM_VALIDATION_TEXT,
    _RETRY_HANDLING_TEXT,
    _RESUMABLE_TRANSFERS_TEXT,
    _STREAMING_TRANSFERS_TEXT,
    _SLICED_OBJECT_DOWNLOADS_TEXT,
    _PARALLEL_COMPOSITE_UPLOADS_TEXT,
    _CHANGING_TEMP_DIRECTORIES_TEXT,
    _COPYING_SPECIAL_FILES_TEXT,
    _OPTIONS_TEXT,
])

CP_SUB_ARGS = 'a:AcDeIL:MNnpPrRs:tUvz:Zj:J'
# May be used by cp or mv.
CP_AND_MV_SHIM_FLAG_MAP = {
    '-A': GcloudStorageFlag('--all-versions'),
    '-a': GcloudStorageFlag('--predefined-acl'),
    '-c': GcloudStorageFlag('--continue-on-error'),
    '-D': GcloudStorageFlag('--daisy-chain'),
    '-e': GcloudStorageFlag('--ignore-symlinks'),
    '-I': GcloudStorageFlag('--read-paths-from-stdin'),
    '-J': GcloudStorageFlag('--gzip-in-flight-all'),
    '-j': GcloudStorageFlag('--gzip-in-flight'),
    '-L': GcloudStorageFlag('--manifest-path'),
    '-n': GcloudStorageFlag('--no-clobber'),
    '-P': GcloudStorageFlag('--preserve-posix'),
    '-p': GcloudStorageFlag('--preserve-acl'),
    '-s': GcloudStorageFlag('--storage-class'),
    '-v': GcloudStorageFlag('--print-created-message'),
    '-Z': GcloudStorageFlag('--gzip-local-all'),
    '-z': GcloudStorageFlag('--gzip-local'),
    '-U': GcloudStorageFlag('--skip-unsupported'),
}
# Adds recursion flags.
CP_SHIM_FLAG_MAP = {
    k: v for k, v in list(CP_AND_MV_SHIM_FLAG_MAP.items()) +
    [('-r', GcloudStorageFlag('-r')), ('-R', GcloudStorageFlag('-r'))]
}


def ShimTranslatePredefinedAclSubOptForCopy(sub_opts):
  """Gcloud uses camel-case predefined/canned ACLs, and gsutil uses snake-case.

  The camel-case-snake-case difference is related to gcloud primarily using
  JSON API rather than the XML API.

  Predefined ACLs are also called "canned ACLs".

  Args:
    sub_opts: List of pairs representing flag keys and values, e.g.
      [('a', 'public-read')]
  """
  predefined_acl_idx = None
  for i, (k, _) in enumerate(sub_opts):
    if k == '-a':
      predefined_acl_idx = i
      break
  if predefined_acl_idx is not None:
    old_predefined_acl = sub_opts[i][1]
    sub_opts[i] = (sub_opts[i][0],
                   gcs_json_api.FULL_PREDEFINED_ACL_XML_TO_JSON_TRANSLATION.get(
                       old_predefined_acl, old_predefined_acl))


def _CopyFuncWrapper(cls, args, thread_state=None):
  cls.CopyFunc(args,
               thread_state=thread_state,
               preserve_posix=cls.preserve_posix_attrs)


def _CopyExceptionHandler(cls, e):
  """Simple exception handler to allow post-completion status."""
  cls.logger.error(str(e))
  cls.op_failure_count += 1
  cls.logger.debug('\n\nEncountered exception while copying:\n%s\n',
                   traceback.format_exc())


def _RmExceptionHandler(cls, e):
  """Simple exception handler to allow post-completion status."""
  cls.logger.error(str(e))


class CpCommand(Command):
  """Implementation of gsutil cp command.

  Note that CpCommand is run for both gsutil cp and gsutil mv. The latter
  happens by MvCommand calling CpCommand and passing the hidden (undocumented)
  -M option. This allows the copy and remove needed for each mv to run
  together (rather than first running all the cp's and then all the rm's, as
  we originally had implemented), which in turn avoids the following problem
  with removing the wrong objects: starting with a bucket containing only
  the object gs://bucket/obj, say the user does:
    gsutil mv gs://bucket/* gs://bucket/d.txt
  If we ran all the cp's and then all the rm's and we didn't expand the wildcard
  first, the cp command would first copy gs://bucket/obj to gs://bucket/d.txt,
  and the rm command would then remove that object. In the implementation
  prior to gsutil release 3.12 we avoided this by building a list of objects
  to process and then running the copies and then the removes; but building
  the list up front limits scalability (compared with the current approach
  of processing the bucket listing iterator on the fly).
  """

  # Command specification. See base class for documentation.
  command_spec = Command.CreateCommandSpec(
      'cp',
      command_name_aliases=['copy'],
      usage_synopsis=_SYNOPSIS,
      min_args=1,
      max_args=NO_MAX,
      # -t is deprecated but leave intact for now to avoid breakage.
      supported_sub_args=CP_SUB_ARGS,
      file_url_ok=True,
      provider_url_ok=False,
      urls_start_arg=0,
      gs_api_support=[ApiSelector.XML, ApiSelector.JSON],
      gs_default_api=ApiSelector.JSON,
      # Unfortunately, "private" args are the only way to support non-single
      # character flags.
      supported_private_args=['stet', 'testcallbackfile='],
      argparse_arguments=[
          CommandArgument.MakeZeroOrMoreCloudOrFileURLsArgument(),
      ],
  )
  # Help specification. See help_provider.py for documentation.
  help_spec = Command.HelpSpec(
      help_name='cp',
      help_name_aliases=['copy'],
      help_type='command_help',
      help_one_line_summary='Copy files and objects',
      help_text=_DETAILED_HELP_TEXT,
      subcommand_help_text={},
  )

  def get_gcloud_storage_args(self):
    self.logger.warn(
        "Unlike pure gsutil, this shim won't run composite uploads and sliced"
        ' downloads in parallel by default. Use the -m flag to enable'
        ' parallelism (i.e. "gsutil -m cp ...").')
    ShimTranslatePredefinedAclSubOptForCopy(self.sub_opts)
    gcloud_storage_map = GcloudStorageMap(
        gcloud_command=['storage', 'cp'],
        flag_map=CP_SHIM_FLAG_MAP,
    )
    return super().get_gcloud_storage_args(gcloud_storage_map)

  # pylint: disable=too-many-statements
  def CopyFunc(self, copy_object_info, thread_state=None, preserve_posix=False):
    """Worker function for performing the actual copy (and rm, for mv)."""
    gsutil_api = GetCloudApiInstance(self, thread_state=thread_state)

    copy_helper_opts = copy_helper.GetCopyHelperOpts()
    if copy_helper_opts.perform_mv:
      cmd_name = 'mv'
    else:
      cmd_name = self.command_name
    src_url = copy_object_info.source_storage_url
    exp_src_url = copy_object_info.expanded_storage_url
    src_url_names_container = copy_object_info.names_container
    have_multiple_srcs = copy_object_info.is_multi_source_request

    if src_url.IsCloudUrl() and src_url.IsProvider():
      raise CommandException(
          'The %s command does not allow provider-only source URLs (%s)' %
          (cmd_name, src_url))
    if preserve_posix and src_url.IsFileUrl() and src_url.IsStream():
      raise CommandException('Cannot preserve POSIX attributes with a stream.')
    if self.parallel_operations and src_url.IsFileUrl() and src_url.IsStream():
      raise CommandException(
          'Cannot upload from a stream when using gsutil -m option.')
    if have_multiple_srcs:
      copy_helper.InsistDstUrlNamesContainer(
          copy_object_info.exp_dst_url,
          copy_object_info.have_existing_dst_container, cmd_name)

    # Various GUI tools (like the GCS web console) create placeholder objects
    # ending with '/' when the user creates an empty directory. Normally these
    # tools should delete those placeholders once objects have been written
    # "under" the directory, but sometimes the placeholders are left around. We
    # need to filter them out here, otherwise if the user tries to rsync from
    # GCS to a local directory it will result in a directory/file conflict
    # (e.g., trying to download an object called "mydata/" where the local
    # directory "mydata" exists).
    if IsCloudSubdirPlaceholder(exp_src_url):
      # We used to output the message 'Skipping cloud sub-directory placeholder
      # object...' but we no longer do so because it caused customer confusion.
      return

    if copy_helper_opts.use_manifest and self.manifest.WasSuccessful(
        exp_src_url.url_string):
      return

    if copy_helper_opts.perform_mv and copy_object_info.names_container:
      # Use recursion_requested when performing name expansion for the
      # directory mv case so we can determine if any of the source URLs are
      # directories (and then use cp -r and rm -r to perform the move, to
      # match the behavior of Linux mv (which when moving a directory moves
      # all the contained files).
      self.recursion_requested = True

    if (copy_object_info.exp_dst_url.IsFileUrl() and
        not os.path.exists(copy_object_info.exp_dst_url.object_name) and
        have_multiple_srcs):

      try:
        os.makedirs(copy_object_info.exp_dst_url.object_name)
      except OSError as e:
        if e.errno != errno.EEXIST:
          raise

    dst_url = copy_helper.ConstructDstUrl(
        src_url,
        exp_src_url,
        src_url_names_container,
        have_multiple_srcs,
        copy_object_info.is_multi_top_level_source_request,
        copy_object_info.exp_dst_url,
        copy_object_info.have_existing_dst_container,
        self.recursion_requested,
        preserve_posix=preserve_posix)
    dst_url = copy_helper.FixWindowsNaming(src_url, dst_url)

    copy_helper.CheckForDirFileConflict(exp_src_url, dst_url)
    if copy_helper.SrcDstSame(exp_src_url, dst_url):
      raise CommandException('%s: "%s" and "%s" are the same file - '
                             'abort.' % (cmd_name, exp_src_url, dst_url))

    if dst_url.IsCloudUrl() and dst_url.HasGeneration():
      raise CommandException('%s: a version-specific URL\n(%s)\ncannot be '
                             'the destination for gsutil cp - abort.' %
                             (cmd_name, dst_url))

    if not dst_url.IsCloudUrl() and copy_helper_opts.dest_storage_class:
      raise CommandException('Cannot specify storage class for a non-cloud '
                             'destination: %s' % dst_url)

    src_obj_metadata = None
    if copy_object_info.expanded_result:
      src_obj_metadata = encoding.JsonToMessage(
          apitools_messages.Object, copy_object_info.expanded_result)

    if src_url.IsFileUrl() and preserve_posix:
      if not src_obj_metadata:
        src_obj_metadata = apitools_messages.Object()
      mode, _, _, _, uid, gid, _, atime, mtime, _ = os.stat(
          exp_src_url.object_name)
      mode = ConvertModeToBase8(mode)
      posix_attrs = POSIXAttributes(atime=atime,
                                    mtime=mtime,
                                    uid=uid,
                                    gid=gid,
                                    mode=mode)
      custom_metadata = apitools_messages.Object.MetadataValue(
          additionalProperties=[])
      SerializeFileAttributesToObjectMetadata(posix_attrs,
                                              custom_metadata,
                                              preserve_posix=preserve_posix)
      src_obj_metadata.metadata = custom_metadata

    if src_obj_metadata and dst_url.IsFileUrl():
      posix_attrs = DeserializeFileAttributesFromObjectMetadata(
          src_obj_metadata, src_url.url_string)
      mode = posix_attrs.mode.permissions
      valid, err = ValidateFilePermissionAccess(src_url.url_string,
                                                uid=posix_attrs.uid,
                                                gid=posix_attrs.gid,
                                                mode=mode)
      if preserve_posix and not valid:
        logging.getLogger().critical(err)
        raise CommandException('This sync will orphan file(s), please fix their'
                               ' permissions before trying again.')

    bytes_transferred = 0
    try:
      if copy_helper_opts.use_manifest:
        self.manifest.Initialize(exp_src_url.url_string, dst_url.url_string)

      if (self.recursion_requested and
          copy_object_info.exp_dst_url.object_name and dst_url.IsFileUrl()):

        # exp_dst_url is the wildcard-expanded path passed by the user:
        #   exp_dst_url => ~/dir
        #   container => /usr/name/dir
        container = os.path.abspath(copy_object_info.exp_dst_url.object_name)

        # dst_url holds the complete path of the object's destination:
        #   dst_url => /usr/name/dir/../file.txt
        #   abspath => /usr/name/file.txt
        #
        # Taking the common path of this and container yields: /usr/name,
        # which does not start with container when the inclusion of '..' strings
        # results in a copy outside of the container.
        if not os.path.commonpath([
            container, os.path.abspath(dst_url.object_name)
        ]).startswith(container):
          self.logger.warn(
              'Skipping copy of source URL %s because it would be copied '
              'outside the expected destination directory: %s.' %
              (exp_src_url, container))
          if copy_helper_opts.use_manifest:
            self.manifest.SetResult(
                exp_src_url.url_string, 0, 'skip',
                'Would have copied outside the destination directory.')
          return

      _, bytes_transferred, result_url, md5 = copy_helper.PerformCopy(
          self.logger,
          exp_src_url,
          dst_url,
          gsutil_api,
          self,
          _CopyExceptionHandler,
          src_obj_metadata=src_obj_metadata,
          allow_splitting=True,
          headers=self.headers,
          manifest=self.manifest,
          gzip_encoded=self.gzip_encoded,
          gzip_exts=self.gzip_exts,
          preserve_posix=preserve_posix,
          use_stet=self.use_stet)
      if copy_helper_opts.use_manifest:
        if md5:
          self.manifest.Set(exp_src_url.url_string, 'md5', md5)
        self.manifest.SetResult(exp_src_url.url_string, bytes_transferred, 'OK')
      if copy_helper_opts.print_ver:
        # Some cases don't return a version-specific URL (e.g., if destination
        # is a file).
        self.logger.info('Created: %s', result_url)
    except ItemExistsError:
      message = 'Skipping existing item: %s' % dst_url
      self.logger.info(message)
      if copy_helper_opts.use_manifest:
        self.manifest.SetResult(exp_src_url.url_string, 0, 'skip', message)
    except SkipUnsupportedObjectError as e:
      message = ('Skipping item %s with unsupported object type %s' %
                 (exp_src_url.url_string, e.unsupported_type))
      self.logger.info(message)
      if copy_helper_opts.use_manifest:
        self.manifest.SetResult(exp_src_url.url_string, 0, 'skip', message)
    except copy_helper.FileConcurrencySkipError as e:
      self.logger.warn(
          'Skipping copy of source URL %s because destination URL '
          '%s is already being copied by another gsutil process '
          'or thread (did you specify the same source URL twice?) ' %
          (src_url, dst_url))
    except Exception as e:  # pylint: disable=broad-except
      if (copy_helper_opts.no_clobber and
          copy_helper.IsNoClobberServerException(e)):
        message = 'Rejected (noclobber): %s' % dst_url
        self.logger.info(message)
        if copy_helper_opts.use_manifest:
          self.manifest.SetResult(exp_src_url.url_string, 0, 'skip', message)
      elif self.continue_on_error:
        message = 'Error copying %s: %s' % (src_url, str(e))
        self.op_failure_count += 1
        self.logger.error(message)
        if copy_helper_opts.use_manifest:
          self.manifest.SetResult(exp_src_url.url_string, 0, 'error',
                                  RemoveCRLFFromString(message))
      else:
        if copy_helper_opts.use_manifest:
          self.manifest.SetResult(exp_src_url.url_string, 0, 'error', str(e))
        raise
    else:
      if copy_helper_opts.perform_mv:
        self.logger.info('Removing %s...', exp_src_url)
        if exp_src_url.IsCloudUrl():
          gsutil_api.DeleteObject(exp_src_url.bucket_name,
                                  exp_src_url.object_name,
                                  generation=exp_src_url.generation,
                                  provider=exp_src_url.scheme)
        else:
          os.unlink(exp_src_url.object_name)

    with self.stats_lock:
      # TODO: Remove stats_lock; we should be able to calculate bytes
      # transferred from StatusMessages posted by operations within PerformCopy.
      self.total_bytes_transferred += bytes_transferred

  def _ConstructNameExpansionIteratorDstTupleIterator(self, src_url_strs_iter,
                                                      dst_url_strs):
    copy_helper_opts = copy_helper.GetCopyHelperOpts()
    for src_url_str, dst_url_str in zip(src_url_strs_iter, dst_url_strs):
      # Getting the destination information for each (sources, destination)
      # tuple. This assumes that the same destination is never provided in
      # multiple tuples, and doing so may result in an inconsistent behavior
      # especially when using the -m multi-threading option.
      #
      # Example for the inconsistent behavior, the following commands will
      # behave differently:
      #
      # gsutil cp -r dir1 dir2 gs://bucket/non-existent-dir
      # gsutil cp -r [
      #            (dir1, gs://bucket/non-existent-dir),
      #            (dir2, gs://bucket/non-existent-dir)
      #        ]
      #
      # When multiple threads execute on a non existing destination directory.
      # These threads might encounter different states of the destination
      # directory. The first thread to execute the command finds that the
      # destination directory does not exist, it will create the destination
      # directory and copies the files inside the source directories to the
      # destination directory. The following threads find that the destination
      # directory already exists and copy the source directories in the
      # destination directory. In another scenario, all the threads might find
      # that the destination directory does not exist and copy the source
      # directories to the destination directory.
      exp_dst_url, have_existing_dst_container = (
          copy_helper.ExpandUrlToSingleBlr(dst_url_str,
                                           self.gsutil_api,
                                           self.project_id,
                                           logger=self.logger))
      name_expansion_iterator_dst_tuple = NameExpansionIteratorDestinationTuple(
          NameExpansionIterator(
              self.command_name,
              self.debug,
              self.logger,
              self.gsutil_api,
              src_url_str,
              self.recursion_requested or copy_helper_opts.perform_mv,
              project_id=self.project_id,
              all_versions=self.all_versions,
              ignore_symlinks=self.exclude_symlinks,
              continue_on_error=(self.continue_on_error or
                                 self.parallel_operations),
              bucket_listing_fields=GetSourceFieldsNeededForCopy(
                  exp_dst_url.IsCloudUrl(),
                  copy_helper_opts.skip_unsupported_objects,
                  copy_helper_opts.preserve_acl,
                  preserve_posix=self.preserve_posix_attrs,
                  delete_source=copy_helper_opts.perform_mv,
                  file_size_will_change=self.use_stet)),
          DestinationInfo(exp_dst_url, have_existing_dst_container))

      self.has_file_dst = self.has_file_dst or exp_dst_url.IsFileUrl()
      self.has_cloud_dst = self.has_cloud_dst or exp_dst_url.IsCloudUrl()
      self.provider_types.add(exp_dst_url.scheme)
      self.combined_src_urls = itertools.chain(self.combined_src_urls,
                                               src_url_str)

      yield name_expansion_iterator_dst_tuple

  # Command entry point.
  def RunCommand(self):
    copy_helper_opts = self._ParseOpts()

    self.total_bytes_transferred = 0

    dst_url = StorageUrlFromString(self.args[-1])
    if dst_url.IsFileUrl() and (dst_url.object_name == '-' or dst_url.IsFifo()):
      if self.preserve_posix_attrs:
        raise CommandException('Cannot preserve POSIX attributes with a '
                               'stream or a named pipe.')
      cat_out_fd = (GetStreamFromFileUrl(dst_url, mode='wb')
                    if dst_url.IsFifo() else None)
      return cat_helper.CatHelper(self).CatUrlStrings(self.args[:-1],
                                                      cat_out_fd=cat_out_fd)

    if copy_helper_opts.read_args_from_stdin:
      if len(self.args) != 1:
        raise CommandException('Source URLs cannot be specified with -I option')
      # Use StdinIteratorCls instead of StdinIterator here to avoid Python 3
      # generator pickling errors when multiprocessing a command.
      src_url_strs = [StdinIteratorCls()]
    else:
      if len(self.args) < 2:
        raise CommandException('Wrong number of arguments for "cp" command.')
      src_url_strs = [self.args[:-1]]

    dst_url_strs = [self.args[-1]]

    self.combined_src_urls = []
    self.has_file_dst = False
    self.has_cloud_dst = False
    self.provider_types = set()
    # Because cp may have multiple source URLs and multiple destinations, we
    # wrap the name expansion iterator in order to collect analytics.
    name_expansion_iterator = CopyObjectsIterator(
        self._ConstructNameExpansionIteratorDstTupleIterator(
            src_url_strs, dst_url_strs),
        copy_helper_opts.daisy_chain,
    )

    process_count, thread_count = self._GetProcessAndThreadCount(
        process_count=None,
        thread_count=None,
        parallel_operations_override=None,
        print_macos_warning=False)
    copy_helper.TriggerReauthForDestinationProviderIfNecessary(
        dst_url, self.gsutil_api, process_count * thread_count)

    seek_ahead_iterator = None
    # Cannot seek ahead with stdin args, since we can only iterate them
    # once without buffering in memory.
    if not copy_helper_opts.read_args_from_stdin:
      seek_ahead_iterator = SeekAheadNameExpansionIterator(
          self.command_name,
          self.debug,
          self.GetSeekAheadGsutilApi(),
          self.combined_src_urls,
          self.recursion_requested or copy_helper_opts.perform_mv,
          all_versions=self.all_versions,
          project_id=self.project_id,
          ignore_symlinks=self.exclude_symlinks,
          file_size_will_change=self.use_stet)

    # Use a lock to ensure accurate statistics in the face of
    # multi-threading/multi-processing.
    self.stats_lock = parallelism_framework_util.CreateLock()

    # Tracks if any copies failed.
    self.op_failure_count = 0

    # Start the clock.
    start_time = time.time()

    # Tuple of attributes to share/manage across multiple processes in
    # parallel (-m) mode.
    shared_attrs = ('op_failure_count', 'total_bytes_transferred')

    # Perform copy requests in parallel (-m) mode, if requested, using
    # configured number of parallel processes and threads. Otherwise,
    # perform requests with sequential function calls in current process.
    self.Apply(_CopyFuncWrapper,
               name_expansion_iterator,
               _CopyExceptionHandler,
               shared_attrs,
               fail_on_error=(not self.continue_on_error),
               seek_ahead_iterator=seek_ahead_iterator)
    self.logger.debug('total_bytes_transferred: %d',
                      self.total_bytes_transferred)

    end_time = time.time()
    self.total_elapsed_time = end_time - start_time
    self.total_bytes_per_second = CalculateThroughput(
        self.total_bytes_transferred, self.total_elapsed_time)
    LogPerformanceSummaryParams(
        has_file_dst=self.has_file_dst,
        has_cloud_dst=self.has_cloud_dst,
        avg_throughput=self.total_bytes_per_second,
        total_bytes_transferred=self.total_bytes_transferred,
        total_elapsed_time=self.total_elapsed_time,
        uses_fan=self.parallel_operations,
        is_daisy_chain=copy_helper_opts.daisy_chain,
        provider_types=list(self.provider_types))

    if self.debug >= DEBUGLEVEL_DUMP_REQUESTS:
      # Note that this only counts the actual GET and PUT bytes for the copy
      # - not any transfers for doing wildcard expansion, the initial
      # HEAD/GET request performed to get the object metadata, etc.
      if self.total_bytes_transferred != 0:
        self.logger.info(
            'Total bytes copied=%d, total elapsed time=%5.3f secs (%sps)',
            self.total_bytes_transferred, self.total_elapsed_time,
            MakeHumanReadable(self.total_bytes_per_second))
    if self.op_failure_count:
      plural_str = 's' if self.op_failure_count > 1 else ''
      raise CommandException('{count} file{pl}/object{pl} could '
                             'not be transferred.'.format(
                                 count=self.op_failure_count, pl=plural_str))

    return 0

  def _ParseOpts(self):
    # TODO: Arrange variables initialized here in alphabetical order.
    perform_mv = False
    # exclude_symlinks is handled by Command parent class, so save in Command
    # state rather than CopyHelperOpts.
    self.exclude_symlinks = False
    no_clobber = False
    # continue_on_error is handled by Command parent class, so save in Command
    # state rather than CopyHelperOpts.
    self.continue_on_error = False
    daisy_chain = False
    read_args_from_stdin = False
    print_ver = False
    use_manifest = False
    preserve_acl = False
    self.preserve_posix_attrs = False
    canned_acl = None
    # canned_acl is handled by a helper function in parent
    # Command class, so save in Command state rather than CopyHelperOpts.
    self.canned = None

    self.all_versions = False

    self.skip_unsupported_objects = False

    # Files matching these extensions should be compressed.
    # The gzip_encoded flag marks if the files should be compressed during
    # the upload. The gzip_local flag marks if the files should be compressed
    # before uploading. Files compressed prior to uploaded are stored
    # compressed, while files compressed during the upload are stored
    # uncompressed. These flags cannot be mixed.
    gzip_encoded = False
    gzip_local = False
    gzip_arg_exts = None
    gzip_arg_all = None

    test_callback_file = None
    dest_storage_class = None
    self.use_stet = False

    # self.recursion_requested initialized in command.py (so can be checked
    # in parent class for all commands).
    self.manifest = None
    if self.sub_opts:
      for o, a in self.sub_opts:
        if o == '-a':
          canned_acl = a
          self.canned = True
        if o == '-A':
          self.all_versions = True
        if o == '-c':
          self.continue_on_error = True
        elif o == '-D':
          daisy_chain = True
        elif o == '-e':
          self.exclude_symlinks = True
        elif o == '--testcallbackfile':
          # File path of a pickled class that implements ProgressCallback.call.
          # Used for testing transfer interruptions and resumes.
          test_callback_file = a
        elif o == '-I':
          read_args_from_stdin = True
        elif o == '-j':
          gzip_encoded = True
          gzip_arg_exts = [x.strip() for x in a.split(',')]
        elif o == '-J':
          gzip_encoded = True
          gzip_arg_all = GZIP_ALL_FILES
        elif o == '-L':
          use_manifest = True
          self.manifest = Manifest(a)
        elif o == '-M':
          # Note that we signal to the cp command to perform a move (copy
          # followed by remove) and use directory-move naming rules by passing
          # the undocumented (for internal use) -M option when running the cp
          # command from mv.py.
          perform_mv = True
        elif o == '-n':
          no_clobber = True
        elif o == '-p':
          preserve_acl = True
        elif o == '-P':
          self.preserve_posix_attrs = True
          InitializePreservePosixData()
        elif o == '-r' or o == '-R':
          self.recursion_requested = True
        elif o == '-s':
          dest_storage_class = NormalizeStorageClass(a)
        elif o == '-U':
          self.skip_unsupported_objects = True
        elif o == '-v':
          print_ver = True
        elif o == '-z':
          gzip_local = True
          gzip_arg_exts = [x.strip() for x in a.split(',')]
        elif o == '-Z':
          gzip_local = True
          gzip_arg_all = GZIP_ALL_FILES
        elif o == '--stet':
          self.use_stet = True

    if preserve_acl and canned_acl:
      raise CommandException(
          'Specifying both the -p and -a options together is invalid.')

    if self.all_versions and self.parallel_operations:
      raise CommandException(
          'The gsutil -m option is not supported with the cp -A flag, to '
          'ensure that object version ordering is preserved. Please re-run '
          'the command without the -m option.')
    if gzip_encoded and gzip_local:
      raise CommandException(
          'Specifying both the -j/-J and -z/-Z options together is invalid.')
    if gzip_arg_exts and gzip_arg_all:
      if gzip_encoded:
        raise CommandException(
            'Specifying both the -j and -J options together is invalid.')
      else:
        raise CommandException(
            'Specifying both the -z and -Z options together is invalid.')
    self.gzip_exts = gzip_arg_exts or gzip_arg_all
    self.gzip_encoded = gzip_encoded

    return CreateCopyHelperOpts(
        perform_mv=perform_mv,
        no_clobber=no_clobber,
        daisy_chain=daisy_chain,
        read_args_from_stdin=read_args_from_stdin,
        print_ver=print_ver,
        use_manifest=use_manifest,
        preserve_acl=preserve_acl,
        canned_acl=canned_acl,
        skip_unsupported_objects=self.skip_unsupported_objects,
        test_callback_file=test_callback_file,
        dest_storage_class=dest_storage_class)