novafarma/gcloud auth application-default login/google-cloud-sdk/platform/gsutil/gslib/command.py

# -*- coding: utf-8 -*-
# Copyright 2010 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Base class for gsutil commands.

In addition to base class code, this file contains helpers that depend on base
class state (such as GetAndPrintAcl) In general, functions that depend on
class state and that are used by multiple commands belong in this file.
Functions that don't depend on class state belong in util.py, and non-shared
helpers belong in individual subclasses.
"""

from __future__ import absolute_import
from __future__ import print_function
from __future__ import division
from __future__ import unicode_literals

import codecs
from collections import namedtuple
import copy
import getopt
import json
import logging
import os
import signal
import sys
import textwrap
import threading
import time
import traceback

import boto
from boto.storage_uri import StorageUri
import gslib
from gslib.cloud_api import AccessDeniedException
from gslib.cloud_api import ArgumentException
from gslib.cloud_api import ServiceException
from gslib.cloud_api_delegator import CloudApiDelegator
from gslib.cs_api_map import ApiSelector
from gslib.cs_api_map import GsutilApiMapFactory
from gslib.exception import CommandException
from gslib.help_provider import HelpProvider
from gslib.metrics import CaptureThreadStatException
from gslib.metrics import LogPerformanceSummaryParams
from gslib.name_expansion import CopyObjectInfo
from gslib.name_expansion import CopyObjectsIterator
from gslib.name_expansion import NameExpansionIterator
from gslib.name_expansion import NameExpansionResult
from gslib.name_expansion import SeekAheadNameExpansionIterator
from gslib.plurality_checkable_iterator import PluralityCheckableIterator
from gslib.seek_ahead_thread import SeekAheadThread
from gslib.sig_handling import ChildProcessSignalHandler
from gslib.sig_handling import GetCaughtSignals
from gslib.sig_handling import KillProcess
from gslib.sig_handling import MultithreadedMainSignalHandler
from gslib.sig_handling import RegisterSignalHandler
from gslib.storage_url import HaveFileUrls
from gslib.storage_url import HaveProviderUrls
from gslib.storage_url import StorageUrlFromString
from gslib.storage_url import UrlsAreForSingleProvider
from gslib.storage_url import UrlsAreMixOfBucketsAndObjects
from gslib.third_party.storage_apitools import storage_v1_messages as apitools_messages
from gslib.thread_message import FinalMessage
from gslib.thread_message import MetadataMessage
from gslib.thread_message import PerformanceSummaryMessage
from gslib.thread_message import ProducerThreadMessage
from gslib.ui_controller import MainThreadUIQueue
from gslib.ui_controller import UIController
from gslib.ui_controller import UIThread
from gslib.utils.boto_util import GetFriendlyConfigFilePaths
from gslib.utils.boto_util import GetMaxConcurrentCompressedUploads
from gslib.utils.constants import NO_MAX
from gslib.utils.constants import UTF8
import gslib.utils.parallelism_framework_util
from gslib.utils.parallelism_framework_util import AtomicDict
from gslib.utils.parallelism_framework_util import CheckMultiprocessingAvailableAndInit
from gslib.utils.parallelism_framework_util import multiprocessing_context
from gslib.utils.parallelism_framework_util import ProcessAndThreadSafeInt
from gslib.utils.parallelism_framework_util import PutToQueueWithTimeout
from gslib.utils.parallelism_framework_util import SEEK_AHEAD_JOIN_TIMEOUT
from gslib.utils.parallelism_framework_util import ShouldProhibitMultiprocessing
from gslib.utils.parallelism_framework_util import UI_THREAD_JOIN_TIMEOUT
from gslib.utils.parallelism_framework_util import ZERO_TASKS_TO_DO_ARGUMENT
from gslib.utils.rsync_util import RsyncDiffToApply
from gslib.utils.shim_util import GcloudStorageCommandMixin
from gslib.utils.system_util import GetTermLines
from gslib.utils.system_util import IS_WINDOWS
from gslib.utils.translation_helper import AclTranslation
from gslib.utils.translation_helper import GetNonMetadataHeaders
from gslib.utils.translation_helper import PRIVATE_DEFAULT_OBJ_ACL
from gslib.wildcard_iterator import CreateWildcardIterator
from six.moves import queue as Queue

# pylint: disable=g-import-not-at-top
try:
  from Crypto import Random as CryptoRandom
except ImportError:
  CryptoRandom = None
# pylint: enable=g-import-not-at-top

OFFER_GSUTIL_M_SUGGESTION_THRESHOLD = 5
OFFER_GSUTIL_M_SUGGESTION_FREQUENCY = 1000


def CreateOrGetGsutilLogger(command_name):
  """Fetches a logger with the given name that resembles 'print' output.

  Initial Logger Configuration:

  The logger abides by gsutil -d/-D/-DD/-q options. If none of those options
  were specified at invocation, the returned logger will display all messages
  logged with level INFO or above. Log propagation is disabled.

  If a logger with the specified name has already been created and configured,
  it is not reconfigured, e.g.:

    foo = CreateOrGetGsutilLogger('foo')  # Creates and configures Logger "foo".
    foo.setLevel(logging.DEBUG)  # Change level from INFO to DEBUG
    foo = CreateOrGetGsutilLogger('foo')  # Does not reset level to INFO.

  Args:
    command_name: (str) Command name to create logger for.

  Returns:
    A logging.Logger object.
  """
  log = logging.getLogger(command_name)
  # There are some scenarios (e.g. unit tests, commands like `mv` that call
  # other commands) in which we call this function multiple times. To avoid
  # adding duplicate handlers or overwriting logger attributes set elsewhere,
  # we only configure the logger if it's one we haven't configured before (i.e.
  # one that doesn't have a handler set yet).
  if not log.handlers:
    log.propagate = False
    log.setLevel(logging.root.level)
    log_handler = logging.StreamHandler()
    log_handler.setFormatter(logging.Formatter('%(message)s'))
    log.addHandler(log_handler)
  return log


def _DefaultExceptionHandler(cls, e):
  cls.logger.exception(e)


def _UrlArgChecker(command_instance, url):
  if not command_instance.exclude_symlinks:
    return True
  exp_src_url = url.expanded_storage_url
  if exp_src_url.IsFileUrl() and os.path.islink(exp_src_url.object_name):
    command_instance.logger.info('Skipping symbolic link %s...', exp_src_url)
    return False
  return True


def DummyArgChecker(*unused_args):
  return True


def SetAclFuncWrapper(cls, name_expansion_result, thread_state=None):
  return cls.SetAclFunc(name_expansion_result, thread_state=thread_state)


def SetAclExceptionHandler(cls, e):
  """Exception handler that maintains state about post-completion status."""
  cls.logger.error(str(e))
  cls.everything_set_okay = False


# We will keep this list of all thread- or process-safe queues (except the
# global status queue) ever created by the main thread so that we can
# forcefully kill them upon shutdown. Otherwise, we encounter a Python bug in
# which empty queues block forever on join (which is called as part of the
# Python exit function cleanup) under the impression that they are non-empty.
# However, this also lets us shut down somewhat more cleanly when interrupted.
queues = []


def _CryptoRandomAtFork():
  if CryptoRandom and getattr(CryptoRandom, 'atfork', None):
    # Fixes https://github.com/GoogleCloudPlatform/gsutil/issues/390. The
    # oauth2client module uses Python's Crypto library when pyOpenSSL isn't
    # present; that module requires calling atfork() in both the parent and
    # child process after a new process is forked.
    CryptoRandom.atfork()


def _NewMultiprocessingQueue():
  new_queue = multiprocessing_context.Queue(MAX_QUEUE_SIZE)
  queues.append(new_queue)
  return new_queue


def _NewThreadsafeQueue():
  new_queue = Queue.Queue(MAX_QUEUE_SIZE)
  queues.append(new_queue)
  return new_queue


# The maximum size of a process- or thread-safe queue. Imposing this limit
# prevents us from needing to hold an arbitrary amount of data in memory.
# However, setting this number too high (e.g., >= 32768 on OS X) can cause
# problems on some operating systems.
MAX_QUEUE_SIZE = 32500

# Related to the max queue size above, once we cross this threshold of
# iterated tasks added to the queue, kick off the SeekAheadThread that will
# estimate the total work necessary for the command.
DEFAULT_TASK_ESTIMATION_THRESHOLD = 30000


def _GetTaskEstimationThreshold():
  return boto.config.getint('GSUtil', 'task_estimation_threshold',
                            DEFAULT_TASK_ESTIMATION_THRESHOLD)


# That maximum depth of the tree of recursive calls to command.Apply. This is
# an arbitrary limit put in place to prevent developers from accidentally
# causing problems with infinite recursion, and it can be increased if needed.
MAX_RECURSIVE_DEPTH = 5

# Map from deprecated aliases to the current command and subcommands that
# provide the same behavior.
# TODO: Remove this map and deprecate old commands on 9/9/14.
OLD_ALIAS_MAP = {
    'chacl': ['acl', 'ch'],
    'getacl': ['acl', 'get'],
    'setacl': ['acl', 'set'],
    'getcors': ['cors', 'get'],
    'setcors': ['cors', 'set'],
    'chdefacl': ['defacl', 'ch'],
    'getdefacl': ['defacl', 'get'],
    'setdefacl': ['defacl', 'set'],
    'disablelogging': ['logging', 'set', 'off'],
    'enablelogging': ['logging', 'set', 'on'],
    'getlogging': ['logging', 'get'],
    'getversioning': ['versioning', 'get'],
    'setversioning': ['versioning', 'set'],
    'getwebcfg': ['web', 'get'],
    'setwebcfg': ['web', 'set']
}

# Declare all of the module level variables - see
# InitializeMultiprocessingVariables for an explanation of why this is
# necessary.
# pylint: disable=global-at-module-level
global manager, consumer_pools, task_queues, caller_id_lock, caller_id_counter
global total_tasks, call_completed_map, global_return_values_map
global need_pool_or_done_cond, caller_id_finished_count, new_pool_needed
global current_max_recursive_level, shared_vars_map, shared_vars_list_map
global class_map, worker_checking_level_lock, failure_count, thread_stats
global glob_status_queue, ui_controller, concurrent_compressed_upload_lock


def InitializeMultiprocessingVariables():
  """Initializes module-level variables that will be inherited by subprocesses.

  On Windows, a multiprocessing.Manager object should only
  be created within an "if __name__ == '__main__':" block. This function
  must be called, otherwise every command that calls Command.Apply will fail.

  While multiprocessing variables are initialized at the beginning of
  gsutil execution, new processes and threads are created only by calls
  to Command.Apply. When multiple processes and threads are used,
  the flow of startup/teardown looks like this:

  1. __main__: initializes multiprocessing variables, including any necessary
     Manager processes (here and in gslib.utils.parallelism_framework_util).
  2. __main__: Registers signal handlers for terminating signals responsible
     for cleaning up multiprocessing variables and manager processes upon exit.
  3. Command.Apply registers signal handlers for the main process to kill
     itself after the cleanup handlers registered by __main__ have executed.
  4. If worker processes have not been created for the current level of
     recursive calls, Command.Apply creates those processes.

  ---- Parallel operations start here, so steps are no longer numbered. ----
  - Command.Apply in the main thread starts the ProducerThread.
    - The Producer thread adds task arguments to the global task queue.
      - It optionally starts the SeekAheadThread which estimates total
        work for the Apply call.

  - Command.Apply in the main thread starts the UIThread, which will consume
    messages from the global status queue, process them, and display them to
    the user.

  - Each worker process creates a thread pool to perform work.
    - The worker process registers signal handlers to kill itself in
      response to a terminating signal.
    - The main thread of the worker process moves items from the global
      task queue to the process-local task queue.
    - Worker threads retrieve items from the process-local task queue,
      perform the work, and post messages to the global status queue.
    - Worker threads may themselves call Command.Apply.
      - This creates a new pool of worker subprocesses with the same size
        as the main pool. This pool is shared amongst all Command.Apply calls
        at the given recursion depth.
      - This reuses the global UIThread, global status queue, and global task
        queue.
      - This starts a new ProducerThread.
      - A SeekAheadThread is not started at this level; only one such thread
        exists at the top level, and it provides estimates for top-level work
        only.

  - The ProducerThread runs out of tasks, or the user signals cancellation.
    - The ProducerThread cancels the SeekAheadThread (if it is running) via
      an event.
    - The ProducerThread enqueues special terminating messages on the
      global task queue and global status queue, signaling the UI Thread to
      shut down and the main thread to continue operation.
    - In the termination case, existing processes exit in response to
      terminating signals from the main process.

  ---- Parallel operations end here. ----
  5. Further top-level calls to Command.Apply can be made, which will repeat
     all of the steps made in #4, except that worker processes will be
     reused.
  """
  # This list of global variables must exactly match the above list of
  # declarations.
  # pylint: disable=global-variable-undefined
  global manager, consumer_pools, task_queues, caller_id_lock, caller_id_counter
  global total_tasks, call_completed_map, global_return_values_map, thread_stats
  global need_pool_or_done_cond, caller_id_finished_count, new_pool_needed
  global current_max_recursive_level, shared_vars_map, shared_vars_list_map
  global class_map, worker_checking_level_lock, failure_count, glob_status_queue
  global concurrent_compressed_upload_lock

  manager = multiprocessing_context.Manager()

  # List of ConsumerPools - used during shutdown to clean up child processes.
  consumer_pools = []

  # List of all existing task queues - used by all pools to find the queue
  # that's appropriate for the given recursive_apply_level.
  task_queues = []

  # Used to assign a globally unique caller ID to each Apply call.
  caller_id_lock = manager.Lock()
  caller_id_counter = ProcessAndThreadSafeInt(True)

  # Map from caller_id to total number of tasks to be completed for that ID.
  total_tasks = AtomicDict(manager=manager)

  # Map from caller_id to a boolean which is True iff all its tasks are
  # finished.
  call_completed_map = AtomicDict(manager=manager)

  # Used to keep track of the set of return values for each caller ID.
  global_return_values_map = AtomicDict(manager=manager)

  # Condition used to notify any waiting threads that a task has finished or
  # that a call to Apply needs a new set of consumer processes.
  need_pool_or_done_cond = manager.Condition()

  # Lock used to prevent multiple worker processes from asking the main thread
  # to create a new consumer pool for the same level.
  worker_checking_level_lock = manager.Lock()

  # Map from caller_id to the current number of completed tasks for that ID.
  caller_id_finished_count = AtomicDict(manager=manager)

  # Used as a way for the main thread to distinguish between being woken up
  # by another call finishing and being woken up by a call that needs a new set
  # of consumer processes.
  new_pool_needed = ProcessAndThreadSafeInt(True)

  current_max_recursive_level = ProcessAndThreadSafeInt(True)

  # Map from (caller_id, name) to the value of that shared variable.
  shared_vars_map = AtomicDict(manager=manager)
  shared_vars_list_map = AtomicDict(manager=manager)

  # Map from (process id, thread id) to a _ThreadStat object (see WorkerThread).
  # Used to keep track of thread idle time and execution time.
  thread_stats = AtomicDict(manager=manager)

  # Map from caller_id to calling class.
  class_map = manager.dict()

  # Number of tasks that resulted in an exception in calls to Apply().
  failure_count = ProcessAndThreadSafeInt(True)

  # Central queue for status reporting across multiple processes and threads.
  # It's possible that if many processes and threads are executing small file
  # writes or metadata changes quickly, performance may be bounded by lock
  # contention on the queue. Initial testing conducted with
  # 12 processes * 5 threads per process showed little difference.  If this
  # becomes a performance bottleneck in the future, consider creating a queue
  # per-process and having the UI thread poll all of the queues; that approach
  # would need to address:
  # - Queue fairness if one queue grows to be disproportionately large
  # - Reasonable time correlation with events as they occur
  #
  # This queue must be torn down after worker processes/threads and the
  # UI thread have been torn down. Otherwise, these threads may have
  # undefined behavior when trying to interact with a non-existent queue.
  glob_status_queue = manager.Queue(MAX_QUEUE_SIZE)

  # Semaphore lock used to prevent resource exhaustion when running many
  # compressed uploads in parallel.
  concurrent_compressed_upload_lock = manager.BoundedSemaphore(
      GetMaxConcurrentCompressedUploads())


def TeardownMultiprocessingProcesses():
  """Should be called by signal handlers prior to shut down."""
  # Shut down all processes in consumer pools in preparation for exiting.
  ShutDownGsutil()
  # Shut down command and util's multiprocessing.Manager().
  # pylint: disable=global-variable-not-assigned,global-variable-undefined
  global manager
  # pylint: enable=global-variable-not-assigned,global-variable-undefined
  manager.shutdown()
  gslib.utils.parallelism_framework_util.top_level_manager.shutdown()


def InitializeThreadingVariables():
  """Initializes module-level variables used when running multi-threaded.

  When multiprocessing is not available (or on Windows where only 1 process
  is used), thread-safe analogs to the multiprocessing global variables
  must be initialized. This function is the thread-safe analog to
  InitializeMultiprocessingVariables.
  """
  # pylint: disable=global-variable-undefined
  global global_return_values_map, shared_vars_map, failure_count
  global caller_id_finished_count, shared_vars_list_map, total_tasks
  global need_pool_or_done_cond, call_completed_map, class_map, thread_stats
  global task_queues, caller_id_lock, caller_id_counter, glob_status_queue
  global worker_checking_level_lock, current_max_recursive_level
  global concurrent_compressed_upload_lock
  caller_id_counter = ProcessAndThreadSafeInt(False)
  caller_id_finished_count = AtomicDict()
  caller_id_lock = threading.Lock()
  call_completed_map = AtomicDict()
  class_map = AtomicDict()
  current_max_recursive_level = ProcessAndThreadSafeInt(False)
  failure_count = ProcessAndThreadSafeInt(False)
  glob_status_queue = Queue.Queue(MAX_QUEUE_SIZE)
  global_return_values_map = AtomicDict()
  need_pool_or_done_cond = threading.Condition()
  shared_vars_list_map = AtomicDict()
  shared_vars_map = AtomicDict()
  thread_stats = AtomicDict()
  task_queues = []
  total_tasks = AtomicDict()
  worker_checking_level_lock = threading.Lock()
  concurrent_compressed_upload_lock = threading.BoundedSemaphore(
      GetMaxConcurrentCompressedUploads())


# Each subclass of Command must define a property named 'command_spec' that is
# an instance of the following class.
CommandSpec = namedtuple(
    'CommandSpec',
    [
        # Name of command.
        'command_name',
        # Usage synopsis.
        'usage_synopsis',
        # List of command name aliases.
        'command_name_aliases',
        # Min number of args required by this command.
        'min_args',
        # Max number of args required by this command, or NO_MAX.
        'max_args',
        # Getopt-style string specifying acceptable sub args.
        'supported_sub_args',
        # True if file URLs are acceptable for this command.
        'file_url_ok',
        # True if provider-only URLs are acceptable for this command.
        'provider_url_ok',
        # Index in args of first URL arg.
        'urls_start_arg',
        # List of supported APIs
        'gs_api_support',
        # Default API to use for this command
        'gs_default_api',
        # Private arguments (for internal testing)
        'supported_private_args',
        'argparse_arguments',
    ])


class Command(HelpProvider, GcloudStorageCommandMixin):
  """Base class for all gsutil commands."""

  # Each subclass must override this with an instance of CommandSpec.
  command_spec = None

  _commands_with_subcommands_and_subopts = ('acl', 'defacl', 'iam', 'kms',
                                            'label', 'logging', 'notification',
                                            'retention', 'web')

  # This keeps track of the recursive depth of the current call to Apply.
  recursive_apply_level = 0

  # If the multiprocessing module isn't available, we'll use this to keep track
  # of the caller_id.
  sequential_caller_id = -1

  @staticmethod
  def CreateCommandSpec(command_name,
                        usage_synopsis=None,
                        command_name_aliases=None,
                        min_args=0,
                        max_args=NO_MAX,
                        supported_sub_args='',
                        file_url_ok=False,
                        provider_url_ok=False,
                        urls_start_arg=0,
                        gs_api_support=None,
                        gs_default_api=None,
                        supported_private_args=None,
                        argparse_arguments=None):
    """Creates an instance of CommandSpec, with defaults."""
    return CommandSpec(command_name=command_name,
                       usage_synopsis=usage_synopsis,
                       command_name_aliases=command_name_aliases or [],
                       min_args=min_args,
                       max_args=max_args,
                       supported_sub_args=supported_sub_args,
                       file_url_ok=file_url_ok,
                       provider_url_ok=provider_url_ok,
                       urls_start_arg=urls_start_arg,
                       gs_api_support=gs_api_support or [ApiSelector.XML],
                       gs_default_api=gs_default_api or ApiSelector.XML,
                       supported_private_args=supported_private_args,
                       argparse_arguments=argparse_arguments or [])

  # Define a convenience property for command name, since it's used many places.
  def _GetDefaultCommandName(self):
    return self.command_spec.command_name

  command_name = property(_GetDefaultCommandName)

  def _CalculateUrlsStartArg(self):
    """Calculate the index in args of the first URL arg.

    Returns:
      Index of the first URL arg (according to the command spec).
    """
    return self.command_spec.urls_start_arg

  def _TranslateDeprecatedAliases(self, args):
    """Map deprecated aliases to the corresponding new command, and warn."""
    new_command_args = OLD_ALIAS_MAP.get(self.command_alias_used, None)
    if new_command_args:
      # Prepend any subcommands for the new command. The command name itself
      # is not part of the args, so leave it out.
      args = new_command_args[1:] + args
      self.logger.warn('\n'.join(
          textwrap.wrap(
              ('You are using a deprecated alias, "%(used_alias)s", for the '
               '"%(command_name)s" command. This will stop working on 9/9/2014. '
               'Please use "%(command_name)s" with the appropriate sub-command in '
               'the future. See "gsutil help %(command_name)s" for details.') %
              {
                  'used_alias': self.command_alias_used,
                  'command_name': self.command_name
              })))
    return args

  def __init__(self,
               command_runner,
               args,
               headers,
               debug,
               trace_token,
               parallel_operations,
               bucket_storage_uri_class,
               gsutil_api_class_map_factory,
               logging_filters=None,
               command_alias_used=None,
               perf_trace_token=None,
               user_project=None):
    """Instantiates a Command.

    Args:
      command_runner: CommandRunner (for commands built atop other commands).
      args: Command-line args (arg0 = actual arg, not command name ala bash).
      headers: Dictionary containing optional HTTP headers to pass to boto.
      debug: Debug level to pass in to boto connection (range 0..3).
      trace_token: Trace token to pass to the API implementation.
      parallel_operations: Should command operations be executed in parallel?
      bucket_storage_uri_class: Class to instantiate for cloud StorageUris.
                                Settable for testing/mocking.
      gsutil_api_class_map_factory: Creates map of cloud storage interfaces.
                                    Settable for testing/mocking.
      logging_filters: Optional list of logging. Filters to apply to this
                       command's logger.
      command_alias_used: The alias that was actually used when running this
                          command (as opposed to the "official" command name,
                          which will always correspond to the file name).
      perf_trace_token: Performance measurement trace token to use when making
          API calls.
      user_project: Project to be billed for this request.

    Implementation note: subclasses shouldn't need to define an __init__
    method, and instead depend on the shared initialization that happens
    here. If you do define an __init__ method in a subclass you'll need to
    explicitly call super().__init__(). But you're encouraged not to do this,
    because it will make changing the __init__ interface more painful.
    """
    # Save class values from constructor params.
    super().__init__()
    self.command_runner = command_runner
    self.unparsed_args = args
    self.headers = headers
    self.debug = debug
    self.trace_token = trace_token
    self.perf_trace_token = perf_trace_token
    self.parallel_operations = parallel_operations
    self.user_project = user_project
    self.bucket_storage_uri_class = bucket_storage_uri_class
    self.gsutil_api_class_map_factory = gsutil_api_class_map_factory
    self.exclude_symlinks = False
    self.recursion_requested = False
    self.all_versions = False
    self.command_alias_used = command_alias_used
    self.seek_ahead_gsutil_api = None
    # pylint: disable=global-variable-not-assigned
    # pylint: disable=global-variable-undefined
    global ui_controller
    # pylint: enable=global-variable-undefined
    # pylint: enable=global-variable-not-assigned
    # Global instance of a threaded logger object.
    self.logger = CreateOrGetGsutilLogger(self.command_name)
    if logging_filters:
      for log_filter in logging_filters:
        self.logger.addFilter(log_filter)

    if self.headers is not None:
      self.non_metadata_headers = GetNonMetadataHeaders(self.headers)
    else:
      self.non_metadata_headers = None

    if self.command_spec is None:
      raise CommandException('"%s" command implementation is missing a '
                             'command_spec definition.' % self.command_name)

    self.quiet_mode = not self.logger.isEnabledFor(logging.INFO)
    ui_controller = UIController(quiet_mode=self.quiet_mode,
                                 dump_status_messages_file=boto.config.get(
                                     'GSUtil', 'dump_status_messages_file',
                                     None))

    # Parse and validate args.
    self.args = self._TranslateDeprecatedAliases(args)
    self.ParseSubOpts()

    # Named tuple public functions start with _
    # pylint: disable=protected-access
    self.command_spec = self.command_spec._replace(
        urls_start_arg=self._CalculateUrlsStartArg())

    if (len(self.args) < self.command_spec.min_args or
        len(self.args) > self.command_spec.max_args):
      self.RaiseWrongNumberOfArgumentsException()

    if self.command_name not in self._commands_with_subcommands_and_subopts:
      self.CheckArguments()

    # Build the support and default maps from the command spec.
    support_map = {
        'gs': self.command_spec.gs_api_support,
        's3': [ApiSelector.XML]
    }
    default_map = {
        'gs': self.command_spec.gs_default_api,
        's3': ApiSelector.XML
    }
    self.gsutil_api_map = GsutilApiMapFactory.GetApiMap(
        self.gsutil_api_class_map_factory, support_map, default_map)

    self.project_id = None
    self.gsutil_api = CloudApiDelegator(self.bucket_storage_uri_class,
                                        self.gsutil_api_map,
                                        self.logger,
                                        MainThreadUIQueue(
                                            sys.stderr, ui_controller),
                                        debug=self.debug,
                                        http_headers=self.non_metadata_headers,
                                        trace_token=self.trace_token,
                                        perf_trace_token=self.perf_trace_token,
                                        user_project=self.user_project)
    # Cross-platform path to run gsutil binary.
    self.gsutil_cmd = ''
    # If running on Windows, invoke python interpreter explicitly.
    if IS_WINDOWS:
      self.gsutil_cmd += 'python '
    # Add full path to gsutil to make sure we test the correct version.
    self.gsutil_path = gslib.GSUTIL_PATH
    self.gsutil_cmd += self.gsutil_path

    # We're treating recursion_requested like it's used by all commands, but
    # only some of the commands accept the -R option.
    if self.sub_opts:
      for o, unused_a in self.sub_opts:
        if o == '-r' or o == '-R':
          self.recursion_requested = True
          break

    self.multiprocessing_is_available = (
        CheckMultiprocessingAvailableAndInit().is_available)

  def RaiseWrongNumberOfArgumentsException(self):
    """Raises exception for wrong number of arguments supplied to command."""
    if len(self.args) < self.command_spec.min_args:
      tail_str = 's' if self.command_spec.min_args > 1 else ''
      message = ('The %s command requires at least %d argument%s.' %
                 (self.command_name, self.command_spec.min_args, tail_str))
    else:
      message = ('The %s command accepts at most %d arguments.' %
                 (self.command_name, self.command_spec.max_args))
    message += ' Usage:\n%s\nFor additional help run:\n  gsutil help %s' % (
        self.command_spec.usage_synopsis, self.command_name)
    raise CommandException(message)

  def RaiseInvalidArgumentException(self):
    """Raises exception for specifying an invalid argument to command."""
    message = ('Incorrect option(s) specified. Usage:\n%s\n'
               'For additional help run:\n  gsutil help %s' %
               (self.command_spec.usage_synopsis, self.command_name))
    raise CommandException(message)

  def ParseSubOpts(self,
                   check_args=False,
                   args=None,
                   should_update_sub_opts_and_args=True):
    """Parses sub-opt args.

    Args:
      check_args: True to have CheckArguments() called after parsing.
      args: List of args. If None, self.args will be used.
      should_update_sub_opts_and_args: True if self.sub_opts and self.args
        should be updated with the values returned after parsing. Else return a
        tuple of sub_opts, args returned by getopt.getopt. This is done
        to allow this method to be called from get_gcloud_storage_args in which
        case we do not want to update self.sub_opts and self.args.

    Raises:
      RaiseInvalidArgumentException: Invalid args specified.
    """
    if args is None:
      unparsed_args = self.args
    else:
      unparsed_args = args
    try:
      parsed_sub_opts, parsed_args = getopt.getopt(
          unparsed_args, self.command_spec.supported_sub_args,
          self.command_spec.supported_private_args or [])
    except getopt.GetoptError:
      self.RaiseInvalidArgumentException()
    if should_update_sub_opts_and_args:
      self.sub_opts, self.args = parsed_sub_opts, parsed_args
      if check_args:
        self.CheckArguments()
    else:
      if check_args:
        # This is just for sanity check. Only get_gcloud_storage_args will
        # call this method with should_update_sub_opts_and_args=False, and it
        # does not set check_args to True.
        raise TypeError('Requested to check arguments'
                        ' but sub_opts and args have not been updated.')
      return parsed_sub_opts, parsed_args

  def CheckArguments(self):
    """Checks that command line arguments match the command_spec.

    Any commands in self._commands_with_subcommands_and_subopts are responsible
    for calling this method after handling initial parsing of their arguments.
    This prevents commands with sub-commands as well as options from breaking
    the parsing of getopt.

    TODO: Provide a function to parse commands and sub-commands more
    intelligently once we stop allowing the deprecated command versions.

    Raises:
      CommandException if the arguments don't match.
    """

    if (not self.command_spec.file_url_ok and
        HaveFileUrls(self.args[self.command_spec.urls_start_arg:])):
      raise CommandException('"%s" command does not support "file://" URLs. '
                             'Did you mean to use a gs:// URL?' %
                             self.command_name)
    if (not self.command_spec.provider_url_ok and
        HaveProviderUrls(self.args[self.command_spec.urls_start_arg:])):
      raise CommandException('"%s" command does not support provider-only '
                             'URLs.' % self.command_name)

  def WildcardIterator(self, url_string, all_versions=False):
    """Helper to instantiate gslib.WildcardIterator.

    Args are same as gslib.WildcardIterator interface, but this method fills in
    most of the values from instance state.

    Args:
      url_string: URL string naming wildcard objects to iterate.
      all_versions: If true, the iterator yields all versions of objects
                    matching the wildcard.  If false, yields just the live
                    object version.

    Returns:
      WildcardIterator for use by caller.
    """
    return CreateWildcardIterator(url_string,
                                  self.gsutil_api,
                                  all_versions=all_versions,
                                  project_id=self.project_id,
                                  logger=self.logger)

  def GetSeekAheadGsutilApi(self):
    """Helper to instantiate a Cloud API instance for a seek-ahead iterator.

    This must be separate from the core command.gsutil_api instance for
    thread-safety, since other iterators typically use that instance and the
    SeekAheadIterator operates in parallel.

    Returns:
      Cloud API instance for use by the seek-ahead iterator.
    """
    # This is initialized in Initialize(Multiprocessing|Threading)Variables
    # pylint: disable=global-variable-not-assigned
    # pylint: disable=global-variable-undefined
    global glob_status_queue
    # pylint: enable=global-variable-not-assigned
    # pylint: enable=global-variable-undefined
    if not self.seek_ahead_gsutil_api:
      self.seek_ahead_gsutil_api = CloudApiDelegator(
          self.bucket_storage_uri_class,
          self.gsutil_api_map,
          logging.getLogger('dummy'),
          glob_status_queue,
          debug=self.debug,
          http_headers=self.non_metadata_headers,
          trace_token=self.trace_token,
          perf_trace_token=self.perf_trace_token,
          user_project=self.user_project)
    return self.seek_ahead_gsutil_api

  def RunCommand(self):
    """Abstract function in base class. Subclasses must implement this.

    The return value of this function will be used as the exit status of the
    process, so subclass commands should return an integer exit code (0 for
    success, a value in [1,255] for failure).
    """
    raise CommandException('Command %s is missing its RunCommand() '
                           'implementation' % self.command_name)

  ############################################################
  # Shared helper functions that depend on base class state. #
  ############################################################

  # TODO: Refactor ACL functions to a different module and pass the
  # command object as state, as opposed to defining them as member functions
  # of the command class.
  def ApplyAclFunc(self,
                   acl_func,
                   acl_excep_handler,
                   url_strs,
                   object_fields=None):
    """Sets the standard or default object ACL depending on self.command_name.

    Args:
      acl_func: ACL function to be passed to Apply.
      acl_excep_handler: ACL exception handler to be passed to Apply.
      url_strs: URL strings on which to set ACL.
      object_fields: If present, list of object metadata fields to retrieve;
          if None, default name expansion iterator fields will be used.

    Raises:
      CommandException if an ACL could not be set.
    """
    multi_threaded_url_args = []

    urls = list(map(StorageUrlFromString, url_strs))

    if (UrlsAreMixOfBucketsAndObjects(urls) and not self.recursion_requested):
      raise CommandException('Cannot operate on a mix of buckets and objects.')

    # Handle bucket ACL setting operations single-threaded, because
    # our threading machinery currently assumes it's working with objects
    # (name_expansion_iterator), and normally we wouldn't expect users to need
    # to set ACLs on huge numbers of buckets at once anyway.
    for url in urls:
      if url.IsCloudUrl() and url.IsBucket():
        if self.recursion_requested:
          # If user specified -R option, convert any bucket args to bucket
          # wildcards (e.g., gs://bucket/*), to prevent the operation from
          # being applied to the buckets themselves.
          url.object_name = '*'
          multi_threaded_url_args.append(url.url_string)
        else:
          # Convert to a NameExpansionResult so we can re-use the threaded
          # function for the single-threaded implementation.  RefType is unused.
          for blr in self.WildcardIterator(
              url.url_string).IterBuckets(bucket_fields=['id']):
            name_expansion_for_url = NameExpansionResult(
                source_storage_url=url,
                is_multi_source_request=False,
                is_multi_top_level_source_request=False,
                names_container=False,
                expanded_storage_url=blr.storage_url,
                expanded_result=None)
            acl_func(self, name_expansion_for_url)
      else:
        multi_threaded_url_args.append(url.url_string)

    if len(multi_threaded_url_args) >= 1:
      name_expansion_iterator = NameExpansionIterator(
          self.command_name,
          self.debug,
          self.logger,
          self.gsutil_api,
          multi_threaded_url_args,
          self.recursion_requested,
          all_versions=self.all_versions,
          continue_on_error=self.continue_on_error or self.parallel_operations,
          bucket_listing_fields=object_fields)

      seek_ahead_iterator = SeekAheadNameExpansionIterator(
          self.command_name,
          self.debug,
          self.GetSeekAheadGsutilApi(),
          multi_threaded_url_args,
          self.recursion_requested,
          all_versions=self.all_versions)

      # Perform requests in parallel (-m) mode, if requested, using
      # configured number of parallel processes and threads. Otherwise,
      # perform requests with sequential function calls in current process.
      self.Apply(acl_func,
                 name_expansion_iterator,
                 acl_excep_handler,
                 fail_on_error=not self.continue_on_error,
                 seek_ahead_iterator=seek_ahead_iterator)

    if not self.everything_set_okay and not self.continue_on_error:
      raise CommandException('ACLs for some objects could not be set.')

  def SetAclFunc(self, name_expansion_result, thread_state=None):
    """Sets the object ACL for the name_expansion_result provided.

    Args:
      name_expansion_result: NameExpansionResult describing the target object.
      thread_state: If present, use this gsutil Cloud API instance for the set.
    """
    if thread_state:
      assert not self.def_acl
      gsutil_api = thread_state
    else:
      gsutil_api = self.gsutil_api
    op_string = 'default object ACL' if self.def_acl else 'ACL'
    url = name_expansion_result.expanded_storage_url
    self.logger.info('Setting %s on %s...', op_string, url)
    if (gsutil_api.GetApiSelector(url.scheme) == ApiSelector.XML and
        url.scheme != 'gs'):
      # If we are called with a non-google ACL model, we need to use the XML
      # passthrough. acl_arg should either be a canned ACL or an XML ACL.
      self._SetAclXmlPassthrough(url, gsutil_api)
    else:
      # Normal Cloud API path. acl_arg is a JSON ACL or a canned ACL.
      self._SetAclGsutilApi(url, gsutil_api)
    PutToQueueWithTimeout(gsutil_api.status_queue,
                          MetadataMessage(message_time=time.time()))

  def _SetAclXmlPassthrough(self, url, gsutil_api):
    """Sets the ACL for the URL provided using the XML passthrough functions.

    This function assumes that self.def_acl, self.canned,
    and self.continue_on_error are initialized, and that self.acl_arg is
    either an XML string or a canned ACL string.

    Args:
      url: CloudURL to set the ACL on.
      gsutil_api: gsutil Cloud API to use for the ACL set. Must support XML
          passthrough functions.
    """
    orig_prefer_api = gsutil_api.prefer_api
    try:
      gsutil_api.prefer_api = ApiSelector.XML
      gsutil_api.XmlPassThroughSetAcl(self.acl_arg,
                                      url,
                                      canned=self.canned,
                                      def_obj_acl=self.def_acl,
                                      provider=url.scheme)
    except ServiceException as e:
      if self.continue_on_error:
        self.everything_set_okay = False
        self.logger.error(e)
      else:
        raise
    finally:
      gsutil_api.prefer_api = orig_prefer_api

  def _SetAclGsutilApi(self, url, gsutil_api):
    """Sets the ACL for the URL provided using the gsutil Cloud API.

    This function assumes that self.def_acl, self.canned,
    and self.continue_on_error are initialized, and that self.acl_arg is
    either a JSON string or a canned ACL string.

    Args:
      url: CloudURL to set the ACL on.
      gsutil_api: gsutil Cloud API to use for the ACL set.
    """
    try:
      if url.IsBucket():
        if self.def_acl:
          if self.canned:
            gsutil_api.PatchBucket(url.bucket_name,
                                   apitools_messages.Bucket(),
                                   canned_def_acl=self.acl_arg,
                                   provider=url.scheme,
                                   fields=['id'])
          else:
            def_obj_acl = AclTranslation.JsonToMessage(
                self.acl_arg, apitools_messages.ObjectAccessControl)
            if not def_obj_acl:
              # Use a sentinel value to indicate a private (no entries) default
              # object ACL.
              def_obj_acl.append(PRIVATE_DEFAULT_OBJ_ACL)
            bucket_metadata = apitools_messages.Bucket(
                defaultObjectAcl=def_obj_acl)
            gsutil_api.PatchBucket(url.bucket_name,
                                   bucket_metadata,
                                   provider=url.scheme,
                                   fields=['id'])
        else:
          if self.canned:
            gsutil_api.PatchBucket(url.bucket_name,
                                   apitools_messages.Bucket(),
                                   canned_acl=self.acl_arg,
                                   provider=url.scheme,
                                   fields=['id'])
          else:
            bucket_acl = AclTranslation.JsonToMessage(
                self.acl_arg, apitools_messages.BucketAccessControl)
            bucket_metadata = apitools_messages.Bucket(acl=bucket_acl)
            gsutil_api.PatchBucket(url.bucket_name,
                                   bucket_metadata,
                                   provider=url.scheme,
                                   fields=['id'])
      else:  # url.IsObject()
        if self.canned:
          gsutil_api.PatchObjectMetadata(url.bucket_name,
                                         url.object_name,
                                         apitools_messages.Object(),
                                         provider=url.scheme,
                                         generation=url.generation,
                                         canned_acl=self.acl_arg)
        else:
          object_acl = AclTranslation.JsonToMessage(
              self.acl_arg, apitools_messages.ObjectAccessControl)
          object_metadata = apitools_messages.Object(acl=object_acl)
          gsutil_api.PatchObjectMetadata(url.bucket_name,
                                         url.object_name,
                                         object_metadata,
                                         provider=url.scheme,
                                         generation=url.generation)
    except ArgumentException as e:
      raise
    except ServiceException as e:
      if self.continue_on_error:
        self.everything_set_okay = False
        self.logger.error(e)
      else:
        raise

  def SetAclCommandHelper(self, acl_func, acl_excep_handler):
    """Sets ACLs on the self.args using the passed-in acl function.

    Args:
      acl_func: ACL function to be passed to Apply.
      acl_excep_handler: ACL exception handler to be passed to Apply.
    """
    acl_arg = self.args[0]
    url_args = self.args[1:]
    # Disallow multi-provider setacl requests, because there are differences in
    # the ACL models.
    if not UrlsAreForSingleProvider(url_args):
      raise CommandException('"%s" command spanning providers not allowed.' %
                             self.command_name)

    # Determine whether acl_arg names a file containing XML ACL text vs. the
    # string name of a canned ACL.
    if os.path.isfile(acl_arg):
      with codecs.open(acl_arg, 'r', UTF8) as f:
        acl_arg = f.read()
      self.canned = False
    else:
      # No file exists, so expect a canned ACL string.
      # validate=False because we allow wildcard urls.
      storage_uri = boto.storage_uri(
          url_args[0],
          debug=self.debug,
          validate=False,
          bucket_storage_uri_class=self.bucket_storage_uri_class)

      canned_acls = storage_uri.canned_acls()
      if acl_arg not in canned_acls:
        raise CommandException('Invalid canned ACL "%s".' % acl_arg)
      self.canned = True

    # Used to track if any ACLs failed to be set.
    self.everything_set_okay = True
    self.acl_arg = acl_arg

    self.ApplyAclFunc(acl_func, acl_excep_handler, url_args)
    if not self.everything_set_okay and not self.continue_on_error:
      raise CommandException('ACLs for some objects could not be set.')

  def _WarnServiceAccounts(self):
    """Warns service account users who have received an AccessDenied error.

    When one of the metadata-related commands fails due to AccessDenied, user
    must ensure that they are listed as an Owner in the API console.
    """
    # Import this here so that the value will be set first in
    # gcs_oauth2_boto_plugin.
    # pylint: disable=g-import-not-at-top
    from gcs_oauth2_boto_plugin.oauth2_plugin import IS_SERVICE_ACCOUNT

    if IS_SERVICE_ACCOUNT:
      # This method is only called when canned ACLs are used, so the warning
      # definitely applies.
      self.logger.warning('\n'.join(
          textwrap.wrap(
              'It appears that your service account has been denied access while '
              'attempting to perform a metadata operation. If you believe that you '
              'should have access to this metadata (i.e., if it is associated with '
              'your account), please make sure that your service account'
              's email '
              'address is listed as an Owner in the Permissions tab of the API '
              'console. See "gsutil help creds" for further information.\n')))

  def GetAndPrintAcl(self, url_str):
    """Prints the standard or default object ACL depending on self.command_name.

    Args:
      url_str: URL string to get ACL for.
    """
    blr = self.GetAclCommandBucketListingReference(url_str)
    url = StorageUrlFromString(url_str)
    if (self.gsutil_api.GetApiSelector(url.scheme) == ApiSelector.XML and
        url.scheme != 'gs'):
      # Need to use XML passthrough.
      try:
        acl = self.gsutil_api.XmlPassThroughGetAcl(url,
                                                   def_obj_acl=self.def_acl,
                                                   provider=url.scheme)
        print(acl.to_xml())
      except AccessDeniedException as _:
        self._WarnServiceAccounts()
        raise
    else:
      if self.command_name == 'defacl':
        acl = blr.root_object.defaultObjectAcl
        if not acl:
          self.logger.warn(
              'No default object ACL present for %s. This could occur if '
              'the default object ACL is private, in which case objects '
              'created in this bucket will be readable only by their '
              'creators. It could also mean you do not have OWNER permission '
              'on %s and therefore do not have permission to read the '
              'default object ACL. It could also mean that %s has Bucket '
              'Policy Only enabled and therefore object ACLs and default '
              'object ACLs are disabled (see '
              'https://cloud.google.com/storage/docs/bucket-policy-only).',
              url_str, url_str, url_str)
      else:
        acl = blr.root_object.acl
        # Use the access controls api to check if the acl is actually empty or
        # if the user has 403 access denied or 400 invalid argument.
        if not acl:
          self._ListAccessControlsAcl(url)

      print(AclTranslation.JsonFromMessage(acl))

  def _ListAccessControlsAcl(self, storage_url):
    """Returns either bucket or object access controls for a storage url.

    Args:
      storage_url: StorageUrl object representing the bucket or object.

    Returns:
      BucketAccessControls, ObjectAccessControls, or None if storage_url does
      not represent a cloud bucket or cloud object.

    Raises:
      ServiceException if there was an error in the request.
    """
    if storage_url.IsBucket():
      return self.gsutil_api.ListBucketAccessControls(
          storage_url.bucket_name, provider=storage_url.scheme)
    elif storage_url.IsObject():
      return self.gsutil_api.ListObjectAccessControls(
          storage_url.bucket_name,
          storage_url.object_name,
          provider=storage_url.scheme)
    else:
      return None

  def GetAclCommandBucketListingReference(self, url_str):
    """Gets a single bucket listing reference for an acl get command.

    Args:
      url_str: URL string to get the bucket listing reference for.

    Returns:
      BucketListingReference for the URL string.

    Raises:
      CommandException if string did not result in exactly one reference.
    """
    # We're guaranteed by caller that we have the appropriate type of url
    # string for the call (ex. we will never be called with an object string
    # by getdefacl)
    wildcard_url = StorageUrlFromString(url_str)
    if wildcard_url.IsObject():
      plurality_iter = PluralityCheckableIterator(
          self.WildcardIterator(url_str).IterObjects(
              bucket_listing_fields=['acl']))
    else:
      # Bucket or provider.  We call IterBuckets explicitly here to ensure that
      # the root object is populated with the acl.
      if self.command_name == 'defacl':
        bucket_fields = ['defaultObjectAcl']
      else:
        bucket_fields = ['acl']
      plurality_iter = PluralityCheckableIterator(
          self.WildcardIterator(url_str).IterBuckets(
              bucket_fields=bucket_fields))
    if plurality_iter.IsEmpty():
      raise CommandException('No URLs matched')
    if plurality_iter.HasPlurality():
      raise CommandException(
          '%s matched more than one URL, which is not allowed by the %s '
          'command' % (url_str, self.command_name))
    return list(plurality_iter)[0]

  def GetSingleBucketUrlFromArg(self, arg, bucket_fields=None):
    """Gets a single bucket URL based on the command arguments.

    Args:
      arg: String argument to get bucket URL for.
      bucket_fields: Fields to populate for the bucket.

    Returns:
      (StorageUrl referring to a single bucket, Bucket metadata).

    Raises:
      CommandException if args did not match exactly one bucket.
    """
    plurality_checkable_iterator = self.GetBucketUrlIterFromArg(
        arg, bucket_fields=bucket_fields)
    if plurality_checkable_iterator.HasPlurality():
      raise CommandException('%s matched more than one URL, which is not\n'
                             'allowed by the %s command' %
                             (arg, self.command_name))
    blr = list(plurality_checkable_iterator)[0]
    return StorageUrlFromString(blr.url_string), blr.root_object

  def GetBucketUrlIterFromArg(self, arg, bucket_fields=None):
    """Gets a single bucket URL based on the command arguments.

    Args:
      arg: String argument to iterate over.
      bucket_fields: Fields to populate for the bucket.

    Returns:
      PluralityCheckableIterator over buckets.

    Raises:
      CommandException if iterator matched no buckets.
    """
    arg_url = StorageUrlFromString(arg)
    if not arg_url.IsCloudUrl() or arg_url.IsObject():
      raise CommandException('"%s" command must specify a bucket' %
                             self.command_name)

    plurality_checkable_iterator = PluralityCheckableIterator(
        self.WildcardIterator(arg).IterBuckets(bucket_fields=bucket_fields))
    if plurality_checkable_iterator.IsEmpty():
      raise CommandException('No URLs matched')
    return plurality_checkable_iterator

  ######################
  # Private functions. #
  ######################

  def _ResetConnectionPool(self):
    # Each OS process needs to establish its own set of connections to
    # the server to avoid writes from different OS processes interleaving
    # onto the same socket (and garbling the underlying SSL session).
    # We ensure each process gets its own set of connections here by
    # reinitializing state that tracks connections.
    connection_pool = StorageUri.provider_pool
    if connection_pool:
      for i in connection_pool:
        connection_pool[i].connection.close()

    StorageUri.provider_pool = {}
    StorageUri.connection = None

  def _GetProcessAndThreadCount(self,
                                process_count,
                                thread_count,
                                parallel_operations_override,
                                print_macos_warning=True):
    """Determines the values of process_count and thread_count.

    These values are used for parallel operations.
    If we're not performing operations in parallel, then ignore
    existing values and use process_count = thread_count = 1.

    Args:
      process_count: A positive integer or None. In the latter case, we read
                     the value from the .boto config file.
      thread_count: A positive integer or None. In the latter case, we read
                    the value from the .boto config file.
      parallel_operations_override: Used to override self.parallel_operations.
                                    This allows the caller to safely override
                                    the top-level flag for a single call.
      print_macos_warning: Print a warning about parallel processing on MacOS
                           if true.

    Returns:
      (process_count, thread_count): The number of processes and threads to use,
                                     respectively.
    """
    # Set OS process and python thread count as a function of options
    # and config.
    if self.parallel_operations or parallel_operations_override:
      if not process_count:
        process_count = boto.config.getint(
            'GSUtil', 'parallel_process_count',
            gslib.commands.config.DEFAULT_PARALLEL_PROCESS_COUNT)
      if process_count < 1:
        raise CommandException('Invalid parallel_process_count "%d".' %
                               process_count)
      if not thread_count:
        thread_count = boto.config.getint(
            'GSUtil', 'parallel_thread_count',
            gslib.commands.config.DEFAULT_PARALLEL_THREAD_COUNT)
      if thread_count < 1:
        raise CommandException('Invalid parallel_thread_count "%d".' %
                               thread_count)
    else:
      # If -m not specified, then assume 1 OS process and 1 Python thread.
      process_count = 1
      thread_count = 1

    should_prohibit_multiprocessing, os_name = ShouldProhibitMultiprocessing()
    if should_prohibit_multiprocessing and process_count > 1:
      raise CommandException('\n'.join(
          textwrap.wrap(
              ('It is not possible to set process_count > 1 on %s. Please '
               'update your config file(s) (located at %s) and set '
               '"parallel_process_count = 1".') %
              (os_name, ', '.join(GetFriendlyConfigFilePaths())))))
    is_main_thread = self.recursive_apply_level == 0
    if print_macos_warning and os_name == 'macOS' and process_count > 1 and is_main_thread:
      self.logger.info(
          'If you experience problems with multiprocessing on MacOS, they '
          'might be related to https://bugs.python.org/issue33725. You can '
          'disable multiprocessing by editing your .boto config or by adding '
          'the following flag to your command: '
          '`-o "GSUtil:parallel_process_count=1"`. Note that multithreading is '
          'still available even if you disable multiprocessing.\n')

    self.logger.debug('process count: %d', process_count)
    self.logger.debug('thread count: %d', thread_count)
    return (process_count, thread_count)

  def _SetUpPerCallerState(self):
    """Set up the state for a caller id, corresponding to one Apply call."""
    # pylint: disable=global-variable-undefined,global-variable-not-assigned
    # These variables are initialized in InitializeMultiprocessingVariables or
    # InitializeThreadingVariables
    global global_return_values_map, shared_vars_map, failure_count
    global caller_id_finished_count, shared_vars_list_map, total_tasks
    global need_pool_or_done_cond, call_completed_map, class_map
    global task_queues, caller_id_lock, caller_id_counter
    # Get a new caller ID.
    with caller_id_lock:
      caller_id_counter.Increment()
      caller_id = caller_id_counter.GetValue()

    # Create a copy of self with an incremented recursive level. This allows
    # the class to report its level correctly if the function called from it
    # also needs to call Apply.
    cls = copy.copy(self)
    cls.recursive_apply_level += 1

    # Thread-safe loggers can't be pickled, so we will remove it here and
    # recreate it later in the WorkerThread. This is not a problem since any
    # logger with the same name will be treated as a singleton.
    cls.logger = None

    # Likewise, the default API connection(s) can't be pickled, but are unused
    # anyway as each thread gets its own API delegator.
    cls.gsutil_api = None
    cls.seek_ahead_gsutil_api = None

    class_map[caller_id] = cls
    total_tasks[caller_id] = -1  # -1 => the producer hasn't finished yet.
    call_completed_map[caller_id] = False
    caller_id_finished_count[caller_id] = 0
    global_return_values_map[caller_id] = []
    return caller_id

  def _CreateNewConsumerPool(self, num_processes, num_threads, status_queue):
    """Create a new pool of processes that call _ApplyThreads."""
    processes = []
    task_queue = _NewMultiprocessingQueue()
    task_queues.append(task_queue)

    current_max_recursive_level.Increment()
    if current_max_recursive_level.GetValue() > MAX_RECURSIVE_DEPTH:
      raise CommandException('Recursion depth of Apply calls is too great.')
    for _ in range(num_processes):
      recursive_apply_level = len(consumer_pools)
      p = multiprocessing_context.Process(target=self._ApplyThreads,
                                          args=(num_threads, num_processes,
                                                recursive_apply_level,
                                                status_queue))
      p.daemon = True
      processes.append(p)
      _CryptoRandomAtFork()
      p.start()
    consumer_pool = _ConsumerPool(processes, task_queue)
    consumer_pools.append(consumer_pool)

  class ParallelOverrideReason(object):
    """Enum class to describe purpose of overriding parallel operations."""
    # For the case when we use slice parallelism.
    SLICE = 'slice'
    # For the case when we run a helper Apply call (such as in the _DiffIterator
    # of rsync) and override to make the command go faster.
    SPEED = 'speed'
    # For when we run Apply calls in perfdiag.
    PERFDIAG = 'perfdiag'

  def Apply(self,
            func,
            args_iterator,
            exception_handler,
            shared_attrs=None,
            arg_checker=_UrlArgChecker,
            parallel_operations_override=None,
            process_count=None,
            thread_count=None,
            should_return_results=False,
            fail_on_error=False,
            seek_ahead_iterator=None):
    """Calls _Parallel/SequentialApply based on multiprocessing availability.

    Args:
      func: Function to call to process each argument.
      args_iterator: Iterable collection of arguments to be put into the
                     work queue.
      exception_handler: Exception handler for WorkerThread class.
      shared_attrs: List of attributes to manage across sub-processes.
      arg_checker: Used to determine whether we should process the current
                   argument or simply skip it. Also handles any logging that
                   is specific to a particular type of argument.
      parallel_operations_override: A string (see ParallelOverrideReason)
                                    describing the reason to override
                                    self.parallel_operations. This allows the
                                    caller to safely override the top-level flag
                                    for a single call.
      process_count: The number of processes to use. If not specified, then
                     the configured default will be used.
      thread_count: The number of threads per process. If not specified, then
                    the configured default will be used..
      should_return_results: If true, then return the results of all successful
                             calls to func in a list.
      fail_on_error: If true, then raise any exceptions encountered when
                     executing func. This is only applicable in the case of
                     process_count == thread_count == 1.
      seek_ahead_iterator: If present, a seek-ahead iterator that will
          provide an approximation of the total number of tasks and bytes that
          will be iterated by the ProducerThread. Used only if multiple
          processes and/or threads are used.

    Returns:
      Results from spawned threads.
    """
    # This is initialized in Initialize(Multiprocessing|Threading)Variables
    # pylint: disable=global-variable-not-assigned
    # pylint: disable=global-variable-undefined
    global thread_stats
    # pylint: enable=global-variable-not-assigned
    # pylint: enable=global-variable-undefined
    if shared_attrs:
      original_shared_vars_values = {}  # We'll add these back in at the end.
      for name in shared_attrs:
        original_shared_vars_values[name] = getattr(self, name)
        # By setting this to 0, we simplify the logic for computing deltas.
        # We'll add it back after all of the tasks have been performed.
        setattr(self, name, 0)

    (process_count, thread_count) = self._GetProcessAndThreadCount(
        process_count, thread_count, parallel_operations_override)

    is_main_thread = (self.recursive_apply_level == 0 and
                      self.sequential_caller_id == -1)

    if is_main_thread:
      # This initializes the initial performance summary parameters.
      LogPerformanceSummaryParams(num_processes=process_count,
                                  num_threads=thread_count)

    # We don't honor the fail_on_error flag in the case of multiple threads
    # or processes.
    fail_on_error = fail_on_error and (process_count * thread_count == 1)

    # Only check this from the first call in the main thread. Apart from the
    # fact that it's  wasteful to try this multiple times in general, it also
    # will never work when called from a subprocess since we use daemon
    # processes, and daemons can't create other processes.
    if (is_main_thread and not self.multiprocessing_is_available and
        process_count > 1):
      # Run the check again and log the appropriate warnings. This was run
      # before, when the Command object was created, in order to calculate
      # self.multiprocessing_is_available, but we don't want to print the
      # warning until we're sure the user actually tried to use multiple
      # threads or processes.
      CheckMultiprocessingAvailableAndInit(logger=self.logger)

    caller_id = self._SetUpPerCallerState()

    # If any shared attributes passed by caller, create a dictionary of
    # shared memory variables for every element in the list of shared
    # attributes.
    if shared_attrs:
      shared_vars_list_map[caller_id] = shared_attrs
      for name in shared_attrs:
        shared_vars_map[(caller_id, name)] = 0

    # Make all of the requested function calls.
    usable_processes_count = (process_count
                              if self.multiprocessing_is_available else 1)
    if thread_count * usable_processes_count > 1:
      self._ParallelApply(
          func,
          args_iterator,
          exception_handler,
          caller_id,
          arg_checker,
          usable_processes_count,
          thread_count,
          should_return_results,
          fail_on_error,
          seek_ahead_iterator=seek_ahead_iterator,
          parallel_operations_override=parallel_operations_override)
      if is_main_thread:
        _AggregateThreadStats()
    else:
      self._SequentialApply(func, args_iterator, exception_handler, caller_id,
                            arg_checker, should_return_results, fail_on_error)

    if shared_attrs:
      for name in shared_attrs:
        # This allows us to retain the original value of the shared variable,
        # and simply apply the delta after what was done during the call to
        # apply.
        final_value = (original_shared_vars_values[name] + shared_vars_map.get(
            (caller_id, name)))
        setattr(self, name, final_value)

    if should_return_results:
      return global_return_values_map.get(caller_id)

  def _MaybeSuggestGsutilDashM(self):
    """Outputs a suggestion to the user to use gsutil -m."""
    if not (boto.config.getint('GSUtil', 'parallel_process_count', 0) == 1 and
            boto.config.getint('GSUtil', 'parallel_thread_count', 0) == 1):
      self.logger.info('\n' + textwrap.fill(
          '==> NOTE: You are performing a sequence of gsutil operations that '
          'may run significantly faster if you instead use gsutil -m %s ...\n'
          'Please see the -m section under "gsutil help options" for further '
          'information about when gsutil -m can be advantageous.' %
          self.command_spec.command_name) + '\n')

  # pylint: disable=g-doc-args
  def _SequentialApply(self, func, args_iterator, exception_handler, caller_id,
                       arg_checker, should_return_results, fail_on_error):
    """Performs all function calls sequentially in the current thread.

    No other threads or processes will be spawned. This degraded functionality
    is used when the multiprocessing module is not available or the user
    requests only one thread and one process.
    """
    # Create a WorkerThread to handle all of the logic needed to actually call
    # the function. Note that this thread will never be started, and all work
    # is done in the current thread.
    worker_thread = WorkerThread(None,
                                 False,
                                 headers=self.non_metadata_headers,
                                 perf_trace_token=self.perf_trace_token,
                                 trace_token=self.trace_token,
                                 user_project=self.user_project)
    args_iterator = iter(args_iterator)
    # Count of sequential calls that have been made. Used for producing
    # suggestion to use gsutil -m.
    sequential_call_count = 0
    while True:

      # Try to get the next argument, handling any exceptions that arise.
      try:
        args = next(args_iterator)
      except StopIteration as e:
        break
      except Exception as e:  # pylint: disable=broad-except
        _IncrementFailureCount()
        if fail_on_error:
          raise
        else:
          try:
            exception_handler(self, e)
          except Exception as _:  # pylint: disable=broad-except
            self.logger.debug(
                'Caught exception while handling exception for %s:\n%s', func,
                traceback.format_exc())
          continue

      sequential_call_count += 1
      if (sequential_call_count == OFFER_GSUTIL_M_SUGGESTION_THRESHOLD or
          sequential_call_count % OFFER_GSUTIL_M_SUGGESTION_FREQUENCY == 0):
        # Output suggestion near beginning of run, so user sees it early, and
        # every so often while the command is executing, so they can ^C and try
        # gsutil -m.
        self._MaybeSuggestGsutilDashM()
      if arg_checker(self, args):
        # Now that we actually have the next argument, perform the task.
        task = Task(func, args, caller_id, exception_handler,
                    should_return_results, arg_checker, fail_on_error)
        worker_thread.PerformTask(task, self)

    lines_since_suggestion_last_printed = (sequential_call_count %
                                           OFFER_GSUTIL_M_SUGGESTION_FREQUENCY)
    if lines_since_suggestion_last_printed >= GetTermLines():
      # Output suggestion at end of long run, in case user missed it at the
      # start and it scrolled off-screen.
      self._MaybeSuggestGsutilDashM()

    PutToQueueWithTimeout(self.gsutil_api.status_queue,
                          FinalMessage(time.time()))

    # If the final iterated argument results in an exception, and that
    # exception modifies shared_attrs, we need to publish the results.
    worker_thread.shared_vars_updater.Update(caller_id, self)

    # Now that all the work is done, log the types of source URLs encountered.
    self._ProcessSourceUrlTypes(args_iterator)

  # pylint: disable=g-doc-args
  def _ParallelApply(self,
                     func,
                     args_iterator,
                     exception_handler,
                     caller_id,
                     arg_checker,
                     process_count,
                     thread_count,
                     should_return_results,
                     fail_on_error,
                     seek_ahead_iterator=None,
                     parallel_operations_override=None):
    r"""Dispatches input arguments across a thread/process pool.

    Pools are composed of parallel OS processes and/or Python threads,
    based on options (-m or not) and settings in the user's config file.

    If only one OS process is requested/available, dispatch requests across
    threads in the current OS process.

    In the multi-process case, we will create one pool of worker processes for
    each level of the tree of recursive calls to Apply. E.g., if A calls
    Apply(B), and B ultimately calls Apply(C) followed by Apply(D), then we
    will only create two sets of worker processes - B will execute in the first,
    and C and D will execute in the second. If C is then changed to call
    Apply(E) and D is changed to call Apply(F), then we will automatically
    create a third set of processes (lazily, when needed) that will be used to
    execute calls to E and F. This might look something like:

    Pool1 Executes:                B
                                  / \
    Pool2 Executes:              C   D
                                /     \
    Pool3 Executes:            E       F

    Apply's parallelism is generally broken up into 4 cases:
    - If process_count == thread_count == 1, then all tasks will be executed
      by _SequentialApply.
    - If process_count > 1 and thread_count == 1, then the main thread will
      create a new pool of processes (if they don't already exist) and each of
      those processes will execute the tasks in a single thread.
    - If process_count == 1 and thread_count > 1, then this process will create
      a new pool of threads to execute the tasks.
    - If process_count > 1 and thread_count > 1, then the main thread will
      create a new pool of processes (if they don't already exist) and each of
      those processes will, upon creation, create a pool of threads to
      execute the tasks.

    Args:
      caller_id: The caller ID unique to this call to command.Apply.
      See command.Apply for description of other arguments.
    """
    # This is initialized in Initialize(Multiprocessing|Threading)Variables
    # pylint: disable=global-variable-not-assigned
    # pylint: disable=global-variable-undefined
    global glob_status_queue, ui_controller
    # pylint: enable=global-variable-not-assigned
    # pylint: enable=global-variable-undefined
    is_main_thread = self.recursive_apply_level == 0

    if (parallel_operations_override == self.ParallelOverrideReason.SLICE and
        self.recursive_apply_level <= 1):
      # The operation uses slice parallelism if the recursive apply level > 0 or
      # if we're executing _ParallelApply without the -m option.
      glob_status_queue.put(PerformanceSummaryMessage(time.time(), True))

    if not IS_WINDOWS and is_main_thread:
      # For multi-thread or multi-process scenarios, the main process must
      # kill itself on a terminating signal, because sys.exit(1) only exits
      # the currently executing thread, leaving orphaned processes. The main
      # thread is responsible for cleaning up multiprocessing variables such
      # as manager processes. Therefore, the main thread's signal handling
      # chain is:
      # 1: __main__._CleanupSignalHandler (clean up processes)
      # 2: MultithreadedSignalHandler (kill self)
      for signal_num in (signal.SIGINT, signal.SIGTERM):
        RegisterSignalHandler(signal_num,
                              MultithreadedMainSignalHandler,
                              is_final_handler=True)

    if not task_queues:
      # The process we create will need to access the next recursive level
      # of task queues if it makes a call to Apply, so we always keep around
      # one more queue than we know we need. OTOH, if we don't create a new
      # process, the existing process still needs a task queue to use.
      if process_count > 1:
        task_queues.append(_NewMultiprocessingQueue())
      else:
        task_queue = _NewThreadsafeQueue()
        task_queues.append(task_queue)
        # Create a top-level worker pool since this is the first execution
        # of ParallelApply on the main thread.
        WorkerPool(thread_count,
                   self.logger,
                   task_queue=task_queue,
                   bucket_storage_uri_class=self.bucket_storage_uri_class,
                   gsutil_api_map=self.gsutil_api_map,
                   debug=self.debug,
                   status_queue=glob_status_queue,
                   headers=self.non_metadata_headers,
                   perf_trace_token=self.perf_trace_token,
                   trace_token=self.trace_token,
                   user_project=self.user_project)

    if process_count > 1:  # Handle process pool creation.
      # Check whether this call will need a new set of workers.

      # Each worker must acquire a shared lock before notifying the main thread
      # that it needs a new worker pool, so that at most one worker asks for
      # a new worker pool at once.
      try:
        if not is_main_thread:
          worker_checking_level_lock.acquire()
        if self.recursive_apply_level >= current_max_recursive_level.GetValue():
          with need_pool_or_done_cond:
            # Only the main thread is allowed to create new processes -
            # otherwise, we will run into some Python bugs.
            if is_main_thread:
              self._CreateNewConsumerPool(process_count, thread_count,
                                          glob_status_queue)
            else:
              # Notify the main thread that we need a new consumer pool.
              new_pool_needed.Reset(reset_value=1)
              need_pool_or_done_cond.notify_all()
              # The main thread will notify us when it finishes.
              need_pool_or_done_cond.wait()
      finally:
        if not is_main_thread:
          worker_checking_level_lock.release()
    else:  # Handle new worker thread pool creation.
      if not is_main_thread:
        try:
          worker_checking_level_lock.acquire()
          if self.recursive_apply_level > _GetCurrentMaxRecursiveLevel():
            # We don't have a thread pool for this level of recursive apply
            # calls, so create a pool and corresponding task queue.
            _IncrementCurrentMaxRecursiveLevel()
            task_queue = _NewThreadsafeQueue()
            task_queues.append(task_queue)
            WorkerPool(thread_count,
                       self.logger,
                       task_queue=task_queue,
                       bucket_storage_uri_class=self.bucket_storage_uri_class,
                       gsutil_api_map=self.gsutil_api_map,
                       debug=self.debug,
                       status_queue=glob_status_queue,
                       headers=self.non_metadata_headers,
                       perf_trace_token=self.perf_trace_token,
                       trace_token=self.trace_token,
                       user_project=self.user_project)
        finally:
          worker_checking_level_lock.release()

    task_queue = task_queues[self.recursive_apply_level]

    # Only use the seek-ahead iterator in the main thread to provide an
    # overall estimate of operations.
    if seek_ahead_iterator and not is_main_thread:
      seek_ahead_iterator = None

    # Kick off a producer thread to throw tasks in the global task queue. We
    # do this asynchronously so that the main thread can be free to create new
    # consumer pools when needed (otherwise, any thread with a task that needs
    # a new consumer pool must block until we're completely done producing; in
    # the worst case, every worker blocks on such a call and the producer fills
    # up the task queue before it finishes, so we block forever).
    producer_thread = ProducerThread(
        copy.copy(self),
        args_iterator,
        caller_id,
        func,
        task_queue,
        should_return_results,
        exception_handler,
        arg_checker,
        fail_on_error,
        seek_ahead_iterator=seek_ahead_iterator,
        status_queue=(glob_status_queue if is_main_thread else None))

    # Start the UI thread that is responsible for displaying operation status
    # (aggregated across processes and threads) to the user.
    ui_thread = None
    if is_main_thread:
      ui_thread = UIThread(glob_status_queue, sys.stderr, ui_controller)

    # Wait here until either:
    #   1. We're the main thread in the multi-process case, and someone needs
    #      a new consumer pool - in which case we create one and continue
    #      waiting.
    #   2. Someone notifies us that all of the work we requested is done, in
    #      which case we retrieve the results (if applicable) and stop
    #      waiting.
    # At most one of these can be true, because the main thread is blocked on
    # its call to Apply, and a thread will not ask for a new consumer pool
    # unless it had more work to do.
    while True:
      with need_pool_or_done_cond:
        if call_completed_map[caller_id]:
          break
        elif (process_count > 1 and is_main_thread and
              new_pool_needed.GetValue()):
          new_pool_needed.Reset()
          self._CreateNewConsumerPool(process_count, thread_count,
                                      glob_status_queue)
          need_pool_or_done_cond.notify_all()

        # Note that we must check the above conditions before the wait() call;
        # otherwise, the notification can happen before we start waiting, in
        # which case we'll block forever.
        need_pool_or_done_cond.wait()

    # We've completed all tasks (or excepted), so signal the UI thread to
    # terminate.
    if is_main_thread:
      PutToQueueWithTimeout(glob_status_queue, ZERO_TASKS_TO_DO_ARGUMENT)
      ui_thread.join(timeout=UI_THREAD_JOIN_TIMEOUT)
      # Now that all the work is done, log the types of source URLs encountered.
      self._ProcessSourceUrlTypes(producer_thread.args_iterator)

    # We encountered an exception from the producer thread before any arguments
    # were enqueued, but it wouldn't have been propagated, so we'll now
    # explicitly raise it here.
    if producer_thread.unknown_exception:
      # pylint: disable=raising-bad-type
      raise producer_thread.unknown_exception

    # We encountered an exception from the producer thread while iterating over
    # the arguments, so raise it here if we're meant to fail on error.
    if producer_thread.iterator_exception and fail_on_error:
      # pylint: disable=raising-bad-type
      raise producer_thread.iterator_exception
    if is_main_thread and not parallel_operations_override:
      PutToQueueWithTimeout(glob_status_queue, FinalMessage(time.time()))

  def _ProcessSourceUrlTypes(self, args_iterator):
    """Logs the URL type information to analytics collection."""
    if not isinstance(args_iterator, CopyObjectsIterator):
      return
    LogPerformanceSummaryParams(is_daisy_chain=args_iterator.is_daisy_chain,
                                has_file_src=args_iterator.has_file_src,
                                has_cloud_src=args_iterator.has_cloud_src,
                                provider_types=args_iterator.provider_types)

  def _ApplyThreads(self, thread_count, process_count, recursive_apply_level,
                    status_queue):
    """Assigns the work from the multi-process global task queue.

    Work is assigned to an individual process for later consumption either by
    the WorkerThreads or (if thread_count == 1) this thread.

    Args:
      thread_count: The number of threads used to perform the work. If 1, then
                    perform all work in this thread.
      process_count: The number of processes used to perform the work.
      recursive_apply_level: The depth in the tree of recursive calls to Apply
                             of this thread.
      status_queue: Multiprocessing/threading queue for progress reporting and
          performance aggregation.
    """
    assert process_count > 1, (
        'Invalid state, calling command._ApplyThreads with only one process.')

    _CryptoRandomAtFork()
    # Separate processes should exit on a terminating signal,
    # but to avoid race conditions only the main process should handle
    # multiprocessing cleanup. Override child processes to use a single signal
    # handler.
    for catch_signal in GetCaughtSignals():
      signal.signal(catch_signal, ChildProcessSignalHandler)

    self._ResetConnectionPool()
    self.recursive_apply_level = recursive_apply_level

    task_queue = task_queues[recursive_apply_level]

    # Ensure fairness across processes by filling our WorkerPool
    # only with as many tasks as it has WorkerThreads. This semaphore is
    # acquired each time that a task is retrieved from the queue and released
    # each time a task is completed by a WorkerThread.
    worker_semaphore = threading.BoundedSemaphore(thread_count)

    # TODO: Presently, this pool gets recreated with each call to Apply. We
    # should be able to do it just once, at process creation time.
    worker_pool = WorkerPool(
        thread_count,
        self.logger,
        worker_semaphore=worker_semaphore,
        bucket_storage_uri_class=self.bucket_storage_uri_class,
        gsutil_api_map=self.gsutil_api_map,
        debug=self.debug,
        status_queue=status_queue,
        headers=self.non_metadata_headers,
        perf_trace_token=self.perf_trace_token,
        trace_token=self.trace_token,
        user_project=self.user_project)

    num_enqueued = 0
    while True:
      while not worker_semaphore.acquire(blocking=False):
        # Because Python signal handlers are only called in between atomic
        # instructions, if we block the main thread on an available worker
        # thread, we won't be able to respond to signals such as a
        # user-initiated CTRL-C until a worker thread completes a task.
        # We poll the semaphore periodically as a compromise between
        # efficiency and user responsiveness.
        time.sleep(0.01)
      task = task_queue.get()

      if task.args != ZERO_TASKS_TO_DO_ARGUMENT:
        # If we have no tasks to do and we're performing a blocking call, we
        # need a special signal to tell us to stop - otherwise, we block on
        # the call to task_queue.get() forever.
        worker_pool.AddTask(task)
        num_enqueued += 1
      else:
        # No tasks remain; since no work was dispatched to a thread, don't
        # block the semaphore on a WorkerThread completion.
        worker_semaphore.release()


# Below here lie classes and functions related to controlling the flow of tasks
# between various threads and processes.
class _ConsumerPool(object):

  def __init__(self, processes, task_queue):
    self.processes = processes
    self.task_queue = task_queue

  def ShutDown(self):
    for process in self.processes:
      KillProcess(process.pid)


class Task(
    namedtuple('Task', (
        'func args caller_id exception_handler should_return_results arg_checker '
        'fail_on_error'))):
  """Task class representing work to be completed.

  Args:
    func: The function to be executed.
    args: The arguments to func.
    caller_id: The globally-unique caller ID corresponding to the Apply call.
    exception_handler: The exception handler to use if the call to func fails.
    should_return_results: True iff the results of this function should be
                           returned from the Apply call.
    arg_checker: Used to determine whether we should process the current
                 argument or simply skip it. Also handles any logging that
                 is specific to a particular type of argument.
    fail_on_error: If true, then raise any exceptions encountered when
                   executing func. This is only applicable in the case of
                   process_count == thread_count == 1.
  """
  pass


# TODO: Refactor the various threading code that doesn't need to depend on
# command.py globals (ProducerThread, UIThread) to different files to aid
# readability and reduce the size of command.py.
def _StartSeekAheadThread(seek_ahead_iterator, seek_ahead_thread_cancel_event):
  """Initializes and runs the seek-ahead thread.

  We defer starting this thread until it is needed, since it is only useful
  when the ProducerThread iterates more results than it can store on the global
  task queue.

  Args:
    seek_ahead_iterator: Iterator that yields SeekAheadResults.
    seek_ahead_thread_cancel_event: threading.Event for signaling the
        seek-ahead thread to terminate.

  Returns:
    The thread object for the initialized thread.
  """
  # This is initialized in Initialize(Multiprocessing|Threading)Variables
  # pylint: disable=global-variable-not-assigned
  # pylint: disable=global-variable-undefined
  global glob_status_queue
  # pylint: enable=global-variable-not-assigned
  # pylint: enable=global-variable-undefined
  return SeekAheadThread(seek_ahead_iterator, seek_ahead_thread_cancel_event,
                         glob_status_queue)


class ProducerThread(threading.Thread):
  """Thread used to enqueue work for other processes and threads."""

  def __init__(self,
               cls,
               args_iterator,
               caller_id,
               func,
               task_queue,
               should_return_results,
               exception_handler,
               arg_checker,
               fail_on_error,
               seek_ahead_iterator=None,
               status_queue=None):
    """Initializes the producer thread.

    Args:
      cls: Instance of Command for which this ProducerThread was created.
      args_iterator: Iterable collection of arguments to be put into the
                     work queue.
      caller_id: Globally-unique caller ID corresponding to this call to Apply.
      func: The function to be called on each element of args_iterator.
      task_queue: The queue into which tasks will be put, to later be consumed
                  by Command._ApplyThreads.
      should_return_results: True iff the results for this call to command.Apply
                             were requested.
      exception_handler: The exception handler to use when errors are
                         encountered during calls to func.
      arg_checker: Used to determine whether we should process the current
                   argument or simply skip it. Also handles any logging that
                   is specific to a particular type of argument.
      fail_on_error: If true, then raise any exceptions encountered when
                     executing func. This is only applicable in the case of
                     process_count == thread_count == 1.
      seek_ahead_iterator: If present, a seek-ahead iterator that will
          provide an approximation of the total number of tasks and bytes that
          will be iterated by the ProducerThread.
      status_queue: status_queue to inform task_queue estimation. Only
          valid when calling from the main thread, else None. Even if this is
          the main thread, the status_queue will only properly work if args
          is a collection of NameExpansionResults, which is the type that gives
          us initial information about files to be processed. Otherwise,
          nothing will be added to the queue.
    """
    super(ProducerThread, self).__init__()
    self.func = func
    self.cls = cls
    self.args_iterator = args_iterator
    self.caller_id = caller_id
    self.task_queue = task_queue
    self.arg_checker = arg_checker
    self.exception_handler = exception_handler
    self.should_return_results = should_return_results
    self.fail_on_error = fail_on_error
    self.shared_variables_updater = _SharedVariablesUpdater()
    self.daemon = True
    self.unknown_exception = None
    self.iterator_exception = None
    self.seek_ahead_iterator = seek_ahead_iterator
    self.status_queue = status_queue
    self.start()

  def run(self):
    num_tasks = 0
    cur_task = None
    last_task = None
    task_estimation_threshold = None
    seek_ahead_thread = None
    seek_ahead_thread_cancel_event = None
    seek_ahead_thread_considered = False
    args = None
    try:
      total_size = 0
      self.args_iterator = iter(self.args_iterator)
      while True:
        try:
          args = next(self.args_iterator)
        except StopIteration as e:
          break
        except Exception as e:  # pylint: disable=broad-except
          _IncrementFailureCount()
          if self.fail_on_error:
            self.iterator_exception = e
            raise
          else:
            try:
              self.exception_handler(self.cls, e)
            except Exception as _:  # pylint: disable=broad-except
              self.cls.logger.debug(
                  'Caught exception while handling exception for %s:\n%s',
                  self.func, traceback.format_exc())
            self.shared_variables_updater.Update(self.caller_id, self.cls)
            continue

        if self.arg_checker(self.cls, args):
          num_tasks += 1
          if self.status_queue:
            if not num_tasks % 100:
              # Time to update the total number of tasks.
              if (isinstance(args, NameExpansionResult) or
                  isinstance(args, CopyObjectInfo) or
                  isinstance(args, RsyncDiffToApply)):
                PutToQueueWithTimeout(
                    self.status_queue,
                    ProducerThreadMessage(num_tasks, total_size, time.time()))
            if (isinstance(args, NameExpansionResult) or
                isinstance(args, CopyObjectInfo)):
              if args.expanded_result:
                json_expanded_result = json.loads(args.expanded_result)
                if 'size' in json_expanded_result:
                  total_size += int(json_expanded_result['size'])
            elif isinstance(args, RsyncDiffToApply):
              if args.copy_size:
                total_size += int(args.copy_size)

          if not seek_ahead_thread_considered:
            if task_estimation_threshold is None:
              task_estimation_threshold = _GetTaskEstimationThreshold()
            if task_estimation_threshold <= 0:
              # Disable the seek-ahead thread (never start it).
              seek_ahead_thread_considered = True
            elif num_tasks >= task_estimation_threshold:
              if self.seek_ahead_iterator:
                seek_ahead_thread_cancel_event = threading.Event()
                seek_ahead_thread = _StartSeekAheadThread(
                    self.seek_ahead_iterator, seek_ahead_thread_cancel_event)
                # For integration testing only, force estimation to complete
                # prior to producing further results.
                if boto.config.get('GSUtil', 'task_estimation_force', None):
                  seek_ahead_thread.join(timeout=SEEK_AHEAD_JOIN_TIMEOUT)

              seek_ahead_thread_considered = True

          last_task = cur_task
          cur_task = Task(self.func, args, self.caller_id,
                          self.exception_handler, self.should_return_results,
                          self.arg_checker, self.fail_on_error)
          if last_task:
            self.task_queue.put(last_task)
    except Exception as e:  # pylint: disable=broad-except
      # This will also catch any exception raised due to an error in the
      # iterator when fail_on_error is set, so check that we failed for some
      # other reason before claiming that we had an unknown exception.
      if not self.iterator_exception:
        self.unknown_exception = e
    finally:
      # We need to make sure to update total_tasks[caller_id] before we enqueue
      # the last task. Otherwise, a worker can retrieve the last task and
      # complete it, then check total_tasks and determine that we're not done
      # producing all before we update total_tasks. This approach forces workers
      # to wait on the last task until after we've updated total_tasks.
      total_tasks[self.caller_id] = num_tasks
      if not cur_task:
        # This happens if there were zero arguments to be put in the queue.
        cur_task = Task(None, ZERO_TASKS_TO_DO_ARGUMENT, self.caller_id, None,
                        None, None, None)
      self.task_queue.put(cur_task)

      # If the seek ahead thread is still running, cancel it and wait for it
      # to exit since we've enumerated all of the tasks already. We don't want
      # to delay command completion on an estimate that has become meaningless.
      if seek_ahead_thread is not None:
        seek_ahead_thread_cancel_event.set()
        # It's possible that the seek-ahead-thread may attempt to put to the
        # status queue after it has been torn down, for example if the system
        # is overloaded. Because the put uses a timeout, it should never block
        # command termination or signal handling.
        seek_ahead_thread.join(timeout=SEEK_AHEAD_JOIN_TIMEOUT)
      # Send a final ProducerThread message that definitively states
      # the amount of actual work performed.
      if (self.status_queue and
          (isinstance(args, NameExpansionResult) or isinstance(
              args, CopyObjectInfo) or isinstance(args, RsyncDiffToApply))):
        PutToQueueWithTimeout(
            self.status_queue,
            ProducerThreadMessage(num_tasks,
                                  total_size,
                                  time.time(),
                                  finished=True))

      # It's possible that the workers finished before we updated total_tasks,
      # so we need to check here as well.
      _NotifyIfDone(self.caller_id,
                    caller_id_finished_count.get(self.caller_id))


class WorkerPool(object):
  """Pool of worker threads to which tasks can be added."""

  def __init__(self,
               thread_count,
               logger,
               worker_semaphore=None,
               task_queue=None,
               bucket_storage_uri_class=None,
               gsutil_api_map=None,
               debug=0,
               status_queue=None,
               headers=None,
               perf_trace_token=None,
               trace_token=None,
               user_project=None):
    # In the multi-process case, a worker sempahore is required to ensure
    # even work distribution.
    #
    # In the single process case, the input task queue directly feeds worker
    # threads from the ProducerThread. Since worker threads will consume only
    # one task at a time from the queue, there is no need for a semaphore to
    # ensure even work distribution.
    #
    # Thus, exactly one of task_queue or worker_semaphore must be provided.
    assert (worker_semaphore is None) != (task_queue is None)
    self.headers = headers
    self.perf_trace_token = perf_trace_token
    self.trace_token = trace_token
    self.user_project = user_project

    self.task_queue = task_queue or _NewThreadsafeQueue()
    self.threads = []
    for _ in range(thread_count):
      worker_thread = WorkerThread(
          self.task_queue,
          logger,
          worker_semaphore=worker_semaphore,
          bucket_storage_uri_class=bucket_storage_uri_class,
          gsutil_api_map=gsutil_api_map,
          debug=debug,
          status_queue=status_queue,
          headers=self.headers,
          perf_trace_token=self.perf_trace_token,
          trace_token=self.trace_token,
          user_project=self.user_project)
      self.threads.append(worker_thread)
      worker_thread.start()

  def AddTask(self, task):
    """Adds a task to the task queue; used only in the multi-process case."""
    self.task_queue.put(task)


class WorkerThread(threading.Thread):
  """Thread where all the work will be performed.

  This makes the function calls for Apply and takes care of all error handling,
  return value propagation, and shared_vars.

  Note that this thread is NOT started upon instantiation because the function-
  calling logic is also used in the single-threaded case.
  """
  # This is initialized in Initialize(Multiprocessing|Threading)Variables
  # pylint: disable=global-variable-not-assigned
  # pylint: disable=global-variable-undefined
  global thread_stats

  # pylint: enable=global-variable-not-assigned
  # pylint: enable=global-variable-undefined

  def __init__(self,
               task_queue,
               logger,
               worker_semaphore=None,
               bucket_storage_uri_class=None,
               gsutil_api_map=None,
               debug=0,
               status_queue=None,
               headers=None,
               perf_trace_token=None,
               trace_token=None,
               user_project=None):
    """Initializes the worker thread.

    Args:
      task_queue: The thread-safe queue from which this thread should obtain
                  its work.
      logger: Logger to use for this thread.
      worker_semaphore: threading.BoundedSemaphore to be released each time a
          task is completed, or None for single-threaded execution.
      bucket_storage_uri_class: Class to instantiate for cloud StorageUris.
                                Settable for testing/mocking.
      gsutil_api_map: Map of providers and API selector tuples to api classes
                      which can be used to communicate with those providers.
                      Used for the instantiating CloudApiDelegator class.
      debug: debug level for the CloudApiDelegator class.
      status_queue: Queue for reporting status updates.
      user_project: Project to be billed for this request.
    """
    super(WorkerThread, self).__init__()

    self.pid = os.getpid()
    self.init_time = time.time()
    self.task_queue = task_queue
    self.worker_semaphore = worker_semaphore
    self.daemon = True
    self.cached_classes = {}
    self.shared_vars_updater = _SharedVariablesUpdater()
    self.headers = headers
    self.perf_trace_token = perf_trace_token
    self.trace_token = trace_token
    self.user_project = user_project

    # Note that thread_gsutil_api is not initialized in the sequential
    # case; task functions should use utils.cloud_api_helper.GetCloudApiInstance
    # to retrieve the main thread's CloudApiDelegator in that case.
    self.thread_gsutil_api = None
    if bucket_storage_uri_class and gsutil_api_map:
      self.thread_gsutil_api = CloudApiDelegator(
          bucket_storage_uri_class,
          gsutil_api_map,
          logger,
          status_queue,
          debug=debug,
          http_headers=self.headers,
          perf_trace_token=self.perf_trace_token,
          trace_token=self.trace_token,
          user_project=self.user_project)

  @CaptureThreadStatException
  def _StartBlockedTime(self):
    """Update the thread_stats AtomicDict before task_queue.get() is called."""
    if thread_stats.get((self.pid, self.ident)) is None:
      thread_stats[(self.pid, self.ident)] = _ThreadStat(self.init_time)
    # While this read/modify/write is not an atomic operation on the dict,
    # we are protected since the (process ID, thread ID) tuple is unique
    # to this thread, making this thread the only reader/writer for this key.
    thread_stat = thread_stats[(self.pid, self.ident)]
    thread_stat.StartBlockedTime()
    thread_stats[(self.pid, self.ident)] = thread_stat

  @CaptureThreadStatException
  def _EndBlockedTime(self):
    """Update the thread_stats AtomicDict after task_queue.get() is called."""
    thread_stat = thread_stats[(self.pid, self.ident)]
    thread_stat.EndBlockedTime()
    thread_stats[(self.pid, self.ident)] = thread_stat

  def PerformTask(self, task, cls):
    """Makes the function call for a task.

    Args:
      task: The Task to perform.
      cls: The instance of a class which gives context to the functions called
           by the Task's function. E.g., see SetAclFuncWrapper.
    """
    caller_id = task.caller_id
    try:
      results = task.func(cls, task.args, thread_state=self.thread_gsutil_api)
      if task.should_return_results:
        global_return_values_map.Increment(caller_id, [results],
                                           default_value=[])
    except Exception as e:  # pylint: disable=broad-except
      _IncrementFailureCount()
      if task.fail_on_error:
        raise  # Only happens for single thread and process case.
      else:
        try:
          task.exception_handler(cls, e)
        except Exception as _:  # pylint: disable=broad-except
          # Don't allow callers to raise exceptions here and kill the worker
          # threads.
          cls.logger.debug(
              'Caught exception while handling exception for %s:\n%s', task,
              traceback.format_exc())
    finally:
      if self.worker_semaphore:
        self.worker_semaphore.release()
      self.shared_vars_updater.Update(caller_id, cls)

      # Even if we encounter an exception, we still need to claim that that
      # the function finished executing. Otherwise, we won't know when to
      # stop waiting and return results.
      num_done = caller_id_finished_count.Increment(caller_id, 1)
      _NotifyIfDone(caller_id, num_done)

  def run(self):
    while True:
      self._StartBlockedTime()
      task = self.task_queue.get()
      self._EndBlockedTime()
      if task.args == ZERO_TASKS_TO_DO_ARGUMENT:
        # This can happen in the single-process case because worker threads
        # consume ProducerThread tasks directly.
        continue
      caller_id = task.caller_id

      # Get the instance of the command with the appropriate context.
      cls = self.cached_classes.get(caller_id, None)
      if not cls:
        cls = copy.copy(class_map[caller_id])
        cls.logger = CreateOrGetGsutilLogger(cls.command_name)
        self.cached_classes[caller_id] = cls

      self.PerformTask(task, cls)


class _ThreadStat(object):
  """Stores thread idle and execution time statistics."""

  def __init__(self, init_time):
    self.total_idle_time = 0
    # The last time EndBlockedTime was called, which is the last time a
    # task_queue.get() completed or when we initialized the thread.
    self.end_block_time = init_time
    # The last time StartBlockedTime was called, which is the last time a
    # task_queue.get() call started.
    self.start_block_time = time.time()
    # Between now and thread initialization, we were not blocked.
    self.total_execution_time = 0

  def StartBlockedTime(self):
    self.start_block_time = time.time()
    exec_time = self.start_block_time - self.end_block_time
    self.total_execution_time += exec_time

  def EndBlockedTime(self):
    self.end_block_time = time.time()
    idle_time = self.end_block_time - self.start_block_time
    self.total_idle_time += idle_time

  def AggregateStat(self, end_time):
    """Decide final stats upon Apply completion."""
    if self.end_block_time > self.start_block_time:
      # Apply ended before we blocked on task_queue.get(), or there was an
      # exception during StartBlockedTime. In both of these cases, we were not
      # blocked on task_queue.get() and so can add this time to execution time.
      self.total_execution_time += end_time - self.end_block_time
    else:
      # Apply ended while we were blocked on task_queue.get(), or there was an
      # exception during EndBlockedTime. In both of these cases, we were in the
      # midst of or just finishing a task_queue.get() call, and so can add this
      # time to idle time.
      self.total_idle_time += end_time - self.start_block_time


def _AggregateThreadStats():
  """At the end of the top-level Apply call, aggregate the thread stats dict.

  This should only be called in the main process and thread because it logs to
  the MetricsCollector.
  """
  cur_time = time.time()
  total_idle_time = total_execution_time = 0
  for thread_stat in thread_stats.values():
    thread_stat.AggregateStat(cur_time)
    total_idle_time += thread_stat.total_idle_time
    total_execution_time += thread_stat.total_execution_time
  LogPerformanceSummaryParams(thread_idle_time=total_idle_time,
                              thread_execution_time=total_execution_time)


class _SharedVariablesUpdater(object):
  """Used to update shared variable for a class in the global map.

     Note that each thread will have its own instance of the calling class for
     context, and it will also have its own instance of a
     _SharedVariablesUpdater.  This is used in the following way:

     1. Before any tasks are performed, each thread will get a copy of the
        calling class, and the globally-consistent value of this shared variable
        will be initialized to whatever it was before the call to Apply began.

     2. After each time a thread performs a task, it will look at the current
        values of the shared variables in its instance of the calling class.

        2.A. For each such variable, it computes the delta of this variable
             between the last known value for this class (which is stored in
             a dict local to this class) and the current value of the variable
             in the class.

        2.B. Using this delta, we update the last known value locally as well
             as the globally-consistent value shared across all classes (the
             globally consistent value is simply increased by the computed
             delta).
  """

  def __init__(self):
    self.last_shared_var_values = {}

  def Update(self, caller_id, cls):
    """Update any shared variables with their deltas."""
    shared_vars = shared_vars_list_map.get(caller_id, None)
    if shared_vars:
      for name in shared_vars:
        key = (caller_id, name)
        last_value = self.last_shared_var_values.get(key, 0)
        # Compute the change made since the last time we updated here. This is
        # calculated by simply subtracting the last known value from the current
        # value in the class instance.
        delta = getattr(cls, name) - last_value
        self.last_shared_var_values[key] = delta + last_value

        # Update the globally-consistent value by simply increasing it by the
        # computed delta.
        shared_vars_map.Increment(key, delta)


def _NotifyIfDone(caller_id, num_done):
  """Notify any threads waiting for results that something has finished.

  Each waiting thread will then need to check the call_completed_map to see if
  its work is done.

  Note that num_done could be calculated here, but it is passed in as an
  optimization so that we have one less call to a globally-locked data
  structure.

  Args:
    caller_id: The caller_id of the function whose progress we're checking.
    num_done: The number of tasks currently completed for that caller_id.
  """
  num_to_do = total_tasks[caller_id]
  if num_to_do == num_done and num_to_do >= 0:
    # Notify the Apply call that's sleeping that it's ready to return.
    with need_pool_or_done_cond:
      call_completed_map[caller_id] = True
      need_pool_or_done_cond.notify_all()


# pylint: disable=global-variable-not-assigned,global-variable-undefined
def ShutDownGsutil():
  """Shut down all processes in consumer pools in preparation for exiting."""
  global glob_status_queue
  for q in queues:
    try:
      q.cancel_join_thread()
    except:  # pylint: disable=bare-except
      pass
  for consumer_pool in consumer_pools:
    consumer_pool.ShutDown()
  try:
    glob_status_queue.cancel_join_thread()
  except:  # pylint: disable=bare-except
    pass


def _GetCurrentMaxRecursiveLevel():
  global current_max_recursive_level
  return current_max_recursive_level.GetValue()


def _IncrementCurrentMaxRecursiveLevel():
  global current_max_recursive_level
  current_max_recursive_level.Increment()


def _IncrementFailureCount():
  global failure_count
  failure_count.Increment()


def DecrementFailureCount():
  global failure_count
  failure_count.Decrement()


def GetFailureCount():
  """Returns the number of failures processed during calls to Apply()."""
  global failure_count
  return failure_count.GetValue()


def ResetFailureCount():
  """Resets the failure_count variable to 0 - useful if error is expected."""
  global failure_count
  failure_count.Reset()