# -*- coding: utf-8 -*- # # Copyright 2017 Google LLC. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Library for ignoring files for upload. This library very closely mimics the semantics of Git's gitignore file: https://git-scm.com/docs/gitignore See `gcloud topic gcloudignore` for details. A typical use would be: file_chooser = gcloudignore.GetFileChooserForDir(upload_directory) for f in file_chooser.GetIncludedFiles('some/path'): print 'uploading {}'.format(f) # actually do the upload, too """ from __future__ import absolute_import from __future__ import division from __future__ import unicode_literals import fnmatch import os import re _GCLOUDIGNORE_PATH_SEP = '/' _ENDS_IN_ODD_NUMBER_SLASHES_RE = r'(?= len(line): tokens.append(curr) break # Pass through trailing backslash tokens.append(curr + line[i+1]) i += 2 else: tokens.append(curr) i += 1 # Then, strip the trailing space tokens. res = [] only_seen_spaces = True for curr in reversed(tokens): if only_seen_spaces and curr == ' ': continue only_seen_spaces = False res.append(curr) return ''.join(reversed(res)) def _UnescapeSpaces(line): """Unescapes all spaces in a line.""" return line.replace('\\ ', ' ') return _UnescapeSpaces(_Rstrip(line)) def _Unescape(line): r"""Unescapes a line. The escape character is '\'. An escaped backslash turns into one backslash; any other escaped character is ignored. Args: line: str, the line to unescape Returns: str, the unescaped line """ return re.sub(r'\\([^\\])', r'\1', line).replace('\\\\', '\\') def GetPathPrefixes(path): """Returns all prefixes for the given path, inclusive. That is, for 'foo/bar/baz', returns ['', 'foo', 'foo/bar', 'foo/bar/baz']. Args: path: str, the path for which to get prefixes. Returns: list of str, the prefixes. """ path_prefixes = [path] path_reminder = True # Apparently which one is empty when operating on top-level directory depends # on your configuration. while path and path_reminder: path, path_reminder = os.path.split(path) path_prefixes.insert(0, path) return path_prefixes class Glob(object): """A file-matching glob pattern. See https://git-scm.com/docs/gitignore for full syntax specification. Attributes: pattern: str, a globbing pattern. must_be_dir: bool, true if only dirs match. """ def __init__(self, pattern, must_be_dir=False): self.pattern = pattern self.must_be_dir = must_be_dir def _MatchesHelper(self, pattern_parts, path): """Determines whether the given pattern matches the given path. Args: pattern_parts: list of str, the list of pattern parts that must all match the path. path: str, the path to match. Returns: bool, whether the patch matches the pattern_parts (Matches() will convert this into a Match value). """ # This method works right-to-left. It checks that the right-most pattern # part matches the right-most path part, and that the remaining pattern # matches the remaining path. if not pattern_parts: # We've reached the end of the pattern! Success. return True if path is None: # Distinguish between '*' and '/*'. The former should match '' (the root # directory) but the latter should not. return False pattern_part = pattern_parts[-1] remaining_pattern = pattern_parts[:-1] if path: # normpath turns '' into '.', which confuses fnmatch later path = os.path.normpath(path) remaining_path, path_part = os.path.split(path) if not path_part: # See note above. remaining_path = None if pattern_part == '**': # If the path would match the remaining pattern_parts after skipping 0-all # of the trailing path parts, the whole pattern matches. # # That is, if we have `/**` as a pattern and `foo/bar` as our # path, if any of ``, `foo`, and `foo/bar` match ``, we return # true. path_prefixes = GetPathPrefixes(path) # '**' patterns only match against the full path (essentially, they have # an implicit '/' at the front of the pattern). An empty string at the # beginning of remaining_pattern simulates this. # # pylint: disable=g-explicit-bool-comparison # In this case, it's much clearer to show what we're checking for. if not (remaining_pattern and remaining_pattern[0] == ''): remaining_pattern.insert(0, '') # pylint: enable=g-explicit-bool-comparison return any(self._MatchesHelper(remaining_pattern, prefix) for prefix in path_prefixes) if pattern_part == '*' and not remaining_pattern: # We need to ensure that a '*' at the beginning of a pattern does not # match a part with a '/' in it. That should only happen when '**' is # used. # For example: '*/bar' should match 'foo/bar', but not 'foo/qux/bar'. if remaining_path and len(remaining_path) > 1: return False if not fnmatch.fnmatch(path_part, pattern_part): # If the current pattern part doesn't match the current path part, the # whole pattern can't match the whole path. Give up! return False return self._MatchesHelper(remaining_pattern, remaining_path) def Matches(self, path, is_dir=False): """Returns a Match for this pattern and the given path.""" if self.must_be_dir and not is_dir: return False if self._MatchesHelper(self.pattern.split('/'), path): return True else: return False @classmethod def FromString(cls, line): """Creates a pattern for an individual line of an ignore file. Windows-style newlines must be removed. Args: line: str, The line to parse. Returns: Pattern. Raises: InvalidLineError: if the line was invalid (comment, blank, contains invalid consecutive stars). """ if line.endswith('/'): line = line[:-1] must_be_dir = True else: must_be_dir = False line = _HandleSpaces(line) if re.search(_ENDS_IN_ODD_NUMBER_SLASHES_RE, line): raise InvalidLineError( 'Line [{}] ends in an odd number of [\\]s.'.format(line)) line = _Unescape(line) if not line: raise InvalidLineError('Line [{}] is blank.'.format(line)) return cls(line, must_be_dir=must_be_dir)