407 lines
12 KiB
Python
407 lines
12 KiB
Python
# patch.py -- For dealing with packed-style patches.
|
|
# Copyright (C) 2009-2013 Jelmer Vernooij <jelmer@jelmer.uk>
|
|
#
|
|
# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
|
|
# General Public License as public by the Free Software Foundation; version 2.0
|
|
# or (at your option) any later version. You can redistribute it and/or
|
|
# modify it under the terms of either of these two licenses.
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
#
|
|
# You should have received a copy of the licenses; if not, see
|
|
# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
|
|
# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
|
|
# License, Version 2.0.
|
|
#
|
|
|
|
"""Classes for dealing with git am-style patches.
|
|
|
|
These patches are basically unified diffs with some extra metadata tacked
|
|
on.
|
|
"""
|
|
|
|
from difflib import SequenceMatcher
|
|
import email.parser
|
|
import time
|
|
|
|
from dulwich.objects import (
|
|
Blob,
|
|
Commit,
|
|
S_ISGITLINK,
|
|
)
|
|
|
|
FIRST_FEW_BYTES = 8000
|
|
|
|
|
|
def write_commit_patch(f, commit, contents, progress, version=None, encoding=None):
|
|
"""Write a individual file patch.
|
|
|
|
Args:
|
|
commit: Commit object
|
|
progress: Tuple with current patch number and total.
|
|
Returns:
|
|
tuple with filename and contents
|
|
"""
|
|
encoding = encoding or getattr(f, "encoding", "ascii")
|
|
if isinstance(contents, str):
|
|
contents = contents.encode(encoding)
|
|
(num, total) = progress
|
|
f.write(
|
|
b"From "
|
|
+ commit.id
|
|
+ b" "
|
|
+ time.ctime(commit.commit_time).encode(encoding)
|
|
+ b"\n"
|
|
)
|
|
f.write(b"From: " + commit.author + b"\n")
|
|
f.write(
|
|
b"Date: " + time.strftime("%a, %d %b %Y %H:%M:%S %Z").encode(encoding) + b"\n"
|
|
)
|
|
f.write(
|
|
("Subject: [PATCH %d/%d] " % (num, total)).encode(encoding)
|
|
+ commit.message
|
|
+ b"\n"
|
|
)
|
|
f.write(b"\n")
|
|
f.write(b"---\n")
|
|
try:
|
|
import subprocess
|
|
|
|
p = subprocess.Popen(
|
|
["diffstat"], stdout=subprocess.PIPE, stdin=subprocess.PIPE
|
|
)
|
|
except (ImportError, OSError):
|
|
pass # diffstat not available?
|
|
else:
|
|
(diffstat, _) = p.communicate(contents)
|
|
f.write(diffstat)
|
|
f.write(b"\n")
|
|
f.write(contents)
|
|
f.write(b"-- \n")
|
|
if version is None:
|
|
from dulwich import __version__ as dulwich_version
|
|
|
|
f.write(b"Dulwich %d.%d.%d\n" % dulwich_version)
|
|
else:
|
|
f.write(version.encode(encoding) + b"\n")
|
|
|
|
|
|
def get_summary(commit):
|
|
"""Determine the summary line for use in a filename.
|
|
|
|
Args:
|
|
commit: Commit
|
|
Returns: Summary string
|
|
"""
|
|
decoded = commit.message.decode(errors="replace")
|
|
return decoded.splitlines()[0].replace(" ", "-")
|
|
|
|
|
|
# Unified Diff
|
|
def _format_range_unified(start, stop):
|
|
'Convert range to the "ed" format'
|
|
# Per the diff spec at http://www.unix.org/single_unix_specification/
|
|
beginning = start + 1 # lines start numbering with one
|
|
length = stop - start
|
|
if length == 1:
|
|
return "{}".format(beginning)
|
|
if not length:
|
|
beginning -= 1 # empty ranges begin at line just before the range
|
|
return "{},{}".format(beginning, length)
|
|
|
|
|
|
def unified_diff(
|
|
a,
|
|
b,
|
|
fromfile="",
|
|
tofile="",
|
|
fromfiledate="",
|
|
tofiledate="",
|
|
n=3,
|
|
lineterm="\n",
|
|
tree_encoding="utf-8",
|
|
output_encoding="utf-8",
|
|
):
|
|
"""difflib.unified_diff that can detect "No newline at end of file" as
|
|
original "git diff" does.
|
|
|
|
Based on the same function in Python2.7 difflib.py
|
|
"""
|
|
started = False
|
|
for group in SequenceMatcher(None, a, b).get_grouped_opcodes(n):
|
|
if not started:
|
|
started = True
|
|
fromdate = "\t{}".format(fromfiledate) if fromfiledate else ""
|
|
todate = "\t{}".format(tofiledate) if tofiledate else ""
|
|
yield "--- {}{}{}".format(
|
|
fromfile.decode(tree_encoding), fromdate, lineterm
|
|
).encode(output_encoding)
|
|
yield "+++ {}{}{}".format(
|
|
tofile.decode(tree_encoding), todate, lineterm
|
|
).encode(output_encoding)
|
|
|
|
first, last = group[0], group[-1]
|
|
file1_range = _format_range_unified(first[1], last[2])
|
|
file2_range = _format_range_unified(first[3], last[4])
|
|
yield "@@ -{} +{} @@{}".format(file1_range, file2_range, lineterm).encode(
|
|
output_encoding
|
|
)
|
|
|
|
for tag, i1, i2, j1, j2 in group:
|
|
if tag == "equal":
|
|
for line in a[i1:i2]:
|
|
yield b" " + line
|
|
continue
|
|
if tag in ("replace", "delete"):
|
|
for line in a[i1:i2]:
|
|
if not line[-1:] == b"\n":
|
|
line += b"\n\\ No newline at end of file\n"
|
|
yield b"-" + line
|
|
if tag in ("replace", "insert"):
|
|
for line in b[j1:j2]:
|
|
if not line[-1:] == b"\n":
|
|
line += b"\n\\ No newline at end of file\n"
|
|
yield b"+" + line
|
|
|
|
|
|
def is_binary(content):
|
|
"""See if the first few bytes contain any null characters.
|
|
|
|
Args:
|
|
content: Bytestring to check for binary content
|
|
"""
|
|
return b"\0" in content[:FIRST_FEW_BYTES]
|
|
|
|
|
|
def shortid(hexsha):
|
|
if hexsha is None:
|
|
return b"0" * 7
|
|
else:
|
|
return hexsha[:7]
|
|
|
|
|
|
def patch_filename(p, root):
|
|
if p is None:
|
|
return b"/dev/null"
|
|
else:
|
|
return root + b"/" + p
|
|
|
|
|
|
def write_object_diff(f, store, old_file, new_file, diff_binary=False):
|
|
"""Write the diff for an object.
|
|
|
|
Args:
|
|
f: File-like object to write to
|
|
store: Store to retrieve objects from, if necessary
|
|
old_file: (path, mode, hexsha) tuple
|
|
new_file: (path, mode, hexsha) tuple
|
|
diff_binary: Whether to diff files even if they
|
|
are considered binary files by is_binary().
|
|
|
|
Note: the tuple elements should be None for nonexistant files
|
|
"""
|
|
(old_path, old_mode, old_id) = old_file
|
|
(new_path, new_mode, new_id) = new_file
|
|
patched_old_path = patch_filename(old_path, b"a")
|
|
patched_new_path = patch_filename(new_path, b"b")
|
|
|
|
def content(mode, hexsha):
|
|
if hexsha is None:
|
|
return Blob.from_string(b"")
|
|
elif S_ISGITLINK(mode):
|
|
return Blob.from_string(b"Subproject commit " + hexsha + b"\n")
|
|
else:
|
|
return store[hexsha]
|
|
|
|
def lines(content):
|
|
if not content:
|
|
return []
|
|
else:
|
|
return content.splitlines()
|
|
|
|
f.writelines(
|
|
gen_diff_header((old_path, new_path), (old_mode, new_mode), (old_id, new_id))
|
|
)
|
|
old_content = content(old_mode, old_id)
|
|
new_content = content(new_mode, new_id)
|
|
if not diff_binary and (is_binary(old_content.data) or is_binary(new_content.data)):
|
|
binary_diff = (
|
|
b"Binary files "
|
|
+ patched_old_path
|
|
+ b" and "
|
|
+ patched_new_path
|
|
+ b" differ\n"
|
|
)
|
|
f.write(binary_diff)
|
|
else:
|
|
f.writelines(
|
|
unified_diff(
|
|
lines(old_content),
|
|
lines(new_content),
|
|
patched_old_path,
|
|
patched_new_path,
|
|
)
|
|
)
|
|
|
|
|
|
# TODO(user): Support writing unicode, rather than bytes.
|
|
def gen_diff_header(paths, modes, shas):
|
|
"""Write a blob diff header.
|
|
|
|
Args:
|
|
paths: Tuple with old and new path
|
|
modes: Tuple with old and new modes
|
|
shas: Tuple with old and new shas
|
|
"""
|
|
(old_path, new_path) = paths
|
|
(old_mode, new_mode) = modes
|
|
(old_sha, new_sha) = shas
|
|
if old_path is None and new_path is not None:
|
|
old_path = new_path
|
|
if new_path is None and old_path is not None:
|
|
new_path = old_path
|
|
old_path = patch_filename(old_path, b"a")
|
|
new_path = patch_filename(new_path, b"b")
|
|
yield b"diff --git " + old_path + b" " + new_path + b"\n"
|
|
|
|
if old_mode != new_mode:
|
|
if new_mode is not None:
|
|
if old_mode is not None:
|
|
yield ("old file mode %o\n" % old_mode).encode("ascii")
|
|
yield ("new file mode %o\n" % new_mode).encode("ascii")
|
|
else:
|
|
yield ("deleted file mode %o\n" % old_mode).encode("ascii")
|
|
yield b"index " + shortid(old_sha) + b".." + shortid(new_sha)
|
|
if new_mode is not None and old_mode is not None:
|
|
yield (" %o" % new_mode).encode("ascii")
|
|
yield b"\n"
|
|
|
|
|
|
# TODO(user): Support writing unicode, rather than bytes.
|
|
def write_blob_diff(f, old_file, new_file):
|
|
"""Write blob diff.
|
|
|
|
Args:
|
|
f: File-like object to write to
|
|
old_file: (path, mode, hexsha) tuple (None if nonexisting)
|
|
new_file: (path, mode, hexsha) tuple (None if nonexisting)
|
|
|
|
Note: The use of write_object_diff is recommended over this function.
|
|
"""
|
|
(old_path, old_mode, old_blob) = old_file
|
|
(new_path, new_mode, new_blob) = new_file
|
|
patched_old_path = patch_filename(old_path, b"a")
|
|
patched_new_path = patch_filename(new_path, b"b")
|
|
|
|
def lines(blob):
|
|
if blob is not None:
|
|
return blob.splitlines()
|
|
else:
|
|
return []
|
|
|
|
f.writelines(
|
|
gen_diff_header(
|
|
(old_path, new_path),
|
|
(old_mode, new_mode),
|
|
(getattr(old_blob, "id", None), getattr(new_blob, "id", None)),
|
|
)
|
|
)
|
|
old_contents = lines(old_blob)
|
|
new_contents = lines(new_blob)
|
|
f.writelines(
|
|
unified_diff(old_contents, new_contents, patched_old_path, patched_new_path)
|
|
)
|
|
|
|
|
|
def write_tree_diff(f, store, old_tree, new_tree, diff_binary=False):
|
|
"""Write tree diff.
|
|
|
|
Args:
|
|
f: File-like object to write to.
|
|
old_tree: Old tree id
|
|
new_tree: New tree id
|
|
diff_binary: Whether to diff files even if they
|
|
are considered binary files by is_binary().
|
|
"""
|
|
changes = store.tree_changes(old_tree, new_tree)
|
|
for (oldpath, newpath), (oldmode, newmode), (oldsha, newsha) in changes:
|
|
write_object_diff(
|
|
f,
|
|
store,
|
|
(oldpath, oldmode, oldsha),
|
|
(newpath, newmode, newsha),
|
|
diff_binary=diff_binary,
|
|
)
|
|
|
|
|
|
def git_am_patch_split(f, encoding=None):
|
|
"""Parse a git-am-style patch and split it up into bits.
|
|
|
|
Args:
|
|
f: File-like object to parse
|
|
encoding: Encoding to use when creating Git objects
|
|
Returns: Tuple with commit object, diff contents and git version
|
|
"""
|
|
encoding = encoding or getattr(f, "encoding", "ascii")
|
|
encoding = encoding or "ascii"
|
|
contents = f.read()
|
|
if isinstance(contents, bytes) and getattr(email.parser, "BytesParser", None):
|
|
parser = email.parser.BytesParser()
|
|
msg = parser.parsebytes(contents)
|
|
else:
|
|
parser = email.parser.Parser()
|
|
msg = parser.parsestr(contents)
|
|
return parse_patch_message(msg, encoding)
|
|
|
|
|
|
def parse_patch_message(msg, encoding=None):
|
|
"""Extract a Commit object and patch from an e-mail message.
|
|
|
|
Args:
|
|
msg: An email message (email.message.Message)
|
|
encoding: Encoding to use to encode Git commits
|
|
Returns: Tuple with commit object, diff contents and git version
|
|
"""
|
|
c = Commit()
|
|
c.author = msg["from"].encode(encoding)
|
|
c.committer = msg["from"].encode(encoding)
|
|
try:
|
|
patch_tag_start = msg["subject"].index("[PATCH")
|
|
except ValueError:
|
|
subject = msg["subject"]
|
|
else:
|
|
close = msg["subject"].index("] ", patch_tag_start)
|
|
subject = msg["subject"][close + 2 :]
|
|
c.message = (subject.replace("\n", "") + "\n").encode(encoding)
|
|
first = True
|
|
|
|
body = msg.get_payload(decode=True)
|
|
lines = body.splitlines(True)
|
|
line_iter = iter(lines)
|
|
|
|
for line in line_iter:
|
|
if line == b"---\n":
|
|
break
|
|
if first:
|
|
if line.startswith(b"From: "):
|
|
c.author = line[len(b"From: ") :].rstrip()
|
|
else:
|
|
c.message += b"\n" + line
|
|
first = False
|
|
else:
|
|
c.message += line
|
|
diff = b""
|
|
for line in line_iter:
|
|
if line == b"-- \n":
|
|
break
|
|
diff += line
|
|
try:
|
|
version = next(line_iter).rstrip(b"\n")
|
|
except StopIteration:
|
|
version = None
|
|
return c, diff, version
|