584 lines
18 KiB
Python
584 lines
18 KiB
Python
# protocol.py -- Shared parts of the git protocols
|
|
# Copyright (C) 2008 John Carr <john.carr@unrouted.co.uk>
|
|
# Copyright (C) 2008-2012 Jelmer Vernooij <jelmer@jelmer.uk>
|
|
#
|
|
# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
|
|
# General Public License as public by the Free Software Foundation; version 2.0
|
|
# or (at your option) any later version. You can redistribute it and/or
|
|
# modify it under the terms of either of these two licenses.
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
#
|
|
# You should have received a copy of the licenses; if not, see
|
|
# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
|
|
# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
|
|
# License, Version 2.0.
|
|
#
|
|
|
|
"""Generic functions for talking the git smart server protocol."""
|
|
|
|
from io import BytesIO
|
|
from os import (
|
|
SEEK_END,
|
|
)
|
|
import socket
|
|
|
|
import dulwich
|
|
from dulwich.errors import (
|
|
HangupException,
|
|
GitProtocolError,
|
|
)
|
|
|
|
TCP_GIT_PORT = 9418
|
|
|
|
ZERO_SHA = b"0" * 40
|
|
|
|
SINGLE_ACK = 0
|
|
MULTI_ACK = 1
|
|
MULTI_ACK_DETAILED = 2
|
|
|
|
# pack data
|
|
SIDE_BAND_CHANNEL_DATA = 1
|
|
# progress messages
|
|
SIDE_BAND_CHANNEL_PROGRESS = 2
|
|
# fatal error message just before stream aborts
|
|
SIDE_BAND_CHANNEL_FATAL = 3
|
|
|
|
CAPABILITY_ATOMIC = b"atomic"
|
|
CAPABILITY_DEEPEN_SINCE = b"deepen-since"
|
|
CAPABILITY_DEEPEN_NOT = b"deepen-not"
|
|
CAPABILITY_DEEPEN_RELATIVE = b"deepen-relative"
|
|
CAPABILITY_DELETE_REFS = b"delete-refs"
|
|
CAPABILITY_INCLUDE_TAG = b"include-tag"
|
|
CAPABILITY_MULTI_ACK = b"multi_ack"
|
|
CAPABILITY_MULTI_ACK_DETAILED = b"multi_ack_detailed"
|
|
CAPABILITY_NO_DONE = b"no-done"
|
|
CAPABILITY_NO_PROGRESS = b"no-progress"
|
|
CAPABILITY_OFS_DELTA = b"ofs-delta"
|
|
CAPABILITY_QUIET = b"quiet"
|
|
CAPABILITY_REPORT_STATUS = b"report-status"
|
|
CAPABILITY_SHALLOW = b"shallow"
|
|
CAPABILITY_SIDE_BAND = b"side-band"
|
|
CAPABILITY_SIDE_BAND_64K = b"side-band-64k"
|
|
CAPABILITY_THIN_PACK = b"thin-pack"
|
|
CAPABILITY_AGENT = b"agent"
|
|
CAPABILITY_SYMREF = b"symref"
|
|
CAPABILITY_ALLOW_TIP_SHA1_IN_WANT = b"allow-tip-sha1-in-want"
|
|
CAPABILITY_ALLOW_REACHABLE_SHA1_IN_WANT = b"allow-reachable-sha1-in-want"
|
|
|
|
# Magic ref that is used to attach capabilities to when
|
|
# there are no refs. Should always be ste to ZERO_SHA.
|
|
CAPABILITIES_REF = b"capabilities^{}"
|
|
|
|
COMMON_CAPABILITIES = [
|
|
CAPABILITY_OFS_DELTA,
|
|
CAPABILITY_SIDE_BAND,
|
|
CAPABILITY_SIDE_BAND_64K,
|
|
CAPABILITY_AGENT,
|
|
CAPABILITY_NO_PROGRESS,
|
|
]
|
|
KNOWN_UPLOAD_CAPABILITIES = set(
|
|
COMMON_CAPABILITIES
|
|
+ [
|
|
CAPABILITY_THIN_PACK,
|
|
CAPABILITY_MULTI_ACK,
|
|
CAPABILITY_MULTI_ACK_DETAILED,
|
|
CAPABILITY_INCLUDE_TAG,
|
|
CAPABILITY_DEEPEN_SINCE,
|
|
CAPABILITY_SYMREF,
|
|
CAPABILITY_SHALLOW,
|
|
CAPABILITY_DEEPEN_NOT,
|
|
CAPABILITY_DEEPEN_RELATIVE,
|
|
CAPABILITY_ALLOW_TIP_SHA1_IN_WANT,
|
|
CAPABILITY_ALLOW_REACHABLE_SHA1_IN_WANT,
|
|
]
|
|
)
|
|
KNOWN_RECEIVE_CAPABILITIES = set(
|
|
COMMON_CAPABILITIES
|
|
+ [
|
|
CAPABILITY_REPORT_STATUS,
|
|
CAPABILITY_DELETE_REFS,
|
|
CAPABILITY_QUIET,
|
|
CAPABILITY_ATOMIC,
|
|
]
|
|
)
|
|
|
|
|
|
def agent_string():
|
|
return ("dulwich/%d.%d.%d" % dulwich.__version__).encode("ascii")
|
|
|
|
|
|
def capability_agent():
|
|
return CAPABILITY_AGENT + b"=" + agent_string()
|
|
|
|
|
|
def capability_symref(from_ref, to_ref):
|
|
return CAPABILITY_SYMREF + b"=" + from_ref + b":" + to_ref
|
|
|
|
|
|
def extract_capability_names(capabilities):
|
|
return set(parse_capability(c)[0] for c in capabilities)
|
|
|
|
|
|
def parse_capability(capability):
|
|
parts = capability.split(b"=", 1)
|
|
if len(parts) == 1:
|
|
return (parts[0], None)
|
|
return tuple(parts)
|
|
|
|
|
|
def symref_capabilities(symrefs):
|
|
return [capability_symref(*k) for k in symrefs]
|
|
|
|
|
|
COMMAND_DEEPEN = b"deepen"
|
|
COMMAND_SHALLOW = b"shallow"
|
|
COMMAND_UNSHALLOW = b"unshallow"
|
|
COMMAND_DONE = b"done"
|
|
COMMAND_WANT = b"want"
|
|
COMMAND_HAVE = b"have"
|
|
|
|
|
|
class ProtocolFile(object):
|
|
"""A dummy file for network ops that expect file-like objects."""
|
|
|
|
def __init__(self, read, write):
|
|
self.read = read
|
|
self.write = write
|
|
|
|
def tell(self):
|
|
pass
|
|
|
|
def close(self):
|
|
pass
|
|
|
|
|
|
def format_cmd_pkt(cmd, *args):
|
|
return cmd + b" " + b"".join([(a + b"\0") for a in args])
|
|
|
|
|
|
def parse_cmd_pkt(line):
|
|
splice_at = line.find(b" ")
|
|
cmd, args = line[:splice_at], line[splice_at + 1 :]
|
|
assert args[-1:] == b"\x00"
|
|
return cmd, args[:-1].split(b"\0")
|
|
|
|
|
|
def pkt_line(data):
|
|
"""Wrap data in a pkt-line.
|
|
|
|
Args:
|
|
data: The data to wrap, as a str or None.
|
|
Returns: The data prefixed with its length in pkt-line format; if data was
|
|
None, returns the flush-pkt ('0000').
|
|
"""
|
|
if data is None:
|
|
return b"0000"
|
|
return ("%04x" % (len(data) + 4)).encode("ascii") + data
|
|
|
|
|
|
class Protocol(object):
|
|
"""Class for interacting with a remote git process over the wire.
|
|
|
|
Parts of the git wire protocol use 'pkt-lines' to communicate. A pkt-line
|
|
consists of the length of the line as a 4-byte hex string, followed by the
|
|
payload data. The length includes the 4-byte header. The special line
|
|
'0000' indicates the end of a section of input and is called a 'flush-pkt'.
|
|
|
|
For details on the pkt-line format, see the cgit distribution:
|
|
Documentation/technical/protocol-common.txt
|
|
"""
|
|
|
|
def __init__(self, read, write, close=None, report_activity=None):
|
|
self.read = read
|
|
self.write = write
|
|
self._close = close
|
|
self.report_activity = report_activity
|
|
self._readahead = None
|
|
|
|
def close(self):
|
|
if self._close:
|
|
self._close()
|
|
|
|
def __enter__(self):
|
|
return self
|
|
|
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
self.close()
|
|
|
|
def read_pkt_line(self):
|
|
"""Reads a pkt-line from the remote git process.
|
|
|
|
This method may read from the readahead buffer; see unread_pkt_line.
|
|
|
|
Returns: The next string from the stream, without the length prefix, or
|
|
None for a flush-pkt ('0000').
|
|
"""
|
|
if self._readahead is None:
|
|
read = self.read
|
|
else:
|
|
read = self._readahead.read
|
|
self._readahead = None
|
|
|
|
try:
|
|
sizestr = read(4)
|
|
if not sizestr:
|
|
raise HangupException()
|
|
size = int(sizestr, 16)
|
|
if size == 0:
|
|
if self.report_activity:
|
|
self.report_activity(4, "read")
|
|
return None
|
|
if self.report_activity:
|
|
self.report_activity(size, "read")
|
|
pkt_contents = read(size - 4)
|
|
except socket.error as e:
|
|
raise GitProtocolError(e)
|
|
else:
|
|
if len(pkt_contents) + 4 != size:
|
|
raise GitProtocolError(
|
|
"Length of pkt read %04x does not match length prefix %04x"
|
|
% (len(pkt_contents) + 4, size)
|
|
)
|
|
return pkt_contents
|
|
|
|
def eof(self):
|
|
"""Test whether the protocol stream has reached EOF.
|
|
|
|
Note that this refers to the actual stream EOF and not just a
|
|
flush-pkt.
|
|
|
|
Returns: True if the stream is at EOF, False otherwise.
|
|
"""
|
|
try:
|
|
next_line = self.read_pkt_line()
|
|
except HangupException:
|
|
return True
|
|
self.unread_pkt_line(next_line)
|
|
return False
|
|
|
|
def unread_pkt_line(self, data):
|
|
"""Unread a single line of data into the readahead buffer.
|
|
|
|
This method can be used to unread a single pkt-line into a fixed
|
|
readahead buffer.
|
|
|
|
Args:
|
|
data: The data to unread, without the length prefix.
|
|
Raises:
|
|
ValueError: If more than one pkt-line is unread.
|
|
"""
|
|
if self._readahead is not None:
|
|
raise ValueError("Attempted to unread multiple pkt-lines.")
|
|
self._readahead = BytesIO(pkt_line(data))
|
|
|
|
def read_pkt_seq(self):
|
|
"""Read a sequence of pkt-lines from the remote git process.
|
|
|
|
Returns: Yields each line of data up to but not including the next
|
|
flush-pkt.
|
|
"""
|
|
pkt = self.read_pkt_line()
|
|
while pkt:
|
|
yield pkt
|
|
pkt = self.read_pkt_line()
|
|
|
|
def write_pkt_line(self, line):
|
|
"""Sends a pkt-line to the remote git process.
|
|
|
|
Args:
|
|
line: A string containing the data to send, without the length
|
|
prefix.
|
|
"""
|
|
try:
|
|
line = pkt_line(line)
|
|
self.write(line)
|
|
if self.report_activity:
|
|
self.report_activity(len(line), "write")
|
|
except socket.error as e:
|
|
raise GitProtocolError(e)
|
|
|
|
def write_file(self):
|
|
"""Return a writable file-like object for this protocol."""
|
|
|
|
class ProtocolFile(object):
|
|
def __init__(self, proto):
|
|
self._proto = proto
|
|
self._offset = 0
|
|
|
|
def write(self, data):
|
|
self._proto.write(data)
|
|
self._offset += len(data)
|
|
|
|
def tell(self):
|
|
return self._offset
|
|
|
|
def close(self):
|
|
pass
|
|
|
|
return ProtocolFile(self)
|
|
|
|
def write_sideband(self, channel, blob):
|
|
"""Write multiplexed data to the sideband.
|
|
|
|
Args:
|
|
channel: An int specifying the channel to write to.
|
|
blob: A blob of data (as a string) to send on this channel.
|
|
"""
|
|
# a pktline can be a max of 65520. a sideband line can therefore be
|
|
# 65520-5 = 65515
|
|
# WTF: Why have the len in ASCII, but the channel in binary.
|
|
while blob:
|
|
self.write_pkt_line(bytes(bytearray([channel])) + blob[:65515])
|
|
blob = blob[65515:]
|
|
|
|
def send_cmd(self, cmd, *args):
|
|
"""Send a command and some arguments to a git server.
|
|
|
|
Only used for the TCP git protocol (git://).
|
|
|
|
Args:
|
|
cmd: The remote service to access.
|
|
args: List of arguments to send to remove service.
|
|
"""
|
|
self.write_pkt_line(format_cmd_pkt(cmd, *args))
|
|
|
|
def read_cmd(self):
|
|
"""Read a command and some arguments from the git client
|
|
|
|
Only used for the TCP git protocol (git://).
|
|
|
|
Returns: A tuple of (command, [list of arguments]).
|
|
"""
|
|
line = self.read_pkt_line()
|
|
return parse_cmd_pkt(line)
|
|
|
|
|
|
_RBUFSIZE = 8192 # Default read buffer size.
|
|
|
|
|
|
class ReceivableProtocol(Protocol):
|
|
"""Variant of Protocol that allows reading up to a size without blocking.
|
|
|
|
This class has a recv() method that behaves like socket.recv() in addition
|
|
to a read() method.
|
|
|
|
If you want to read n bytes from the wire and block until exactly n bytes
|
|
(or EOF) are read, use read(n). If you want to read at most n bytes from
|
|
the wire but don't care if you get less, use recv(n). Note that recv(n)
|
|
will still block until at least one byte is read.
|
|
"""
|
|
|
|
def __init__(
|
|
self, recv, write, close=None, report_activity=None, rbufsize=_RBUFSIZE
|
|
):
|
|
super(ReceivableProtocol, self).__init__(
|
|
self.read, write, close=close, report_activity=report_activity
|
|
)
|
|
self._recv = recv
|
|
self._rbuf = BytesIO()
|
|
self._rbufsize = rbufsize
|
|
|
|
def read(self, size):
|
|
# From _fileobj.read in socket.py in the Python 2.6.5 standard library,
|
|
# with the following modifications:
|
|
# - omit the size <= 0 branch
|
|
# - seek back to start rather than 0 in case some buffer has been
|
|
# consumed.
|
|
# - use SEEK_END instead of the magic number.
|
|
# Copyright (c) 2001-2010 Python Software Foundation; All Rights
|
|
# Reserved
|
|
# Licensed under the Python Software Foundation License.
|
|
# TODO: see if buffer is more efficient than cBytesIO.
|
|
assert size > 0
|
|
|
|
# Our use of BytesIO rather than lists of string objects returned by
|
|
# recv() minimizes memory usage and fragmentation that occurs when
|
|
# rbufsize is large compared to the typical return value of recv().
|
|
buf = self._rbuf
|
|
start = buf.tell()
|
|
buf.seek(0, SEEK_END)
|
|
# buffer may have been partially consumed by recv()
|
|
buf_len = buf.tell() - start
|
|
if buf_len >= size:
|
|
# Already have size bytes in our buffer? Extract and return.
|
|
buf.seek(start)
|
|
rv = buf.read(size)
|
|
self._rbuf = BytesIO()
|
|
self._rbuf.write(buf.read())
|
|
self._rbuf.seek(0)
|
|
return rv
|
|
|
|
self._rbuf = BytesIO() # reset _rbuf. we consume it via buf.
|
|
while True:
|
|
left = size - buf_len
|
|
# recv() will malloc the amount of memory given as its
|
|
# parameter even though it often returns much less data
|
|
# than that. The returned data string is short lived
|
|
# as we copy it into a BytesIO and free it. This avoids
|
|
# fragmentation issues on many platforms.
|
|
data = self._recv(left)
|
|
if not data:
|
|
break
|
|
n = len(data)
|
|
if n == size and not buf_len:
|
|
# Shortcut. Avoid buffer data copies when:
|
|
# - We have no data in our buffer.
|
|
# AND
|
|
# - Our call to recv returned exactly the
|
|
# number of bytes we were asked to read.
|
|
return data
|
|
if n == left:
|
|
buf.write(data)
|
|
del data # explicit free
|
|
break
|
|
assert n <= left, "_recv(%d) returned %d bytes" % (left, n)
|
|
buf.write(data)
|
|
buf_len += n
|
|
del data # explicit free
|
|
# assert buf_len == buf.tell()
|
|
buf.seek(start)
|
|
return buf.read()
|
|
|
|
def recv(self, size):
|
|
assert size > 0
|
|
|
|
buf = self._rbuf
|
|
start = buf.tell()
|
|
buf.seek(0, SEEK_END)
|
|
buf_len = buf.tell()
|
|
buf.seek(start)
|
|
|
|
left = buf_len - start
|
|
if not left:
|
|
# only read from the wire if our read buffer is exhausted
|
|
data = self._recv(self._rbufsize)
|
|
if len(data) == size:
|
|
# shortcut: skip the buffer if we read exactly size bytes
|
|
return data
|
|
buf = BytesIO()
|
|
buf.write(data)
|
|
buf.seek(0)
|
|
del data # explicit free
|
|
self._rbuf = buf
|
|
return buf.read(size)
|
|
|
|
|
|
def extract_capabilities(text):
|
|
"""Extract a capabilities list from a string, if present.
|
|
|
|
Args:
|
|
text: String to extract from
|
|
Returns: Tuple with text with capabilities removed and list of capabilities
|
|
"""
|
|
if b"\0" not in text:
|
|
return text, []
|
|
text, capabilities = text.rstrip().split(b"\0")
|
|
return (text, capabilities.strip().split(b" "))
|
|
|
|
|
|
def extract_want_line_capabilities(text):
|
|
"""Extract a capabilities list from a want line, if present.
|
|
|
|
Note that want lines have capabilities separated from the rest of the line
|
|
by a space instead of a null byte. Thus want lines have the form:
|
|
|
|
want obj-id cap1 cap2 ...
|
|
|
|
Args:
|
|
text: Want line to extract from
|
|
Returns: Tuple with text with capabilities removed and list of capabilities
|
|
"""
|
|
split_text = text.rstrip().split(b" ")
|
|
if len(split_text) < 3:
|
|
return text, []
|
|
return (b" ".join(split_text[:2]), split_text[2:])
|
|
|
|
|
|
def ack_type(capabilities):
|
|
"""Extract the ack type from a capabilities list."""
|
|
if b"multi_ack_detailed" in capabilities:
|
|
return MULTI_ACK_DETAILED
|
|
elif b"multi_ack" in capabilities:
|
|
return MULTI_ACK
|
|
return SINGLE_ACK
|
|
|
|
|
|
class BufferedPktLineWriter(object):
|
|
"""Writer that wraps its data in pkt-lines and has an independent buffer.
|
|
|
|
Consecutive calls to write() wrap the data in a pkt-line and then buffers
|
|
it until enough lines have been written such that their total length
|
|
(including length prefix) reach the buffer size.
|
|
"""
|
|
|
|
def __init__(self, write, bufsize=65515):
|
|
"""Initialize the BufferedPktLineWriter.
|
|
|
|
Args:
|
|
write: A write callback for the underlying writer.
|
|
bufsize: The internal buffer size, including length prefixes.
|
|
"""
|
|
self._write = write
|
|
self._bufsize = bufsize
|
|
self._wbuf = BytesIO()
|
|
self._buflen = 0
|
|
|
|
def write(self, data):
|
|
"""Write data, wrapping it in a pkt-line."""
|
|
line = pkt_line(data)
|
|
line_len = len(line)
|
|
over = self._buflen + line_len - self._bufsize
|
|
if over >= 0:
|
|
start = line_len - over
|
|
self._wbuf.write(line[:start])
|
|
self.flush()
|
|
else:
|
|
start = 0
|
|
saved = line[start:]
|
|
self._wbuf.write(saved)
|
|
self._buflen += len(saved)
|
|
|
|
def flush(self):
|
|
"""Flush all data from the buffer."""
|
|
data = self._wbuf.getvalue()
|
|
if data:
|
|
self._write(data)
|
|
self._len = 0
|
|
self._wbuf = BytesIO()
|
|
|
|
|
|
class PktLineParser(object):
|
|
"""Packet line parser that hands completed packets off to a callback."""
|
|
|
|
def __init__(self, handle_pkt):
|
|
self.handle_pkt = handle_pkt
|
|
self._readahead = BytesIO()
|
|
|
|
def parse(self, data):
|
|
"""Parse a fragment of data and call back for any completed packets."""
|
|
self._readahead.write(data)
|
|
buf = self._readahead.getvalue()
|
|
if len(buf) < 4:
|
|
return
|
|
while len(buf) >= 4:
|
|
size = int(buf[:4], 16)
|
|
if size == 0:
|
|
self.handle_pkt(None)
|
|
buf = buf[4:]
|
|
elif size <= len(buf):
|
|
self.handle_pkt(buf[4:size])
|
|
buf = buf[size:]
|
|
else:
|
|
break
|
|
self._readahead = BytesIO()
|
|
self._readahead.write(buf)
|
|
|
|
def get_tail(self):
|
|
"""Read back any unused data."""
|
|
return self._readahead.getvalue()
|