"""This module implements encoding and decoding of bittorrent's bencoding."""
import io
[docs]class DecodeError(Exception):
"""Describe a decoding error"""
pass
[docs]class EncodeError(Exception):
"""Describe an encoding error"""
pass
def _consume(data, charset, many=False, optional=False):
found = 0
while found < len(data):
if data[found] in charset:
found += 1
else:
break
if not many:
break
if found == 0 and not optional:
raise DecodeError("Parse error at %s" % (data,))
return (data[:found], data[found:])
def _decode_int(data):
_, data = _consume(data, b'i')
minus, data = _consume(data, b'-', optional=True)
numbers, data = _consume(data, b'0123456789', many=True)
_, data = _consume(data, b'e')
return (int(minus + numbers), data)
def _decode_str(data):
count, data = _consume(data, b'0123456789', many=True)
count = int(count)
_, data = _consume(data, b':')
if len(data) < count:
raise DecodeError('Parse error at %s, not enough data to parse string' %
(data,))
return (data[:count], data[count:])
def _decode_list(data):
_, data = _consume(data, b'l')
lst = list()
while True:
val, data = _decode(data, end=True)
if val == None:
break
lst.append(val)
return (lst, data)
def _decode_dict(data):
_, data = _consume(data, b'd')
dct = dict()
while True:
key, data = _decode(data, end=True)
if key == None:
break
val, data = _decode(data)
dct[key] = val
return (dct, data)
def _decode(data, end=False):
if len(data) == 0:
raise DecodeError('No data to decode')
if data[0] in b'i':
return _decode_int(data)
elif data[0] in b'0123456789':
return _decode_str(data)
elif data[0] in b'l':
return _decode_list(data)
elif data[0] in b'd':
return _decode_dict(data)
elif end and data[0] in b'e':
return None, data[1:]
else:
raise DecodeError('No data to decode')
[docs]def decode(data):
"""Decode a bencoded bytes object
:param bytes data: A valid bencoded representation of some object.
:returns: The decoded object.
:rtype: bytes, int, dict, list
:raises: DecodeError
"""
value, data = _decode(data)
if len(data) != 0:
raise DecodeError('Trailing data after decoding')
return value
def _encode_int(buf, val):
buf.write(b'i')
buf.write(str(val).encode())
buf.write(b'e')
def _encode_bytes(buf, val):
buf.write(str(len(val)).encode())
buf.write(b':')
buf.write(val)
def _encode_list(buf, val):
buf.write(b'l')
for item in val:
_encode(buf, item)
buf.write(b'e')
def _encode_dict(buf, rawval):
# Dicts are annoying, because they need to be sorted and can't contain the
# same key twice. This creates two problems:
# 1. a key 'hello' and b'hello' would encode to the same key (not allowed)
# 2. a key 'hello' and b'hello' can't be compared (by the default sorted())
# To solve this problem we convert the entire dictionary to something that
# only has bytes() keys and bail out if we can't do that unambiguously.
val = {}
for key, value in rawval.items():
bkey = key
if type(bkey) == str:
bkey = bkey.encode()
if type(bkey) != bytes:
raise EncodeError("Can't encode dict keys of type %s" % type(key))
if bkey in val:
raise EncodeError('Ambiguous key in dictionary, multiple keys '
'encode to %s', bkey)
val[bkey] = value
buf.write(b'd')
for key in sorted(val.keys()):
_encode_bytes(buf, key)
_encode(buf, val[key])
buf.write(b'e')
def _encode(buf, val):
if isinstance(val, str):
_encode_bytes(buf, val.encode())
elif isinstance(val, bytes):
_encode_bytes(buf, val)
elif isinstance(val, list):
_encode_list(buf, val)
elif isinstance(val, dict):
_encode_dict(buf, val)
elif isinstance(val, int):
_encode_int(buf, val)
else:
raise EncodeError("Can't encode object of type %s" % type(val))
[docs]def encode(val):
"""Encode a value as a bytearray
:param val: The value that is to be encoded. Can be of type ``dict``,
``list``, ``int``, ``string``, ``bytes``. Dicts and lists can only
contain those types. Note that ``dict`` objects may not contain keys
with a type other than ``bytes`` or ``string`` and that all keys must
have a unique ``bytes`` encoding. The following is not valid::
{'hello': 1, b'hello': 2}
because::
'hello'.encode() == b'hello'
Besides the aforementioned types, any type that derives from them is
also supported with the expectation that they behave in a sane way.
If you supply derived types that don't behave as we expect these
types to behave the results are undefined.
:returns: The bencoded representation of ``val``
:rtype: bytes
:raises: EncodeError
"""
buf = io.BytesIO()
_encode(buf, val)
return buf.getvalue()