From: Brian Warner Date: Fri, 15 Feb 2008 01:06:41 +0000 (-0700) Subject: remove bencode, yay X-Git-Tag: allmydata-tahoe-0.8.0~53 X-Git-Url: https://git.rkrishnan.org/components/com_hotproperty/simplejson/encoder.py.html?a=commitdiff_plain;h=09bfa3bea21c64c11449080ed7239e41f5bcdcf8;p=tahoe-lafs%2Ftahoe-lafs.git remove bencode, yay --- diff --git a/src/allmydata/test/test_util.py b/src/allmydata/test/test_util.py index 52e7c0bd..65c571e4 100644 --- a/src/allmydata/test/test_util.py +++ b/src/allmydata/test/test_util.py @@ -6,7 +6,7 @@ from twisted.trial import unittest from twisted.internet import defer from twisted.python import failure -from allmydata.util import base32, bencode, idlib, humanreadable, mathutil, hashutil +from allmydata.util import bencode, idlib, humanreadable, mathutil, hashutil from allmydata.util import assertutil, fileutil, testutil, deferredutil @@ -63,60 +63,6 @@ class HumanReadable(unittest.TestCase): class MyList(list): pass -class Bencode(unittest.TestCase): - def test_bencode(self): - e = bencode.bencode - self.failUnlessEqual(e(4), "i4e") - self.failUnlessEqual(e([1,2]), "li1ei2ee") - self.failUnlessEqual(e(MyList([1,2])), "li1ei2ee") - self.failUnlessEqual(e({1:2}), "di1ei2ee") - self.failUnlessEqual(e(u"a"), "u1:a") - self.failUnlessEqual(e([True,False]), "lb1b0e") - self.failUnlessEqual(e(1.5), "f1.5e") - self.failUnlessEqual(e("foo"), "3:foo") - d = bencode.bdecode - self.failUnlessEqual(d("li1ei2ee"), [1,2]) - self.failUnlessEqual(d("u1:a"), u"a") - self.failUnlessRaises(ValueError, d, "u10:short") - self.failUnlessEqual(d("lb1b0e"), [True,False]) - self.failUnlessRaises(ValueError, d, "b2") - self.failUnlessEqual(d("f1.5e"), 1.5) - self.failUnlessEqual(d("3:foo"), "foo") - self.failUnlessRaises(ValueError, d, - "38:When doing layout, always plan ah") - # ooh! fascinating! bdecode requires string keys! I think this ought - # to be changed - #self.failUnlessEqual(d("di1ei2ee"), {1:2}) - self.failUnlessEqual(d("d1:ai2eu1:bi3ee"), {"a":2, u"b":3}) - self.failUnlessRaises(ValueError, d, "di1ei2ee") - self.failUnlessRaises(ValueError, d, "d1:ai1e1:ai2ee") - - self.failUnlessRaises(ValueError, d, "i1ei2e") - - # now run all the module's builtin tests - bencode.test_decode_raw_string() - bencode.test_encode_and_decode_unicode_results_in_unicode_type() - bencode.test_encode_and_decode_unicode_at_least_preserves_the_content_even_if_it_flattens_the_type() - bencode.test_dict_forbids_non_string_key() - bencode.test_dict_forbids_key_repeat() - bencode.test_empty_dict() - bencode.test_dict_allows_unicode_keys() - bencode.test_ValueError_in_decode_unknown() - bencode.test_encode_and_decode_none() - bencode.test_encode_and_decode_long() - bencode.test_encode_and_decode_int() - bencode.test_encode_and_decode_float() - bencode.test_encode_and_decode_bool() - #bencode.test_decode_noncanonical_int() - bencode.test_encode_and_decode_dict() - bencode.test_encode_and_decode_list() - bencode.test_encode_and_decode_tuple() - bencode.test_encode_and_decode_empty_dict() - bencode.test_encode_and_decode_complex_object() - bencode.test_unfinished_list() - bencode.test_unfinished_dict() - bencode.test_unsupported_type() - class Math(unittest.TestCase): def test_div_ceil(self): f = mathutil.div_ceil diff --git a/src/allmydata/util/bencode.py b/src/allmydata/util/bencode.py deleted file mode 100644 index 0cafa821..00000000 --- a/src/allmydata/util/bencode.py +++ /dev/null @@ -1,434 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -""" -A library for streaming and unstreaming of simple objects, designed -for speed, compactness, and ease of implementation. - -The basic functions are bencode and bdecode. bencode takes an object -and returns a string, bdecode takes a string and returns an object. -bdecode raises a ValueError if you give it an invalid string. - -The objects passed in may be nested dicts, lists, ints, floats, strings, -and Python boolean and None types. For example, all of the following -may be bencoded - - -{'a': [0, 1], 'b': None} - -[None, ['a', 2, ['c', None]]] - -{'spam': (2,3,4)} - -{'name': 'Cronus', 'spouse': 'Rhea', 'children': ['Hades', 'Poseidon']} - -In general bdecode(bencode(spam)) == spam, but tuples and lists are -encoded the same, so bdecode(bencode((0, 1))) is [0, 1] rather -than (0, 1). Longs and ints are also encoded the same way, so -bdecode(bencode(4)) is a long. - -Dict keys are required to be basestrings (byte strings or unicode objects), -to avoid a mess of potential implementation incompatibilities. bencode is -intended to be used for protocols which are going to be re-implemented many -times, so it's very conservative in that regard. - -Which type is encoded is determined by the first character, 'i', 'n', 'f', -'d', 'l', 'b', 'u', and any digit. They indicate integer, null, float, -dict, list, boolean, unicode string, and string, respectively. - -Strings are length-prefixed in base 10, followed by a colon. - -bencode('spam') == '4:spam' - -Unicode string objects are indicated with an initial u, a base 10 -length-prefix, and the remaining bytes in utf-8 encoding. - -bencode(u'\u00bfHabla espa\u00f1ol?') == '¿Habla español?' - -Nulls are indicated by a single 'n'. - -bencode(None) == 'n' - -Integers are encoded base 10 and terminated with an 'e' - - -bencode(3) == 'i3e' -bencode(-20) == 'i-20e' - -Floats are encoded in base 10 and terminated with an 'e' - - -bencode(3.2) == 'f3.2e' -bencode(-23.4532) == 'f-23.4532e' - -Lists are encoded in list order, terminated by an 'e' - - -bencode(['abc', 'd']) == 'l3:abc1:de' -bencode([2, 'f']) == 'li2e1:fe' - -Dicts are encoded by containing alternating keys and values. -The keys are encoded in sorted order, but sort order is not -enforced on the decode. Dicts are terminated by an 'e'. Dict -keys can be either bytestrings or unicode strings. For example - - -bencode({'spam': 'eggs'}) == 'd4:spam4:eggse' -bencode({'ab': 2, 'a': None}) == 'd1:an2:abi2ee' -bencode({'a' : 1, u'\xab': 2}) == 'd1:ai1eu4:\xfe\xff\x00\xa8i2ee' - -Truncated strings come first, so in sort order 'a' comes before 'abc'. -""" - -# This file is licensed under the GNU Lesser General Public License v2.1. -# -# Originally written by Mojo Nation. -# Rewritten by Bram Cohen. -# Further enhanced by Allmydata to support additional Python types (Boolean -# None, Float, and Unicode strings.) - -from types import IntType, LongType, FloatType, ListType, TupleType, DictType, StringType, UnicodeType, BooleanType, NoneType -from cStringIO import StringIO -import string - -def bencode(data): - """ - encodes objects as strings, see module documentation for more info - """ - result = StringIO() - bwrite(data, result) - return result.getvalue() - -def bwrite(data, result): - # a generic using pje's type dispatch will be faster here - try: - encoder = encoders[type(data)] - except KeyError: - encoder = None - # Catch subclasses of built-in types - for t,coder in encoders.items(): - if isinstance(data, t): - encoder = coder - break - if not encoder: - raise ValueError("unsupported data type: %s" % type(data)) - encoder(data, result) - -encoders = {} - -def encode_int(data, result): - result.write('i' + str(data) + 'e') - -encoders[IntType] = encode_int -encoders[LongType] = encode_int - -def encode_float(data, result): - result.write('f' + str(data) + 'e') - -encoders[FloatType] = encode_float - -def encode_bool(data, result): - if data: - result.write('b1') - else: - result.write('b0') - -encoders[BooleanType] = encode_bool - -def encode_list(data, result): - result.write('l') - _bwrite = bwrite - for item in data: - _bwrite(item, result) - result.write('e') - -encoders[TupleType] = encode_list -encoders[ListType] = encode_list -encoders[set] = encode_list - -def encode_string(data, result): - result.write(str(len(data)) + ':' + data) - -encoders[StringType] = encode_string - -def encode_unicode(data, result): - payload = data.encode('utf-8') - result.write('u' + str(len(payload)) + ':' + payload) - -encoders[UnicodeType] = encode_unicode - -def encode_dict(data, result): - result.write('d') - _bwrite = bwrite - keylist = data.keys() - keylist.sort() - for key in keylist: - _bwrite(key, result) - _bwrite(data[key], result) - result.write('e') - -encoders[DictType] = encode_dict - -encoders[NoneType] = lambda data, result: result.write('n') - -def bdecode(s): - """ - Does the opposite of bencode. Raises a ValueError if there's a problem. - """ - try: - result, index = bread(s, 0) - if index != len(s): - raise ValueError('left over stuff at end: %s' % s[index:]) - return result - except IndexError, e: - raise ValueError(str(e)) - except KeyError, e: - raise ValueError(str(e)) - -def bread(s, index): - return decoders[s[index]](s, index) - -decoders = {} - -def decode_raw_string(s, index): - ci = s.index(":", index) - ei = ci + int(s[index:ci]) + 1 - if ei > len(s): - raise ValueError('length encoding indicates premature end of string') - return (s[ci+1:ei], ei) - -for c in string.digits: - decoders[c] = decode_raw_string - -def decode_unicode_string(s, index): - ci = s.index(":", index) - ei = ci + int(s[index+1:ci]) + 1 - if ei > len(s): - raise ValueError('length encoding indicates premature end of string') - return (unicode(s[ci+1:ei], 'utf-8'), ei) - -decoders['u'] = decode_unicode_string - -def decode_int(s, index): - ei = s.index('e', index) - return (long(s[index+1:ei]), ei+1) - -decoders['i'] = decode_int - -def decode_float(s, index): - ei = s.index('e', index) - return (float(s[index+1:ei]), ei+1) - -decoders['f'] = decode_float - -def decode_bool(s, index): - val = s[index+1] - if val == '1': - return True, index+2 - elif val == '0': - return False, index+2 - else: - raise ValueError('invalid boolean encoding: %s' % s[index:index+2]) - -decoders['b'] = decode_bool - -# decoders['n'] = lambda s, index: decoders_n.inc('n') or (None, index + 1) -decoders['n'] = lambda s, index: (None, index + 1) - -def decode_list(s, index): - # decoders_n.inc('l') - result = [] - index += 1 - _bread = bread - while s[index] != 'e': - next, index = _bread(s, index) - result.append(next) - return result, index + 1 - -decoders['l'] = decode_list - -def decode_dict(s, index): - # decoders_n.inc('d') - result = {} - index += 1 - _decode_string = decode_raw_string - _decode_unicode = decode_unicode_string - _bread = bread - while s[index] != 'e': - if s[index] in string.digits: - key, index = _decode_string(s, index) - elif s[index] == "u": - key, index = _decode_unicode(s, index) - else: - raise ValueError("dict key must be basestring") - if key in result: - raise ValueError("dict key was repeated") - value, index = _bread(s, index) - result[key] = value - return result, index + 1 - -decoders['d'] = decode_dict - -def test_decode_raw_string(): - assert decode_raw_string('1:a', 0) == ('a', 3) - assert decode_raw_string('0:', 0) == ('', 2) - assert decode_raw_string('10:aaaaaaaaaaaaaaaaaaaaaaaaa', 0) == ('aaaaaaaaaa', 13) - assert decode_raw_string('10:', 1) == ('', 3) -# non-reexp version does not check for this case -# try: -# decode_raw_string('01:a', 0) -# assert 0, 'failed' -# except ValueError: -# pass - try: - decode_raw_string('--1:a', 0) - assert 0, 'failed' - except ValueError: - pass - try: - decode_raw_string('h', 0) - assert 0, 'failed' - except ValueError: - pass - try: - decode_raw_string('h:', 0) - assert 0, 'failed' - except ValueError: - pass - try: - decode_raw_string('1', 0) - assert 0, 'failed' - except ValueError: - pass - try: - decode_raw_string('', 0) - assert 0, 'failed' - except ValueError: - pass - try: - decode_raw_string('5:a', 0) - assert 0, 'failed' - except ValueError: - pass - -def test_encode_and_decode_unicode_results_in_unicode_type(): - assert bdecode(bencode(u'\u00bfHabla espa\u00f1ol?')) == u'\u00bfHabla espa\u00f1ol?' - -def test_encode_and_decode_unicode_at_least_preserves_the_content_even_if_it_flattens_the_type(): - test_string = bdecode(bencode(u'\u00bfHabla espa\u00f1ol?')) - if isinstance(test_string, unicode): - assert test_string == u'\u00bfHabla espa\u00f1ol?' - elif isinstance(test_string, str): - assert test_string.decode('utf-8') == u'\u00bfHabla espa\u00f1ol?' - else: - assert 0, 'flunked' - -def test_dict_forbids_non_string_key(): - try: - bdecode('di3ene') - assert 0, 'failed' - except ValueError: - pass - -def test_dict_forbids_key_repeat(): - try: - bdecode('d1:an1:ane') - assert 0, 'failed' - except ValueError: - pass - -def test_empty_dict(): - assert bdecode('de') == {} - -def test_dict_allows_unicode_keys(): - assert bdecode(bencode({'a': 1, u'\xa8': 2})) == {'a': 1L, u'\xa8': 2L} - -def test_ValueError_in_decode_unknown(): - try: - bdecode('x') - assert 0, 'flunked' - except ValueError: - pass - -def test_encode_and_decode_none(): - assert bdecode(bencode(None)) == None - -def test_encode_and_decode_long(): - assert bdecode(bencode(-23452422452342L)) == -23452422452342L - -def test_encode_and_decode_int(): - assert bdecode(bencode(2)) == 2 - -def test_encode_and_decode_float(): - assert bdecode(bencode(3.4)) == 3.4 - assert bdecode(bencode(0.0)) == 0.0 - assert bdecode(bencode(-4.56)) == -4.56 - assert bdecode(bencode(-0.0)) == -0.0 - -def test_encode_and_decode_bool(): - assert bdecode(bencode(True)) == True - assert bdecode(bencode(False)) == False - -# the non-regexp methods no longer check for canonical ints, but we -# don't parse input we did not generate using bencode, so I will leave -# these commented out for now -#def test_decode_noncanonical_int(): -# try: -# bdecode('i03e') -# assert 0 -# except ValueError: -# pass -# try: -# bdecode('i3 e') -# assert 0 -# except ValueError: -# pass -# try: -# bdecode('i 3e') -# assert 0 -# except ValueError: -# pass -# try: -# bdecode('i-0e') -# assert 0 -# except ValueError: -# pass - -def test_encode_and_decode_dict(): - x = {'42': 3} - assert bdecode(bencode(x)) == x - -def test_encode_and_decode_list(): - assert bdecode(bencode([])) == [] - -def test_encode_and_decode_tuple(): - assert bdecode(bencode(())) == [] - -def test_encode_and_decode_empty_dict(): - assert bdecode(bencode({})) == {} - -def test_encode_and_decode_complex_object(): - spam = [[], 0, -3, -345234523543245234523L, {}, 'spam', None, {'a': [3]}, {}, {'a': 1L, u'\xa8': 2L}] - assert bencode(bdecode(bencode(spam))) == bencode(spam) - assert bdecode(bencode(spam)) == spam - -def test_unfinished_list(): - try: - bdecode('ln') - assert 0 - except ValueError: - pass - -def test_unfinished_dict(): - try: - bdecode('d') - assert 0 - except ValueError: - pass - try: - bdecode('d1:a') - assert 0 - except ValueError: - pass - -def test_unsupported_type(): - try: - bencode(lambda: None) - assert 0 - except ValueError: - pass