From: Zooko O'Whielacronx Date: Sun, 5 Jul 2009 02:51:09 +0000 (-0700) Subject: directories: keep track of your position as you decode netstring after netstring... X-Git-Tag: trac-4000~31 X-Git-Url: https://git.rkrishnan.org/vdrive/%22news.html/simplejson/$rel_link?a=commitdiff_plain;h=efafcfb91a09b4de4d955d35f98dc0e82753da02;p=tahoe-lafs%2Ftahoe-lafs.git directories: keep track of your position as you decode netstring after netstring from an input buffer instead of copying the trailing part This makes decoding linear in the number of netstrings instead of O(N^2). --- diff --git a/src/allmydata/dirnode.py b/src/allmydata/dirnode.py index 73143199..8f72709b 100644 --- a/src/allmydata/dirnode.py +++ b/src/allmydata/dirnode.py @@ -204,15 +204,16 @@ class NewDirectoryNode: # rocap, rwcap, metadata), in which the name,rocap,metadata are in # cleartext. The 'name' is UTF-8 encoded. The rwcap is formatted as: # pack("16ss32s", iv, AES(H(writekey+iv), plaintextrwcap), mac) - assert isinstance(data, str) + assert isinstance(data, str), (repr(data), type(data)) # an empty directory is serialized as an empty string if data == "": return {} writeable = not self.is_readonly() children = {} - while len(data) > 0: - entry, data = split_netstring(data, 1, True) - name, rocap, rwcapdata, metadata_s = split_netstring(entry, 4) + position = 0 + while position < len(data): + entries, position = split_netstring(data, 1, position) + (name, rocap, rwcapdata, metadata_s), subpos = split_netstring(entries[0], 4) name = name.decode("utf-8") rwcap = None if writeable: diff --git a/src/allmydata/test/test_netstring.py b/src/allmydata/test/test_netstring.py index 5c8199a6..8fecdc49 100644 --- a/src/allmydata/test/test_netstring.py +++ b/src/allmydata/test/test_netstring.py @@ -5,37 +5,32 @@ from allmydata.util.netstring import netstring, split_netstring class Netstring(unittest.TestCase): def test_split(self): a = netstring("hello") + netstring("world") - self.failUnlessEqual(split_netstring(a, 2), ("hello", "world")) - self.failUnlessEqual(split_netstring(a, 2, False), ("hello", "world")) - self.failUnlessEqual(split_netstring(a, 2, True), - ("hello", "world", "")) + self.failUnlessEqual(split_netstring(a, 2), (["hello", "world"], len(a))) + self.failUnlessEqual(split_netstring(a, 2, required_trailer=""), (["hello", "world"], len(a))) self.failUnlessRaises(ValueError, split_netstring, a, 3) - self.failUnlessRaises(ValueError, split_netstring, a+" extra", 2) - self.failUnlessRaises(ValueError, split_netstring, a+" extra", 2, False) + self.failUnlessRaises(ValueError, split_netstring, a+" extra", 2, required_trailer="") + self.failUnlessEqual(split_netstring(a+" extra", 2), (["hello", "world"], len(a))) self.failUnlessEqual(split_netstring(a+"++", 2, required_trailer="++"), - ("hello", "world")) + (["hello", "world"], len(a)+2)) self.failUnlessRaises(ValueError, split_netstring, a+"+", 2, required_trailer="not") def test_extra(self): a = netstring("hello") - self.failUnlessEqual(split_netstring(a, 1, True), ("hello", "")) + self.failUnlessEqual(split_netstring(a, 1), (["hello"], len(a))) b = netstring("hello") + "extra stuff" - self.failUnlessEqual(split_netstring(b, 1, True), - ("hello", "extra stuff")) + self.failUnlessEqual(split_netstring(b, 1), + (["hello"], len(a))) def test_nested(self): a = netstring("hello") + netstring("world") + "extra stuff" b = netstring("a") + netstring("is") + netstring(a) + netstring(".") - top = split_netstring(b, 4) + (top, pos) = split_netstring(b, 4) self.failUnlessEqual(len(top), 4) self.failUnlessEqual(top[0], "a") self.failUnlessEqual(top[1], "is") self.failUnlessEqual(top[2], a) self.failUnlessEqual(top[3], ".") - self.failUnlessRaises(ValueError, split_netstring, a, 2) - self.failUnlessRaises(ValueError, split_netstring, a, 2, False) - bottom = split_netstring(a, 2, True) - self.failUnlessEqual(bottom, ("hello", "world", "extra stuff")) - - + self.failUnlessRaises(ValueError, split_netstring, a, 2, required_trailer="") + bottom = split_netstring(a, 2) + self.failUnlessEqual(bottom, (["hello", "world"], len(netstring("hello")+netstring("world")))) diff --git a/src/allmydata/util/netstring.py b/src/allmydata/util/netstring.py index a1fe8cb9..73f15658 100644 --- a/src/allmydata/util/netstring.py +++ b/src/allmydata/util/netstring.py @@ -5,34 +5,34 @@ def netstring(s): return "%d:%s," % (len(s), s,) def split_netstring(data, numstrings, - allow_leftover=False, - required_trailer=""): - """like string.split(), but extracts netstrings. If allow_leftover=False, - I return numstrings elements, and throw ValueError if there was leftover - data that does not exactly equal 'required_trailer'. If - allow_leftover=True, required_trailer must be empty, and I return - numstrings+1 elements, in which the last element is the leftover data - (possibly an empty string)""" - - assert not (allow_leftover and required_trailer) + position=0, + required_trailer=None): + """like string.split(), but extracts netstrings. Ignore all bytes of data + before the 'position' byte. Return a tuple of (list of elements (numstrings + in length), new position index). The new position index points to the first + byte which was not consumed (the 'required_trailer', if any, counts as + consumed). If 'required_trailer' is not None, throw ValueError if leftover + data does not exactly equal 'required_trailer'.""" + assert type(position) in (int, long), (repr(position), type(position)) elements = [] assert numstrings >= 0 - while data: - colon = data.index(":") - length = int(data[:colon]) + while position < len(data): + colon = data.index(":", position) + length = int(data[position:colon]) string = data[colon+1:colon+1+length] - assert len(string) == length + assert len(string) == length, (len(string), length) elements.append(string) - assert data[colon+1+length] == "," - data = data[colon+1+length+1:] + position = colon+1+length + assert data[position] == ",", position + position += 1 if len(elements) == numstrings: break if len(elements) < numstrings: raise ValueError("ran out of netstrings") - if allow_leftover: - return tuple(elements + [data]) - if data != required_trailer: - raise ValueError("leftover data in netstrings") - return tuple(elements) - + if required_trailer is not None: + if ((len(data) - position) != len(required_trailer)) or (data[position:] != required_trailer): + raise ValueError("leftover data in netstrings") + return (elements, position + len(required_trailer)) + else: + return (elements, position)