From: Brian Warner Date: Sat, 17 Oct 2009 18:00:05 +0000 (-0700) Subject: move dirnode.CachingDict to dictutil.AuxValueDict, generalize method names, X-Git-Tag: trac-4100~14 X-Git-Url: https://git.rkrishnan.org/%5B/FOOURL?a=commitdiff_plain;h=ea373de042c49ba10abca1c9b30003a1a19c907f;p=tahoe-lafs%2Ftahoe-lafs.git move dirnode.CachingDict to dictutil.AuxValueDict, generalize method names, improve tests. Let dirnode _pack_children accept either dict or AuxValueDict. --- diff --git a/src/allmydata/dirnode.py b/src/allmydata/dirnode.py index dfb2f1b5..66029f26 100644 --- a/src/allmydata/dirnode.py +++ b/src/allmydata/dirnode.py @@ -21,23 +21,7 @@ from allmydata.util.netstring import netstring, split_netstring from allmydata.uri import DirectoryURI, ReadonlyDirectoryURI, \ LiteralFileURI, from_string from pycryptopp.cipher.aes import AES - -class CachingDict(dict): - def __init__(self, *args): - super(CachingDict, self).__init__(*args) - self.serialized = {} - - def __setitem__(self, k, v): - super(CachingDict, self).__setitem__(k, v) - self.serialized[k] = None - - def get_both_items(self, k): - return (self.serialized.setdefault(k, None), - super(CachingDict, self).__getitem__(k)) - - def set_both_items(self, key, serialized, t): - self.serialized[key] = serialized - super(CachingDict, self).__setitem__(key, t) +from allmydata.util.dictutil import AuxValueDict class Deleter: def __init__(self, node, name, must_exist=True): @@ -209,9 +193,9 @@ class DirectoryNode: assert isinstance(data, str), (repr(data), type(data)) # an empty directory is serialized as an empty string if data == "": - return CachingDict() + return AuxValueDict() writeable = not self.is_readonly() - children = CachingDict() + children = AuxValueDict() position = 0 while position < len(data): entries, position = split_netstring(data, 1, position) @@ -228,18 +212,20 @@ class DirectoryNode: child = self._create_node(rwcap, rocap) metadata = simplejson.loads(metadata_s) assert isinstance(metadata, dict) - children.set_both_items(name, entry, (child, metadata)) + children.set_with_aux(name, (child, metadata), auxilliary=entry) return children def _pack_contents(self, children): # expects children in the same format as _unpack_contents - assert isinstance(children, CachingDict) + has_aux = isinstance(children, AuxValueDict) entries = [] for name in sorted(children.keys()): - entry, metadata = children.get_both_items(name) - if entry == None: - child, metadata = metadata - assert isinstance(name, unicode) + assert isinstance(name, unicode) + entry = None + if has_aux: + entry = children.get_aux(name) + if not entry: + child, metadata = children.get(name) assert IFilesystemNode.providedBy(child), (name,child) assert isinstance(metadata, dict) rwcap = child.get_uri() # might be RO if the child is not writeable diff --git a/src/allmydata/test/test_dirnode.py b/src/allmydata/test/test_dirnode.py index 4330ce9e..d1c5881e 100644 --- a/src/allmydata/test/test_dirnode.py +++ b/src/allmydata/test/test_dirnode.py @@ -776,21 +776,6 @@ class Packing(unittest.TestCase): self.failUnlessEqual(file1_rwcap, children[u'file1'][0].get_uri()) - def test_caching_dict(self): - d = dirnode.CachingDict() - d.set_both_items("test", "test2", ("test3", "test4")) - cached, value = d.get_both_items("test") - - self.failUnlessEqual(cached, "test2") - self.failUnlessEqual(value, ("test3", "test4")) - - d['test'] = ("test3", "test2") - - cached, value = d.get_both_items("test") - - self.failUnlessEqual(cached, None) - self.failUnlessEqual(value, ("test3", "test2")) - class FakeMutableFile: counter = 0 def __init__(self, initial_contents=""): diff --git a/src/allmydata/test/test_util.py b/src/allmydata/test/test_util.py index 1aef24ed..c7cdd04a 100644 --- a/src/allmydata/test/test_util.py +++ b/src/allmydata/test/test_util.py @@ -1274,6 +1274,53 @@ class DictUtil(unittest.TestCase): self.failUnlessEqual(x, "b") self.failUnlessEqual(d.items(), [("c", 1), ("a", 3)]) + def test_auxdict(self): + d = dictutil.AuxValueDict() + # we put the serialized form in the auxdata + d.set_with_aux("key", ("filecap", "metadata"), "serialized") + + self.failUnlessEqual(d.keys(), ["key"]) + self.failUnlessEqual(d["key"], ("filecap", "metadata")) + self.failUnlessEqual(d.get_aux("key"), "serialized") + def _get_missing(key): + return d[key] + self.failUnlessRaises(KeyError, _get_missing, "nonkey") + self.failUnlessEqual(d.get("nonkey"), None) + self.failUnlessEqual(d.get("nonkey", "nonvalue"), "nonvalue") + self.failUnlessEqual(d.get_aux("nonkey"), None) + self.failUnlessEqual(d.get_aux("nonkey", "nonvalue"), "nonvalue") + + d["key"] = ("filecap2", "metadata2") + self.failUnlessEqual(d["key"], ("filecap2", "metadata2")) + self.failUnlessEqual(d.get_aux("key"), None) + + d.set_with_aux("key2", "value2", "aux2") + self.failUnlessEqual(sorted(d.keys()), ["key", "key2"]) + del d["key2"] + self.failUnlessEqual(d.keys(), ["key"]) + self.failIf("key2" in d) + self.failUnlessRaises(KeyError, _get_missing, "key2") + self.failUnlessEqual(d.get("key2"), None) + self.failUnlessEqual(d.get_aux("key2"), None) + d["key2"] = "newvalue2" + self.failUnlessEqual(d.get("key2"), "newvalue2") + self.failUnlessEqual(d.get_aux("key2"), None) + + d = dictutil.AuxValueDict({1:2,3:4}) + self.failUnlessEqual(sorted(d.keys()), [1,3]) + self.failUnlessEqual(d[1], 2) + self.failUnlessEqual(d.get_aux(1), None) + + d = dictutil.AuxValueDict([ (1,2), (3,4) ]) + self.failUnlessEqual(sorted(d.keys()), [1,3]) + self.failUnlessEqual(d[1], 2) + self.failUnlessEqual(d.get_aux(1), None) + + d = dictutil.AuxValueDict(one=1, two=2) + self.failUnlessEqual(sorted(d.keys()), ["one","two"]) + self.failUnlessEqual(d["one"], 1) + self.failUnlessEqual(d.get_aux("one"), None) + class Pipeline(unittest.TestCase): def pause(self, *args, **kwargs): d = defer.Deferred() diff --git a/src/allmydata/util/dictutil.py b/src/allmydata/util/dictutil.py index 2eee85a6..3dc815b3 100644 --- a/src/allmydata/util/dictutil.py +++ b/src/allmydata/util/dictutil.py @@ -605,3 +605,43 @@ class ValueOrderedDict: le = self.l.pop(i) del self.d[le[1]] return le[1] + +class AuxValueDict(dict): + """I behave like a regular dict, but each key is associated with two + values: the main value, and an auxilliary one. Setting the main value + (with the usual d[key]=value) clears the auxvalue. You can set both main + and auxvalue at the same time, and can retrieve the values separately. + + The main use case is a dictionary that represents unpacked child values + for a directory node, where a common pattern is to modify one or more + children and then pass the dict back to a packing function. The original + packed representation can be cached in the auxvalue, and the packing + function can use it directly on all unmodified children. On large + directories with a complex packing function, this can save considerable + time.""" + + def __init__(self, *args, **kwargs): + super(AuxValueDict, self).__init__(*args, **kwargs) + self.auxilliary = {} + + def __setitem__(self, key, value): + super(AuxValueDict, self).__setitem__(key, value) + self.auxilliary[key] = None # clear the auxvalue + + def __delitem__(self, key): + super(AuxValueDict, self).__delitem__(key) + self.auxilliary.pop(key) + + def get_aux(self, key, default=None): + """Retrieve the auxilliary value. There is no way to distinguish + between an auxvalue of 'None' and a key that does not have an + auxvalue, and get_aux() will not raise KeyError when called with a + missing key.""" + return self.auxilliary.get(key, default) + + def set_with_aux(self, key, value, auxilliary): + """Set both the main value and the auxilliary value. There is no way + to distinguish between an auxvalue of 'None' and a key that does not + have an auxvalue.""" + super(AuxValueDict, self).__setitem__(key, value) + self.auxilliary[key] = auxilliary