From ea373de042c49ba10abca1c9b30003a1a19c907f Mon Sep 17 00:00:00 2001
From: Brian Warner <warner@lothar.com>
Date: Sat, 17 Oct 2009 11:00:05 -0700
Subject: [PATCH] move dirnode.CachingDict to dictutil.AuxValueDict, generalize
 method names, improve tests. Let dirnode _pack_children accept either dict or
 AuxValueDict.

---
 src/allmydata/dirnode.py           | 36 +++++++----------------
 src/allmydata/test/test_dirnode.py | 15 ----------
 src/allmydata/test/test_util.py    | 47 ++++++++++++++++++++++++++++++
 src/allmydata/util/dictutil.py     | 40 +++++++++++++++++++++++++
 4 files changed, 98 insertions(+), 40 deletions(-)

diff --git a/src/allmydata/dirnode.py b/src/allmydata/dirnode.py
index dfb2f1b5..66029f26 100644
--- a/src/allmydata/dirnode.py
+++ b/src/allmydata/dirnode.py
@@ -21,23 +21,7 @@ from allmydata.util.netstring import netstring, split_netstring
 from allmydata.uri import DirectoryURI, ReadonlyDirectoryURI, \
      LiteralFileURI, from_string
 from pycryptopp.cipher.aes import AES
-
-class CachingDict(dict):
-    def __init__(self, *args):
-        super(CachingDict, self).__init__(*args)
-        self.serialized = {}
-
-    def __setitem__(self, k, v):
-        super(CachingDict, self).__setitem__(k, v)
-        self.serialized[k] = None
-
-    def get_both_items(self, k):
-        return (self.serialized.setdefault(k, None),
-                super(CachingDict, self).__getitem__(k))
-
-    def set_both_items(self, key, serialized, t):
-        self.serialized[key] = serialized
-        super(CachingDict, self).__setitem__(key, t)
+from allmydata.util.dictutil import AuxValueDict
 
 class Deleter:
     def __init__(self, node, name, must_exist=True):
@@ -209,9 +193,9 @@ class DirectoryNode:
         assert isinstance(data, str), (repr(data), type(data))
         # an empty directory is serialized as an empty string
         if data == "":
-            return CachingDict()
+            return AuxValueDict()
         writeable = not self.is_readonly()
-        children = CachingDict()
+        children = AuxValueDict()
         position = 0
         while position < len(data):
             entries, position = split_netstring(data, 1, position)
@@ -228,18 +212,20 @@ class DirectoryNode:
             child = self._create_node(rwcap, rocap)
             metadata = simplejson.loads(metadata_s)
             assert isinstance(metadata, dict)
-            children.set_both_items(name, entry, (child, metadata))
+            children.set_with_aux(name, (child, metadata), auxilliary=entry)
         return children
 
     def _pack_contents(self, children):
         # expects children in the same format as _unpack_contents
-        assert isinstance(children, CachingDict)
+        has_aux = isinstance(children, AuxValueDict)
         entries = []
         for name in sorted(children.keys()):
-            entry, metadata = children.get_both_items(name)
-            if entry == None:
-                child, metadata = metadata
-                assert isinstance(name, unicode)
+            assert isinstance(name, unicode)
+            entry = None
+            if has_aux:
+                entry = children.get_aux(name)
+            if not entry:
+                child, metadata = children.get(name)
                 assert IFilesystemNode.providedBy(child), (name,child)
                 assert isinstance(metadata, dict)
                 rwcap = child.get_uri() # might be RO if the child is not writeable
diff --git a/src/allmydata/test/test_dirnode.py b/src/allmydata/test/test_dirnode.py
index 4330ce9e..d1c5881e 100644
--- a/src/allmydata/test/test_dirnode.py
+++ b/src/allmydata/test/test_dirnode.py
@@ -776,21 +776,6 @@ class Packing(unittest.TestCase):
         self.failUnlessEqual(file1_rwcap,
                              children[u'file1'][0].get_uri())
 
-    def test_caching_dict(self):
-        d = dirnode.CachingDict()
-        d.set_both_items("test", "test2", ("test3", "test4"))
-        cached, value = d.get_both_items("test")
-
-        self.failUnlessEqual(cached, "test2")
-        self.failUnlessEqual(value, ("test3", "test4"))
-
-        d['test'] = ("test3", "test2")
-
-        cached, value = d.get_both_items("test")
-
-        self.failUnlessEqual(cached, None)
-        self.failUnlessEqual(value, ("test3", "test2"))
-
 class FakeMutableFile:
     counter = 0
     def __init__(self, initial_contents=""):
diff --git a/src/allmydata/test/test_util.py b/src/allmydata/test/test_util.py
index 1aef24ed..c7cdd04a 100644
--- a/src/allmydata/test/test_util.py
+++ b/src/allmydata/test/test_util.py
@@ -1274,6 +1274,53 @@ class DictUtil(unittest.TestCase):
         self.failUnlessEqual(x, "b")
         self.failUnlessEqual(d.items(), [("c", 1), ("a", 3)])
 
+    def test_auxdict(self):
+        d = dictutil.AuxValueDict()
+        # we put the serialized form in the auxdata
+        d.set_with_aux("key", ("filecap", "metadata"), "serialized")
+
+        self.failUnlessEqual(d.keys(), ["key"])
+        self.failUnlessEqual(d["key"], ("filecap", "metadata"))
+        self.failUnlessEqual(d.get_aux("key"), "serialized")
+        def _get_missing(key):
+            return d[key]
+        self.failUnlessRaises(KeyError, _get_missing, "nonkey")
+        self.failUnlessEqual(d.get("nonkey"), None)
+        self.failUnlessEqual(d.get("nonkey", "nonvalue"), "nonvalue")
+        self.failUnlessEqual(d.get_aux("nonkey"), None)
+        self.failUnlessEqual(d.get_aux("nonkey", "nonvalue"), "nonvalue")
+
+        d["key"] = ("filecap2", "metadata2")
+        self.failUnlessEqual(d["key"], ("filecap2", "metadata2"))
+        self.failUnlessEqual(d.get_aux("key"), None)
+
+        d.set_with_aux("key2", "value2", "aux2")
+        self.failUnlessEqual(sorted(d.keys()), ["key", "key2"])
+        del d["key2"]
+        self.failUnlessEqual(d.keys(), ["key"])
+        self.failIf("key2" in d)
+        self.failUnlessRaises(KeyError, _get_missing, "key2")
+        self.failUnlessEqual(d.get("key2"), None)
+        self.failUnlessEqual(d.get_aux("key2"), None)
+        d["key2"] = "newvalue2"
+        self.failUnlessEqual(d.get("key2"), "newvalue2")
+        self.failUnlessEqual(d.get_aux("key2"), None)
+
+        d = dictutil.AuxValueDict({1:2,3:4})
+        self.failUnlessEqual(sorted(d.keys()), [1,3])
+        self.failUnlessEqual(d[1], 2)
+        self.failUnlessEqual(d.get_aux(1), None)
+
+        d = dictutil.AuxValueDict([ (1,2), (3,4) ])
+        self.failUnlessEqual(sorted(d.keys()), [1,3])
+        self.failUnlessEqual(d[1], 2)
+        self.failUnlessEqual(d.get_aux(1), None)
+
+        d = dictutil.AuxValueDict(one=1, two=2)
+        self.failUnlessEqual(sorted(d.keys()), ["one","two"])
+        self.failUnlessEqual(d["one"], 1)
+        self.failUnlessEqual(d.get_aux("one"), None)
+
 class Pipeline(unittest.TestCase):
     def pause(self, *args, **kwargs):
         d = defer.Deferred()
diff --git a/src/allmydata/util/dictutil.py b/src/allmydata/util/dictutil.py
index 2eee85a6..3dc815b3 100644
--- a/src/allmydata/util/dictutil.py
+++ b/src/allmydata/util/dictutil.py
@@ -605,3 +605,43 @@ class ValueOrderedDict:
         le = self.l.pop(i)
         del self.d[le[1]]
         return le[1]
+
+class AuxValueDict(dict):
+    """I behave like a regular dict, but each key is associated with two
+    values: the main value, and an auxilliary one. Setting the main value
+    (with the usual d[key]=value) clears the auxvalue. You can set both main
+    and auxvalue at the same time, and can retrieve the values separately.
+
+    The main use case is a dictionary that represents unpacked child values
+    for a directory node, where a common pattern is to modify one or more
+    children and then pass the dict back to a packing function. The original
+    packed representation can be cached in the auxvalue, and the packing
+    function can use it directly on all unmodified children. On large
+    directories with a complex packing function, this can save considerable
+    time."""
+
+    def __init__(self, *args, **kwargs):
+        super(AuxValueDict, self).__init__(*args, **kwargs)
+        self.auxilliary = {}
+
+    def __setitem__(self, key, value):
+        super(AuxValueDict, self).__setitem__(key, value)
+        self.auxilliary[key] = None # clear the auxvalue
+
+    def __delitem__(self, key):
+        super(AuxValueDict, self).__delitem__(key)
+        self.auxilliary.pop(key)
+
+    def get_aux(self, key, default=None):
+        """Retrieve the auxilliary value. There is no way to distinguish
+        between an auxvalue of 'None' and a key that does not have an
+        auxvalue, and get_aux() will not raise KeyError when called with a
+        missing key."""
+        return self.auxilliary.get(key, default)
+
+    def set_with_aux(self, key, value, auxilliary):
+        """Set both the main value and the auxilliary value. There is no way
+        to distinguish between an auxvalue of 'None' and a key that does not
+        have an auxvalue."""
+        super(AuxValueDict, self).__setitem__(key, value)
+        self.auxilliary[key] = auxilliary
-- 
2.45.2