dirnodes: fix normalization hole where childnames in directories created by nodemaker...
authordavid-sarah <david-sarah@jacaranda.org>
Fri, 18 Jun 2010 00:02:49 +0000 (17:02 -0700)
committerdavid-sarah <david-sarah@jacaranda.org>
Fri, 18 Jun 2010 00:02:49 +0000 (17:02 -0700)
src/allmydata/dirnode.py
src/allmydata/nodemaker.py
src/allmydata/test/test_dirnode.py

index 53aaddded22b479deeecf5fe923c72ed371925a8..45f6c15c6eb8bc2832a1927174892fbe537aa7cc 100644 (file)
@@ -163,7 +163,7 @@ class Adder:
             precondition(IFilesystemNode.providedBy(child), child)
 
             # Strictly speaking this is redundant because we would raise the
-            # error again in pack_children.
+            # error again in _pack_normalized_children.
             child.raise_error()
 
             metadata = None
@@ -199,9 +199,21 @@ def _encrypt_rw_uri(filenode, rw_uri):
     # The MAC is not checked by readers in Tahoe >= 1.3.0, but we still
     # produce it for the sake of older readers.
 
-def pack_children(filenode, children, deep_immutable=False):
+
+def pack_children(filenode, childrenx, deep_immutable=False):
+    # initial_children must have metadata (i.e. {} instead of None)
+    children = {}
+    for (namex, (node, metadata)) in childrenx.iteritems():
+        precondition(isinstance(metadata, dict),
+                     "directory creation requires metadata to be a dict, not None", metadata)
+        children[normalize(namex)] = (node, metadata)
+
+    return _pack_normalized_children(filenode, children, deep_immutable=deep_immutable)
+
+
+def _pack_normalized_children(filenode, children, deep_immutable=False):
     """Take a dict that maps:
-         children[unicode_name] = (IFileSystemNode, metadata_dict)
+         children[unicode_nfc_name] = (IFileSystemNode, metadata_dict)
     and pack it into a single string, for use as the contents of the backing
     file. This is the same format as is returned by _unpack_contents. I also
     accept an AuxValueDict, in which case I'll use the auxilliary cached data
@@ -368,7 +380,7 @@ class DirectoryNode:
 
     def _pack_contents(self, children):
         # expects children in the same format as _unpack_contents
-        return pack_children(self._node, children)
+        return _pack_normalized_children(self._node, children)
 
     def is_readonly(self):
         return self._node.is_readonly()
index a30efbffdb209f0c67146da4237ac245061c7fe5..3d0819e82b640a50825a40b8adf5a8a7200ce95e 100644 (file)
@@ -1,7 +1,6 @@
 import weakref
 from zope.interface import implements
-from allmydata.util.assertutil import precondition
-from allmydata.interfaces import INodeMaker, MustBeDeepImmutableError
+from allmydata.interfaces import INodeMaker
 from allmydata.immutable.filenode import ImmutableFileNode, LiteralFileNode
 from allmydata.immutable.upload import Data
 from allmydata.mutable.filenode import MutableFileNode
@@ -97,11 +96,6 @@ class NodeMaker:
         return d
 
     def create_new_mutable_directory(self, initial_children={}):
-        # initial_children must have metadata (i.e. {} instead of None)
-        for (name, (node, metadata)) in initial_children.iteritems():
-            precondition(isinstance(metadata, dict),
-                         "create_new_mutable_directory requires metadata to be a dict, not None", metadata)
-            node.raise_error()
         d = self.create_mutable_file(lambda n:
                                      pack_children(n, initial_children))
         d.addCallback(self._create_dirnode)
@@ -110,14 +104,8 @@ class NodeMaker:
     def create_immutable_directory(self, children, convergence=None):
         if convergence is None:
             convergence = self.secret_holder.get_convergence_secret()
-        for (name, (node, metadata)) in children.iteritems():
-            precondition(isinstance(metadata, dict),
-                         "create_immutable_directory requires metadata to be a dict, not None", metadata)
-            node.raise_error()
-            if not node.is_allowed_in_immutable_directory():
-                raise MustBeDeepImmutableError("%s is not immutable" % (node,), name)
         n = DummyImmutableFileNode() # writekey=None
-        packed = pack_children(n, children)
+        packed = pack_children(n, children, deep_immutable=True)
         uploadable = Data(packed, convergence)
         d = self.uploader.upload(uploadable, history=self.history)
         d.addCallback(lambda results: self.create_from_cap(None, results.uri))
index 6d872d000da09b0bd5ca32f5e32ec1dd8007aaf4..52913e6cbac39d0886dee1faa441d39674b5a395 100644 (file)
@@ -1,5 +1,6 @@
 
 import time
+import unicodedata
 from zope.interface import implements
 from twisted.trial import unittest
 from twisted.internet import defer
@@ -261,7 +262,7 @@ class Dirnode(GridTestMixin, unittest.TestCase,
         bad_kids2 = {one_nfd: (bad_future_node2, {})}
         d.addCallback(lambda ign:
                       self.shouldFail(MustBeDeepImmutableError, "bad_kids2",
-                                      "is not immutable",
+                                      "is not allowed in an immutable directory",
                                       c.create_immutable_dirnode,
                                       bad_kids2))
         bad_kids3 = {one_nfd: (nm.create_from_cap(one_uri), None)}
@@ -273,13 +274,13 @@ class Dirnode(GridTestMixin, unittest.TestCase,
         bad_kids4 = {one_nfd: (nm.create_from_cap(mut_write_uri), {})}
         d.addCallback(lambda ign:
                       self.shouldFail(MustBeDeepImmutableError, "bad_kids4",
-                                      "is not immutable",
+                                      "is not allowed in an immutable directory",
                                       c.create_immutable_dirnode,
                                       bad_kids4))
         bad_kids5 = {one_nfd: (nm.create_from_cap(mut_read_uri), {})}
         d.addCallback(lambda ign:
                       self.shouldFail(MustBeDeepImmutableError, "bad_kids5",
-                                      "is not immutable",
+                                      "is not allowed in an immutable directory",
                                       c.create_immutable_dirnode,
                                       bad_kids5))
         d.addCallback(lambda ign: c.create_immutable_dirnode({}))
@@ -336,15 +337,15 @@ class Dirnode(GridTestMixin, unittest.TestCase,
             bad_kids = {one_nfd: (nm.create_from_cap(mut_write_uri), {})}
             d.addCallback(lambda ign:
                           self.shouldFail(MustBeDeepImmutableError, "YZ",
-                                          "is not immutable",
+                                          "is not allowed in an immutable directory",
                                           n.create_subdirectory,
                                           u"sub2", bad_kids, mutable=False))
             return d
         d.addCallback(_made_parent)
         return d
 
-    def test_spaces_are_stripped_on_the_way_out(self):
-        self.basedir = "dirnode/Dirnode/test_spaces_are_stripped_on_the_way_out"
+    def test_directory_representation(self):
+        self.basedir = "dirnode/Dirnode/test_directory_representation"
         self.set_up_grid()
         c = self.g.clients[0]
         nm = c.nodemaker
@@ -352,6 +353,8 @@ class Dirnode(GridTestMixin, unittest.TestCase,
         # This test checks that any trailing spaces in URIs are retained in the
         # encoded directory, but stripped when we get them out of the directory.
         # See ticket #925 for why we want that.
+        # It also tests that we store child names as UTF-8 NFC, and normalize
+        # them again when retrieving them.
 
         stripped_write_uri = "lafs://from_the_future\t"
         stripped_read_uri = "lafs://readonly_from_the_future\t"
@@ -362,9 +365,13 @@ class Dirnode(GridTestMixin, unittest.TestCase,
         self.failUnlessEqual(child.get_write_uri(), spacedout_write_uri)
         self.failUnlessEqual(child.get_readonly_uri(), "ro." + spacedout_read_uri)
 
-        kids = {u"child": (child, {})}
-        d = c.create_dirnode(kids)
-        
+        child_dottedi = u"ch\u0131\u0307ld"
+
+        kids_in   = {child_dottedi: (child, {}), one_nfd: (child, {})}
+        kids_out  = {child_dottedi: (child, {}), one_nfc: (child, {})}
+        kids_norm = {u"child":      (child, {}), one_nfc: (child, {})}
+        d = c.create_dirnode(kids_in)
+
         def _created(dn):
             self.failUnless(isinstance(dn, dirnode.DirectoryNode))
             self.failUnless(dn.is_mutable())
@@ -377,7 +384,8 @@ class Dirnode(GridTestMixin, unittest.TestCase,
 
         def _check_data(data):
             # Decode the netstring representation of the directory to check that the
-            # spaces are retained when the URIs are stored.
+            # spaces are retained when the URIs are stored, and that the names are stored
+            # as NFC.
             position = 0
             numkids = 0
             while position < len(data):
@@ -386,21 +394,42 @@ class Dirnode(GridTestMixin, unittest.TestCase,
                 (name_utf8, ro_uri, rwcapdata, metadata_s), subpos = split_netstring(entry, 4)
                 name = name_utf8.decode("utf-8")
                 rw_uri = self.rootnode._decrypt_rwcapdata(rwcapdata)
-                self.failUnless(name in kids)
-                (expected_child, ign) = kids[name]
+                self.failUnlessIn(name, kids_out)
+                (expected_child, ign) = kids_out[name]
                 self.failUnlessEqual(rw_uri, expected_child.get_write_uri())
                 self.failUnlessEqual("ro." + ro_uri, expected_child.get_readonly_uri())
                 numkids += 1
 
-            self.failUnlessEqual(numkids, 1)
-            return self.rootnode.list()
+            self.failUnlessEqual(numkids, len(kids_out))
+            return self.rootnode
         d.addCallback(_check_data)
-        
-        # Now when we use the real directory listing code, the trailing spaces
-        # should have been stripped (and "ro." should have been prepended to the
-        # ro_uri, since it's unknown).
+
+        # Mock up a hypothetical future version of Unicode that adds a canonical equivalence
+        # between dotless-i + dot-above, and 'i'. That would actually be prohibited by the
+        # stability rules, but similar additions involving currently-unassigned characters
+        # would not be.
+        old_normalize = unicodedata.normalize
+        def future_normalize(form, s):
+            assert form == 'NFC', form
+            return old_normalize(form, s).replace(u"\u0131\u0307", u"i")
+
+        def _list(node):
+            unicodedata.normalize = future_normalize
+            d2 = node.list()
+            def _undo_mock(res):
+                unicodedata.normalize = old_normalize
+                return res
+            d2.addBoth(_undo_mock)
+            return d2
+        d.addCallback(_list)
+
         def _check_kids(children):
-            self.failUnlessEqual(set(children.keys()), set([u"child"]))
+            # Now when we use the real directory listing code, the trailing spaces
+            # should have been stripped (and "ro." should have been prepended to the
+            # ro_uri, since it's unknown). Also the dotless-i + dot-above should have been
+            # normalized to 'i'.
+
+            self.failUnlessEqual(set(children.keys()), set(kids_norm.keys()))
             child_node, child_metadata = children[u"child"]
 
             self.failUnlessEqual(child_node.get_write_uri(), stripped_write_uri)
@@ -408,7 +437,7 @@ class Dirnode(GridTestMixin, unittest.TestCase,
         d.addCallback(_check_kids)
 
         d.addCallback(lambda ign: nm.create_from_cap(self.cap.to_string()))
-        d.addCallback(lambda n: n.list())
+        d.addCallback(_list)
         d.addCallback(_check_kids)  # again with dirnode recreated from cap
         return d