From: david-sarah Date: Fri, 18 Jun 2010 00:02:49 +0000 (-0700) Subject: dirnodes: fix normalization hole where childnames in directories created by nodemaker... X-Git-Tag: trac-4500~7 X-Git-Url: https://git.rkrishnan.org/status?a=commitdiff_plain;h=a9fe3792ded50a26cc8287ab944cd654ec9a0e6a;p=tahoe-lafs%2Ftahoe-lafs.git dirnodes: fix normalization hole where childnames in directories created by nodemaker.create_mutable/immutable_directory would not be normalized. Add a test that we normalize names coming out of a directory. --- diff --git a/src/allmydata/dirnode.py b/src/allmydata/dirnode.py index 53aaddde..45f6c15c 100644 --- a/src/allmydata/dirnode.py +++ b/src/allmydata/dirnode.py @@ -163,7 +163,7 @@ class Adder: precondition(IFilesystemNode.providedBy(child), child) # Strictly speaking this is redundant because we would raise the - # error again in pack_children. + # error again in _pack_normalized_children. child.raise_error() metadata = None @@ -199,9 +199,21 @@ def _encrypt_rw_uri(filenode, rw_uri): # The MAC is not checked by readers in Tahoe >= 1.3.0, but we still # produce it for the sake of older readers. -def pack_children(filenode, children, deep_immutable=False): + +def pack_children(filenode, childrenx, deep_immutable=False): + # initial_children must have metadata (i.e. {} instead of None) + children = {} + for (namex, (node, metadata)) in childrenx.iteritems(): + precondition(isinstance(metadata, dict), + "directory creation requires metadata to be a dict, not None", metadata) + children[normalize(namex)] = (node, metadata) + + return _pack_normalized_children(filenode, children, deep_immutable=deep_immutable) + + +def _pack_normalized_children(filenode, children, deep_immutable=False): """Take a dict that maps: - children[unicode_name] = (IFileSystemNode, metadata_dict) + children[unicode_nfc_name] = (IFileSystemNode, metadata_dict) and pack it into a single string, for use as the contents of the backing file. This is the same format as is returned by _unpack_contents. I also accept an AuxValueDict, in which case I'll use the auxilliary cached data @@ -368,7 +380,7 @@ class DirectoryNode: def _pack_contents(self, children): # expects children in the same format as _unpack_contents - return pack_children(self._node, children) + return _pack_normalized_children(self._node, children) def is_readonly(self): return self._node.is_readonly() diff --git a/src/allmydata/nodemaker.py b/src/allmydata/nodemaker.py index a30efbff..3d0819e8 100644 --- a/src/allmydata/nodemaker.py +++ b/src/allmydata/nodemaker.py @@ -1,7 +1,6 @@ import weakref from zope.interface import implements -from allmydata.util.assertutil import precondition -from allmydata.interfaces import INodeMaker, MustBeDeepImmutableError +from allmydata.interfaces import INodeMaker from allmydata.immutable.filenode import ImmutableFileNode, LiteralFileNode from allmydata.immutable.upload import Data from allmydata.mutable.filenode import MutableFileNode @@ -97,11 +96,6 @@ class NodeMaker: return d def create_new_mutable_directory(self, initial_children={}): - # initial_children must have metadata (i.e. {} instead of None) - for (name, (node, metadata)) in initial_children.iteritems(): - precondition(isinstance(metadata, dict), - "create_new_mutable_directory requires metadata to be a dict, not None", metadata) - node.raise_error() d = self.create_mutable_file(lambda n: pack_children(n, initial_children)) d.addCallback(self._create_dirnode) @@ -110,14 +104,8 @@ class NodeMaker: def create_immutable_directory(self, children, convergence=None): if convergence is None: convergence = self.secret_holder.get_convergence_secret() - for (name, (node, metadata)) in children.iteritems(): - precondition(isinstance(metadata, dict), - "create_immutable_directory requires metadata to be a dict, not None", metadata) - node.raise_error() - if not node.is_allowed_in_immutable_directory(): - raise MustBeDeepImmutableError("%s is not immutable" % (node,), name) n = DummyImmutableFileNode() # writekey=None - packed = pack_children(n, children) + packed = pack_children(n, children, deep_immutable=True) uploadable = Data(packed, convergence) d = self.uploader.upload(uploadable, history=self.history) d.addCallback(lambda results: self.create_from_cap(None, results.uri)) diff --git a/src/allmydata/test/test_dirnode.py b/src/allmydata/test/test_dirnode.py index 6d872d00..52913e6c 100644 --- a/src/allmydata/test/test_dirnode.py +++ b/src/allmydata/test/test_dirnode.py @@ -1,5 +1,6 @@ import time +import unicodedata from zope.interface import implements from twisted.trial import unittest from twisted.internet import defer @@ -261,7 +262,7 @@ class Dirnode(GridTestMixin, unittest.TestCase, bad_kids2 = {one_nfd: (bad_future_node2, {})} d.addCallback(lambda ign: self.shouldFail(MustBeDeepImmutableError, "bad_kids2", - "is not immutable", + "is not allowed in an immutable directory", c.create_immutable_dirnode, bad_kids2)) bad_kids3 = {one_nfd: (nm.create_from_cap(one_uri), None)} @@ -273,13 +274,13 @@ class Dirnode(GridTestMixin, unittest.TestCase, bad_kids4 = {one_nfd: (nm.create_from_cap(mut_write_uri), {})} d.addCallback(lambda ign: self.shouldFail(MustBeDeepImmutableError, "bad_kids4", - "is not immutable", + "is not allowed in an immutable directory", c.create_immutable_dirnode, bad_kids4)) bad_kids5 = {one_nfd: (nm.create_from_cap(mut_read_uri), {})} d.addCallback(lambda ign: self.shouldFail(MustBeDeepImmutableError, "bad_kids5", - "is not immutable", + "is not allowed in an immutable directory", c.create_immutable_dirnode, bad_kids5)) d.addCallback(lambda ign: c.create_immutable_dirnode({})) @@ -336,15 +337,15 @@ class Dirnode(GridTestMixin, unittest.TestCase, bad_kids = {one_nfd: (nm.create_from_cap(mut_write_uri), {})} d.addCallback(lambda ign: self.shouldFail(MustBeDeepImmutableError, "YZ", - "is not immutable", + "is not allowed in an immutable directory", n.create_subdirectory, u"sub2", bad_kids, mutable=False)) return d d.addCallback(_made_parent) return d - def test_spaces_are_stripped_on_the_way_out(self): - self.basedir = "dirnode/Dirnode/test_spaces_are_stripped_on_the_way_out" + def test_directory_representation(self): + self.basedir = "dirnode/Dirnode/test_directory_representation" self.set_up_grid() c = self.g.clients[0] nm = c.nodemaker @@ -352,6 +353,8 @@ class Dirnode(GridTestMixin, unittest.TestCase, # This test checks that any trailing spaces in URIs are retained in the # encoded directory, but stripped when we get them out of the directory. # See ticket #925 for why we want that. + # It also tests that we store child names as UTF-8 NFC, and normalize + # them again when retrieving them. stripped_write_uri = "lafs://from_the_future\t" stripped_read_uri = "lafs://readonly_from_the_future\t" @@ -362,9 +365,13 @@ class Dirnode(GridTestMixin, unittest.TestCase, self.failUnlessEqual(child.get_write_uri(), spacedout_write_uri) self.failUnlessEqual(child.get_readonly_uri(), "ro." + spacedout_read_uri) - kids = {u"child": (child, {})} - d = c.create_dirnode(kids) - + child_dottedi = u"ch\u0131\u0307ld" + + kids_in = {child_dottedi: (child, {}), one_nfd: (child, {})} + kids_out = {child_dottedi: (child, {}), one_nfc: (child, {})} + kids_norm = {u"child": (child, {}), one_nfc: (child, {})} + d = c.create_dirnode(kids_in) + def _created(dn): self.failUnless(isinstance(dn, dirnode.DirectoryNode)) self.failUnless(dn.is_mutable()) @@ -377,7 +384,8 @@ class Dirnode(GridTestMixin, unittest.TestCase, def _check_data(data): # Decode the netstring representation of the directory to check that the - # spaces are retained when the URIs are stored. + # spaces are retained when the URIs are stored, and that the names are stored + # as NFC. position = 0 numkids = 0 while position < len(data): @@ -386,21 +394,42 @@ class Dirnode(GridTestMixin, unittest.TestCase, (name_utf8, ro_uri, rwcapdata, metadata_s), subpos = split_netstring(entry, 4) name = name_utf8.decode("utf-8") rw_uri = self.rootnode._decrypt_rwcapdata(rwcapdata) - self.failUnless(name in kids) - (expected_child, ign) = kids[name] + self.failUnlessIn(name, kids_out) + (expected_child, ign) = kids_out[name] self.failUnlessEqual(rw_uri, expected_child.get_write_uri()) self.failUnlessEqual("ro." + ro_uri, expected_child.get_readonly_uri()) numkids += 1 - self.failUnlessEqual(numkids, 1) - return self.rootnode.list() + self.failUnlessEqual(numkids, len(kids_out)) + return self.rootnode d.addCallback(_check_data) - - # Now when we use the real directory listing code, the trailing spaces - # should have been stripped (and "ro." should have been prepended to the - # ro_uri, since it's unknown). + + # Mock up a hypothetical future version of Unicode that adds a canonical equivalence + # between dotless-i + dot-above, and 'i'. That would actually be prohibited by the + # stability rules, but similar additions involving currently-unassigned characters + # would not be. + old_normalize = unicodedata.normalize + def future_normalize(form, s): + assert form == 'NFC', form + return old_normalize(form, s).replace(u"\u0131\u0307", u"i") + + def _list(node): + unicodedata.normalize = future_normalize + d2 = node.list() + def _undo_mock(res): + unicodedata.normalize = old_normalize + return res + d2.addBoth(_undo_mock) + return d2 + d.addCallback(_list) + def _check_kids(children): - self.failUnlessEqual(set(children.keys()), set([u"child"])) + # Now when we use the real directory listing code, the trailing spaces + # should have been stripped (and "ro." should have been prepended to the + # ro_uri, since it's unknown). Also the dotless-i + dot-above should have been + # normalized to 'i'. + + self.failUnlessEqual(set(children.keys()), set(kids_norm.keys())) child_node, child_metadata = children[u"child"] self.failUnlessEqual(child_node.get_write_uri(), stripped_write_uri) @@ -408,7 +437,7 @@ class Dirnode(GridTestMixin, unittest.TestCase, d.addCallback(_check_kids) d.addCallback(lambda ign: nm.create_from_cap(self.cap.to_string())) - d.addCallback(lambda n: n.list()) + d.addCallback(_list) d.addCallback(_check_kids) # again with dirnode recreated from cap return d