From: david-sarah Date: Wed, 16 Jun 2010 03:14:50 +0000 (-0700) Subject: Provisional patch to NFC-normalize filenames going in and out of Tahoe directories. X-Git-Tag: trac-4500~15 X-Git-Url: https://git.rkrishnan.org/%5B/%5D%20/uri/schema.xhtml?a=commitdiff_plain;h=c8d99b77a32b9bfd5728558ceb03039cba64fc87;p=tahoe-lafs%2Ftahoe-lafs.git Provisional patch to NFC-normalize filenames going in and out of Tahoe directories. --- diff --git a/src/allmydata/dirnode.py b/src/allmydata/dirnode.py index 6150fb23..2ae819e7 100644 --- a/src/allmydata/dirnode.py +++ b/src/allmydata/dirnode.py @@ -1,5 +1,5 @@ -import time, math +import time, math, unicodedata from zope.interface import implements from twisted.internet import defer @@ -16,6 +16,7 @@ from allmydata.check_results import DeepCheckResults, \ DeepCheckAndRepairResults from allmydata.monitor import Monitor from allmydata.util import hashutil, mathutil, base32, log +from allmydata.util.stringutils import quote_output from allmydata.util.assertutil import precondition from allmydata.util.netstring import netstring, split_netstring from allmydata.util.consumer import download_to_data @@ -75,15 +76,17 @@ def update_metadata(metadata, new_metadata, now): return metadata +def normalize(namex): + return unicodedata.normalize('NFC', namex) # TODO: {Deleter,MetadataSetter,Adder}.modify all start by unpacking the # contents and end by repacking them. It might be better to apply them to # the unpacked contents. class Deleter: - def __init__(self, node, name, must_exist=True, must_be_directory=False, must_be_file=False): + def __init__(self, node, namex, must_exist=True, must_be_directory=False, must_be_file=False): self.node = node - self.name = name + self.name = normalize(namex) self.must_exist = must_exist self.must_be_directory = must_be_directory self.must_be_file = must_be_file @@ -109,9 +112,9 @@ class Deleter: class MetadataSetter: - def __init__(self, node, name, metadata, create_readonly_node=None): + def __init__(self, node, namex, metadata, create_readonly_node=None): self.node = node - self.name = name + self.name = normalize(namex) self.metadata = metadata self.create_readonly_node = create_readonly_node @@ -139,20 +142,20 @@ class Adder: if entries is None: entries = {} precondition(isinstance(entries, dict), entries) + # keys of 'entries' may not be normalized. self.entries = entries self.overwrite = overwrite self.create_readonly_node = create_readonly_node - def set_node(self, name, node, metadata): - precondition(isinstance(name, unicode), name) + def set_node(self, namex, node, metadata): precondition(IFilesystemNode.providedBy(node), node) - self.entries[name] = (node, metadata) + self.entries[namex] = (node, metadata) def modify(self, old_contents, servermap, first_time): children = self.node._unpack_contents(old_contents) now = time.time() - for (name, (child, new_metadata)) in self.entries.iteritems(): - precondition(isinstance(name, unicode), name) + for (namex, (child, new_metadata)) in self.entries.iteritems(): + name = normalize(namex) precondition(IFilesystemNode.providedBy(child), child) # Strictly speaking this is redundant because we would raise the @@ -162,10 +165,10 @@ class Adder: metadata = None if name in children: if not self.overwrite: - raise ExistingChildError("child '%s' already exists" % name) + raise ExistingChildError("child %s already exists" % quote_output(name, encoding='utf-8')) if self.overwrite == "only-files" and IDirectoryNode.providedBy(children[name][0]): - raise ExistingChildError("child '%s' already exists" % name) + raise ExistingChildError("child %s already exists" % quote_output(name, encoding='utf-8')) metadata = children[name][1].copy() metadata = update_metadata(metadata, new_metadata, now) @@ -176,6 +179,7 @@ class Adder: new_contents = self.node._pack_contents(children) return new_contents + def _encrypt_rw_uri(filenode, rw_uri): assert isinstance(rw_uri, str) writekey = filenode.get_writekey() @@ -212,7 +216,8 @@ def pack_children(filenode, children, deep_immutable=False): (child, metadata) = children[name] child.raise_error() if deep_immutable and not child.is_allowed_in_immutable_directory(): - raise MustBeDeepImmutableError("child '%s' is not allowed in an immutable directory" % (name,), name) + raise MustBeDeepImmutableError("child %s is not allowed in an immutable directory" % + quote_output(name, encoding='utf-8'), name) if has_aux: entry = children.get_aux(name) if not entry: @@ -284,6 +289,7 @@ class DirectoryNode: return plaintext def _create_and_validate_node(self, rw_uri, ro_uri, name): + # name is just for error reporting node = self._nodemaker.create_from_cap(rw_uri, ro_uri, deep_immutable=not self.is_mutable(), name=name) @@ -291,6 +297,7 @@ class DirectoryNode: return node def _create_readonly_node(self, node, name): + # name is just for error reporting if not node.is_unknown() and node.is_readonly(): return node return self._create_and_validate_node(None, node.get_readonly_uri(), name=name) @@ -299,7 +306,8 @@ class DirectoryNode: # the directory is serialized as a list of netstrings, one per child. # Each child is serialized as a list of four netstrings: (name, ro_uri, # rwcapdata, metadata), in which the name, ro_uri, metadata are in - # cleartext. The 'name' is UTF-8 encoded. The rwcapdata is formatted as: + # cleartext. The 'name' is UTF-8 encoded, and should be normalized to NFC. + # The rwcapdata is formatted as: # pack("16ss32s", iv, AES(H(writekey+iv), plaintext_rw_uri), mac) assert isinstance(data, str), (repr(data), type(data)) # an empty directory is serialized as an empty string @@ -312,10 +320,15 @@ class DirectoryNode: while position < len(data): entries, position = split_netstring(data, 1, position) entry = entries[0] - (name_utf8, ro_uri, rwcapdata, metadata_s), subpos = split_netstring(entry, 4) + (namex_utf8, ro_uri, rwcapdata, metadata_s), subpos = split_netstring(entry, 4) if not mutable and len(rwcapdata) > 0: raise ValueError("the rwcapdata field of a dirnode in an immutable directory was not empty") - name = name_utf8.decode("utf-8") + + # A name containing characters that are unassigned in one version of Unicode might + # not be normalized wrt a later version. Therefore we normalize names going both in + # and out of directories. + name = normalize(namex_utf8.decode("utf-8")) + rw_uri = "" if writeable: rw_uri = self._decrypt_rwcapdata(rwcapdata) @@ -338,12 +351,12 @@ class DirectoryNode: children[name] = (child, metadata) children.set_with_aux(name, (child, metadata), auxilliary=entry) else: - log.msg(format="mutable cap for child '%(name)s' unpacked from an immutable directory", - name=name_utf8, + log.msg(format="mutable cap for child %(name)s unpacked from an immutable directory", + name=quote_output(name, encoding='utf-8'), facility="tahoe.webish", level=log.UNUSUAL) except CapConstraintError, e: - log.msg(format="unmet constraint on cap for child '%(name)s' unpacked from a directory:\n" - "%(message)s", message=e.args[0], name=name_utf8, + log.msg(format="unmet constraint on cap for child %(name)s unpacked from a directory:\n" + "%(message)s", message=e.args[0], name=quote_output(name, encoding='utf-8'), facility="tahoe.webish", level=log.UNUSUAL) return children @@ -406,10 +419,10 @@ class DirectoryNode: name to a tuple of (IFilesystemNode, metadata).""" return self._read() - def has_child(self, name): + def has_child(self, namex): """I return a Deferred that fires with a boolean, True if there exists a child of the given name, False if not.""" - assert isinstance(name, unicode) + name = normalize(namex) d = self._read() d.addCallback(lambda children: children.has_key(name)) return d @@ -426,31 +439,31 @@ class DirectoryNode: raise NoSuchChildError(name) return child - def get(self, name): + def get(self, namex): """I return a Deferred that fires with the named child node, which is an IFilesystemNode.""" - assert isinstance(name, unicode) + name = normalize(namex) d = self._read() d.addCallback(self._get, name) return d - def get_child_and_metadata(self, name): + def get_child_and_metadata(self, namex): """I return a Deferred that fires with the (node, metadata) pair for the named child. The node is an IFilesystemNode, and the metadata is a dictionary.""" - assert isinstance(name, unicode) + name = normalize(namex) d = self._read() d.addCallback(self._get_with_metadata, name) return d - def get_metadata_for(self, name): - assert isinstance(name, unicode) + def get_metadata_for(self, namex): + name = normalize(namex) d = self._read() d.addCallback(lambda children: children[name][1]) return d - def set_metadata_for(self, name, metadata): - assert isinstance(name, unicode) + def set_metadata_for(self, namex, metadata): + name = normalize(namex) if self.is_readonly(): return defer.fail(NotWriteableError()) assert isinstance(metadata, dict) @@ -460,7 +473,7 @@ class DirectoryNode: d.addCallback(lambda res: self) return d - def get_child_at_path(self, path): + def get_child_at_path(self, pathx): """Transform a child path into an IFilesystemNode. I perform a recursive series of 'get' operations to find the named @@ -470,42 +483,41 @@ class DirectoryNode: The path can be either a single string (slash-separated) or a list of path-name elements. """ - d = self.get_child_and_metadata_at_path(path) + d = self.get_child_and_metadata_at_path(pathx) d.addCallback(lambda (node, metadata): node) return d - def get_child_and_metadata_at_path(self, path): + def get_child_and_metadata_at_path(self, pathx): """Transform a child path into an IFilesystemNode and a metadata dictionary from the last edge that was traversed. """ - if not path: + if not pathx: return defer.succeed((self, {})) - if isinstance(path, (list, tuple)): + if isinstance(pathx, (list, tuple)): pass else: - path = path.split("/") - for p in path: - assert isinstance(p, unicode) - childname = path[0] - remaining_path = path[1:] - if remaining_path: - d = self.get(childname) + pathx = pathx.split("/") + for p in pathx: + assert isinstance(p, unicode), p + childnamex = pathx[0] + remaining_pathx = pathx[1:] + if remaining_pathx: + d = self.get(childnamex) d.addCallback(lambda node: - node.get_child_and_metadata_at_path(remaining_path)) + node.get_child_and_metadata_at_path(remaining_pathx)) return d - d = self.get_child_and_metadata(childname) + d = self.get_child_and_metadata(childnamex) return d - def set_uri(self, name, writecap, readcap, metadata=None, overwrite=True): - precondition(isinstance(name, unicode), name) + def set_uri(self, namex, writecap, readcap, metadata=None, overwrite=True): precondition(isinstance(writecap, (str,type(None))), writecap) precondition(isinstance(readcap, (str,type(None))), readcap) # We now allow packing unknown nodes, provided they are valid # for this type of directory. - child_node = self._create_and_validate_node(writecap, readcap, name) - d = self.set_node(name, child_node, metadata, overwrite) + child_node = self._create_and_validate_node(writecap, readcap, namex) + d = self.set_node(namex, child_node, metadata, overwrite) d.addCallback(lambda res: child_node) return d @@ -513,8 +525,8 @@ class DirectoryNode: # this takes URIs a = Adder(self, overwrite=overwrite, create_readonly_node=self._create_readonly_node) - for (name, e) in entries.iteritems(): - assert isinstance(name, unicode) + for (namex, e) in entries.iteritems(): + assert isinstance(namex, unicode), namex if len(e) == 2: writecap, readcap = e metadata = None @@ -526,13 +538,13 @@ class DirectoryNode: # We now allow packing unknown nodes, provided they are valid # for this type of directory. - child_node = self._create_and_validate_node(writecap, readcap, name) - a.set_node(name, child_node, metadata) + child_node = self._create_and_validate_node(writecap, readcap, namex) + a.set_node(namex, child_node, metadata) d = self._node.modify(a.modify) d.addCallback(lambda ign: self) return d - def set_node(self, name, child, metadata=None, overwrite=True): + def set_node(self, namex, child, metadata=None, overwrite=True): """I add a child at the specific name. I return a Deferred that fires when the operation finishes. This Deferred will fire with the child node that was just added. I will replace any existing child of the @@ -545,11 +557,10 @@ class DirectoryNode: if self.is_readonly(): return defer.fail(NotWriteableError()) - assert isinstance(name, unicode) assert IFilesystemNode.providedBy(child), child a = Adder(self, overwrite=overwrite, create_readonly_node=self._create_readonly_node) - a.set_node(name, child, metadata) + a.set_node(namex, child, metadata) d = self._node.modify(a.modify) d.addCallback(lambda res: child) return d @@ -565,12 +576,12 @@ class DirectoryNode: return d - def add_file(self, name, uploadable, metadata=None, overwrite=True): + def add_file(self, namex, uploadable, metadata=None, overwrite=True): """I upload a file (using the given IUploadable), then attach the resulting FileNode to the directory at the given name. I return a Deferred that fires (with the IFileNode of the uploaded file) when the operation completes.""" - assert isinstance(name, unicode) + name = normalize(namex) if self.is_readonly(): return defer.fail(NotWriteableError()) d = self._uploader.upload(uploadable) @@ -580,21 +591,20 @@ class DirectoryNode: self.set_node(name, node, metadata, overwrite)) return d - def delete(self, name, must_exist=True, must_be_directory=False, must_be_file=False): + def delete(self, namex, must_exist=True, must_be_directory=False, must_be_file=False): """I remove the child at the specific name. I return a Deferred that fires (with the node just removed) when the operation finishes.""" - assert isinstance(name, unicode) if self.is_readonly(): return defer.fail(NotWriteableError()) - deleter = Deleter(self, name, must_exist=must_exist, + deleter = Deleter(self, namex, must_exist=must_exist, must_be_directory=must_be_directory, must_be_file=must_be_file) d = self._node.modify(deleter.modify) d.addCallback(lambda res: deleter.old_child) return d - def create_subdirectory(self, name, initial_children={}, overwrite=True, + def create_subdirectory(self, namex, initial_children={}, overwrite=True, mutable=True, metadata=None): - assert isinstance(name, unicode) + name = normalize(namex) if self.is_readonly(): return defer.fail(NotWriteableError()) if mutable: @@ -611,21 +621,22 @@ class DirectoryNode: d.addCallback(_created) return d - def move_child_to(self, current_child_name, new_parent, - new_child_name=None, overwrite=True): + def move_child_to(self, current_child_namex, new_parent, + new_child_namex=None, overwrite=True): """I take one of my children and move them to a new parent. The child is referenced by name. On the new parent, the child will live under 'new_child_name', which defaults to 'current_child_name'. I return a Deferred that fires when the operation finishes.""" - assert isinstance(current_child_name, unicode) + if self.is_readonly() or new_parent.is_readonly(): return defer.fail(NotWriteableError()) - if new_child_name is None: - new_child_name = current_child_name - assert isinstance(new_child_name, unicode) + + current_child_name = normalize(current_child_namex) + if new_child_namex is None: + new_child_namex = current_child_name d = self.get(current_child_name) def sn(child): - return new_parent.set_node(new_child_name, child, + return new_parent.set_node(new_child_namex, child, overwrite=overwrite) d.addCallback(sn) d.addCallback(lambda child: self.delete(current_child_name))