From: Brian Warner Date: Sat, 20 Jan 2007 06:22:39 +0000 (-0700) Subject: checkpont more filetree stuff X-Git-Tag: tahoe_v0.1.0-0-UNSTABLE~349 X-Git-Url: https://git.rkrishnan.org/pf/content/en/seg/-?a=commitdiff_plain;h=4a0f8bc110b2b8a74d3375d07226c5ab9470b73f;p=tahoe-lafs%2Ftahoe-lafs.git checkpont more filetree stuff --- diff --git a/src/allmydata/filetree/basenode.py b/src/allmydata/filetree/basenode.py new file mode 100644 index 00000000..5c73bbc4 --- /dev/null +++ b/src/allmydata/filetree/basenode.py @@ -0,0 +1,16 @@ + +from zope.interface import implements +from allmydata.filetree.interfaces import INode + +class BaseURINode(object): + implements(INode) + prefix = None # must be set by subclass + + def is_directory(self): + return False + def serialize_node(self): + return "%s:%s" % (self.prefix, self.uri) + def populate_node(self, data, node_maker): + assert data.startswith(self.prefix + ":") + self.uri = data[len(self.prefix)+1:] + diff --git a/src/allmydata/filetree/directory.py b/src/allmydata/filetree/directory.py index 096f98e3..fe1533f5 100644 --- a/src/allmydata/filetree/directory.py +++ b/src/allmydata/filetree/directory.py @@ -1,11 +1,11 @@ from zope.interface import implements -from allmydata.filetree.interfaces import (INode, - IDirectoryNode, - ISubTree, - ICHKDirectoryNode, ISSKDirectoryNode, - NoSuchChildError, - ) +from allmydata.filetree.interfaces import ( + INode, IDirectoryNode, ISubTree, + ICHKDirectoryNode, ISSKDirectoryNode, + NoSuchChildError, + ) +from allmydata.filetree.basenode import BaseURINode from allmydata import download from allmydata.util import bencode @@ -17,42 +17,42 @@ from allmydata.util import bencode # each time the vdrive changes, update the local drive to match, and # vice versa. - -def to_node(spec): - # TODO - pass -def to_spec(node): - # TODO - pass - +# from the itertools 'recipes' page +from itertools import izip, tee +def pairwise(iterable): + "s -> (s0,s1), (s1,s2), (s2, s3), ..." + a, b = tee(iterable) + try: + b.next() + except StopIteration: + pass + return izip(a, b) class SubTreeNode: implements(INode, IDirectoryNode) def __init__(self, tree): self.enclosing_tree = tree - # subdirectory_node_children maps child name to another SubTreeNode - # instance. This is only for internal directory nodes. All other - # nodes are listed in child_specifications instead. - self.subdirectory_node_children = {} - # child_specifications maps child name to a specification tuple which - # describes how to obtain the actual child. For example, if "foo.jpg" - # in this node represents a CHK-encoded FILE with a uri of "fooURI", - # then self.child_specifications["foo.jpg"] = ("CHKFILE","fooURI") - self.child_specifications = {} + self.children = {} +# # subdirectory_node_children maps child name to another SubTreeNode +# # instance. This is only for internal directory nodes. All other +# # nodes are listed in child_specifications instead. +# self.subdirectory_node_children = {} +# # child_specifications maps child name to a specification tuple which +# # describes how to obtain the actual child. For example, if "foo.jpg" +# # in this node represents a CHK-encoded FILE with a uri of "fooURI", +# # then self.child_specifications["foo.jpg"] = ("CHKFILE","fooURI") +# self.child_specifications = {} def is_directory(self): return True def list(self): - return sorted(self.subdirectory_node_children.keys() + - self.child_specifications.keys()) + return sorted(self.children.keys()) def get(self, childname): - if childname in self.subdirectory_node_children: - return self.subdirectory_node_children[childname] - elif childname in self.child_specifications: - return to_node(self.child_specifications[childname]) + if childname in self.children: + return self.children[childname] else: raise NoSuchChildError("no child named '%s'" % (childname,)) @@ -61,28 +61,24 @@ class SubTreeNode: def delete(self, childname): assert self.enclosing_tree.is_mutable() - if childname in self.subdirectory_node_children: - del self.subdirectory_node_children[childname] - elif childname in self.child_specifications: - del self.child_specifications[childname] + if childname in self.children: + del self.children[childname] else: raise NoSuchChildError("no child named '%s'" % (childname,)) def add_subdir(self, childname): - assert childname not in self.subdirectory_node_children - assert childname not in self.child_specifications + assert childname not in self.children newnode = SubTreeNode(self.enclosing_tree) - self.subdirectory_node_children[childname] = newnode + self.children[childname] = newnode return newnode def add(self, childname, node): - assert childname not in self.subdirectory_node_children - assert childname not in self.child_specifications - spec = to_spec(node) - self.child_specifications[childname] = spec + assert childname not in self.children + assert INode(node) + self.children[childname] = node return self - def serialize_to_sexprs(self): + def serialize_node(self): # note: this is a one-pass recursive serialization that will result # in the whole file table being held in memory. This is only # appropriate for directories with fewer than, say, 10k nodes. If we @@ -90,31 +86,26 @@ class SubTreeNode: # generator instead, and write the serialized data directly to a # tempfile. # - # ["DIRECTORY", name1, child1, name2, child2..] + # [name1, child1, name2, child2..] + # + # child1 is either a list for subdirs, or a string for non-subdirs - data = ["DIRECTORY"] - for name in sorted(self.node_children.keys()): + data = [] + for name in sorted(self.children.keys()): data.append(name) - data.append(self.node_children[name].serialize()) - for name in sorted(self.child_specifications.keys()): - data.append(name) - data.append(self.child_specifications[name].serialize()) + data.append(self.children[name].serialize_node()) return data - def populate_from_sexprs(self, data): - assert data[0] == "DIRECTORY" - assert len(data) % 2 == 1 - for i in range(1, len(data), 2): - name = data[i] - child_data = data[i+1] - assert isinstance(child_data, (list, tuple)) - child_type = child_data[0] - if child_type == "DIRECTORY": + def populate_node(self, data, node_maker): + assert len(data) % 2 == 0 + for (name, child_data) in pairwise(data): + if isinstance(child_data, (list, tuple)): child = SubTreeNode(self.enclosing_tree) - child.populate_from_sexprs(child_data) - self.node_children[name] = child + child.populate_node(child_data) else: - self.child_specifications[name] = child_data + assert isinstance(child_data, str) + child = node_maker(child_data) + self.children[name] = child @@ -139,22 +130,22 @@ class _DirectorySubTree(object): self.root = SubTreeNode(self) self.mutable = True # sure, why not - def populate_from_specification(self, spec, parent_is_mutable, downloader): - return self.populate_from_node(to_node(spec), - parent_is_mutable, downloader) + def populate_from_node(self, node, parent_is_mutable, node_maker, downloader): + # self.populate_from_node must be defined by the subclass (CHK or + # SSK), since it controls how the spec is interpreted. It will + # probably use the contents of the node to figure out what to + # download from the mesh, then pass this downloaded serialized data + # to populate_from_data() + raise NotImplementedError - def populate_from_data(self, data): - self.root = SubTreeNode() - self.root.populate_from_sexprs(bencode.bdecode(data)) + def populate_from_data(self, data, node_maker): + self.root = SubTreeNode(self) + self.root.populate_node(bencode.bdecode(data), node_maker) return self - def serialize(self): - """Return a series of nested lists which describe my structure - in a form that can be bencoded.""" - return self.root.serialize_to_sexprs() - - def serialize_to_file(self, f): - f.write(bencode.bencode(self.serialize())) + def serialize_subtree_to_file(self, f): + sexprs = self.root.serialize_node() + bencode.bwrite(sexprs, f) def is_mutable(self): return self.mutable @@ -167,25 +158,35 @@ class _DirectorySubTree(object): node = self.root while remaining_path: name = remaining_path[0] - if name in node.node_children: - node = node.node_children[name] - assert isinstance(node, SubTreeNode) + try: + childnode = node.get(name) + except NoSuchChildError: + # The node *would* be in this subtree if it existed, but it + # doesn't. Leave found_path and remaining_path alone, and + # node points at the last parent node that was on the path. + break + if IDirectoryNode.providedBy(childnode): + # recurse + node = childnode found_path.append(name) remaining_path.pop(0) continue - if name in node.child_specifications: + else: # the path takes us out of this subtree and into another - next_subtree_spec = node.child_specifications[name] - node = to_node(next_subtree_spec) + node = childnode # next subtree node found_path.append(name) remaining_path.pop(0) break - # The node *would* be in this subtree if it existed, but it - # doesn't. Leave found_path and remaining_path alone, and node - # points at the last parent node that was on the path. - break return (found_path, node, remaining_path) +class CHKDirectorySubTreeNode(BaseURINode): + implements(ICHKDirectoryNode) + prefix = "CHKDirectory" + + def get_uri(self): + return self.uri + + class CHKDirectorySubTree(_DirectorySubTree): # maybe mutable, maybe not @@ -195,11 +196,11 @@ class CHKDirectorySubTree(_DirectorySubTree): def set_uri(self, uri): self.old_uri = uri - def populate_from_node(self, node, parent_is_mutable, downloader): - node = ICHKDirectoryNode(node) + def populate_from_node(self, node, parent_is_mutable, node_maker, downloader): + assert ICHKDirectoryNode(node) self.mutable = parent_is_mutable d = downloader.download(node.get_uri(), download.Data()) - d.addCallback(self.populate_from_data) + d.addCallback(self.populate_from_data, node_maker) return d def update(self, prepath, work_queue): @@ -219,6 +220,27 @@ class CHKDirectorySubTree(_DirectorySubTree): # this needs investigation. return boxname + +class SSKDirectorySubTreeNode(object): + implements(INode, ISSKDirectoryNode) + prefix = "SSKDirectory" + + def is_directory(self): + return False + def serialize_node(self): + data = (self.read_cap, self.write_cap) + return "%s:%s" % (self.prefix, bencode.bencode(data)) + def populate_node(self, data, node_maker): + assert data.startswith(self.prefix + ":") + capdata = data[len(self.prefix)+1:] + self.read_cap, self.write_cap = bencode.bdecode(capdata) + + def get_read_capability(self): + return self.read_cap + def get_write_capability(self): + return self.write_cap + + class SSKDirectorySubTree(_DirectorySubTree): def new(self): @@ -229,13 +251,13 @@ class SSKDirectorySubTree(_DirectorySubTree): def mutation_affects_parent(self): return False - def populate_from_node(self, node, parent_is_mutable, downloader): + def populate_from_node(self, node, parent_is_mutable, node_maker, downloader): node = ISSKDirectoryNode(node) self.read_capability = node.get_read_capability() self.write_capability = node.get_write_capability() self.mutable = bool(self.write_capability) d = downloader.download_ssk(self.read_capability, download.Data()) - d.addCallback(self.populate_from_data) + d.addCallback(self.populate_from_data, node_maker) return d def set_version(self, version): diff --git a/src/allmydata/filetree/file.py b/src/allmydata/filetree/file.py index fdc9dbe0..3eb440f2 100644 --- a/src/allmydata/filetree/file.py +++ b/src/allmydata/filetree/file.py @@ -1,28 +1,32 @@ from zope.interface import implements from allmydata.filetree.interfaces import INode, IFileNode +from allmydata.filetree.basenode import BaseURINode +from allmydata.util import bencode + +class CHKFileNode(BaseURINode): + implements(IFileNode) + prefix = "CHKFile" -class CHKFile(object): - implements(INode, IFileNode) - def __init__(self, uri): - self.uri = uri def get_uri(self): return self.uri -class MutableSSKFile(object): +class SSKFileNode(object): implements(INode, IFileNode) - def __init__(self, read_cap, write_cap): - self.read_cap = read_cap - self.write_cap = write_cap + prefix = "SSKFile" + + def is_directory(self): + return False + def serialize_node(self): + data = (self.read_cap, self.write_cap) + return "%s:%s" % (self.prefix, bencode.bencode(data)) + def populate_node(self, data, node_maker): + assert data.startswith(self.prefix + ":") + capdata = data[len(self.prefix)+1:] + self.read_cap, self.write_cap = bencode.bdecode(capdata) + def get_read_capability(self): return self.read_cap def get_write_capability(self): return self.write_cap -class ImmutableSSKFile(object): - implements(INode, IFileNode) - def __init__(self, read_cap): - self.read_cap = read_cap - def get_read_capability(self): - return self.read_cap - diff --git a/src/allmydata/filetree/interfaces.py b/src/allmydata/filetree/interfaces.py index f8744a79..f209e3c7 100644 --- a/src/allmydata/filetree/interfaces.py +++ b/src/allmydata/filetree/interfaces.py @@ -6,12 +6,27 @@ class INode(Interface): other I*Node interfaces also implement this one.""" def is_directory(): """Return True if this node is an internal directory node.""" + def serialize_node(): + """Return a data structure which contains enough information to build + this node again in the future (by calling vdrive.make_node(). For + IDirectoryNodes, this will be a list. For all other nodes this will + be a string.""" + def populate_node(data, node_maker): + """vdrive.make_node() will first use the prefix inside 'data' to + decide what kind of Node to create. It will then call this function + to populate the new Node from the data returned by serialize_node.""" class IFileNode(Interface): """This is a file which can be retrieved.""" + # TODO: not sure which of these to provide.. should URIs contain "CHK" or + # "SSK" in them? Or should that be a detail of IDownloader? def get_uri(): """Return the URI of the target file. This URI can be passed to an IDownloader to retrieve the data.""" + def download(downloader, target): + """Download the file to the given target (using the provided + downloader). Return a deferred that fires (with 'target') when the + download is complete.""" class IDirectoryNode(Interface): """This is a directory which can be listed.""" @@ -50,28 +65,30 @@ class ISubTree(Interface): a DirectoryNode, or it might be a FileNode. """ - def populate_from_specification(spec, parent_is_mutable, downloader): - """Given a specification tuple, arrange to populate this subtree by - pulling data from some source (possibly the mesh, or the queen, or an - HTTP server, or the local filesystem). Return a Deferred that will - fire (with self) when this subtree is ready for use (specifically - when it is ready for get() and add() calls). + def populate_from_node(node, parent_is_mutable, node_maker, downloader): + """Subtrees are created by opener.open() being called with an INode + which describes both the kind of subtree to be created and a way to + obtain its contents. open() uses the node to create a new instance of + the appropriate subtree type, then calls this populate_from_node() + method. + + Each subtree's populate_from_node() method is expected to use the + downloader to obtain a file with the subtree's serialized contents + (probably by pulling data from some source, like the mesh, the queen, + an HTTP server, or somewhere on the local filesystem), then + unserialize them and populate the subtree's state. + + Return a Deferred that will fire (with self) when this subtree is + ready for use (specifically when it is ready for get() and add() + calls). """ - def populate_from_node(node, parent_is_mutable, downloader): - """Like populate_from_specification.""" - def populate_from_data(data): - """Used internally by populate_from_specification. This is called - with a sequence of bytes that describes the contents of the subtree, + """Used internally by populate_from_node. This is called with a + sequence of bytes that describes the contents of the subtree, probably a bencoded tuple or s-expression. Returns self. """ - def unserialize(serialized_data): - """Populate all nodes from serialized_data, previously created by - calling my serialize() method. 'serialized_data' is a series of - nested lists (s-expressions), probably recorded in bencoded form.""" - def is_mutable(): """This returns True if we have the ability to modify this subtree. If this returns True, this reference may be adapted to @@ -109,6 +126,11 @@ class ISubTree(Interface): """ + def serialize_subtree_to_file(f): + """Create a string which describes my structure and write it to the + given filehandle (using only .write()). This string should be + suitable for uploading to the mesh or storing in a local file.""" + def update(prepath, workqueue): """Perform and schedule whatever work is necessary to record this subtree to persistent storage and update the parent at 'prepath' @@ -121,10 +143,6 @@ class ISubTree(Interface): some subtree is updated which does not require notifying the parent. """ - def serialize(): - """Return a series of nested lists which describe my structure - in a form that can be bencoded.""" - #class IMutableSubTree(Interface): # def mutation_affects_parent(): @@ -171,6 +189,16 @@ class IOpener(Interface): class IVirtualDrive(Interface): + def __init__(workqueue, downloader, root_node): + pass + + # internal methods + + def make_node(serialized): + """Given a string produced by original_node.serialize_node(), produce + an equivalent node. + """ + # commands to manipulate files def list(path): @@ -220,9 +248,13 @@ class IVirtualDrive(Interface): # TODO class ICHKDirectoryNode(Interface): - pass + def get_uri(): + pass class ISSKDirectoryNode(Interface): - pass + def get_read_capability(): + pass + def get_write_capability(): + pass diff --git a/src/allmydata/filetree/vdrive.py b/src/allmydata/filetree/vdrive.py index 160c1ce4..581b9f69 100644 --- a/src/allmydata/filetree/vdrive.py +++ b/src/allmydata/filetree/vdrive.py @@ -1,27 +1,37 @@ from zope.interface import implements -from allmydata.filetree import opener -from allmydata.filetree.interfaces import (IVirtualDrive, ISubTree, IFileNode, - IDirectoryNode, NoSuchDirectoryError, - NoSuchChildError, PathAlreadyExistsError, - PathDoesNotExistError, - ) +from allmydata.filetree import opener, directory, redirect +from allmydata.filetree.interfaces import ( + IVirtualDrive, INode, ISubTree, IFileNode, IDirectoryNode, + NoSuchDirectoryError, NoSuchChildError, PathAlreadyExistsError, + PathDoesNotExistError, + ) from allmydata.upload import IUploadable +all_node_types = [ + directory.CHKDirectorySubTreeNode, + directory.SSKDirectorySubTreeNode, + redirect.LocalFileRedirectionNode, + redirect.QueenRedirectionNode, + redirect.HTTPRedirectionNode, + redirect.QueenOrLocalFileRedirectionNode, +] + class VirtualDrive(object): implements(IVirtualDrive) - def __init__(self, workqueue, downloader, root_specification): + def __init__(self, workqueue, downloader, root_node): + assert INode(root_node) self.workqueue = workqueue workqueue.set_vdrive(self) # TODO: queen? self.opener = opener.Opener(self.queen, downloader) - self.root_specification = root_specification + self.root_node = root_node # these methods are used to walk through our subtrees def _get_root(self): - return self.opener.open(self.root_specification, False) + return self.opener.open(self.root_node, False) def _get_node(self, path): d = self._get_closest_node(path) @@ -114,6 +124,25 @@ class VirtualDrive(object): d.addCallback(_got_closest) return d + # these are called when loading and creating nodes + def make_node(self, serialized): + # this turns a string into an INode, which contains information about + # the file or directory (like a URI), but does not contain the actual + # contents. An IOpener can be used later to retrieve the contents + # (which means downloading the file if this is an IFileNode, or + # perhaps creating a new subtree from the contents) + + # maybe include parent_is_mutable? + assert isinstance(serialized, str) + colon = serialized.index(":") + prefix = serialized[:colon] + for node_class in all_node_types: + if prefix == node_class.prefix: + node = node_class() + node.populate_node(serialized, self.make_node) + return node + raise RuntimeError("unable to handle subtree type '%s'" % prefix) + # these are called by the workqueue def add(self, path, new_node):