4 from zope.interface import implements
5 from twisted.internet import defer
7 from allmydata.mutable.common import NotMutableError
8 from allmydata.mutable.node import MutableFileNode
9 from allmydata.interfaces import IMutableFileNode, IDirectoryNode,\
10 IURI, IFileNode, IMutableFileURI, IFilesystemNode, \
11 ExistingChildError, NoSuchChildError, ICheckable, IDeepCheckable
12 from allmydata.checker_results import DeepCheckResults, \
13 DeepCheckAndRepairResults
14 from allmydata.monitor import Monitor
15 from allmydata.util import hashutil, mathutil, base32, log
16 from allmydata.util.hashutil import netstring
17 from allmydata.util.limiter import ConcurrencyLimiter
18 from allmydata.util.netstring import split_netstring
19 from allmydata.uri import NewDirectoryURI
20 from pycryptopp.cipher.aes import AES
23 def __init__(self, node, name, must_exist=True):
26 self.must_exist = True
27 def modify(self, old_contents):
28 children = self.node._unpack_contents(old_contents)
29 if self.name not in children:
31 raise NoSuchChildError(self.name)
34 self.old_child, metadata = children[self.name]
35 del children[self.name]
36 new_contents = self.node._pack_contents(children)
40 def __init__(self, node, name, metadata):
43 self.metadata = metadata
45 def modify(self, old_contents):
46 children = self.node._unpack_contents(old_contents)
47 if self.name not in children:
48 raise NoSuchChildError(self.name)
49 children[self.name] = (children[self.name][0], self.metadata)
50 new_contents = self.node._pack_contents(children)
55 def __init__(self, node, entries=None, overwrite=True):
59 self.entries = entries
60 self.overwrite = overwrite
62 def set_node(self, name, node, metadata):
63 self.entries.append( [name, node, metadata] )
65 def modify(self, old_contents):
66 children = self.node._unpack_contents(old_contents)
68 for e in self.entries:
74 name, child, new_metadata = e
75 assert isinstance(name, unicode)
77 if not self.overwrite:
78 raise ExistingChildError("child '%s' already exists" % name)
79 metadata = children[name][1].copy()
81 metadata = {"ctime": now,
83 if new_metadata is None:
85 if "ctime" not in metadata:
86 metadata["ctime"] = now
87 metadata["mtime"] = now
90 metadata = new_metadata.copy()
91 children[name] = (child, metadata)
92 new_contents = self.node._pack_contents(children)
95 class NewDirectoryNode:
96 implements(IDirectoryNode, ICheckable, IDeepCheckable)
97 filenode_class = MutableFileNode
99 def __init__(self, client):
100 self._client = client
101 self._most_recent_size = None
104 return "<%s %s %s>" % (self.__class__.__name__, self.is_readonly() and "RO" or "RW", hasattr(self, '_uri') and self._uri.abbrev())
105 def init_from_uri(self, myuri):
106 self._uri = IURI(myuri)
107 self._node = self.filenode_class(self._client)
108 self._node.init_from_uri(self._uri.get_filenode_uri())
111 def create(self, keypair_generator=None):
113 Returns a deferred that eventually fires with self once the directory
114 has been created (distributed across a set of storage servers).
116 # first we create a MutableFileNode with empty_contents, then use its
117 # URI to create our own.
118 self._node = self.filenode_class(self._client)
119 empty_contents = self._pack_contents({})
120 d = self._node.create(empty_contents, keypair_generator)
121 d.addCallback(self._filenode_created)
123 def _filenode_created(self, res):
124 self._uri = NewDirectoryURI(IMutableFileURI(self._node.get_uri()))
128 # return the size of our backing mutable file, in bytes, if we've
130 return self._most_recent_size
132 def _set_size(self, data):
133 self._most_recent_size = len(data)
137 d = self._node.download_best_version()
138 d.addCallback(self._set_size)
139 d.addCallback(self._unpack_contents)
142 def _encrypt_rwcap(self, rwcap):
143 assert isinstance(rwcap, str)
145 key = hashutil.mutable_rwcap_key_hash(IV, self._node.get_writekey())
147 crypttext = cryptor.process(rwcap)
148 mac = hashutil.hmac(key, IV + crypttext)
149 assert len(mac) == 32
150 return IV + crypttext + mac
152 def _decrypt_rwcapdata(self, encwrcap):
154 crypttext = encwrcap[16:-32]
156 key = hashutil.mutable_rwcap_key_hash(IV, self._node.get_writekey())
157 if mac != hashutil.hmac(key, IV+crypttext):
158 raise hashutil.IntegrityCheckError("HMAC does not match, crypttext is corrupted")
160 plaintext = cryptor.process(crypttext)
163 def _create_node(self, child_uri):
164 return self._client.create_node_from_uri(child_uri)
166 def _unpack_contents(self, data):
167 # the directory is serialized as a list of netstrings, one per child.
168 # Each child is serialized as a list of four netstrings: (name,
169 # rocap, rwcap, metadata), in which the name,rocap,metadata are in
170 # cleartext. The 'name' is UTF-8 encoded. The rwcap is formatted as:
171 # pack("16ss32s", iv, AES(H(writekey+iv), plaintextrwcap), mac)
172 assert isinstance(data, str)
173 # an empty directory is serialized as an empty string
176 writeable = not self.is_readonly()
179 entry, data = split_netstring(data, 1, True)
180 name, rocap, rwcapdata, metadata_s = split_netstring(entry, 4)
181 name = name.decode("utf-8")
183 rwcap = self._decrypt_rwcapdata(rwcapdata)
184 child = self._create_node(rwcap)
186 child = self._create_node(rocap)
187 metadata = simplejson.loads(metadata_s)
188 assert isinstance(metadata, dict)
189 children[name] = (child, metadata)
192 def _pack_contents(self, children):
193 # expects children in the same format as _unpack_contents
194 assert isinstance(children, dict)
196 for name in sorted(children.keys()):
197 child, metadata = children[name]
198 assert isinstance(name, unicode)
199 assert (IFileNode.providedBy(child)
200 or IMutableFileNode.providedBy(child)
201 or IDirectoryNode.providedBy(child)), (name,child)
202 assert isinstance(metadata, dict)
203 rwcap = child.get_uri() # might be RO if the child is not writeable
204 rocap = child.get_readonly_uri()
205 entry = "".join([netstring(name.encode("utf-8")),
207 netstring(self._encrypt_rwcap(rwcap)),
208 netstring(simplejson.dumps(metadata))])
209 entries.append(netstring(entry))
210 return "".join(entries)
212 def is_readonly(self):
213 return self._node.is_readonly()
214 def is_mutable(self):
215 return self._node.is_mutable()
218 return self._uri.to_string()
220 def get_readonly_uri(self):
221 return self._uri.get_readonly().to_string()
223 def get_verifier(self):
224 return self._uri.get_verifier()
226 def get_storage_index(self):
227 return self._uri._filenode_uri.storage_index
229 def check(self, monitor, verify=False):
230 """Perform a file check. See IChecker.check for details."""
231 return self._node.check(monitor, verify)
232 def check_and_repair(self, monitor, verify=False):
233 return self._node.check_and_repair(monitor, verify)
236 """I return a Deferred that fires with a dictionary mapping child
237 name to a tuple of (IFileNode or IDirectoryNode, metadata)."""
240 def has_child(self, name):
241 """I return a Deferred that fires with a boolean, True if there
242 exists a child of the given name, False if not."""
243 assert isinstance(name, unicode)
245 d.addCallback(lambda children: children.has_key(name))
248 def _get(self, children, name):
249 child = children.get(name)
251 raise NoSuchChildError(name)
254 def _get_with_metadata(self, children, name):
255 child = children.get(name)
257 raise NoSuchChildError(name)
261 """I return a Deferred that fires with the named child node,
262 which is either an IFileNode or an IDirectoryNode."""
263 assert isinstance(name, unicode)
265 d.addCallback(self._get, name)
268 def get_child_and_metadata(self, name):
269 """I return a Deferred that fires with the (node, metadata) pair for
270 the named child. The node is either an IFileNode or an
271 IDirectoryNode, and the metadata is a dictionary."""
272 assert isinstance(name, unicode)
274 d.addCallback(self._get_with_metadata, name)
277 def get_metadata_for(self, name):
278 assert isinstance(name, unicode)
280 d.addCallback(lambda children: children[name][1])
283 def set_metadata_for(self, name, metadata):
284 assert isinstance(name, unicode)
285 if self.is_readonly():
286 return defer.fail(NotMutableError())
287 assert isinstance(metadata, dict)
288 s = MetadataSetter(self, name, metadata)
289 d = self._node.modify(s.modify)
290 d.addCallback(lambda res: self)
293 def get_child_at_path(self, path):
294 """Transform a child path into an IDirectoryNode or IFileNode.
296 I perform a recursive series of 'get' operations to find the named
297 descendant node. I return a Deferred that fires with the node, or
298 errbacks with IndexError if the node could not be found.
300 The path can be either a single string (slash-separated) or a list of
303 d = self.get_child_and_metadata_at_path(path)
304 d.addCallback(lambda (node, metadata): node)
307 def get_child_and_metadata_at_path(self, path):
308 """Transform a child path into an IDirectoryNode or IFileNode and
309 a metadata dictionary from the last edge that was traversed.
313 return defer.succeed((self, {}))
314 if isinstance(path, (list, tuple)):
317 path = path.split("/")
319 assert isinstance(p, unicode)
321 remaining_path = path[1:]
323 d = self.get(childname)
324 d.addCallback(lambda node:
325 node.get_child_and_metadata_at_path(remaining_path))
327 d = self.get_child_and_metadata(childname)
330 def set_uri(self, name, child_uri, metadata=None, overwrite=True):
331 """I add a child (by URI) at the specific name. I return a Deferred
332 that fires with the child node when the operation finishes. I will
333 replace any existing child of the same name.
335 The child_uri could be for a file, or for a directory (either
336 read-write or read-only, using a URI that came from get_uri() ).
338 If this directory node is read-only, the Deferred will errback with a
340 assert isinstance(name, unicode)
341 child_node = self._create_node(child_uri)
342 d = self.set_node(name, child_node, metadata, overwrite)
343 d.addCallback(lambda res: child_node)
346 def set_children(self, entries, overwrite=True):
348 a = Adder(self, overwrite=overwrite)
356 name, child_uri, metadata = e
357 assert isinstance(name, unicode)
358 a.set_node(name, self._create_node(child_uri), metadata)
359 return self._node.modify(a.modify)
361 def set_node(self, name, child, metadata=None, overwrite=True):
362 """I add a child at the specific name. I return a Deferred that fires
363 when the operation finishes. This Deferred will fire with the child
364 node that was just added. I will replace any existing child of the
367 If this directory node is read-only, the Deferred will errback with a
370 if self.is_readonly():
371 return defer.fail(NotMutableError())
372 assert isinstance(name, unicode)
373 assert IFilesystemNode.providedBy(child), child
374 a = Adder(self, overwrite=overwrite)
375 a.set_node(name, child, metadata)
376 d = self._node.modify(a.modify)
377 d.addCallback(lambda res: child)
380 def set_nodes(self, entries, overwrite=True):
381 if self.is_readonly():
382 return defer.fail(NotMutableError())
383 a = Adder(self, entries, overwrite=overwrite)
384 d = self._node.modify(a.modify)
385 d.addCallback(lambda res: None)
389 def add_file(self, name, uploadable, metadata=None, overwrite=True):
390 """I upload a file (using the given IUploadable), then attach the
391 resulting FileNode to the directory at the given name. I return a
392 Deferred that fires (with the IFileNode of the uploaded file) when
393 the operation completes."""
394 assert isinstance(name, unicode)
395 if self.is_readonly():
396 return defer.fail(NotMutableError())
397 d = self._client.upload(uploadable)
398 d.addCallback(lambda results: results.uri)
399 d.addCallback(self._client.create_node_from_uri)
400 d.addCallback(lambda node:
401 self.set_node(name, node, metadata, overwrite))
404 def delete(self, name):
405 """I remove the child at the specific name. I return a Deferred that
406 fires (with the node just removed) when the operation finishes."""
407 assert isinstance(name, unicode)
408 if self.is_readonly():
409 return defer.fail(NotMutableError())
410 deleter = Deleter(self, name)
411 d = self._node.modify(deleter.modify)
412 d.addCallback(lambda res: deleter.old_child)
415 def create_empty_directory(self, name, overwrite=True):
416 """I create and attach an empty directory at the given name. I return
417 a Deferred that fires (with the new directory node) when the
418 operation finishes."""
419 assert isinstance(name, unicode)
420 if self.is_readonly():
421 return defer.fail(NotMutableError())
422 d = self._client.create_empty_dirnode()
424 entries = [(name, child, None)]
425 a = Adder(self, entries, overwrite=overwrite)
426 d = self._node.modify(a.modify)
427 d.addCallback(lambda res: child)
429 d.addCallback(_created)
432 def move_child_to(self, current_child_name, new_parent,
433 new_child_name=None, overwrite=True):
434 """I take one of my children and move them to a new parent. The child
435 is referenced by name. On the new parent, the child will live under
436 'new_child_name', which defaults to 'current_child_name'. I return a
437 Deferred that fires when the operation finishes."""
438 assert isinstance(current_child_name, unicode)
439 if self.is_readonly() or new_parent.is_readonly():
440 return defer.fail(NotMutableError())
441 if new_child_name is None:
442 new_child_name = current_child_name
443 assert isinstance(new_child_name, unicode)
444 d = self.get(current_child_name)
446 return new_parent.set_node(new_child_name, child,
449 d.addCallback(lambda child: self.delete(current_child_name))
453 def deep_traverse(self, walker):
454 """Perform a recursive walk, using this dirnode as a root, notifying
455 the 'walker' instance of everything I encounter.
457 I call walker.enter_directory(parent, children) once for each dirnode
458 I visit, immediately after retrieving the list of children. I pass in
459 the parent dirnode and the dict of childname->(childnode,metadata).
460 This function should *not* traverse the children: I will do that.
461 enter_directory() is most useful for the deep-stats number that
462 counts how large a directory is.
464 I call walker.add_node(node, path) for each node (both files and
465 directories) I can reach. Most work should be done here.
467 I avoid loops by keeping track of verifier-caps and refusing to call
468 each() or traverse a node that I've seen before.
470 I return a Deferred that will fire with the value of walker.finish().
473 # this is just a tree-walker, except that following each edge
474 # requires a Deferred. We use a ConcurrencyLimiter to make sure the
475 # fan-out doesn't cause problems.
478 walker.set_monitor(monitor)
480 found = set([self.get_verifier()])
481 limiter = ConcurrencyLimiter(10)
482 d = self._deep_traverse_dirnode(self, [],
483 walker, monitor, found, limiter)
484 d.addCallback(lambda ignored: walker.finish())
485 d.addBoth(monitor.finish)
486 d.addErrback(lambda f: None)
490 def _deep_traverse_dirnode(self, node, path,
491 walker, monitor, found, limiter):
492 # process this directory, then walk its children
493 monitor.raise_if_cancelled()
494 d = limiter.add(walker.add_node, node, path)
495 d.addCallback(lambda ignored: limiter.add(node.list))
496 d.addCallback(self._deep_traverse_dirnode_children, node, path,
497 walker, monitor, found, limiter)
500 def _deep_traverse_dirnode_children(self, children, parent, path,
501 walker, monitor, found, limiter):
502 monitor.raise_if_cancelled()
503 dl = [limiter.add(walker.enter_directory, parent, children)]
504 for name, (child, metadata) in children.iteritems():
505 verifier = child.get_verifier()
506 if verifier in found:
509 childpath = path + [name]
510 if IDirectoryNode.providedBy(child):
511 dl.append(self._deep_traverse_dirnode(child, childpath,
515 dl.append(limiter.add(walker.add_node, child, childpath))
516 return defer.DeferredList(dl, fireOnOneErrback=True, consumeErrors=True)
519 def build_manifest(self):
520 """Return a Monitor, with a ['status'] that will be a list of (path,
521 cap) tuples, for all nodes (directories and files) reachable from
523 walker = ManifestWalker(self)
524 return self.deep_traverse(walker)
526 def start_deep_stats(self):
527 # Since deep_traverse tracks verifier caps, we avoid double-counting
528 # children for which we've got both a write-cap and a read-cap
529 return self.deep_traverse(DeepStats(self))
531 def start_deep_check(self, verify=False):
532 return self.deep_traverse(DeepChecker(self, verify, repair=False))
534 def start_deep_check_and_repair(self, verify=False):
535 return self.deep_traverse(DeepChecker(self, verify, repair=True))
538 class ManifestWalker:
539 def __init__(self, origin):
542 def set_monitor(self, monitor):
543 self.monitor = monitor
544 monitor.origin_si = self.origin.get_storage_index()
545 monitor.set_status(self.manifest)
546 def add_node(self, node, path):
547 self.manifest.append( (tuple(path), node.get_uri()) )
548 def enter_directory(self, parent, children):
555 def __init__(self, origin):
558 for k in ["count-immutable-files",
559 "count-mutable-files",
560 "count-literal-files",
563 "size-immutable-files",
564 #"size-mutable-files",
565 "size-literal-files",
568 "largest-directory-children",
569 "largest-immutable-file",
570 #"largest-mutable-file",
574 for k in ["size-files-histogram"]:
575 self.histograms[k] = {} # maps (min,max) to count
576 self.buckets = [ (0,0), (1,3)]
577 self.root = math.sqrt(10)
579 def set_monitor(self, monitor):
580 self.monitor = monitor
581 monitor.origin_si = self.origin.get_storage_index()
582 monitor.set_status(self.stats)
584 def add_node(self, node, childpath):
585 if IDirectoryNode.providedBy(node):
586 self.add("count-directories")
587 elif IMutableFileNode.providedBy(node):
588 self.add("count-files")
589 self.add("count-mutable-files")
590 # TODO: update the servermap, compute a size, add it to
591 # size-mutable-files, max it into "largest-mutable-file"
592 elif IFileNode.providedBy(node): # CHK and LIT
593 self.add("count-files")
594 size = node.get_size()
595 self.histogram("size-files-histogram", size)
596 if node.get_uri().startswith("URI:LIT:"):
597 self.add("count-literal-files")
598 self.add("size-literal-files", size)
600 self.add("count-immutable-files")
601 self.add("size-immutable-files", size)
602 self.max("largest-immutable-file", size)
604 def enter_directory(self, parent, children):
605 dirsize_bytes = parent.get_size()
606 dirsize_children = len(children)
607 self.add("size-directories", dirsize_bytes)
608 self.max("largest-directory", dirsize_bytes)
609 self.max("largest-directory-children", dirsize_children)
611 def add(self, key, value=1):
612 self.stats[key] += value
614 def max(self, key, value):
615 self.stats[key] = max(self.stats[key], value)
617 def which_bucket(self, size):
618 # return (min,max) such that min <= size <= max
619 # values are from the set (0,0), (1,3), (4,10), (11,31), (32,100),
620 # (101,316), (317, 1000), etc: two per decade
624 if i >= len(self.buckets):
626 new_lower = self.buckets[i-1][1]+1
627 new_upper = int(mathutil.next_power_of_k(new_lower, self.root))
628 self.buckets.append( (new_lower, new_upper) )
629 maybe = self.buckets[i]
630 if maybe[0] <= size <= maybe[1]:
634 def histogram(self, key, size):
635 bucket = self.which_bucket(size)
636 h = self.histograms[key]
641 def get_results(self):
642 stats = self.stats.copy()
643 for key in self.histograms:
644 h = self.histograms[key]
645 out = [ (bucket[0], bucket[1], h[bucket]) for bucket in h ]
651 return self.get_results()
655 def __init__(self, root, verify, repair):
656 root_si = root.get_storage_index()
657 self._lp = log.msg(format="deep-check starting (%(si)s),"
658 " verify=%(verify)s, repair=%(repair)s",
659 si=base32.b2a(root_si), verify=verify, repair=repair)
660 self._verify = verify
661 self._repair = repair
663 self._results = DeepCheckAndRepairResults(root_si)
665 self._results = DeepCheckResults(root_si)
666 self._stats = DeepStats(root)
668 def set_monitor(self, monitor):
669 self.monitor = monitor
670 monitor.set_status(self._results)
672 def add_node(self, node, childpath):
674 d = node.check_and_repair(self.monitor, self._verify)
675 d.addCallback(self._results.add_check_and_repair, childpath)
677 d = node.check(self.monitor, self._verify)
678 d.addCallback(self._results.add_check, childpath)
679 d.addCallback(lambda ignored: self._stats.add_node(node, childpath))
682 def enter_directory(self, parent, children):
683 return self._stats.enter_directory(parent, children)
686 log.msg("deep-check done", parent=self._lp)
687 self._results.update_stats(self._stats.get_results())
691 # use client.create_dirnode() to make one of these