4 from zope.interface import implements
5 from twisted.internet import defer
6 from foolscap.api import fireEventually
8 from allmydata.mutable.common import NotWriteableError
9 from allmydata.mutable.filenode import MutableFileNode
10 from allmydata.unknown import UnknownNode, strip_prefix_for_ro
11 from allmydata.interfaces import IFilesystemNode, IDirectoryNode, IFileNode, \
12 IImmutableFileNode, IMutableFileNode, \
13 ExistingChildError, NoSuchChildError, ICheckable, IDeepCheckable, \
14 MustBeDeepImmutableError, CapConstraintError
15 from allmydata.check_results import DeepCheckResults, \
16 DeepCheckAndRepairResults
17 from allmydata.monitor import Monitor
18 from allmydata.util import hashutil, mathutil, base32, log
19 from allmydata.util.assertutil import precondition
20 from allmydata.util.netstring import netstring, split_netstring
21 from allmydata.util.consumer import download_to_data
22 from allmydata.uri import LiteralFileURI, from_string, wrap_dirnode_cap
23 from pycryptopp.cipher.aes import AES
24 from allmydata.util.dictutil import AuxValueDict
27 # TODO: {Deleter,MetadataSetter,Adder}.modify all start by unpacking the
28 # contents and end by repacking them. It might be better to apply them to
29 # the unpacked contents.
32 def __init__(self, node, name, must_exist=True):
35 self.must_exist = True
36 def modify(self, old_contents, servermap, first_time):
37 children = self.node._unpack_contents(old_contents)
38 if self.name not in children:
39 if first_time and self.must_exist:
40 raise NoSuchChildError(self.name)
43 self.old_child, metadata = children[self.name]
44 del children[self.name]
45 new_contents = self.node._pack_contents(children)
50 def __init__(self, node, name, metadata):
53 self.metadata = metadata
55 def modify(self, old_contents, servermap, first_time):
56 children = self.node._unpack_contents(old_contents)
57 if self.name not in children:
58 raise NoSuchChildError(self.name)
59 children[self.name] = (children[self.name][0], self.metadata)
60 new_contents = self.node._pack_contents(children)
65 def __init__(self, node, entries=None, overwrite=True):
69 precondition(isinstance(entries, dict), entries)
70 self.entries = entries
71 self.overwrite = overwrite
73 def set_node(self, name, node, metadata):
74 precondition(isinstance(name, unicode), name)
75 precondition(IFilesystemNode.providedBy(node), node)
76 self.entries[name] = (node, metadata)
78 def modify(self, old_contents, servermap, first_time):
79 children = self.node._unpack_contents(old_contents)
81 for (name, (child, new_metadata)) in self.entries.iteritems():
82 precondition(isinstance(name, unicode), name)
83 precondition(IFilesystemNode.providedBy(child), child)
85 # Strictly speaking this is redundant because we would raise the
86 # error again in pack_children.
90 if not self.overwrite:
91 raise ExistingChildError("child '%s' already exists" % name)
93 if self.overwrite == "only-files" and IDirectoryNode.providedBy(children[name][0]):
94 raise ExistingChildError("child '%s' already exists" % name)
95 metadata = children[name][1].copy()
97 metadata = {"ctime": now,
105 if new_metadata is not None:
106 # Overwrite all metadata.
107 newmd = new_metadata.copy()
110 if newmd.has_key('tahoe'):
112 if metadata.has_key('tahoe'):
113 newmd['tahoe'] = metadata['tahoe']
117 # For backwards compatibility with Tahoe < 1.4.0:
118 if "ctime" not in metadata:
119 metadata["ctime"] = now
120 metadata["mtime"] = now
123 sysmd = metadata.get('tahoe', {})
124 if not 'linkcrtime' in sysmd:
125 if "ctime" in metadata:
126 # In Tahoe < 1.4.0 we used the word "ctime" to mean what Tahoe >= 1.4.0
127 # calls "linkcrtime".
128 sysmd["linkcrtime"] = metadata["ctime"]
130 sysmd["linkcrtime"] = now
131 sysmd["linkmotime"] = now
133 children[name] = (child, metadata)
134 new_contents = self.node._pack_contents(children)
137 def _encrypt_rw_uri(filenode, rw_uri):
138 assert isinstance(rw_uri, str)
139 writekey = filenode.get_writekey()
142 salt = hashutil.mutable_rwcap_salt_hash(rw_uri)
143 key = hashutil.mutable_rwcap_key_hash(salt, writekey)
145 crypttext = cryptor.process(rw_uri)
146 mac = hashutil.hmac(key, salt + crypttext)
147 assert len(mac) == 32
148 return salt + crypttext + mac
149 # The MAC is not checked by readers in Tahoe >= 1.3.0, but we still
150 # produce it for the sake of older readers.
152 def pack_children(filenode, children, deep_immutable=False):
153 """Take a dict that maps:
154 children[unicode_name] = (IFileSystemNode, metadata_dict)
155 and pack it into a single string, for use as the contents of the backing
156 file. This is the same format as is returned by _unpack_contents. I also
157 accept an AuxValueDict, in which case I'll use the auxilliary cached data
158 as the pre-packed entry, which is faster than re-packing everything each
161 If deep_immutable is True, I will require that all my children are deeply
162 immutable, and will raise a MustBeDeepImmutableError if not.
165 has_aux = isinstance(children, AuxValueDict)
167 for name in sorted(children.keys()):
168 assert isinstance(name, unicode)
170 (child, metadata) = children[name]
172 if deep_immutable and not child.is_allowed_in_immutable_directory():
173 raise MustBeDeepImmutableError("child '%s' is not allowed in an immutable directory" % (name,), name)
175 entry = children.get_aux(name)
177 assert IFilesystemNode.providedBy(child), (name,child)
178 assert isinstance(metadata, dict)
179 rw_uri = child.get_write_uri()
182 assert isinstance(rw_uri, str), rw_uri
184 # should be prevented by MustBeDeepImmutableError check above
185 assert not (rw_uri and deep_immutable)
187 ro_uri = child.get_readonly_uri()
190 assert isinstance(ro_uri, str), ro_uri
191 entry = "".join([netstring(name.encode("utf-8")),
192 netstring(strip_prefix_for_ro(ro_uri, deep_immutable)),
193 netstring(_encrypt_rw_uri(filenode, rw_uri)),
194 netstring(simplejson.dumps(metadata))])
195 entries.append(netstring(entry))
196 return "".join(entries)
199 implements(IDirectoryNode, ICheckable, IDeepCheckable)
200 filenode_class = MutableFileNode
202 def __init__(self, filenode, nodemaker, uploader):
203 assert IFileNode.providedBy(filenode), filenode
204 assert not IDirectoryNode.providedBy(filenode), filenode
205 self._node = filenode
206 filenode_cap = filenode.get_cap()
207 self._uri = wrap_dirnode_cap(filenode_cap)
208 self._nodemaker = nodemaker
209 self._uploader = uploader
212 return "<%s %s-%s %s>" % (self.__class__.__name__,
213 self.is_readonly() and "RO" or "RW",
214 self.is_mutable() and "MUT" or "IMM",
215 hasattr(self, '_uri') and self._uri.abbrev())
218 """Return the size of our backing mutable file, in bytes, if we've
219 fetched it. Otherwise return None. This returns synchronously."""
220 return self._node.get_size()
222 def get_current_size(self):
223 """Calculate the size of our backing mutable file, in bytes. Returns
224 a Deferred that fires with the result."""
225 return self._node.get_current_size()
228 if self._node.is_mutable():
229 # use the IMutableFileNode API.
230 d = self._node.download_best_version()
232 d = download_to_data(self._node)
233 d.addCallback(self._unpack_contents)
236 def _decrypt_rwcapdata(self, encwrcap):
238 crypttext = encwrcap[16:-32]
239 key = hashutil.mutable_rwcap_key_hash(salt, self._node.get_writekey())
241 plaintext = cryptor.process(crypttext)
244 def _create_and_validate_node(self, rw_uri, ro_uri, name):
245 node = self._nodemaker.create_from_cap(rw_uri, ro_uri,
246 deep_immutable=not self.is_mutable(),
251 def _unpack_contents(self, data):
252 # the directory is serialized as a list of netstrings, one per child.
253 # Each child is serialized as a list of four netstrings: (name, ro_uri,
254 # rwcapdata, metadata), in which the name, ro_uri, metadata are in
255 # cleartext. The 'name' is UTF-8 encoded. The rwcapdata is formatted as:
256 # pack("16ss32s", iv, AES(H(writekey+iv), plaintext_rw_uri), mac)
257 assert isinstance(data, str), (repr(data), type(data))
258 # an empty directory is serialized as an empty string
260 return AuxValueDict()
261 writeable = not self.is_readonly()
262 mutable = self.is_mutable()
263 children = AuxValueDict()
265 while position < len(data):
266 entries, position = split_netstring(data, 1, position)
268 (name_utf8, ro_uri, rwcapdata, metadata_s), subpos = split_netstring(entry, 4)
269 if not mutable and len(rwcapdata) > 0:
270 raise ValueError("the rwcapdata field of a dirnode in an immutable directory was not empty")
271 name = name_utf8.decode("utf-8")
274 rw_uri = self._decrypt_rwcapdata(rwcapdata)
276 # Since the encryption uses CTR mode, it currently leaks the length of the
277 # plaintext rw_uri -- and therefore whether it is present, i.e. whether the
278 # dirnode is writeable (ticket #925). By stripping trailing spaces in
279 # Tahoe >= 1.6.0, we may make it easier for future versions to plug this leak.
280 # ro_uri is treated in the same way for consistency.
281 # rw_uri and ro_uri will be either None or a non-empty string.
283 rw_uri = rw_uri.rstrip(' ') or None
284 ro_uri = ro_uri.rstrip(' ') or None
287 child = self._create_and_validate_node(rw_uri, ro_uri, name)
288 if mutable or child.is_allowed_in_immutable_directory():
289 metadata = simplejson.loads(metadata_s)
290 assert isinstance(metadata, dict)
291 children[name] = (child, metadata)
292 children.set_with_aux(name, (child, metadata), auxilliary=entry)
294 log.msg(format="mutable cap for child '%(name)s' unpacked from an immutable directory",
296 facility="tahoe.webish", level=log.UNUSUAL)
297 except CapConstraintError, e:
298 log.msg(format="unmet constraint on cap for child '%(name)s' unpacked from a directory:\n"
299 "%(message)s", message=e.args[0], name=name_utf8,
300 facility="tahoe.webish", level=log.UNUSUAL)
304 def _pack_contents(self, children):
305 # expects children in the same format as _unpack_contents
306 return pack_children(self._node, children)
308 def is_readonly(self):
309 return self._node.is_readonly()
311 def is_mutable(self):
312 return self._node.is_mutable()
314 def is_unknown(self):
317 def is_allowed_in_immutable_directory(self):
318 return not self._node.is_mutable()
320 def raise_error(self):
324 return self._uri.to_string()
326 def get_write_uri(self):
327 if self.is_readonly():
329 return self._uri.to_string()
331 def get_readonly_uri(self):
332 return self._uri.get_readonly().to_string()
337 def get_readcap(self):
338 return self._uri.get_readonly()
340 def get_verify_cap(self):
341 return self._uri.get_verify_cap()
343 def get_repair_cap(self):
344 if self._node.is_readonly():
345 return None # readonly (mutable) dirnodes are not yet repairable
348 def get_storage_index(self):
349 return self._uri._filenode_uri.storage_index
351 def check(self, monitor, verify=False, add_lease=False):
352 """Perform a file check. See IChecker.check for details."""
353 return self._node.check(monitor, verify, add_lease)
354 def check_and_repair(self, monitor, verify=False, add_lease=False):
355 return self._node.check_and_repair(monitor, verify, add_lease)
358 """I return a Deferred that fires with a dictionary mapping child
359 name to a tuple of (IFileNode or IDirectoryNode, metadata)."""
362 def has_child(self, name):
363 """I return a Deferred that fires with a boolean, True if there
364 exists a child of the given name, False if not."""
365 assert isinstance(name, unicode)
367 d.addCallback(lambda children: children.has_key(name))
370 def _get(self, children, name):
371 child = children.get(name)
373 raise NoSuchChildError(name)
376 def _get_with_metadata(self, children, name):
377 child = children.get(name)
379 raise NoSuchChildError(name)
383 """I return a Deferred that fires with the named child node,
384 which is either an IFileNode or an IDirectoryNode."""
385 assert isinstance(name, unicode)
387 d.addCallback(self._get, name)
390 def get_child_and_metadata(self, name):
391 """I return a Deferred that fires with the (node, metadata) pair for
392 the named child. The node is either an IFileNode or an
393 IDirectoryNode, and the metadata is a dictionary."""
394 assert isinstance(name, unicode)
396 d.addCallback(self._get_with_metadata, name)
399 def get_metadata_for(self, name):
400 assert isinstance(name, unicode)
402 d.addCallback(lambda children: children[name][1])
405 def set_metadata_for(self, name, metadata):
406 assert isinstance(name, unicode)
407 if self.is_readonly():
408 return defer.fail(NotWriteableError())
409 assert isinstance(metadata, dict)
410 s = MetadataSetter(self, name, metadata)
411 d = self._node.modify(s.modify)
412 d.addCallback(lambda res: self)
415 def get_child_at_path(self, path):
416 """Transform a child path into an IDirectoryNode or IFileNode.
418 I perform a recursive series of 'get' operations to find the named
419 descendant node. I return a Deferred that fires with the node, or
420 errbacks with IndexError if the node could not be found.
422 The path can be either a single string (slash-separated) or a list of
425 d = self.get_child_and_metadata_at_path(path)
426 d.addCallback(lambda (node, metadata): node)
429 def get_child_and_metadata_at_path(self, path):
430 """Transform a child path into an IDirectoryNode or IFileNode and
431 a metadata dictionary from the last edge that was traversed.
435 return defer.succeed((self, {}))
436 if isinstance(path, (list, tuple)):
439 path = path.split("/")
441 assert isinstance(p, unicode)
443 remaining_path = path[1:]
445 d = self.get(childname)
446 d.addCallback(lambda node:
447 node.get_child_and_metadata_at_path(remaining_path))
449 d = self.get_child_and_metadata(childname)
452 def set_uri(self, name, writecap, readcap, metadata=None, overwrite=True):
453 precondition(isinstance(name, unicode), name)
454 precondition(isinstance(writecap, (str,type(None))), writecap)
455 precondition(isinstance(readcap, (str,type(None))), readcap)
457 # We now allow packing unknown nodes, provided they are valid
458 # for this type of directory.
459 child_node = self._create_and_validate_node(writecap, readcap, name)
460 d = self.set_node(name, child_node, metadata, overwrite)
461 d.addCallback(lambda res: child_node)
464 def set_children(self, entries, overwrite=True):
466 a = Adder(self, overwrite=overwrite)
467 for (name, e) in entries.iteritems():
468 assert isinstance(name, unicode)
470 writecap, readcap = e
474 writecap, readcap, metadata = e
475 precondition(isinstance(writecap, (str,type(None))), writecap)
476 precondition(isinstance(readcap, (str,type(None))), readcap)
478 # We now allow packing unknown nodes, provided they are valid
479 # for this type of directory.
480 child_node = self._create_and_validate_node(writecap, readcap, name)
481 a.set_node(name, child_node, metadata)
482 d = self._node.modify(a.modify)
483 d.addCallback(lambda ign: self)
486 def set_node(self, name, child, metadata=None, overwrite=True):
487 """I add a child at the specific name. I return a Deferred that fires
488 when the operation finishes. This Deferred will fire with the child
489 node that was just added. I will replace any existing child of the
492 If this directory node is read-only, the Deferred will errback with a
493 NotWriteableError."""
495 precondition(IFilesystemNode.providedBy(child), child)
497 if self.is_readonly():
498 return defer.fail(NotWriteableError())
499 assert isinstance(name, unicode)
500 assert IFilesystemNode.providedBy(child), child
501 a = Adder(self, overwrite=overwrite)
502 a.set_node(name, child, metadata)
503 d = self._node.modify(a.modify)
504 d.addCallback(lambda res: child)
507 def set_nodes(self, entries, overwrite=True):
508 precondition(isinstance(entries, dict), entries)
509 if self.is_readonly():
510 return defer.fail(NotWriteableError())
511 a = Adder(self, entries, overwrite=overwrite)
512 d = self._node.modify(a.modify)
513 d.addCallback(lambda res: self)
517 def add_file(self, name, uploadable, metadata=None, overwrite=True):
518 """I upload a file (using the given IUploadable), then attach the
519 resulting FileNode to the directory at the given name. I return a
520 Deferred that fires (with the IFileNode of the uploaded file) when
521 the operation completes."""
522 assert isinstance(name, unicode)
523 if self.is_readonly():
524 return defer.fail(NotWriteableError())
525 d = self._uploader.upload(uploadable)
526 d.addCallback(lambda results:
527 self._create_and_validate_node(results.uri, None, name))
528 d.addCallback(lambda node:
529 self.set_node(name, node, metadata, overwrite))
532 def delete(self, name):
533 """I remove the child at the specific name. I return a Deferred that
534 fires (with the node just removed) when the operation finishes."""
535 assert isinstance(name, unicode)
536 if self.is_readonly():
537 return defer.fail(NotWriteableError())
538 deleter = Deleter(self, name)
539 d = self._node.modify(deleter.modify)
540 d.addCallback(lambda res: deleter.old_child)
543 def create_subdirectory(self, name, initial_children={}, overwrite=True,
545 assert isinstance(name, unicode)
546 if self.is_readonly():
547 return defer.fail(NotWriteableError())
549 d = self._nodemaker.create_new_mutable_directory(initial_children)
551 d = self._nodemaker.create_immutable_directory(initial_children)
553 entries = {name: (child, None)}
554 a = Adder(self, entries, overwrite=overwrite)
555 d = self._node.modify(a.modify)
556 d.addCallback(lambda res: child)
558 d.addCallback(_created)
561 def move_child_to(self, current_child_name, new_parent,
562 new_child_name=None, overwrite=True):
563 """I take one of my children and move them to a new parent. The child
564 is referenced by name. On the new parent, the child will live under
565 'new_child_name', which defaults to 'current_child_name'. I return a
566 Deferred that fires when the operation finishes."""
567 assert isinstance(current_child_name, unicode)
568 if self.is_readonly() or new_parent.is_readonly():
569 return defer.fail(NotWriteableError())
570 if new_child_name is None:
571 new_child_name = current_child_name
572 assert isinstance(new_child_name, unicode)
573 d = self.get(current_child_name)
575 return new_parent.set_node(new_child_name, child,
578 d.addCallback(lambda child: self.delete(current_child_name))
582 def deep_traverse(self, walker):
583 """Perform a recursive walk, using this dirnode as a root, notifying
584 the 'walker' instance of everything I encounter.
586 I call walker.enter_directory(parent, children) once for each dirnode
587 I visit, immediately after retrieving the list of children. I pass in
588 the parent dirnode and the dict of childname->(childnode,metadata).
589 This function should *not* traverse the children: I will do that.
590 enter_directory() is most useful for the deep-stats number that
591 counts how large a directory is.
593 I call walker.add_node(node, path) for each node (both files and
594 directories) I can reach. Most work should be done here.
596 I avoid loops by keeping track of verifier-caps and refusing to call
597 walker.add_node() or traverse a node that I've seen before. This
598 means that any file or directory will only be given to the walker
599 once. If files or directories are referenced multiple times by a
600 directory structure, this may appear to under-count or miss some of
603 I return a Monitor which can be used to wait for the operation to
604 finish, learn about its progress, or cancel the operation.
607 # this is just a tree-walker, except that following each edge
608 # requires a Deferred. We used to use a ConcurrencyLimiter to limit
609 # fanout to 10 simultaneous operations, but the memory load of the
610 # queued operations was excessive (in one case, with 330k dirnodes,
611 # it caused the process to run into the 3.0GB-ish per-process 32bit
612 # linux memory limit, and crashed). So we use a single big Deferred
613 # chain, and do a strict depth-first traversal, one node at a time.
614 # This can be slower, because we aren't pipelining directory reads,
615 # but it brought the memory footprint down by roughly 50%.
618 walker.set_monitor(monitor)
620 found = set([self.get_verify_cap()])
621 d = self._deep_traverse_dirnode(self, [], walker, monitor, found)
622 d.addCallback(lambda ignored: walker.finish())
623 d.addBoth(monitor.finish)
624 d.addErrback(lambda f: None)
628 def _deep_traverse_dirnode(self, node, path, walker, monitor, found):
629 # process this directory, then walk its children
630 monitor.raise_if_cancelled()
631 d = defer.maybeDeferred(walker.add_node, node, path)
632 d.addCallback(lambda ignored: node.list())
633 d.addCallback(self._deep_traverse_dirnode_children, node, path,
634 walker, monitor, found)
637 def _deep_traverse_dirnode_children(self, children, parent, path,
638 walker, monitor, found):
639 monitor.raise_if_cancelled()
640 d = defer.maybeDeferred(walker.enter_directory, parent, children)
641 # we process file-like children first, so we can drop their FileNode
642 # objects as quickly as possible. Tests suggest that a FileNode (held
643 # in the client's nodecache) consumes about 2440 bytes. dirnodes (not
644 # in the nodecache) seem to consume about 2000 bytes.
647 for name, (child, metadata) in sorted(children.iteritems()):
648 childpath = path + [name]
649 if isinstance(child, UnknownNode):
650 walker.add_node(child, childpath)
652 verifier = child.get_verify_cap()
653 # allow LIT files (for which verifier==None) to be processed
654 if (verifier is not None) and (verifier in found):
657 if IDirectoryNode.providedBy(child):
658 dirkids.append( (child, childpath) )
660 filekids.append( (child, childpath) )
661 for i, (child, childpath) in enumerate(filekids):
662 d.addCallback(lambda ignored, child=child, childpath=childpath:
663 walker.add_node(child, childpath))
664 # to work around the Deferred tail-recursion problem
665 # (specifically the defer.succeed flavor) requires us to avoid
666 # doing more than 158 LIT files in a row. We insert a turn break
667 # once every 100 files (LIT or CHK) to preserve some stack space
668 # for other code. This is a different expression of the same
669 # Twisted problem as in #237.
671 d.addCallback(lambda ignored: fireEventually())
672 for (child, childpath) in dirkids:
673 d.addCallback(lambda ignored, child=child, childpath=childpath:
674 self._deep_traverse_dirnode(child, childpath,
680 def build_manifest(self):
681 """Return a Monitor, with a ['status'] that will be a list of (path,
682 cap) tuples, for all nodes (directories and files) reachable from
684 walker = ManifestWalker(self)
685 return self.deep_traverse(walker)
687 def start_deep_stats(self):
688 # Since deep_traverse tracks verifier caps, we avoid double-counting
689 # children for which we've got both a write-cap and a read-cap
690 return self.deep_traverse(DeepStats(self))
692 def start_deep_check(self, verify=False, add_lease=False):
693 return self.deep_traverse(DeepChecker(self, verify, repair=False, add_lease=add_lease))
695 def start_deep_check_and_repair(self, verify=False, add_lease=False):
696 return self.deep_traverse(DeepChecker(self, verify, repair=True, add_lease=add_lease))
701 def __init__(self, origin):
704 for k in ["count-immutable-files",
705 "count-mutable-files",
706 "count-literal-files",
710 "size-immutable-files",
711 #"size-mutable-files",
712 "size-literal-files",
715 "largest-directory-children",
716 "largest-immutable-file",
717 #"largest-mutable-file",
721 for k in ["size-files-histogram"]:
722 self.histograms[k] = {} # maps (min,max) to count
723 self.buckets = [ (0,0), (1,3)]
724 self.root = math.sqrt(10)
726 def set_monitor(self, monitor):
727 self.monitor = monitor
728 monitor.origin_si = self.origin.get_storage_index()
729 monitor.set_status(self.get_results())
731 def add_node(self, node, childpath):
732 if isinstance(node, UnknownNode):
733 self.add("count-unknown")
734 elif IDirectoryNode.providedBy(node):
735 self.add("count-directories")
736 elif IMutableFileNode.providedBy(node):
737 self.add("count-files")
738 self.add("count-mutable-files")
739 # TODO: update the servermap, compute a size, add it to
740 # size-mutable-files, max it into "largest-mutable-file"
741 elif IImmutableFileNode.providedBy(node): # CHK and LIT
742 self.add("count-files")
743 size = node.get_size()
744 self.histogram("size-files-histogram", size)
745 theuri = from_string(node.get_uri())
746 if isinstance(theuri, LiteralFileURI):
747 self.add("count-literal-files")
748 self.add("size-literal-files", size)
750 self.add("count-immutable-files")
751 self.add("size-immutable-files", size)
752 self.max("largest-immutable-file", size)
754 def enter_directory(self, parent, children):
755 dirsize_bytes = parent.get_size()
756 if dirsize_bytes is not None:
757 self.add("size-directories", dirsize_bytes)
758 self.max("largest-directory", dirsize_bytes)
759 dirsize_children = len(children)
760 self.max("largest-directory-children", dirsize_children)
762 def add(self, key, value=1):
763 self.stats[key] += value
765 def max(self, key, value):
766 self.stats[key] = max(self.stats[key], value)
768 def which_bucket(self, size):
769 # return (min,max) such that min <= size <= max
770 # values are from the set (0,0), (1,3), (4,10), (11,31), (32,100),
771 # (101,316), (317, 1000), etc: two per decade
775 if i >= len(self.buckets):
777 new_lower = self.buckets[i-1][1]+1
778 new_upper = int(mathutil.next_power_of_k(new_lower, self.root))
779 self.buckets.append( (new_lower, new_upper) )
780 maybe = self.buckets[i]
781 if maybe[0] <= size <= maybe[1]:
785 def histogram(self, key, size):
786 bucket = self.which_bucket(size)
787 h = self.histograms[key]
792 def get_results(self):
793 stats = self.stats.copy()
794 for key in self.histograms:
795 h = self.histograms[key]
796 out = [ (bucket[0], bucket[1], h[bucket]) for bucket in h ]
802 return self.get_results()
804 class ManifestWalker(DeepStats):
805 def __init__(self, origin):
806 DeepStats.__init__(self, origin)
808 self.storage_index_strings = set()
809 self.verifycaps = set()
811 def add_node(self, node, path):
812 self.manifest.append( (tuple(path), node.get_uri()) )
813 si = node.get_storage_index()
815 self.storage_index_strings.add(base32.b2a(si))
816 v = node.get_verify_cap()
818 self.verifycaps.add(v.to_string())
819 return DeepStats.add_node(self, node, path)
821 def get_results(self):
822 stats = DeepStats.get_results(self)
823 return {"manifest": self.manifest,
824 "verifycaps": self.verifycaps,
825 "storage-index": self.storage_index_strings,
831 def __init__(self, root, verify, repair, add_lease):
832 root_si = root.get_storage_index()
833 self._lp = log.msg(format="deep-check starting (%(si)s),"
834 " verify=%(verify)s, repair=%(repair)s",
835 si=base32.b2a(root_si), verify=verify, repair=repair)
836 self._verify = verify
837 self._repair = repair
838 self._add_lease = add_lease
840 self._results = DeepCheckAndRepairResults(root_si)
842 self._results = DeepCheckResults(root_si)
843 self._stats = DeepStats(root)
845 def set_monitor(self, monitor):
846 self.monitor = monitor
847 monitor.set_status(self._results)
849 def add_node(self, node, childpath):
851 d = node.check_and_repair(self.monitor, self._verify, self._add_lease)
852 d.addCallback(self._results.add_check_and_repair, childpath)
854 d = node.check(self.monitor, self._verify, self._add_lease)
855 d.addCallback(self._results.add_check, childpath)
856 d.addCallback(lambda ignored: self._stats.add_node(node, childpath))
859 def enter_directory(self, parent, children):
860 return self._stats.enter_directory(parent, children)
863 log.msg("deep-check done", parent=self._lp)
864 self._results.update_stats(self._stats.get_results())
868 # use client.create_dirnode() to make one of these