2 import time, math, unicodedata
4 from zope.interface import implements
5 from twisted.internet import defer
6 from foolscap.api import fireEventually
8 from allmydata.mutable.common import NotWriteableError
9 from allmydata.mutable.filenode import MutableFileNode
10 from allmydata.unknown import UnknownNode, strip_prefix_for_ro
11 from allmydata.interfaces import IFilesystemNode, IDirectoryNode, IFileNode, \
12 IImmutableFileNode, IMutableFileNode, \
13 ExistingChildError, NoSuchChildError, ICheckable, IDeepCheckable, \
14 MustBeDeepImmutableError, CapConstraintError, ChildOfWrongTypeError
15 from allmydata.check_results import DeepCheckResults, \
16 DeepCheckAndRepairResults
17 from allmydata.monitor import Monitor
18 from allmydata.util import hashutil, mathutil, base32, log
19 from allmydata.util.encodingutil import quote_output
20 from allmydata.util.assertutil import precondition
21 from allmydata.util.netstring import netstring, split_netstring
22 from allmydata.util.consumer import download_to_data
23 from allmydata.uri import LiteralFileURI, from_string, wrap_dirnode_cap
24 from pycryptopp.cipher.aes import AES
25 from allmydata.util.dictutil import AuxValueDict
28 def update_metadata(metadata, new_metadata, now):
29 """Updates 'metadata' in-place with the information in 'new_metadata'.
30 Timestamps are set according to the time 'now'."""
36 if 'ctime' in metadata:
37 old_ctime = metadata['ctime']
39 if new_metadata is not None:
40 # Overwrite all metadata.
41 newmd = new_metadata.copy()
46 if 'tahoe' in metadata:
47 newmd['tahoe'] = metadata['tahoe']
52 sysmd = metadata.get('tahoe', {})
53 if 'linkcrtime' not in sysmd:
54 # In Tahoe < 1.4.0 we used the word 'ctime' to mean what Tahoe >= 1.4.0
55 # calls 'linkcrtime'. This field is only used if it was in the old metadata,
56 # and 'tahoe:linkcrtime' was not.
57 if old_ctime is not None:
58 sysmd['linkcrtime'] = old_ctime
60 sysmd['linkcrtime'] = now
62 sysmd['linkmotime'] = now
63 metadata['tahoe'] = sysmd
68 # 'x' at the end of a variable name indicates that it holds a Unicode string that may not
72 return unicodedata.normalize('NFC', namex)
74 # TODO: {Deleter,MetadataSetter,Adder}.modify all start by unpacking the
75 # contents and end by repacking them. It might be better to apply them to
76 # the unpacked contents.
79 def __init__(self, node, namex, must_exist=True, must_be_directory=False, must_be_file=False):
81 self.name = normalize(namex)
82 self.must_exist = must_exist
83 self.must_be_directory = must_be_directory
84 self.must_be_file = must_be_file
86 def modify(self, old_contents, servermap, first_time):
87 children = self.node._unpack_contents(old_contents)
88 if self.name not in children:
89 if first_time and self.must_exist:
90 raise NoSuchChildError(self.name)
93 self.old_child, metadata = children[self.name]
95 # Unknown children can be removed regardless of must_be_directory or must_be_file.
96 if self.must_be_directory and IFileNode.providedBy(self.old_child):
97 raise ChildOfWrongTypeError("delete required a directory, not a file")
98 if self.must_be_file and IDirectoryNode.providedBy(self.old_child):
99 raise ChildOfWrongTypeError("delete required a file, not a directory")
101 del children[self.name]
102 new_contents = self.node._pack_contents(children)
106 class MetadataSetter:
107 def __init__(self, node, namex, metadata, create_readonly_node=None):
109 self.name = normalize(namex)
110 self.metadata = metadata
111 self.create_readonly_node = create_readonly_node
113 def modify(self, old_contents, servermap, first_time):
114 children = self.node._unpack_contents(old_contents)
116 if name not in children:
117 raise NoSuchChildError(name)
120 child = children[name][0]
122 metadata = update_metadata(children[name][1].copy(), self.metadata, now)
123 if self.create_readonly_node and metadata.get('no-write', False):
124 child = self.create_readonly_node(child, name)
126 children[name] = (child, metadata)
127 new_contents = self.node._pack_contents(children)
132 def __init__(self, node, entries=None, overwrite=True, create_readonly_node=None):
136 precondition(isinstance(entries, dict), entries)
137 # keys of 'entries' may not be normalized.
138 self.entries = entries
139 self.overwrite = overwrite
140 self.create_readonly_node = create_readonly_node
142 def set_node(self, namex, node, metadata):
143 precondition(IFilesystemNode.providedBy(node), node)
144 self.entries[namex] = (node, metadata)
146 def modify(self, old_contents, servermap, first_time):
147 children = self.node._unpack_contents(old_contents)
149 for (namex, (child, new_metadata)) in self.entries.iteritems():
150 name = normalize(namex)
151 precondition(IFilesystemNode.providedBy(child), child)
153 # Strictly speaking this is redundant because we would raise the
154 # error again in _pack_normalized_children.
159 if not self.overwrite:
160 raise ExistingChildError("child %s already exists" % quote_output(name, encoding='utf-8'))
162 if self.overwrite == "only-files" and IDirectoryNode.providedBy(children[name][0]):
163 raise ExistingChildError("child %s already exists" % quote_output(name, encoding='utf-8'))
164 metadata = children[name][1].copy()
166 metadata = update_metadata(metadata, new_metadata, now)
167 if self.create_readonly_node and metadata.get('no-write', False):
168 child = self.create_readonly_node(child, name)
170 children[name] = (child, metadata)
171 new_contents = self.node._pack_contents(children)
174 def _encrypt_rw_uri(writekey, rw_uri):
175 precondition(isinstance(rw_uri, str), rw_uri)
176 precondition(isinstance(writekey, str), writekey)
178 salt = hashutil.mutable_rwcap_salt_hash(rw_uri)
179 key = hashutil.mutable_rwcap_key_hash(salt, writekey)
181 crypttext = cryptor.process(rw_uri)
182 mac = hashutil.hmac(key, salt + crypttext)
183 assert len(mac) == 32
184 return salt + crypttext + mac
185 # The MAC is not checked by readers in Tahoe >= 1.3.0, but we still
186 # produce it for the sake of older readers.
188 def pack_children(childrenx, writekey, deep_immutable=False):
189 # initial_children must have metadata (i.e. {} instead of None)
191 for (namex, (node, metadata)) in childrenx.iteritems():
192 precondition(isinstance(metadata, dict),
193 "directory creation requires metadata to be a dict, not None", metadata)
194 children[normalize(namex)] = (node, metadata)
196 return _pack_normalized_children(children, writekey=writekey, deep_immutable=deep_immutable)
199 ZERO_LEN_NETSTR=netstring('')
200 def _pack_normalized_children(children, writekey, deep_immutable=False):
201 """Take a dict that maps:
202 children[unicode_nfc_name] = (IFileSystemNode, metadata_dict)
203 and pack it into a single string, for use as the contents of the backing
204 file. This is the same format as is returned by _unpack_contents. I also
205 accept an AuxValueDict, in which case I'll use the auxilliary cached data
206 as the pre-packed entry, which is faster than re-packing everything each
209 If writekey is provided then I will superencrypt the child's writecap with
212 If deep_immutable is True, I will require that all my children are deeply
213 immutable, and will raise a MustBeDeepImmutableError if not.
215 precondition((writekey is None) or isinstance(writekey, str), writekey)
217 has_aux = isinstance(children, AuxValueDict)
219 for name in sorted(children.keys()):
220 assert isinstance(name, unicode)
222 (child, metadata) = children[name]
224 if deep_immutable and not child.is_allowed_in_immutable_directory():
225 raise MustBeDeepImmutableError("child %s is not allowed in an immutable directory" %
226 quote_output(name, encoding='utf-8'), name)
228 entry = children.get_aux(name)
230 assert IFilesystemNode.providedBy(child), (name,child)
231 assert isinstance(metadata, dict)
232 rw_uri = child.get_write_uri()
235 assert isinstance(rw_uri, str), rw_uri
237 # should be prevented by MustBeDeepImmutableError check above
238 assert not (rw_uri and deep_immutable)
240 ro_uri = child.get_readonly_uri()
243 assert isinstance(ro_uri, str), ro_uri
244 if writekey is not None:
245 writecap = netstring(_encrypt_rw_uri(writekey, rw_uri))
247 writecap = ZERO_LEN_NETSTR
248 entry = "".join([netstring(name.encode("utf-8")),
249 netstring(strip_prefix_for_ro(ro_uri, deep_immutable)),
251 netstring(simplejson.dumps(metadata))])
252 entries.append(netstring(entry))
253 return "".join(entries)
256 implements(IDirectoryNode, ICheckable, IDeepCheckable)
257 filenode_class = MutableFileNode
259 def __init__(self, filenode, nodemaker, uploader):
260 assert IFileNode.providedBy(filenode), filenode
261 assert not IDirectoryNode.providedBy(filenode), filenode
262 self._node = filenode
263 filenode_cap = filenode.get_cap()
264 self._uri = wrap_dirnode_cap(filenode_cap)
265 self._nodemaker = nodemaker
266 self._uploader = uploader
269 return "<%s %s-%s %s>" % (self.__class__.__name__,
270 self.is_readonly() and "RO" or "RW",
271 self.is_mutable() and "MUT" or "IMM",
272 hasattr(self, '_uri') and self._uri.abbrev())
275 """Return the size of our backing mutable file, in bytes, if we've
276 fetched it. Otherwise return None. This returns synchronously."""
277 return self._node.get_size()
279 def get_current_size(self):
280 """Calculate the size of our backing mutable file, in bytes. Returns
281 a Deferred that fires with the result."""
282 return self._node.get_current_size()
285 if self._node.is_mutable():
286 # use the IMutableFileNode API.
287 d = self._node.download_best_version()
289 d = download_to_data(self._node)
290 d.addCallback(self._unpack_contents)
293 def _decrypt_rwcapdata(self, encwrcap):
295 crypttext = encwrcap[16:-32]
296 key = hashutil.mutable_rwcap_key_hash(salt, self._node.get_writekey())
298 plaintext = cryptor.process(crypttext)
301 def _create_and_validate_node(self, rw_uri, ro_uri, name):
302 # name is just for error reporting
303 node = self._nodemaker.create_from_cap(rw_uri, ro_uri,
304 deep_immutable=not self.is_mutable(),
309 def _create_readonly_node(self, node, name):
310 # name is just for error reporting
311 if not node.is_unknown() and node.is_readonly():
313 return self._create_and_validate_node(None, node.get_readonly_uri(), name=name)
315 def _unpack_contents(self, data):
316 # the directory is serialized as a list of netstrings, one per child.
317 # Each child is serialized as a list of four netstrings: (name, ro_uri,
318 # rwcapdata, metadata), in which the name, ro_uri, metadata are in
319 # cleartext. The 'name' is UTF-8 encoded, and should be normalized to NFC.
320 # The rwcapdata is formatted as:
321 # pack("16ss32s", iv, AES(H(writekey+iv), plaintext_rw_uri), mac)
322 assert isinstance(data, str), (repr(data), type(data))
323 # an empty directory is serialized as an empty string
325 return AuxValueDict()
326 writeable = not self.is_readonly()
327 mutable = self.is_mutable()
328 children = AuxValueDict()
330 while position < len(data):
331 entries, position = split_netstring(data, 1, position)
333 (namex_utf8, ro_uri, rwcapdata, metadata_s), subpos = split_netstring(entry, 4)
334 if not mutable and len(rwcapdata) > 0:
335 raise ValueError("the rwcapdata field of a dirnode in an immutable directory was not empty")
337 # A name containing characters that are unassigned in one version of Unicode might
338 # not be normalized wrt a later version. See the note in section 'Normalization Stability'
339 # at <http://unicode.org/policies/stability_policy.html>.
340 # Therefore we normalize names going both in and out of directories.
341 name = normalize(namex_utf8.decode("utf-8"))
345 rw_uri = self._decrypt_rwcapdata(rwcapdata)
347 # Since the encryption uses CTR mode, it currently leaks the length of the
348 # plaintext rw_uri -- and therefore whether it is present, i.e. whether the
349 # dirnode is writeable (ticket #925). By stripping trailing spaces in
350 # Tahoe >= 1.6.0, we may make it easier for future versions to plug this leak.
351 # ro_uri is treated in the same way for consistency.
352 # rw_uri and ro_uri will be either None or a non-empty string.
354 rw_uri = rw_uri.rstrip(' ') or None
355 ro_uri = ro_uri.rstrip(' ') or None
358 child = self._create_and_validate_node(rw_uri, ro_uri, name)
359 if mutable or child.is_allowed_in_immutable_directory():
360 metadata = simplejson.loads(metadata_s)
361 assert isinstance(metadata, dict)
362 children[name] = (child, metadata)
363 children.set_with_aux(name, (child, metadata), auxilliary=entry)
365 log.msg(format="mutable cap for child %(name)s unpacked from an immutable directory",
366 name=quote_output(name, encoding='utf-8'),
367 facility="tahoe.webish", level=log.UNUSUAL)
368 except CapConstraintError, e:
369 log.msg(format="unmet constraint on cap for child %(name)s unpacked from a directory:\n"
370 "%(message)s", message=e.args[0], name=quote_output(name, encoding='utf-8'),
371 facility="tahoe.webish", level=log.UNUSUAL)
375 def _pack_contents(self, children):
376 # expects children in the same format as _unpack_contents returns
377 return _pack_normalized_children(children, self._node.get_writekey())
379 def is_readonly(self):
380 return self._node.is_readonly()
382 def is_mutable(self):
383 return self._node.is_mutable()
385 def is_unknown(self):
388 def is_allowed_in_immutable_directory(self):
389 return not self._node.is_mutable()
391 def raise_error(self):
395 return self._uri.to_string()
397 def get_write_uri(self):
398 if self.is_readonly():
400 return self._uri.to_string()
402 def get_readonly_uri(self):
403 return self._uri.get_readonly().to_string()
408 def get_readcap(self):
409 return self._uri.get_readonly()
411 def get_verify_cap(self):
412 return self._uri.get_verify_cap()
414 def get_repair_cap(self):
415 if self._node.is_readonly():
416 return None # readonly (mutable) dirnodes are not yet repairable
419 def get_storage_index(self):
420 return self._uri.get_storage_index()
422 def check(self, monitor, verify=False, add_lease=False):
423 """Perform a file check. See IChecker.check for details."""
424 return self._node.check(monitor, verify, add_lease)
425 def check_and_repair(self, monitor, verify=False, add_lease=False):
426 return self._node.check_and_repair(monitor, verify, add_lease)
429 """I return a Deferred that fires with a dictionary mapping child
430 name to a tuple of (IFilesystemNode, metadata)."""
433 def has_child(self, namex):
434 """I return a Deferred that fires with a boolean, True if there
435 exists a child of the given name, False if not."""
436 name = normalize(namex)
438 d.addCallback(lambda children: children.has_key(name))
441 def _get(self, children, name):
442 child = children.get(name)
444 raise NoSuchChildError(name)
447 def _get_with_metadata(self, children, name):
448 child = children.get(name)
450 raise NoSuchChildError(name)
453 def get(self, namex):
454 """I return a Deferred that fires with the named child node,
455 which is an IFilesystemNode."""
456 name = normalize(namex)
458 d.addCallback(self._get, name)
461 def get_child_and_metadata(self, namex):
462 """I return a Deferred that fires with the (node, metadata) pair for
463 the named child. The node is an IFilesystemNode, and the metadata
465 name = normalize(namex)
467 d.addCallback(self._get_with_metadata, name)
470 def get_metadata_for(self, namex):
471 name = normalize(namex)
473 d.addCallback(lambda children: children[name][1])
476 def set_metadata_for(self, namex, metadata):
477 name = normalize(namex)
478 if self.is_readonly():
479 return defer.fail(NotWriteableError())
480 assert isinstance(metadata, dict)
481 s = MetadataSetter(self, name, metadata,
482 create_readonly_node=self._create_readonly_node)
483 d = self._node.modify(s.modify)
484 d.addCallback(lambda res: self)
487 def get_child_at_path(self, pathx):
488 """Transform a child path into an IFilesystemNode.
490 I perform a recursive series of 'get' operations to find the named
491 descendant node. I return a Deferred that fires with the node, or
492 errbacks with IndexError if the node could not be found.
494 The path can be either a single string (slash-separated) or a list of
497 d = self.get_child_and_metadata_at_path(pathx)
498 d.addCallback(lambda (node, metadata): node)
501 def get_child_and_metadata_at_path(self, pathx):
502 """Transform a child path into an IFilesystemNode and
503 a metadata dictionary from the last edge that was traversed.
507 return defer.succeed((self, {}))
508 if isinstance(pathx, (list, tuple)):
511 pathx = pathx.split("/")
513 assert isinstance(p, unicode), p
514 childnamex = pathx[0]
515 remaining_pathx = pathx[1:]
517 d = self.get(childnamex)
518 d.addCallback(lambda node:
519 node.get_child_and_metadata_at_path(remaining_pathx))
521 d = self.get_child_and_metadata(childnamex)
524 def set_uri(self, namex, writecap, readcap, metadata=None, overwrite=True):
525 precondition(isinstance(writecap, (str,type(None))), writecap)
526 precondition(isinstance(readcap, (str,type(None))), readcap)
528 # We now allow packing unknown nodes, provided they are valid
529 # for this type of directory.
530 child_node = self._create_and_validate_node(writecap, readcap, namex)
531 d = self.set_node(namex, child_node, metadata, overwrite)
532 d.addCallback(lambda res: child_node)
535 def set_children(self, entries, overwrite=True):
537 a = Adder(self, overwrite=overwrite,
538 create_readonly_node=self._create_readonly_node)
539 for (namex, e) in entries.iteritems():
540 assert isinstance(namex, unicode), namex
542 writecap, readcap = e
546 writecap, readcap, metadata = e
547 precondition(isinstance(writecap, (str,type(None))), writecap)
548 precondition(isinstance(readcap, (str,type(None))), readcap)
550 # We now allow packing unknown nodes, provided they are valid
551 # for this type of directory.
552 child_node = self._create_and_validate_node(writecap, readcap, namex)
553 a.set_node(namex, child_node, metadata)
554 d = self._node.modify(a.modify)
555 d.addCallback(lambda ign: self)
558 def set_node(self, namex, child, metadata=None, overwrite=True):
559 """I add a child at the specific name. I return a Deferred that fires
560 when the operation finishes. This Deferred will fire with the child
561 node that was just added. I will replace any existing child of the
564 If this directory node is read-only, the Deferred will errback with a
565 NotWriteableError."""
567 precondition(IFilesystemNode.providedBy(child), child)
569 if self.is_readonly():
570 return defer.fail(NotWriteableError())
571 assert IFilesystemNode.providedBy(child), child
572 a = Adder(self, overwrite=overwrite,
573 create_readonly_node=self._create_readonly_node)
574 a.set_node(namex, child, metadata)
575 d = self._node.modify(a.modify)
576 d.addCallback(lambda res: child)
579 def set_nodes(self, entries, overwrite=True):
580 precondition(isinstance(entries, dict), entries)
581 if self.is_readonly():
582 return defer.fail(NotWriteableError())
583 a = Adder(self, entries, overwrite=overwrite,
584 create_readonly_node=self._create_readonly_node)
585 d = self._node.modify(a.modify)
586 d.addCallback(lambda res: self)
590 def add_file(self, namex, uploadable, metadata=None, overwrite=True):
591 """I upload a file (using the given IUploadable), then attach the
592 resulting FileNode to the directory at the given name. I return a
593 Deferred that fires (with the IFileNode of the uploaded file) when
594 the operation completes."""
595 name = normalize(namex)
596 if self.is_readonly():
597 return defer.fail(NotWriteableError())
598 d = self._uploader.upload(uploadable)
599 d.addCallback(lambda results:
600 self._create_and_validate_node(results.get_uri(), None,
602 d.addCallback(lambda node:
603 self.set_node(name, node, metadata, overwrite))
606 def delete(self, namex, must_exist=True, must_be_directory=False, must_be_file=False):
607 """I remove the child at the specific name. I return a Deferred that
608 fires (with the node just removed) when the operation finishes."""
609 if self.is_readonly():
610 return defer.fail(NotWriteableError())
611 deleter = Deleter(self, namex, must_exist=must_exist,
612 must_be_directory=must_be_directory, must_be_file=must_be_file)
613 d = self._node.modify(deleter.modify)
614 d.addCallback(lambda res: deleter.old_child)
617 # XXX: Too many arguments? Worthwhile to break into mutable/immutable?
618 def create_subdirectory(self, namex, initial_children={}, overwrite=True,
619 mutable=True, mutable_version=None, metadata=None):
620 name = normalize(namex)
621 if self.is_readonly():
622 return defer.fail(NotWriteableError())
625 d = self._nodemaker.create_new_mutable_directory(initial_children,
626 version=mutable_version)
628 d = self._nodemaker.create_new_mutable_directory(initial_children)
630 # mutable version doesn't make sense for immmutable directories.
631 assert mutable_version is None
632 d = self._nodemaker.create_immutable_directory(initial_children)
634 entries = {name: (child, metadata)}
635 a = Adder(self, entries, overwrite=overwrite,
636 create_readonly_node=self._create_readonly_node)
637 d = self._node.modify(a.modify)
638 d.addCallback(lambda res: child)
640 d.addCallback(_created)
643 def move_child_to(self, current_child_namex, new_parent,
644 new_child_namex=None, overwrite=True):
645 """I take one of my children and move them to a new parent. The child
646 is referenced by name. On the new parent, the child will live under
647 'new_child_name', which defaults to 'current_child_name'. I return a
648 Deferred that fires when the operation finishes."""
650 if self.is_readonly() or new_parent.is_readonly():
651 return defer.fail(NotWriteableError())
653 current_child_name = normalize(current_child_namex)
654 if new_child_namex is None:
655 new_child_namex = current_child_name
656 d = self.get(current_child_name)
658 return new_parent.set_node(new_child_namex, child,
661 d.addCallback(lambda child: self.delete(current_child_name))
665 def deep_traverse(self, walker):
666 """Perform a recursive walk, using this dirnode as a root, notifying
667 the 'walker' instance of everything I encounter.
669 I call walker.enter_directory(parent, children) once for each dirnode
670 I visit, immediately after retrieving the list of children. I pass in
671 the parent dirnode and the dict of childname->(childnode,metadata).
672 This function should *not* traverse the children: I will do that.
673 enter_directory() is most useful for the deep-stats number that
674 counts how large a directory is.
676 I call walker.add_node(node, path) for each node (both files and
677 directories) I can reach. Most work should be done here.
679 I avoid loops by keeping track of verifier-caps and refusing to call
680 walker.add_node() or traverse a node that I've seen before. This
681 means that any file or directory will only be given to the walker
682 once. If files or directories are referenced multiple times by a
683 directory structure, this may appear to under-count or miss some of
686 I return a Monitor which can be used to wait for the operation to
687 finish, learn about its progress, or cancel the operation.
690 # this is just a tree-walker, except that following each edge
691 # requires a Deferred. We used to use a ConcurrencyLimiter to limit
692 # fanout to 10 simultaneous operations, but the memory load of the
693 # queued operations was excessive (in one case, with 330k dirnodes,
694 # it caused the process to run into the 3.0GB-ish per-process 32bit
695 # linux memory limit, and crashed). So we use a single big Deferred
696 # chain, and do a strict depth-first traversal, one node at a time.
697 # This can be slower, because we aren't pipelining directory reads,
698 # but it brought the memory footprint down by roughly 50%.
701 walker.set_monitor(monitor)
703 found = set([self.get_verify_cap()])
704 d = self._deep_traverse_dirnode(self, [], walker, monitor, found)
705 d.addCallback(lambda ignored: walker.finish())
706 d.addBoth(monitor.finish)
707 d.addErrback(lambda f: None)
711 def _deep_traverse_dirnode(self, node, path, walker, monitor, found):
712 # process this directory, then walk its children
713 monitor.raise_if_cancelled()
714 d = defer.maybeDeferred(walker.add_node, node, path)
715 d.addCallback(lambda ignored: node.list())
716 d.addCallback(self._deep_traverse_dirnode_children, node, path,
717 walker, monitor, found)
720 def _deep_traverse_dirnode_children(self, children, parent, path,
721 walker, monitor, found):
722 monitor.raise_if_cancelled()
723 d = defer.maybeDeferred(walker.enter_directory, parent, children)
724 # we process file-like children first, so we can drop their FileNode
725 # objects as quickly as possible. Tests suggest that a FileNode (held
726 # in the client's nodecache) consumes about 2440 bytes. dirnodes (not
727 # in the nodecache) seem to consume about 2000 bytes.
730 for name, (child, metadata) in sorted(children.iteritems()):
731 childpath = path + [name]
732 if isinstance(child, UnknownNode):
733 walker.add_node(child, childpath)
735 verifier = child.get_verify_cap()
736 # allow LIT files (for which verifier==None) to be processed
737 if (verifier is not None) and (verifier in found):
740 if IDirectoryNode.providedBy(child):
741 dirkids.append( (child, childpath) )
743 filekids.append( (child, childpath) )
744 for i, (child, childpath) in enumerate(filekids):
745 d.addCallback(lambda ignored, child=child, childpath=childpath:
746 walker.add_node(child, childpath))
747 # to work around the Deferred tail-recursion problem
748 # (specifically the defer.succeed flavor) requires us to avoid
749 # doing more than 158 LIT files in a row. We insert a turn break
750 # once every 100 files (LIT or CHK) to preserve some stack space
751 # for other code. This is a different expression of the same
752 # Twisted problem as in #237.
754 d.addCallback(lambda ignored: fireEventually())
755 for (child, childpath) in dirkids:
756 d.addCallback(lambda ignored, child=child, childpath=childpath:
757 self._deep_traverse_dirnode(child, childpath,
763 def build_manifest(self):
764 """Return a Monitor, with a ['status'] that will be a list of (path,
765 cap) tuples, for all nodes (directories and files) reachable from
767 walker = ManifestWalker(self)
768 return self.deep_traverse(walker)
770 def start_deep_stats(self):
771 # Since deep_traverse tracks verifier caps, we avoid double-counting
772 # children for which we've got both a write-cap and a read-cap
773 return self.deep_traverse(DeepStats(self))
775 def start_deep_check(self, verify=False, add_lease=False):
776 return self.deep_traverse(DeepChecker(self, verify, repair=False, add_lease=add_lease))
778 def start_deep_check_and_repair(self, verify=False, add_lease=False):
779 return self.deep_traverse(DeepChecker(self, verify, repair=True, add_lease=add_lease))
784 def __init__(self, origin):
787 for k in ["count-immutable-files",
788 "count-mutable-files",
789 "count-literal-files",
793 "size-immutable-files",
794 #"size-mutable-files",
795 "size-literal-files",
798 "largest-directory-children",
799 "largest-immutable-file",
800 #"largest-mutable-file",
804 for k in ["size-files-histogram"]:
805 self.histograms[k] = {} # maps (min,max) to count
806 self.buckets = [ (0,0), (1,3)]
807 self.root = math.sqrt(10)
809 def set_monitor(self, monitor):
810 self.monitor = monitor
811 monitor.origin_si = self.origin.get_storage_index()
812 monitor.set_status(self.get_results())
814 def add_node(self, node, childpath):
815 if isinstance(node, UnknownNode):
816 self.add("count-unknown")
817 elif IDirectoryNode.providedBy(node):
818 self.add("count-directories")
819 elif IMutableFileNode.providedBy(node):
820 self.add("count-files")
821 self.add("count-mutable-files")
822 # TODO: update the servermap, compute a size, add it to
823 # size-mutable-files, max it into "largest-mutable-file"
824 elif IImmutableFileNode.providedBy(node): # CHK and LIT
825 self.add("count-files")
826 size = node.get_size()
827 self.histogram("size-files-histogram", size)
828 theuri = from_string(node.get_uri())
829 if isinstance(theuri, LiteralFileURI):
830 self.add("count-literal-files")
831 self.add("size-literal-files", size)
833 self.add("count-immutable-files")
834 self.add("size-immutable-files", size)
835 self.max("largest-immutable-file", size)
837 def enter_directory(self, parent, children):
838 dirsize_bytes = parent.get_size()
839 if dirsize_bytes is not None:
840 self.add("size-directories", dirsize_bytes)
841 self.max("largest-directory", dirsize_bytes)
842 dirsize_children = len(children)
843 self.max("largest-directory-children", dirsize_children)
845 def add(self, key, value=1):
846 self.stats[key] += value
848 def max(self, key, value):
849 self.stats[key] = max(self.stats[key], value)
851 def which_bucket(self, size):
852 # return (min,max) such that min <= size <= max
853 # values are from the set (0,0), (1,3), (4,10), (11,31), (32,100),
854 # (101,316), (317, 1000), etc: two per decade
858 if i >= len(self.buckets):
860 new_lower = self.buckets[i-1][1]+1
861 new_upper = int(mathutil.next_power_of_k(new_lower, self.root))
862 self.buckets.append( (new_lower, new_upper) )
863 maybe = self.buckets[i]
864 if maybe[0] <= size <= maybe[1]:
868 def histogram(self, key, size):
869 bucket = self.which_bucket(size)
870 h = self.histograms[key]
875 def get_results(self):
876 stats = self.stats.copy()
877 for key in self.histograms:
878 h = self.histograms[key]
879 out = [ (bucket[0], bucket[1], h[bucket]) for bucket in h ]
885 return self.get_results()
887 class ManifestWalker(DeepStats):
888 def __init__(self, origin):
889 DeepStats.__init__(self, origin)
891 self.storage_index_strings = set()
892 self.verifycaps = set()
894 def add_node(self, node, path):
895 self.manifest.append( (tuple(path), node.get_uri()) )
896 si = node.get_storage_index()
898 self.storage_index_strings.add(base32.b2a(si))
899 v = node.get_verify_cap()
901 self.verifycaps.add(v.to_string())
902 return DeepStats.add_node(self, node, path)
904 def get_results(self):
905 stats = DeepStats.get_results(self)
906 return {"manifest": self.manifest,
907 "verifycaps": self.verifycaps,
908 "storage-index": self.storage_index_strings,
914 def __init__(self, root, verify, repair, add_lease):
915 root_si = root.get_storage_index()
917 root_si_base32 = base32.b2a(root_si)
920 self._lp = log.msg(format="deep-check starting (%(si)s),"
921 " verify=%(verify)s, repair=%(repair)s",
922 si=root_si_base32, verify=verify, repair=repair)
923 self._verify = verify
924 self._repair = repair
925 self._add_lease = add_lease
927 self._results = DeepCheckAndRepairResults(root_si)
929 self._results = DeepCheckResults(root_si)
930 self._stats = DeepStats(root)
932 def set_monitor(self, monitor):
933 self.monitor = monitor
934 monitor.set_status(self._results)
936 def add_node(self, node, childpath):
938 d = node.check_and_repair(self.monitor, self._verify, self._add_lease)
939 d.addCallback(self._results.add_check_and_repair, childpath)
941 d = node.check(self.monitor, self._verify, self._add_lease)
942 d.addCallback(self._results.add_check, childpath)
943 d.addCallback(lambda ignored: self._stats.add_node(node, childpath))
946 def enter_directory(self, parent, children):
947 return self._stats.enter_directory(parent, children)
950 log.msg("deep-check done", parent=self._lp)
951 self._results.update_stats(self._stats.get_results())
955 # use client.create_dirnode() to make one of these