2 import time, math, unicodedata
4 from zope.interface import implements
5 from twisted.internet import defer
6 from foolscap.api import fireEventually
8 from allmydata.mutable.common import NotWriteableError
9 from allmydata.mutable.filenode import MutableFileNode
10 from allmydata.unknown import UnknownNode, strip_prefix_for_ro
11 from allmydata.interfaces import IFilesystemNode, IDirectoryNode, IFileNode, \
12 IImmutableFileNode, IMutableFileNode, \
13 ExistingChildError, NoSuchChildError, ICheckable, IDeepCheckable, \
14 MustBeDeepImmutableError, CapConstraintError, ChildOfWrongTypeError
15 from allmydata.check_results import DeepCheckResults, \
16 DeepCheckAndRepairResults
17 from allmydata.monitor import Monitor
18 from allmydata.util import hashutil, mathutil, base32, log
19 from allmydata.util.encodingutil import quote_output
20 from allmydata.util.assertutil import precondition
21 from allmydata.util.netstring import netstring, split_netstring
22 from allmydata.util.consumer import download_to_data
23 from allmydata.uri import LiteralFileURI, from_string, wrap_dirnode_cap
24 from pycryptopp.cipher.aes import AES
25 from allmydata.util.dictutil import AuxValueDict
28 def update_metadata(metadata, new_metadata, now):
29 """Updates 'metadata' in-place with the information in 'new_metadata'.
30 Timestamps are set according to the time 'now'."""
36 if 'ctime' in metadata:
37 old_ctime = metadata['ctime']
39 if new_metadata is not None:
40 # Overwrite all metadata.
41 newmd = new_metadata.copy()
46 if 'tahoe' in metadata:
47 newmd['tahoe'] = metadata['tahoe']
52 sysmd = metadata.get('tahoe', {})
53 if 'linkcrtime' not in sysmd:
54 # In Tahoe < 1.4.0 we used the word 'ctime' to mean what Tahoe >= 1.4.0
55 # calls 'linkcrtime'. This field is only used if it was in the old metadata,
56 # and 'tahoe:linkcrtime' was not.
57 if old_ctime is not None:
58 sysmd['linkcrtime'] = old_ctime
60 sysmd['linkcrtime'] = now
62 sysmd['linkmotime'] = now
63 metadata['tahoe'] = sysmd
68 # 'x' at the end of a variable name indicates that it holds a Unicode string that may not
72 return unicodedata.normalize('NFC', namex)
74 # TODO: {Deleter,MetadataSetter,Adder}.modify all start by unpacking the
75 # contents and end by repacking them. It might be better to apply them to
76 # the unpacked contents.
79 def __init__(self, node, namex, must_exist=True, must_be_directory=False, must_be_file=False):
81 self.name = normalize(namex)
82 self.must_exist = must_exist
83 self.must_be_directory = must_be_directory
84 self.must_be_file = must_be_file
86 def modify(self, old_contents, servermap, first_time):
87 children = self.node._unpack_contents(old_contents)
88 if self.name not in children:
89 if first_time and self.must_exist:
90 raise NoSuchChildError(self.name)
93 self.old_child, metadata = children[self.name]
95 # Unknown children can be removed regardless of must_be_directory or must_be_file.
96 if self.must_be_directory and IFileNode.providedBy(self.old_child):
97 raise ChildOfWrongTypeError("delete required a directory, not a file")
98 if self.must_be_file and IDirectoryNode.providedBy(self.old_child):
99 raise ChildOfWrongTypeError("delete required a file, not a directory")
101 del children[self.name]
102 new_contents = self.node._pack_contents(children)
106 class MetadataSetter:
107 def __init__(self, node, namex, metadata, create_readonly_node=None):
109 self.name = normalize(namex)
110 self.metadata = metadata
111 self.create_readonly_node = create_readonly_node
113 def modify(self, old_contents, servermap, first_time):
114 children = self.node._unpack_contents(old_contents)
116 if name not in children:
117 raise NoSuchChildError(name)
120 child = children[name][0]
122 metadata = update_metadata(children[name][1].copy(), self.metadata, now)
123 if self.create_readonly_node and metadata.get('no-write', False):
124 child = self.create_readonly_node(child, name)
126 children[name] = (child, metadata)
127 new_contents = self.node._pack_contents(children)
132 def __init__(self, node, entries=None, overwrite=True, create_readonly_node=None):
136 precondition(isinstance(entries, dict), entries)
137 precondition(overwrite in (True, False, "only-files"), overwrite)
138 # keys of 'entries' may not be normalized.
139 self.entries = entries
140 self.overwrite = overwrite
141 self.create_readonly_node = create_readonly_node
143 def set_node(self, namex, node, metadata):
144 precondition(IFilesystemNode.providedBy(node), node)
145 self.entries[namex] = (node, metadata)
147 def modify(self, old_contents, servermap, first_time):
148 children = self.node._unpack_contents(old_contents)
150 for (namex, (child, new_metadata)) in self.entries.iteritems():
151 name = normalize(namex)
152 precondition(IFilesystemNode.providedBy(child), child)
154 # Strictly speaking this is redundant because we would raise the
155 # error again in _pack_normalized_children.
160 if not self.overwrite:
161 raise ExistingChildError("child %s already exists" % quote_output(name, encoding='utf-8'))
163 if self.overwrite == "only-files" and IDirectoryNode.providedBy(children[name][0]):
164 raise ExistingChildError("child %s already exists as a directory" % quote_output(name, encoding='utf-8'))
165 metadata = children[name][1].copy()
167 metadata = update_metadata(metadata, new_metadata, now)
168 if self.create_readonly_node and metadata.get('no-write', False):
169 child = self.create_readonly_node(child, name)
171 children[name] = (child, metadata)
172 new_contents = self.node._pack_contents(children)
175 def _encrypt_rw_uri(writekey, rw_uri):
176 precondition(isinstance(rw_uri, str), rw_uri)
177 precondition(isinstance(writekey, str), writekey)
179 salt = hashutil.mutable_rwcap_salt_hash(rw_uri)
180 key = hashutil.mutable_rwcap_key_hash(salt, writekey)
182 crypttext = cryptor.process(rw_uri)
183 mac = hashutil.hmac(key, salt + crypttext)
184 assert len(mac) == 32
185 return salt + crypttext + mac
186 # The MAC is not checked by readers in Tahoe >= 1.3.0, but we still
187 # produce it for the sake of older readers.
189 def pack_children(childrenx, writekey, deep_immutable=False):
190 # initial_children must have metadata (i.e. {} instead of None)
192 for (namex, (node, metadata)) in childrenx.iteritems():
193 precondition(isinstance(metadata, dict),
194 "directory creation requires metadata to be a dict, not None", metadata)
195 children[normalize(namex)] = (node, metadata)
197 return _pack_normalized_children(children, writekey=writekey, deep_immutable=deep_immutable)
200 ZERO_LEN_NETSTR=netstring('')
201 def _pack_normalized_children(children, writekey, deep_immutable=False):
202 """Take a dict that maps:
203 children[unicode_nfc_name] = (IFileSystemNode, metadata_dict)
204 and pack it into a single string, for use as the contents of the backing
205 file. This is the same format as is returned by _unpack_contents. I also
206 accept an AuxValueDict, in which case I'll use the auxilliary cached data
207 as the pre-packed entry, which is faster than re-packing everything each
210 If writekey is provided then I will superencrypt the child's writecap with
213 If deep_immutable is True, I will require that all my children are deeply
214 immutable, and will raise a MustBeDeepImmutableError if not.
216 precondition((writekey is None) or isinstance(writekey, str), writekey)
218 has_aux = isinstance(children, AuxValueDict)
220 for name in sorted(children.keys()):
221 assert isinstance(name, unicode)
223 (child, metadata) = children[name]
225 if deep_immutable and not child.is_allowed_in_immutable_directory():
226 raise MustBeDeepImmutableError("child %s is not allowed in an immutable directory" %
227 quote_output(name, encoding='utf-8'), name)
229 entry = children.get_aux(name)
231 assert IFilesystemNode.providedBy(child), (name,child)
232 assert isinstance(metadata, dict)
233 rw_uri = child.get_write_uri()
236 assert isinstance(rw_uri, str), rw_uri
238 # should be prevented by MustBeDeepImmutableError check above
239 assert not (rw_uri and deep_immutable)
241 ro_uri = child.get_readonly_uri()
244 assert isinstance(ro_uri, str), ro_uri
245 if writekey is not None:
246 writecap = netstring(_encrypt_rw_uri(writekey, rw_uri))
248 writecap = ZERO_LEN_NETSTR
249 entry = "".join([netstring(name.encode("utf-8")),
250 netstring(strip_prefix_for_ro(ro_uri, deep_immutable)),
252 netstring(simplejson.dumps(metadata))])
253 entries.append(netstring(entry))
254 return "".join(entries)
257 implements(IDirectoryNode, ICheckable, IDeepCheckable)
258 filenode_class = MutableFileNode
260 def __init__(self, filenode, nodemaker, uploader):
261 assert IFileNode.providedBy(filenode), filenode
262 assert not IDirectoryNode.providedBy(filenode), filenode
263 self._node = filenode
264 filenode_cap = filenode.get_cap()
265 self._uri = wrap_dirnode_cap(filenode_cap)
266 self._nodemaker = nodemaker
267 self._uploader = uploader
270 return "<%s %s-%s %s>" % (self.__class__.__name__,
271 self.is_readonly() and "RO" or "RW",
272 self.is_mutable() and "MUT" or "IMM",
273 hasattr(self, '_uri') and self._uri.abbrev())
276 """Return the size of our backing mutable file, in bytes, if we've
277 fetched it. Otherwise return None. This returns synchronously."""
278 return self._node.get_size()
280 def get_current_size(self):
281 """Calculate the size of our backing mutable file, in bytes. Returns
282 a Deferred that fires with the result."""
283 return self._node.get_current_size()
286 if self._node.is_mutable():
287 # use the IMutableFileNode API.
288 d = self._node.download_best_version()
290 d = download_to_data(self._node)
291 d.addCallback(self._unpack_contents)
294 def _decrypt_rwcapdata(self, encwrcap):
296 crypttext = encwrcap[16:-32]
297 key = hashutil.mutable_rwcap_key_hash(salt, self._node.get_writekey())
299 plaintext = cryptor.process(crypttext)
302 def _create_and_validate_node(self, rw_uri, ro_uri, name):
303 # name is just for error reporting
304 node = self._nodemaker.create_from_cap(rw_uri, ro_uri,
305 deep_immutable=not self.is_mutable(),
310 def _create_readonly_node(self, node, name):
311 # name is just for error reporting
312 if not node.is_unknown() and node.is_readonly():
314 return self._create_and_validate_node(None, node.get_readonly_uri(), name=name)
316 def _unpack_contents(self, data):
317 # the directory is serialized as a list of netstrings, one per child.
318 # Each child is serialized as a list of four netstrings: (name, ro_uri,
319 # rwcapdata, metadata), in which the name, ro_uri, metadata are in
320 # cleartext. The 'name' is UTF-8 encoded, and should be normalized to NFC.
321 # The rwcapdata is formatted as:
322 # pack("16ss32s", iv, AES(H(writekey+iv), plaintext_rw_uri), mac)
323 assert isinstance(data, str), (repr(data), type(data))
324 # an empty directory is serialized as an empty string
326 return AuxValueDict()
327 writeable = not self.is_readonly()
328 mutable = self.is_mutable()
329 children = AuxValueDict()
331 while position < len(data):
332 entries, position = split_netstring(data, 1, position)
334 (namex_utf8, ro_uri, rwcapdata, metadata_s), subpos = split_netstring(entry, 4)
335 if not mutable and len(rwcapdata) > 0:
336 raise ValueError("the rwcapdata field of a dirnode in an immutable directory was not empty")
338 # A name containing characters that are unassigned in one version of Unicode might
339 # not be normalized wrt a later version. See the note in section 'Normalization Stability'
340 # at <http://unicode.org/policies/stability_policy.html>.
341 # Therefore we normalize names going both in and out of directories.
342 name = normalize(namex_utf8.decode("utf-8"))
346 rw_uri = self._decrypt_rwcapdata(rwcapdata)
348 # Since the encryption uses CTR mode, it currently leaks the length of the
349 # plaintext rw_uri -- and therefore whether it is present, i.e. whether the
350 # dirnode is writeable (ticket #925). By stripping trailing spaces in
351 # Tahoe >= 1.6.0, we may make it easier for future versions to plug this leak.
352 # ro_uri is treated in the same way for consistency.
353 # rw_uri and ro_uri will be either None or a non-empty string.
355 rw_uri = rw_uri.rstrip(' ') or None
356 ro_uri = ro_uri.rstrip(' ') or None
359 child = self._create_and_validate_node(rw_uri, ro_uri, name)
360 if mutable or child.is_allowed_in_immutable_directory():
361 metadata = simplejson.loads(metadata_s)
362 assert isinstance(metadata, dict)
363 children[name] = (child, metadata)
364 children.set_with_aux(name, (child, metadata), auxilliary=entry)
366 log.msg(format="mutable cap for child %(name)s unpacked from an immutable directory",
367 name=quote_output(name, encoding='utf-8'),
368 facility="tahoe.webish", level=log.UNUSUAL)
369 except CapConstraintError, e:
370 log.msg(format="unmet constraint on cap for child %(name)s unpacked from a directory:\n"
371 "%(message)s", message=e.args[0], name=quote_output(name, encoding='utf-8'),
372 facility="tahoe.webish", level=log.UNUSUAL)
376 def _pack_contents(self, children):
377 # expects children in the same format as _unpack_contents returns
378 return _pack_normalized_children(children, self._node.get_writekey())
380 def is_readonly(self):
381 return self._node.is_readonly()
383 def is_mutable(self):
384 return self._node.is_mutable()
386 def is_unknown(self):
389 def is_allowed_in_immutable_directory(self):
390 return not self._node.is_mutable()
392 def raise_error(self):
396 return self._uri.to_string()
398 def get_write_uri(self):
399 if self.is_readonly():
401 return self._uri.to_string()
403 def get_readonly_uri(self):
404 return self._uri.get_readonly().to_string()
409 def get_readcap(self):
410 return self._uri.get_readonly()
412 def get_verify_cap(self):
413 return self._uri.get_verify_cap()
415 def get_repair_cap(self):
416 if self._node.is_readonly():
417 return None # readonly (mutable) dirnodes are not yet repairable
420 def get_storage_index(self):
421 return self._uri.get_storage_index()
423 def check(self, monitor, verify=False, add_lease=False):
424 """Perform a file check. See IChecker.check for details."""
425 return self._node.check(monitor, verify, add_lease)
426 def check_and_repair(self, monitor, verify=False, add_lease=False):
427 return self._node.check_and_repair(monitor, verify, add_lease)
430 """I return a Deferred that fires with a dictionary mapping child
431 name to a tuple of (IFilesystemNode, metadata)."""
434 def has_child(self, namex):
435 """I return a Deferred that fires with a boolean, True if there
436 exists a child of the given name, False if not."""
437 name = normalize(namex)
439 d.addCallback(lambda children: children.has_key(name))
442 def _get(self, children, name):
443 child = children.get(name)
445 raise NoSuchChildError(name)
448 def _get_with_metadata(self, children, name):
449 child = children.get(name)
451 raise NoSuchChildError(name)
454 def get(self, namex):
455 """I return a Deferred that fires with the named child node,
456 which is an IFilesystemNode."""
457 name = normalize(namex)
459 d.addCallback(self._get, name)
462 def get_child_and_metadata(self, namex):
463 """I return a Deferred that fires with the (node, metadata) pair for
464 the named child. The node is an IFilesystemNode, and the metadata
466 name = normalize(namex)
468 d.addCallback(self._get_with_metadata, name)
471 def get_metadata_for(self, namex):
472 name = normalize(namex)
474 d.addCallback(lambda children: children[name][1])
477 def set_metadata_for(self, namex, metadata):
478 name = normalize(namex)
479 if self.is_readonly():
480 return defer.fail(NotWriteableError())
481 assert isinstance(metadata, dict)
482 s = MetadataSetter(self, name, metadata,
483 create_readonly_node=self._create_readonly_node)
484 d = self._node.modify(s.modify)
485 d.addCallback(lambda res: self)
488 def get_child_at_path(self, pathx):
489 """Transform a child path into an IFilesystemNode.
491 I perform a recursive series of 'get' operations to find the named
492 descendant node. I return a Deferred that fires with the node, or
493 errbacks with IndexError if the node could not be found.
495 The path can be either a single string (slash-separated) or a list of
498 d = self.get_child_and_metadata_at_path(pathx)
499 d.addCallback(lambda (node, metadata): node)
502 def get_child_and_metadata_at_path(self, pathx):
503 """Transform a child path into an IFilesystemNode and
504 a metadata dictionary from the last edge that was traversed.
508 return defer.succeed((self, {}))
509 if isinstance(pathx, (list, tuple)):
512 pathx = pathx.split("/")
514 assert isinstance(p, unicode), p
515 childnamex = pathx[0]
516 remaining_pathx = pathx[1:]
518 d = self.get(childnamex)
519 d.addCallback(lambda node:
520 node.get_child_and_metadata_at_path(remaining_pathx))
522 d = self.get_child_and_metadata(childnamex)
525 def set_uri(self, namex, writecap, readcap, metadata=None, overwrite=True):
526 precondition(isinstance(writecap, (str,type(None))), writecap)
527 precondition(isinstance(readcap, (str,type(None))), readcap)
529 # We now allow packing unknown nodes, provided they are valid
530 # for this type of directory.
531 child_node = self._create_and_validate_node(writecap, readcap, namex)
532 d = self.set_node(namex, child_node, metadata, overwrite)
533 d.addCallback(lambda res: child_node)
536 def set_children(self, entries, overwrite=True):
538 a = Adder(self, overwrite=overwrite,
539 create_readonly_node=self._create_readonly_node)
540 for (namex, e) in entries.iteritems():
541 assert isinstance(namex, unicode), namex
543 writecap, readcap = e
547 writecap, readcap, metadata = e
548 precondition(isinstance(writecap, (str,type(None))), writecap)
549 precondition(isinstance(readcap, (str,type(None))), readcap)
551 # We now allow packing unknown nodes, provided they are valid
552 # for this type of directory.
553 child_node = self._create_and_validate_node(writecap, readcap, namex)
554 a.set_node(namex, child_node, metadata)
555 d = self._node.modify(a.modify)
556 d.addCallback(lambda ign: self)
559 def set_node(self, namex, child, metadata=None, overwrite=True):
560 """I add a child at the specific name. I return a Deferred that fires
561 when the operation finishes. This Deferred will fire with the child
562 node that was just added. I will replace any existing child of the
565 If this directory node is read-only, the Deferred will errback with a
566 NotWriteableError."""
568 precondition(IFilesystemNode.providedBy(child), child)
570 if self.is_readonly():
571 return defer.fail(NotWriteableError())
572 assert IFilesystemNode.providedBy(child), child
573 a = Adder(self, overwrite=overwrite,
574 create_readonly_node=self._create_readonly_node)
575 a.set_node(namex, child, metadata)
576 d = self._node.modify(a.modify)
577 d.addCallback(lambda res: child)
580 def set_nodes(self, entries, overwrite=True):
581 precondition(isinstance(entries, dict), entries)
582 if self.is_readonly():
583 return defer.fail(NotWriteableError())
584 a = Adder(self, entries, overwrite=overwrite,
585 create_readonly_node=self._create_readonly_node)
586 d = self._node.modify(a.modify)
587 d.addCallback(lambda res: self)
591 def add_file(self, namex, uploadable, metadata=None, overwrite=True):
592 """I upload a file (using the given IUploadable), then attach the
593 resulting FileNode to the directory at the given name. I return a
594 Deferred that fires (with the IFileNode of the uploaded file) when
595 the operation completes."""
596 name = normalize(namex)
597 if self.is_readonly():
598 return defer.fail(NotWriteableError())
599 d = self._uploader.upload(uploadable)
600 d.addCallback(lambda results:
601 self._create_and_validate_node(results.get_uri(), None,
603 d.addCallback(lambda node:
604 self.set_node(name, node, metadata, overwrite))
607 def delete(self, namex, must_exist=True, must_be_directory=False, must_be_file=False):
608 """I remove the child at the specific name. I return a Deferred that
609 fires (with the node just removed) when the operation finishes."""
610 if self.is_readonly():
611 return defer.fail(NotWriteableError())
612 deleter = Deleter(self, namex, must_exist=must_exist,
613 must_be_directory=must_be_directory, must_be_file=must_be_file)
614 d = self._node.modify(deleter.modify)
615 d.addCallback(lambda res: deleter.old_child)
618 # XXX: Too many arguments? Worthwhile to break into mutable/immutable?
619 def create_subdirectory(self, namex, initial_children={}, overwrite=True,
620 mutable=True, mutable_version=None, metadata=None):
621 name = normalize(namex)
622 if self.is_readonly():
623 return defer.fail(NotWriteableError())
626 d = self._nodemaker.create_new_mutable_directory(initial_children,
627 version=mutable_version)
629 d = self._nodemaker.create_new_mutable_directory(initial_children)
631 # mutable version doesn't make sense for immmutable directories.
632 assert mutable_version is None
633 d = self._nodemaker.create_immutable_directory(initial_children)
635 entries = {name: (child, metadata)}
636 a = Adder(self, entries, overwrite=overwrite,
637 create_readonly_node=self._create_readonly_node)
638 d = self._node.modify(a.modify)
639 d.addCallback(lambda res: child)
641 d.addCallback(_created)
644 def move_child_to(self, current_child_namex, new_parent,
645 new_child_namex=None, overwrite=True):
647 I take one of my child links and move it to a new parent. The child
648 link is referenced by name. In the new parent, the child link will live
649 at 'new_child_namex', which defaults to 'current_child_namex'. I return
650 a Deferred that fires when the operation finishes.
651 'new_child_namex' and 'current_child_namex' need not be normalized.
653 The overwrite parameter may be True (overwrite any existing child),
654 False (error if the new child link already exists), or "only-files"
655 (error if the new child link exists and points to a directory).
657 if self.is_readonly() or new_parent.is_readonly():
658 return defer.fail(NotWriteableError())
660 current_child_name = normalize(current_child_namex)
661 if new_child_namex is None:
662 new_child_name = current_child_name
664 new_child_name = normalize(new_child_namex)
666 from_uri = self.get_write_uri()
667 if new_parent.get_write_uri() == from_uri and new_child_name == current_child_name:
668 # needed for correctness, otherwise we would delete the child
669 return defer.succeed("redundant rename/relink")
671 d = self.get_child_and_metadata(current_child_name)
672 def _got_child( (child, metadata) ):
673 return new_parent.set_node(new_child_name, child, metadata,
675 d.addCallback(_got_child)
676 d.addCallback(lambda child: self.delete(current_child_name))
680 def deep_traverse(self, walker):
681 """Perform a recursive walk, using this dirnode as a root, notifying
682 the 'walker' instance of everything I encounter.
684 I call walker.enter_directory(parent, children) once for each dirnode
685 I visit, immediately after retrieving the list of children. I pass in
686 the parent dirnode and the dict of childname->(childnode,metadata).
687 This function should *not* traverse the children: I will do that.
688 enter_directory() is most useful for the deep-stats number that
689 counts how large a directory is.
691 I call walker.add_node(node, path) for each node (both files and
692 directories) I can reach. Most work should be done here.
694 I avoid loops by keeping track of verifier-caps and refusing to call
695 walker.add_node() or traverse a node that I've seen before. This
696 means that any file or directory will only be given to the walker
697 once. If files or directories are referenced multiple times by a
698 directory structure, this may appear to under-count or miss some of
701 I return a Monitor which can be used to wait for the operation to
702 finish, learn about its progress, or cancel the operation.
705 # this is just a tree-walker, except that following each edge
706 # requires a Deferred. We used to use a ConcurrencyLimiter to limit
707 # fanout to 10 simultaneous operations, but the memory load of the
708 # queued operations was excessive (in one case, with 330k dirnodes,
709 # it caused the process to run into the 3.0GB-ish per-process 32bit
710 # linux memory limit, and crashed). So we use a single big Deferred
711 # chain, and do a strict depth-first traversal, one node at a time.
712 # This can be slower, because we aren't pipelining directory reads,
713 # but it brought the memory footprint down by roughly 50%.
716 walker.set_monitor(monitor)
718 found = set([self.get_verify_cap()])
719 d = self._deep_traverse_dirnode(self, [], walker, monitor, found)
720 d.addCallback(lambda ignored: walker.finish())
721 d.addBoth(monitor.finish)
722 d.addErrback(lambda f: None)
726 def _deep_traverse_dirnode(self, node, path, walker, monitor, found):
727 # process this directory, then walk its children
728 monitor.raise_if_cancelled()
729 d = defer.maybeDeferred(walker.add_node, node, path)
730 d.addCallback(lambda ignored: node.list())
731 d.addCallback(self._deep_traverse_dirnode_children, node, path,
732 walker, monitor, found)
735 def _deep_traverse_dirnode_children(self, children, parent, path,
736 walker, monitor, found):
737 monitor.raise_if_cancelled()
738 d = defer.maybeDeferred(walker.enter_directory, parent, children)
739 # we process file-like children first, so we can drop their FileNode
740 # objects as quickly as possible. Tests suggest that a FileNode (held
741 # in the client's nodecache) consumes about 2440 bytes. dirnodes (not
742 # in the nodecache) seem to consume about 2000 bytes.
745 for name, (child, metadata) in sorted(children.iteritems()):
746 childpath = path + [name]
747 if isinstance(child, UnknownNode):
748 walker.add_node(child, childpath)
750 verifier = child.get_verify_cap()
751 # allow LIT files (for which verifier==None) to be processed
752 if (verifier is not None) and (verifier in found):
755 if IDirectoryNode.providedBy(child):
756 dirkids.append( (child, childpath) )
758 filekids.append( (child, childpath) )
759 for i, (child, childpath) in enumerate(filekids):
760 d.addCallback(lambda ignored, child=child, childpath=childpath:
761 walker.add_node(child, childpath))
762 # to work around the Deferred tail-recursion problem
763 # (specifically the defer.succeed flavor) requires us to avoid
764 # doing more than 158 LIT files in a row. We insert a turn break
765 # once every 100 files (LIT or CHK) to preserve some stack space
766 # for other code. This is a different expression of the same
767 # Twisted problem as in #237.
769 d.addCallback(lambda ignored: fireEventually())
770 for (child, childpath) in dirkids:
771 d.addCallback(lambda ignored, child=child, childpath=childpath:
772 self._deep_traverse_dirnode(child, childpath,
778 def build_manifest(self):
779 """Return a Monitor, with a ['status'] that will be a list of (path,
780 cap) tuples, for all nodes (directories and files) reachable from
782 walker = ManifestWalker(self)
783 return self.deep_traverse(walker)
785 def start_deep_stats(self):
786 # Since deep_traverse tracks verifier caps, we avoid double-counting
787 # children for which we've got both a write-cap and a read-cap
788 return self.deep_traverse(DeepStats(self))
790 def start_deep_check(self, verify=False, add_lease=False):
791 return self.deep_traverse(DeepChecker(self, verify, repair=False, add_lease=add_lease))
793 def start_deep_check_and_repair(self, verify=False, add_lease=False):
794 return self.deep_traverse(DeepChecker(self, verify, repair=True, add_lease=add_lease))
799 def __init__(self, origin):
802 for k in ["count-immutable-files",
803 "count-mutable-files",
804 "count-literal-files",
808 "size-immutable-files",
809 #"size-mutable-files",
810 "size-literal-files",
813 "largest-directory-children",
814 "largest-immutable-file",
815 #"largest-mutable-file",
819 for k in ["size-files-histogram"]:
820 self.histograms[k] = {} # maps (min,max) to count
821 self.buckets = [ (0,0), (1,3)]
822 self.root = math.sqrt(10)
824 def set_monitor(self, monitor):
825 self.monitor = monitor
826 monitor.origin_si = self.origin.get_storage_index()
827 monitor.set_status(self.get_results())
829 def add_node(self, node, childpath):
830 if isinstance(node, UnknownNode):
831 self.add("count-unknown")
832 elif IDirectoryNode.providedBy(node):
833 self.add("count-directories")
834 elif IMutableFileNode.providedBy(node):
835 self.add("count-files")
836 self.add("count-mutable-files")
837 # TODO: update the servermap, compute a size, add it to
838 # size-mutable-files, max it into "largest-mutable-file"
839 elif IImmutableFileNode.providedBy(node): # CHK and LIT
840 self.add("count-files")
841 size = node.get_size()
842 self.histogram("size-files-histogram", size)
843 theuri = from_string(node.get_uri())
844 if isinstance(theuri, LiteralFileURI):
845 self.add("count-literal-files")
846 self.add("size-literal-files", size)
848 self.add("count-immutable-files")
849 self.add("size-immutable-files", size)
850 self.max("largest-immutable-file", size)
852 def enter_directory(self, parent, children):
853 dirsize_bytes = parent.get_size()
854 if dirsize_bytes is not None:
855 self.add("size-directories", dirsize_bytes)
856 self.max("largest-directory", dirsize_bytes)
857 dirsize_children = len(children)
858 self.max("largest-directory-children", dirsize_children)
860 def add(self, key, value=1):
861 self.stats[key] += value
863 def max(self, key, value):
864 self.stats[key] = max(self.stats[key], value)
866 def which_bucket(self, size):
867 # return (min,max) such that min <= size <= max
868 # values are from the set (0,0), (1,3), (4,10), (11,31), (32,100),
869 # (101,316), (317, 1000), etc: two per decade
873 if i >= len(self.buckets):
875 new_lower = self.buckets[i-1][1]+1
876 new_upper = int(mathutil.next_power_of_k(new_lower, self.root))
877 self.buckets.append( (new_lower, new_upper) )
878 maybe = self.buckets[i]
879 if maybe[0] <= size <= maybe[1]:
883 def histogram(self, key, size):
884 bucket = self.which_bucket(size)
885 h = self.histograms[key]
890 def get_results(self):
891 stats = self.stats.copy()
892 for key in self.histograms:
893 h = self.histograms[key]
894 out = [ (bucket[0], bucket[1], h[bucket]) for bucket in h ]
900 return self.get_results()
902 class ManifestWalker(DeepStats):
903 def __init__(self, origin):
904 DeepStats.__init__(self, origin)
906 self.storage_index_strings = set()
907 self.verifycaps = set()
909 def add_node(self, node, path):
910 self.manifest.append( (tuple(path), node.get_uri()) )
911 si = node.get_storage_index()
913 self.storage_index_strings.add(base32.b2a(si))
914 v = node.get_verify_cap()
916 self.verifycaps.add(v.to_string())
917 return DeepStats.add_node(self, node, path)
919 def get_results(self):
920 stats = DeepStats.get_results(self)
921 return {"manifest": self.manifest,
922 "verifycaps": self.verifycaps,
923 "storage-index": self.storage_index_strings,
929 def __init__(self, root, verify, repair, add_lease):
930 root_si = root.get_storage_index()
932 root_si_base32 = base32.b2a(root_si)
935 self._lp = log.msg(format="deep-check starting (%(si)s),"
936 " verify=%(verify)s, repair=%(repair)s",
937 si=root_si_base32, verify=verify, repair=repair)
938 self._verify = verify
939 self._repair = repair
940 self._add_lease = add_lease
942 self._results = DeepCheckAndRepairResults(root_si)
944 self._results = DeepCheckResults(root_si)
945 self._stats = DeepStats(root)
947 def set_monitor(self, monitor):
948 self.monitor = monitor
949 monitor.set_status(self._results)
951 def add_node(self, node, childpath):
953 d = node.check_and_repair(self.monitor, self._verify, self._add_lease)
954 d.addCallback(self._results.add_check_and_repair, childpath)
956 d = node.check(self.monitor, self._verify, self._add_lease)
957 d.addCallback(self._results.add_check, childpath)
958 d.addCallback(lambda ignored: self._stats.add_node(node, childpath))
961 def enter_directory(self, parent, children):
962 return self._stats.enter_directory(parent, children)
965 log.msg("deep-check done", parent=self._lp)
966 self._results.update_stats(self._stats.get_results())
970 # use client.create_dirnode() to make one of these