expansion or encoding overhead into account. Later versions of the code may
improve this estimate upwards.
+GET $URL?t=deep-stats
+
+ Return a JSON-encoded dictionary that lists interesting statistics about
+ the set of all files and directories reachable from the given directory:
+
+ count-immutable-files: count of how many CHK files are in the set
+ count-mutable-files: same, for mutable files (does not include directories)
+ count-literal-files: same, for LIT files (data contained inside the URI)
+ count-files: sum of the above three
+ count-directories: count of directories
+ size-immutable-files: total bytes for all CHK files in the set, =deep-size
+ size-mutable-files (TODO): same, for current version of all mutable files
+ size-literal-files: same, for LIT files
+ size-directories: size of directories (includes size-literal-files)
+ largest-directory: number of children in the largest directory
+ largest-immutable-file: number of bytes in the largest CHK file
+
+ size-mutable-files is not implemented, because it would require extra
+ queries to each mutable file to get their size. This may be implemented in
+ the future.
+
+ Assuming no sharing, the basic space consumed by a single root directory is
+ the sum of size-immutable-files, size-mutable-files, and size-directories.
+ The actual disk space used by the shares is larger, because of the
+ following sources of overhead:
+
+ integrity data
+ expansion due to erasure coding
+ share management data (leases)
+ backend (ext3) minimum block size
+
+
6. XMLRPC (coming soon)
http://127.0.0.1:8123/xmlrpc
def __init__(self, client):
self._client = client
+ self._most_recent_size = None
+
def __repr__(self):
return "<%s %s %s>" % (self.__class__.__name__, self.is_readonly() and "RO" or "RW", hasattr(self, '_uri') and self._uri.abbrev())
def init_from_uri(self, myuri):
self._uri = NewDirectoryURI(IMutableFileURI(self._node.get_uri()))
return self
+ def get_size(self):
+ # return the size of our backing mutable file, in bytes, if we've
+ # fetched it.
+ return self._most_recent_size
+
+ def _set_size(self, data):
+ self._most_recent_size = len(data)
+ return data
+
def _read(self):
d = self._node.download_best_version()
+ d.addCallback(self._set_size)
d.addCallback(self._unpack_contents)
return d
d.addCallback(_got_list)
return d
+ def deep_stats(self):
+ stats = dict([ (k,0) for k in ["count-immutable-files",
+ "count-mutable-files",
+ "count-literal-files",
+ "count-files",
+ "count-directories",
+ "size-immutable-files",
+ #"size-mutable-files",
+ "size-literal-files",
+ "size-directories",
+ "largest-directory",
+ "largest-directory-children",
+ "largest-immutable-file",
+ #"largest-mutable-file",
+ ]])
+ # we track verifier caps, to avoid double-counting children for which
+ # we've got both a write-cap and a read-cap
+ found = set()
+ found.add(self.get_verifier())
+
+ limiter = ConcurrencyLimiter(10)
+
+ d = self._add_deepstats_from_node(self, found, stats, limiter)
+ d.addCallback(lambda res: stats)
+ return d
+
+ def _add_deepstats_from_node(self, node, found, stats, limiter):
+ d = limiter.add(node.list)
+ def _got_list(children):
+ dl = []
+ dirsize_bytes = node.get_size()
+ dirsize_children = len(children)
+ stats["count-directories"] += 1
+ stats["size-directories"] += dirsize_bytes
+ stats["largest-directory"] = max(stats["largest-directory"],
+ dirsize_bytes)
+ stats["largest-directory-children"] = max(stats["largest-directory-children"],
+ dirsize_children)
+ for name, (child, metadata) in children.iteritems():
+ verifier = child.get_verifier()
+ if verifier in found:
+ continue
+ found.add(verifier)
+ if IDirectoryNode.providedBy(child):
+ dl.append(self._add_deepstats_from_node(child, found,
+ stats, limiter))
+ elif IMutableFileNode.providedBy(child):
+ stats["count-files"] += 1
+ stats["count-mutable-files"] += 1
+ # TODO: update the servermap, compute a size, add it to
+ # stats["size-mutable-files"], max it into
+ # stats["largest-mutable-file"]
+ elif IFileNode.providedBy(child): # CHK and LIT
+ stats["count-files"] += 1
+ size = child.get_size()
+ if child.get_uri().startswith("URI:LIT:"):
+ stats["count-literal-files"] += 1
+ stats["size-literal-files"] += size
+ else:
+ stats["count-immutable-files"] += 1
+ stats["size-immutable-files"] += size
+ stats["largest-immutable-file"] = max(
+ stats["largest-immutable-file"], size)
+ if dl:
+ return defer.DeferredList(dl)
+ d.addCallback(_got_list)
+ return d
+
+
+
# use client.create_dirnode() to make one of these
operation finishes. The child name must be a unicode string."""
def build_manifest():
- """Return a frozenset of verifier-capability strings for all nodes
- (directories and files) reachable from this one."""
+ """Return a Deferred that fires with a frozenset of
+ verifier-capability strings for all nodes (directories and files)
+ reachable from this one."""
+
+ def deep_stats():
+ """Return a Deferred that fires with a dictionary of statistics
+ computed by examining all nodes (directories and files) reachable
+ from this one, with the following keys::
+
+ count-immutable-files: count of how many CHK files are in the set
+ count-mutable-files: same, for mutable files (does not include
+ directories)
+ count-literal-files: same, for LIT files
+ count-files: sum of the above three
+
+ count-directories: count of directories
+
+ size-immutable-files: total bytes for all CHK files in the set
+ size-mutable-files (TODO): same, for current version of all mutable
+ files, does not include directories
+ size-literal-files: same, for LIT files
+ size-directories: size of mutable files used by directories
+
+ largest-directory: number of bytes in the largest directory
+ largest-directory-children: number of children in the largest
+ directory
+ largest-immutable-file: number of bytes in the largest CHK file
+
+ size-mutable-files is not yet implemented, because it would involve
+ even more queries than deep_stats does.
+
+ This operation will visit every directory node underneath this one,
+ and can take a long time to run. On a typical workstation with good
+ bandwidth, this can examine roughly 15 directories per second (and
+ takes several minutes of 100% CPU for ~1700 directories).
+ """
class ICodecEncoder(Interface):
def set_params(data_size, required_shares, max_shares):
sorted(self.expected_manifest))
d.addCallback(_check_manifest)
+ d.addCallback(lambda res: n.deep_stats())
+ def _check_deepstats(stats):
+ self.failUnless(isinstance(stats, dict))
+ expected = {"count-immutable-files": 0,
+ "count-mutable-files": 1,
+ "count-literal-files": 0,
+ "count-files": 1,
+ "count-directories": 2,
+ "size-immutable-files": 0,
+ "size-literal-files": 0,
+ #"size-directories": 616, # varies
+ #"largest-directory": 616,
+ "largest-directory-children": 2,
+ "largest-immutable-file": 0,
+ }
+ for k,v in expected.iteritems():
+ self.failUnlessEqual(stats[k], v,
+ "stats[%s] was %s, not %s" %
+ (k, stats[k], v))
+ self.failUnless(stats["size-directories"] > 600)
+ self.failUnless(stats["largest-directory"] > 600)
+ d.addCallback(_check_deepstats)
+
def _add_subsubdir(res):
return self.subdir.create_empty_directory(u"subsubdir")
d.addCallback(_add_subsubdir)
d.addCallback(_got)
return d
+ def test_GET_DIRURL_deepstats(self):
+ d = self.GET(self.public_url + "/foo?t=deep-stats", followRedirect=True)
+ def _got(stats_json):
+ stats = simplejson.loads(stats_json)
+ expected = {"count-immutable-files": 3,
+ "count-mutable-files": 0,
+ "count-literal-files": 0,
+ "count-files": 3,
+ "count-directories": 3,
+ "size-immutable-files": 57,
+ "size-literal-files": 0,
+ #"size-directories": 1912, # varies
+ #"largest-directory": 1590,
+ "largest-directory-children": 5,
+ "largest-immutable-file": 19,
+ }
+ for k,v in expected.iteritems():
+ self.failUnlessEqual(stats[k], v,
+ "stats[%s] was %s, not %s" %
+ (k, stats[k], v))
+ d.addCallback(_got)
+ return d
+
def test_GET_DIRURL_uri(self):
d = self.GET(self.public_url + "/foo?t=uri")
def _check(res):
d.addCallback(_measure_size)
return d
+class DeepStats(rend.Page):
+
+ def __init__(self, dirnode, dirpath):
+ self._dirnode = dirnode
+ self._dirpath = dirpath
+
+ def renderHTTP(self, ctx):
+ inevow.IRequest(ctx).setHeader("content-type", "text/plain")
+ d = self._dirnode.deep_stats()
+ d.addCallback(simplejson.dumps, indent=1)
+ return d
+
class ChildError:
implements(inevow.IResource)
def renderHTTP(self, ctx):
return Manifest(node, path), ()
elif t == "deep-size":
return DeepSize(node, path), ()
+ elif t == "deep-stats":
+ return DeepStats(node, path), ()
elif t == 'rename-form':
return RenameForm(self.name, node, path), ()
else: