From 5a60086dbc24fdcdf70d0cd0bf25ff4408a8f4ee Mon Sep 17 00:00:00 2001 From: Brian Warner Date: Thu, 13 Nov 2008 19:43:50 -0700 Subject: [PATCH] CLI: add 'tahoe stats', to run start-deep-stats and print the results --- docs/CLI.txt | 6 +++ src/allmydata/scripts/cli.py | 16 ++++++++ src/allmydata/scripts/tahoe_manifest.py | 51 +++++++++++++++++++++++-- src/allmydata/test/test_system.py | 18 +++++++++ 4 files changed, 87 insertions(+), 4 deletions(-) diff --git a/docs/CLI.txt b/docs/CLI.txt index 5a8a6815..93ab4f87 100644 --- a/docs/CLI.txt +++ b/docs/CLI.txt @@ -341,6 +341,12 @@ tahoe manifest --storage-index tahoe: storage index. This (string) value is useful to determine which share files (on the server) are associated with this directory tree. +tahoe stats tahoe: + + This performs a recursive walk of the given directory, visiting every file + and directory that can be reached from that point. It gathers statistics on + the sizes of the objects it encounters, and prints a summary to stdout. + == Debugging == diff --git a/src/allmydata/scripts/cli.py b/src/allmydata/scripts/cli.py index c108c4cd..ee3395b1 100644 --- a/src/allmydata/scripts/cli.py +++ b/src/allmydata/scripts/cli.py @@ -209,6 +209,15 @@ class ManifestOptions(VDriveOptions): longdesc = """Print a list of all files/directories reachable from the given starting point.""" +class StatsOptions(VDriveOptions): + def parseArgs(self, where=''): + self.where = where + + def getSynopsis(self): + return "%s stats [ALIAS:PATH]" % (os.path.basename(sys.argv[0]),) + + longdesc = """Print statistics about of all files/directories reachable from the given starting point.""" + subCommands = [ ["mkdir", None, MakeDirectoryOptions, "Create a new directory"], ["add-alias", None, AddAliasOptions, "Add a new alias cap"], @@ -223,6 +232,7 @@ subCommands = [ ["ln", None, LnOptions, "Make an additional link to an existing file."], ["webopen", None, WebopenOptions, "Open a webbrowser to the root_dir"], ["manifest", None, ManifestOptions, "List all files/dirs in a subtree"], + ["stats", None, StatsOptions, "Print statistics about all files/dirs in a subtree"], ] def mkdir(options): @@ -299,6 +309,11 @@ def manifest(options): rc = tahoe_manifest.manifest(options) return rc +def stats(options): + from allmydata.scripts import tahoe_manifest + rc = tahoe_manifest.stats(options) + return rc + dispatch = { "mkdir": mkdir, "add-alias": add_alias, @@ -313,5 +328,6 @@ dispatch = { "ln": ln, "webopen": webopen, "manifest": manifest, + "stats": stats, } diff --git a/src/allmydata/scripts/tahoe_manifest.py b/src/allmydata/scripts/tahoe_manifest.py index 8428cfce..7772a80d 100644 --- a/src/allmydata/scripts/tahoe_manifest.py +++ b/src/allmydata/scripts/tahoe_manifest.py @@ -7,7 +7,8 @@ from allmydata import uri import urllib import simplejson -class ManifestGrabber: +class SlowOperationRunner: + def run(self, options): stderr = options.stderr self.options = options @@ -24,7 +25,7 @@ class ManifestGrabber: if path: url += "/" + escape_path(path) # todo: should it end with a slash? - url += "?t=start-manifest&ophandle=" + ophandle + url += "?t=%s&ophandle=%s" % (self.operation, ophandle) resp = do_http("POST", url) if resp.status not in (200, 302): print >>stderr, "ERROR", resp.status, resp.reason, resp.read() @@ -68,6 +69,9 @@ class ManifestGrabber: self.write_results(data) return True +class ManifestGrabber(SlowOperationRunner): + operation = "start-manifest" + def write_results(self, data): stdout = self.options.stdout stderr = self.options.stderr @@ -85,7 +89,46 @@ class ManifestGrabber: print >>stdout, cap, "/".join([p.encode("utf-8") for p in path]) - - def manifest(options): return ManifestGrabber().run(options) + +class StatsGrabber(SlowOperationRunner): + operation = "start-deep-stats" + + def write_results(self, data): + stdout = self.options.stdout + stderr = self.options.stderr + keys = ("count-immutable-files", + "count-mutable-files", + "count-literal-files", + "count-files", + "count-directories", + "size-immutable-files", + "size-mutable-files", + "size-literal-files", + "size-directories", + "largest-directory", + "largest-immutable-files", + ) + width = max([len(k) for k in keys]) + print >>stdout, "Counts and Total Sizes:" + for k in keys: + fmt = "%" + str(width) + "s: %d" + if k in data: + print >>stdout, fmt % (k, data[k]) + print >>stdout, "Size Histogram:" + prevmax = None + maxlen = max([len(str(maxsize)) + for (minsize, maxsize, count) + in data["size-files-histogram"]]) + minfmt = "%" + str(maxlen) + "d" + maxfmt = "%-" + str(maxlen) + "d" + linefmt = minfmt + "-" + maxfmt + " : %d" + for (minsize, maxsize, count) in data["size-files-histogram"]: + if prevmax is not None and minsize != prevmax+1: + print >>stdout, " "*(maxlen-1) + "..." + prevmax = maxsize + print >>stdout, linefmt % (minsize, maxsize, count) + +def stats(options): + return StatsGrabber().run(options) diff --git a/src/allmydata/test/test_system.py b/src/allmydata/test/test_system.py index c631532d..e933629c 100644 --- a/src/allmydata/test/test_system.py +++ b/src/allmydata/test/test_system.py @@ -2393,6 +2393,24 @@ class DeepCheckWebGood(DeepCheckBase, unittest.TestCase): self.failUnless(base32.b2a(self.mutable.get_storage_index()) in lines) self.failUnless(base32.b2a(self.large.get_storage_index()) in lines) d.addCallback(_check2) + + d.addCallback(lambda res: + self._run_cli(["stats", + "--node-directory", basedir, + self.root_uri])) + def _check3((out,err)): + lines = [l.strip() for l in out.split("\n") if l] + self.failUnless("count-immutable-files: 1" in lines) + self.failUnless("count-mutable-files: 1" in lines) + self.failUnless("count-literal-files: 1" in lines) + self.failUnless("count-files: 3" in lines) + self.failUnless("count-directories: 1" in lines) + self.failUnless("size-immutable-files: 13000" in lines) + self.failUnless("size-literal-files: 22" in lines) + self.failUnless(" 11-31 : 1".strip() in lines) + self.failUnless("10001-31622 : 1".strip() in lines) + d.addCallback(_check3) + return d -- 2.37.2