From 5a60086dbc24fdcdf70d0cd0bf25ff4408a8f4ee Mon Sep 17 00:00:00 2001
From: Brian Warner <warner@allmydata.com>
Date: Thu, 13 Nov 2008 19:43:50 -0700
Subject: [PATCH] CLI: add 'tahoe stats', to run start-deep-stats and print the
 results

---
 docs/CLI.txt                            |  6 +++
 src/allmydata/scripts/cli.py            | 16 ++++++++
 src/allmydata/scripts/tahoe_manifest.py | 51 +++++++++++++++++++++++--
 src/allmydata/test/test_system.py       | 18 +++++++++
 4 files changed, 87 insertions(+), 4 deletions(-)

diff --git a/docs/CLI.txt b/docs/CLI.txt
index 5a8a6815..93ab4f87 100644
--- a/docs/CLI.txt
+++ b/docs/CLI.txt
@@ -341,6 +341,12 @@ tahoe manifest --storage-index tahoe:
  storage index. This (string) value is useful to determine which share files
  (on the server) are associated with this directory tree.
 
+tahoe stats tahoe:
+
+ This performs a recursive walk of the given directory, visiting every file
+ and directory that can be reached from that point. It gathers statistics on
+ the sizes of the objects it encounters, and prints a summary to stdout.
+
 
 == Debugging ==
 
diff --git a/src/allmydata/scripts/cli.py b/src/allmydata/scripts/cli.py
index c108c4cd..ee3395b1 100644
--- a/src/allmydata/scripts/cli.py
+++ b/src/allmydata/scripts/cli.py
@@ -209,6 +209,15 @@ class ManifestOptions(VDriveOptions):
 
     longdesc = """Print a list of all files/directories reachable from the given starting point."""
 
+class StatsOptions(VDriveOptions):
+    def parseArgs(self, where=''):
+        self.where = where
+
+    def getSynopsis(self):
+        return "%s stats [ALIAS:PATH]" % (os.path.basename(sys.argv[0]),)
+
+    longdesc = """Print statistics about of all files/directories reachable from the given starting point."""
+
 subCommands = [
     ["mkdir", None, MakeDirectoryOptions, "Create a new directory"],
     ["add-alias", None, AddAliasOptions, "Add a new alias cap"],
@@ -223,6 +232,7 @@ subCommands = [
     ["ln", None, LnOptions, "Make an additional link to an existing file."],
     ["webopen", None, WebopenOptions, "Open a webbrowser to the root_dir"],
     ["manifest", None, ManifestOptions, "List all files/dirs in a subtree"],
+    ["stats", None, StatsOptions, "Print statistics about all files/dirs in a subtree"],
     ]
 
 def mkdir(options):
@@ -299,6 +309,11 @@ def manifest(options):
     rc = tahoe_manifest.manifest(options)
     return rc
 
+def stats(options):
+    from allmydata.scripts import tahoe_manifest
+    rc = tahoe_manifest.stats(options)
+    return rc
+
 dispatch = {
     "mkdir": mkdir,
     "add-alias": add_alias,
@@ -313,5 +328,6 @@ dispatch = {
     "ln": ln,
     "webopen": webopen,
     "manifest": manifest,
+    "stats": stats,
     }
 
diff --git a/src/allmydata/scripts/tahoe_manifest.py b/src/allmydata/scripts/tahoe_manifest.py
index 8428cfce..7772a80d 100644
--- a/src/allmydata/scripts/tahoe_manifest.py
+++ b/src/allmydata/scripts/tahoe_manifest.py
@@ -7,7 +7,8 @@ from allmydata import uri
 import urllib
 import simplejson
 
-class ManifestGrabber:
+class SlowOperationRunner:
+
     def run(self, options):
         stderr = options.stderr
         self.options = options
@@ -24,7 +25,7 @@ class ManifestGrabber:
         if path:
             url += "/" + escape_path(path)
         # todo: should it end with a slash?
-        url += "?t=start-manifest&ophandle=" + ophandle
+        url += "?t=%s&ophandle=%s" % (self.operation, ophandle)
         resp = do_http("POST", url)
         if resp.status not in (200, 302):
             print >>stderr, "ERROR", resp.status, resp.reason, resp.read()
@@ -68,6 +69,9 @@ class ManifestGrabber:
         self.write_results(data)
         return True
 
+class ManifestGrabber(SlowOperationRunner):
+    operation = "start-manifest"
+
     def write_results(self, data):
         stdout = self.options.stdout
         stderr = self.options.stderr
@@ -85,7 +89,46 @@ class ManifestGrabber:
                     print >>stdout, cap, "/".join([p.encode("utf-8")
                                                    for p in path])
 
-
-
 def manifest(options):
     return ManifestGrabber().run(options)
+
+class StatsGrabber(SlowOperationRunner):
+    operation = "start-deep-stats"
+
+    def write_results(self, data):
+        stdout = self.options.stdout
+        stderr = self.options.stderr
+        keys = ("count-immutable-files",
+                "count-mutable-files",
+                "count-literal-files",
+                "count-files",
+                "count-directories",
+                "size-immutable-files",
+                "size-mutable-files",
+                "size-literal-files",
+                "size-directories",
+                "largest-directory",
+                "largest-immutable-files",
+                )
+        width = max([len(k) for k in keys])
+        print >>stdout, "Counts and Total Sizes:"
+        for k in keys:
+            fmt = "%" + str(width) + "s: %d"
+            if k in data:
+                print >>stdout, fmt % (k, data[k])
+        print >>stdout, "Size Histogram:"
+        prevmax = None
+        maxlen = max([len(str(maxsize))
+                      for (minsize, maxsize, count)
+                      in data["size-files-histogram"]])
+        minfmt = "%" + str(maxlen) + "d"
+        maxfmt = "%-" + str(maxlen) + "d"
+        linefmt = minfmt + "-" + maxfmt + " : %d"
+        for (minsize, maxsize, count) in data["size-files-histogram"]:
+            if prevmax is not None and minsize != prevmax+1:
+                print >>stdout, " "*(maxlen-1) + "..."
+            prevmax = maxsize
+            print >>stdout, linefmt % (minsize, maxsize, count)
+
+def stats(options):
+    return StatsGrabber().run(options)
diff --git a/src/allmydata/test/test_system.py b/src/allmydata/test/test_system.py
index c631532d..e933629c 100644
--- a/src/allmydata/test/test_system.py
+++ b/src/allmydata/test/test_system.py
@@ -2393,6 +2393,24 @@ class DeepCheckWebGood(DeepCheckBase, unittest.TestCase):
             self.failUnless(base32.b2a(self.mutable.get_storage_index()) in lines)
             self.failUnless(base32.b2a(self.large.get_storage_index()) in lines)
         d.addCallback(_check2)
+
+        d.addCallback(lambda res:
+                      self._run_cli(["stats",
+                                     "--node-directory", basedir,
+                                     self.root_uri]))
+        def _check3((out,err)):
+            lines = [l.strip() for l in out.split("\n") if l]
+            self.failUnless("count-immutable-files: 1" in lines)
+            self.failUnless("count-mutable-files: 1" in lines)
+            self.failUnless("count-literal-files: 1" in lines)
+            self.failUnless("count-files: 3" in lines)
+            self.failUnless("count-directories: 1" in lines)
+            self.failUnless("size-immutable-files: 13000" in lines)
+            self.failUnless("size-literal-files: 22" in lines)
+            self.failUnless("   11-31    : 1".strip() in lines)
+            self.failUnless("10001-31622 : 1".strip() in lines)
+        d.addCallback(_check3)
+
         return d
 
 
-- 
2.45.2