From 0cdf526b50fb1fdea1809b06196da08ee84587a0 Mon Sep 17 00:00:00 2001
From: Brian Warner <warner@allmydata.com>
Date: Tue, 18 Nov 2008 19:12:10 -0700
Subject: [PATCH] cli: add 'tahoe check' and 'tahoe deep-check' commands, with
 primitive reporting code

---
 src/allmydata/scripts/cli.py         | 42 ++++++++++++++
 src/allmydata/scripts/tahoe_check.py | 84 ++++++++++++++++++++++++++++
 src/allmydata/web/checker_results.py | 22 +++++---
 3 files changed, 139 insertions(+), 9 deletions(-)
 create mode 100644 src/allmydata/scripts/tahoe_check.py

diff --git a/src/allmydata/scripts/cli.py b/src/allmydata/scripts/cli.py
index bd578694..6a0d6030 100644
--- a/src/allmydata/scripts/cli.py
+++ b/src/allmydata/scripts/cli.py
@@ -221,6 +221,34 @@ class StatsOptions(VDriveOptions):
 
     longdesc = """Print statistics about of all files/directories reachable from the given starting point."""
 
+class CheckOptions(VDriveOptions):
+    optFlags = [
+        ("raw", "r", "Display raw JSON data instead of parsed"),
+        ("verify", "v", "Verify all hashes, instead of merely querying share presence"),
+        ("repair", "r", "Automatically repair any problems found"),
+        ]
+    def parseArgs(self, where=''):
+        self.where = where
+
+    def getSynopsis(self):
+        return "%s check [ALIAS:PATH]" % (os.path.basename(sys.argv[0]),)
+
+    longdesc = """Check a single file or directory: count how many shares are available, verify their hashes. Optionally repair the file if any problems were found."""
+
+class DeepCheckOptions(VDriveOptions):
+    optFlags = [
+        ("raw", "r", "Display raw JSON data instead of parsed"),
+        ("verify", "v", "Verify all hashes, instead of merely querying share presence"),
+        ("repair", "r", "Automatically repair any problems found"),
+        ]
+    def parseArgs(self, where=''):
+        self.where = where
+
+    def getSynopsis(self):
+        return "%s deep-check [ALIAS:PATH]" % (os.path.basename(sys.argv[0]),)
+
+    longdesc = """Check all files/directories reachable from the given starting point (which must be a directory), like 'tahoe check' but for multiple files. Optionally repair any problems found."""
+
 subCommands = [
     ["mkdir", None, MakeDirectoryOptions, "Create a new directory"],
     ["add-alias", None, AddAliasOptions, "Add a new alias cap"],
@@ -236,6 +264,8 @@ subCommands = [
     ["webopen", None, WebopenOptions, "Open a webbrowser to the root_dir"],
     ["manifest", None, ManifestOptions, "List all files/dirs in a subtree"],
     ["stats", None, StatsOptions, "Print statistics about all files/dirs in a subtree"],
+    ["check", None, CheckOptions, "Check a single file or directory"],
+    ["deep-check", None, DeepCheckOptions, "Check all files/directories reachable from a starting point"],
     ]
 
 def mkdir(options):
@@ -317,6 +347,16 @@ def stats(options):
     rc = tahoe_manifest.stats(options)
     return rc
 
+def check(options):
+    from allmydata.scripts import tahoe_check
+    rc = tahoe_check.check(options)
+    return rc
+
+def deepcheck(options):
+    from allmydata.scripts import tahoe_check
+    rc = tahoe_check.deepcheck(options)
+    return rc
+
 dispatch = {
     "mkdir": mkdir,
     "add-alias": add_alias,
@@ -332,5 +372,7 @@ dispatch = {
     "webopen": webopen,
     "manifest": manifest,
     "stats": stats,
+    "check": check,
+    "deep-check": deepcheck,
     }
 
diff --git a/src/allmydata/scripts/tahoe_check.py b/src/allmydata/scripts/tahoe_check.py
new file mode 100644
index 00000000..ed366f11
--- /dev/null
+++ b/src/allmydata/scripts/tahoe_check.py
@@ -0,0 +1,84 @@
+
+from pprint import pprint
+import urllib
+import simplejson
+from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, escape_path
+from allmydata.scripts.common_http import do_http
+from allmydata.scripts.slow_operation import SlowOperationRunner
+
+class Checker:
+    pass
+
+def check(options):
+    stdout = options.stdout
+    stderr = options.stderr
+    nodeurl = options['node-url']
+    if not nodeurl.endswith("/"):
+        nodeurl += "/"
+    where = options.where
+    rootcap, path = get_alias(options.aliases, where, DEFAULT_ALIAS)
+    if path == '/':
+        path = ''
+    url = nodeurl + "uri/%s" % urllib.quote(rootcap)
+    if path:
+        url += "/" + escape_path(path)
+    # todo: should it end with a slash?
+    url += "?t=check&output=JSON"
+    if options["verify"]:
+        url += "&verify=true"
+    if options["repair"]:
+        url += "&repair=true"
+
+    resp = do_http("POST", url)
+    if resp.status != 200:
+        print >>stderr, "ERROR", resp.status, resp.reason, resp.read()
+        return 1
+    jdata = resp.read()
+    if options.get("raw"):
+        pprint(jdata, stream=stdout)
+        return 0
+    data = simplejson.loads(jdata)
+
+    if options["repair"]:
+        # show repair status
+        pprint(data, stream=stdout)
+    else:
+        # make this prettier
+        pprint(data, stream=stdout)
+    return 0
+
+
+class DeepChecker(SlowOperationRunner):
+
+    def make_url(self, base, ophandle):
+        url = base + "?t=start-deep-check&ophandle=" + ophandle
+        if self.options["verify"]:
+            url += "&verify=true"
+        if self.options["repair"]:
+            url += "&repair=true"
+        return url
+
+    def write_results(self, data):
+        out = self.options.stdout
+        err = self.options.stderr
+        if self.options["repair"]:
+            # todo: make this prettier
+            pprint(data, stream=out)
+        else:
+            print >>out, "Objects Checked: %d" % data["count-objects-checked"]
+            print >>out, "Objects Healthy: %d" % data["count-objects-healthy"]
+            print >>out, "Objects Unhealthy: %d" % data["count-objects-unhealthy"]
+            print >>out
+            if data["list-unhealthy-files"]:
+                print "Unhealthy Files:"
+                for (pathname, cr) in data["list-unhealthy-files"]:
+                    if pathname:
+                        path_s = "/".join(pathname)
+                    else:
+                        path_s = "<root>"
+                    print >>out, path_s, ":", cr["summary"]
+
+
+def deepcheck(options):
+    return DeepChecker().run(options)
+
diff --git a/src/allmydata/web/checker_results.py b/src/allmydata/web/checker_results.py
index d59344eb..ab4dacae 100644
--- a/src/allmydata/web/checker_results.py
+++ b/src/allmydata/web/checker_results.py
@@ -450,16 +450,20 @@ class DeepCheckAndRepairResults(rend.Page, ResultsBase, ReloadMixin):
                                          shnum)
                                         for (serverid, storage_index, shnum)
                                         in res.get_corrupt_shares() ]
-        data["list-remaining-corrupt-shares"] = [ (idlib.nodeid_b2a(serverid),
-                                                   base32.b2a(storage_index),
-                                                   shnum)
-                                                  for (serverid, storage_index, shnum)
-                                                  in res.get_remaining_corrupt_shares() ]
 
-        data["list-unhealthy-files"] = [ (path_t, self._json_check_results(r))
-                                         for (path_t, r)
-                                         in res.get_all_results().items()
-                                         if not r.get_pre_repair_results().is_healthy() ]
+        remaining_corrupt = [ (idlib.nodeid_b2a(serverid),
+                               base32.b2a(storage_index),
+                               shnum)
+                              for (serverid, storage_index, shnum)
+                              in res.get_remaining_corrupt_shares() ]
+        data["list-remaining-corrupt-shares"] = remaining_corrupt
+
+        unhealthy = [ (path_t,
+                       self._json_check_results(crr.get_pre_repair_results()))
+                      for (path_t, crr)
+                      in res.get_all_results().items()
+                      if not crr.get_pre_repair_results().is_healthy() ]
+        data["list-unhealthy-files"] = unhealthy
         data["stats"] = res.get_stats()
         return simplejson.dumps(data, indent=1) + "\n"
 
-- 
2.45.2