From fd4ceb6a8762924c2248a89d547c226b41f0485e Mon Sep 17 00:00:00 2001
From: Brian Warner <warner@allmydata.com>
Date: Tue, 24 Feb 2009 23:13:35 -0700
Subject: [PATCH] webapi: modify streaming deep-manifest/deep-checker to emit
 an ERROR: line if they encounter an unrecoverable+untraversable directory.
 For #590.

---
 docs/frontends/webapi.txt            | 43 +++++++++++++++---
 src/allmydata/test/test_deepcheck.py | 48 +++++++++++++++++---
 src/allmydata/test/test_web.py       | 65 ++++++++++++++++++++++++----
 src/allmydata/web/directory.py       | 12 +++++
 4 files changed, 145 insertions(+), 23 deletions(-)

diff --git a/docs/frontends/webapi.txt b/docs/frontends/webapi.txt
index 2024addc..5d7f46d9 100644
--- a/docs/frontends/webapi.txt
+++ b/docs/frontends/webapi.txt
@@ -896,8 +896,9 @@ POST $URL?t=stream-deep-check
  This initiates a recursive walk of all files and directories reachable from
  the target, performing a check on each one just like t=check. For each
  unique object (duplicates are skipped), a single line of JSON is emitted to
- the HTTP response channel. When the walk is complete, a final line of JSON
- is emitted which contains the accumulated file-size/count "deep-stats" data.
+ the HTTP response channel (or an error indication, see below). When the walk
+ is complete, a final line of JSON is emitted which contains the accumulated
+ file-size/count "deep-stats" data.
 
  This command takes the same arguments as t=start-deep-check.
 
@@ -929,6 +930,18 @@ POST $URL?t=stream-deep-check
  The last unit in the stream will have a type of "stats", and will contain
  the keys described in the "start-deep-stats" operation, below.
 
+ If any errors occur during the traversal (specifically if a directory is
+ unrecoverable, such that further traversal is not possible), an error
+ indication is written to the response body, instead of the usual line of
+ JSON. This error indication line will begin with the string "ERROR:" (in all
+ caps), and contain a summary of the error on the rest of the line. The
+ remaining lines of the response body will be a python exception. The client
+ application should look for the ERROR: and stop processing JSON as soon as
+ it is seen. Note that neither a file being unrecoverable nor a directory
+ merely being unhealthy will cause traversal to stop. The line just before
+ the ERROR: will describe the directory that was untraversable, since the
+ unit is emitted to the HTTP response body before the child is traversed.
+
 
 POST $URL?t=check&repair=true
 
@@ -1017,9 +1030,9 @@ POST $URL?t=stream-deep-check&repair=true
 
  This triggers a recursive walk of all files and directories, performing a
  t=check&repair=true on each one. For each unique object (duplicates are
- skipped), a single line of JSON is emitted to the HTTP response channel.
- When the walk is complete, a final line of JSON is emitted which contains
- the accumulated file-size/count "deep-stats" data.
+ skipped), a single line of JSON is emitted to the HTTP response channel (or
+ an error indication). When the walk is complete, a final line of JSON is
+ emitted which contains the accumulated file-size/count "deep-stats" data.
 
  This emits the same data as t=stream-deep-check (without the repair=true),
  except that the "check-results" field is replaced with a
@@ -1031,6 +1044,11 @@ POST $URL?t=stream-deep-check&repair=true
  receiving client is expected to calculate those values itself from the
  stream of per-object check-and-repair-results.
 
+ Note that the "ERROR:" indication will only be emitted if traversal stops,
+ which will only occur if an unrecoverable directory is encountered. If a
+ file or directory repair fails, the traversal will continue, and the repair
+ failure will be indicated in the JSON data (in the "repair-successful" key).
+
 POST $DIRURL?t=start-manifest    (must add &ophandle=XYZ)
 
   This operation generates a "manfest" of the given directory tree, mostly
@@ -1123,8 +1141,9 @@ POST $URL?t=stream-manifest
  This operation performs a recursive walk of all files and directories
  reachable from the given starting point. For each such unique object
  (duplicates are skipped), a single line of JSON is emitted to the HTTP
- response channel. When the walk is complete, a final line of JSON is emitted
- which contains the accumulated file-size/count "deep-stats" data.
+ response channel (or an error indication, see below). When the walk is
+ complete, a final line of JSON is emitted which contains the accumulated
+ file-size/count "deep-stats" data.
 
  A CLI tool can split the response stream on newlines into "response units",
  and parse each response unit as JSON. Each such parsed unit will be a
@@ -1148,6 +1167,16 @@ POST $URL?t=stream-manifest
  The last unit in the stream will have a type of "stats", and will contain
  the keys described in the "start-deep-stats" operation, below.
 
+ If any errors occur during the traversal (specifically if a directory is
+ unrecoverable, such that further traversal is not possible), an error
+ indication is written to the response body, instead of the usual line of
+ JSON. This error indication line will begin with the string "ERROR:" (in all
+ caps), and contain a summary of the error on the rest of the line. The
+ remaining lines of the response body will be a python exception. The client
+ application should look for the ERROR: and stop processing JSON as soon as
+ it is seen. The line just before the ERROR: will describe the directory that
+ was untraversable, since the manifest entry is emitted to the HTTP response
+ body before the child is traversed.
 
 == Other Useful Pages ==
 
diff --git a/src/allmydata/test/test_deepcheck.py b/src/allmydata/test/test_deepcheck.py
index b7cff2db..9b2318a2 100644
--- a/src/allmydata/test/test_deepcheck.py
+++ b/src/allmydata/test/test_deepcheck.py
@@ -5,6 +5,7 @@ from twisted.trial import unittest
 from twisted.internet import defer
 from twisted.internet import threads # CLI tests use deferToThread
 from allmydata.immutable import upload
+from allmydata.mutable.common import UnrecoverableFileError
 from allmydata.util import idlib
 from allmydata.util import base32
 from allmydata.scripts import runner
@@ -13,7 +14,8 @@ from allmydata.interfaces import ICheckResults, ICheckAndRepairResults, \
 from allmydata.monitor import Monitor, OperationCancelledError
 from twisted.web.client import getPage
 
-from allmydata.test.common import ErrorMixin, _corrupt_mutable_share_data
+from allmydata.test.common import ErrorMixin, _corrupt_mutable_share_data, \
+     ShouldFailMixin
 from allmydata.test.common_util import StallMixin
 from allmydata.test.no_network import GridTestMixin
 
@@ -123,7 +125,7 @@ class MutableChecker(GridTestMixin, unittest.TestCase, ErrorMixin):
         return d
 
 
-class DeepCheckBase(GridTestMixin, ErrorMixin, StallMixin):
+class DeepCheckBase(GridTestMixin, ErrorMixin, StallMixin, ShouldFailMixin):
 
     def web_json(self, n, **kwargs):
         kwargs["output"] = "json"
@@ -835,6 +837,7 @@ class DeepCheckWebBad(DeepCheckBase, unittest.TestCase):
         d = self.set_up_damaged_tree()
         d.addCallback(self.do_check)
         d.addCallback(self.do_deepcheck)
+        d.addCallback(self.do_deepcheck_broken)
         d.addCallback(self.do_test_web_bad)
         d.addErrback(self.explain_web_error)
         d.addErrback(self.explain_error)
@@ -854,6 +857,12 @@ class DeepCheckWebBad(DeepCheckBase, unittest.TestCase):
         #   large-missing-shares
         #   large-corrupt-shares
         #   large-unrecoverable
+        # broken
+        #   large1-good
+        #   subdir-good
+        #     large2-good
+        #   subdir-unrecoverable
+        #     large3-good
 
         self.nodes = {}
 
@@ -871,9 +880,27 @@ class DeepCheckWebBad(DeepCheckBase, unittest.TestCase):
         d.addCallback(self.create_mangled, "large-missing-shares")
         d.addCallback(self.create_mangled, "large-corrupt-shares")
         d.addCallback(self.create_mangled, "large-unrecoverable")
-
+        d.addCallback(lambda ignored: c0.create_empty_dirnode())
+        d.addCallback(self._stash_node, "broken")
+        large1 = upload.Data("Lots of data\n" * 1000 + "large1" + "\n", None)
+        d.addCallback(lambda ignored:
+                      self.nodes["broken"].add_file(u"large1", large1))
+        d.addCallback(lambda ignored:
+                      self.nodes["broken"].create_empty_directory(u"subdir-good"))
+        large2 = upload.Data("Lots of data\n" * 1000 + "large2" + "\n", None)
+        d.addCallback(lambda subdir: subdir.add_file(u"large2-good", large2))
+        d.addCallback(lambda ignored:
+                      self.nodes["broken"].create_empty_directory(u"subdir-unrecoverable"))
+        d.addCallback(self._stash_node, "subdir-unrecoverable")
+        large3 = upload.Data("Lots of data\n" * 1000 + "large3" + "\n", None)
+        d.addCallback(lambda subdir: subdir.add_file(u"large3-good", large3))
+        d.addCallback(lambda ignored:
+                      self._delete_most_shares(self.nodes["broken"]))
         return d
 
+    def _stash_node(self, node, name):
+        self.nodes[name] = node
+        return node
 
     def create_mangled(self, ignored, name):
         nodetype, mangletype = name.split("-", 1)
@@ -887,10 +914,7 @@ class DeepCheckWebBad(DeepCheckBase, unittest.TestCase):
             small = upload.Data("Small enough for a LIT", None)
             d = self.root.add_file(unicode(name), small)
 
-        def _stash_node(node):
-            self.nodes[name] = node
-            return node
-        d.addCallback(_stash_node)
+        d.addCallback(self._stash_node, name)
 
         if mangletype == "good":
             pass
@@ -1042,6 +1066,16 @@ class DeepCheckWebBad(DeepCheckBase, unittest.TestCase):
 
         return d
 
+    def do_deepcheck_broken(self, ignored):
+        # deep-check on the broken directory should fail, because of the
+        # untraversable subdir
+        def _do_deep_check():
+            return self.nodes["broken"].start_deep_check().when_done()
+        d = self.shouldFail(UnrecoverableFileError, "do_deep_check",
+                            "no recoverable versions",
+                            _do_deep_check)
+        return d
+
     def json_is_healthy(self, data, where):
         r = data["results"]
         self.failUnless(r["healthy"], where)
diff --git a/src/allmydata/test/test_web.py b/src/allmydata/test/test_web.py
index a5b28bd4..492e406a 100644
--- a/src/allmydata/test/test_web.py
+++ b/src/allmydata/test/test_web.py
@@ -19,7 +19,6 @@ from allmydata.test.common import FakeDirectoryNode, FakeCHKFileNode, \
 from allmydata.interfaces import IURI, INewDirectoryURI, \
      IReadonlyNewDirectoryURI, IFileURI, IMutableFileURI, IMutableFileNode
 from allmydata.mutable import servermap, publish, retrieve
-from allmydata.mutable.common import UnrecoverableFileError
 import common_util as testutil
 from allmydata.test.no_network import GridTestMixin
 
@@ -2860,15 +2859,63 @@ class Grid(GridTestMixin, WebErrorMixin, unittest.TestCase, ShouldFailMixin):
 
         d.addCallback(lambda ign:
                       self.delete_shares_numbered(self.uris["subdir"],
-                                                  range(10)))
+                                                  range(1, 10)))
 
-        ## argh! how should a streaming-JSON API indicate fatal error?
-        ## answer: emit ERROR: instead of a JSON string
-        #d.addCallback(lambda ign:
-        #              self.shouldFail(UnrecoverableFileError, 'check-subdir',
-        #                              "no recoverable versions",
-        #                              self.CHECK, "ignored",
-        #                              "root", "t=stream-deep-check"))
+        # root
+        # root/good
+        # root/small
+        # root/sick
+        # root/subdir [unrecoverable]
+        # root/subdir/grandchild
+
+        # how should a streaming-JSON API indicate fatal error?
+        # answer: emit ERROR: instead of a JSON string
+
+        d.addCallback(self.CHECK, "root", "t=stream-manifest")
+        def _check_broken_manifest(res):
+            lines = res.splitlines()
+            error_lines = [i
+                           for (i,line) in enumerate(lines)
+                           if line.startswith("ERROR:")]
+            if not error_lines:
+                self.fail("no ERROR: in output: %s" % (res,))
+            first_error = error_lines[0]
+            error_line = lines[first_error]
+            error_msg = lines[first_error+1:]
+            error_msg_s = "\n".join(error_msg) + "\n"
+            self.failUnlessIn("ERROR: UnrecoverableFileError", error_line)
+            self.failUnlessIn("no recoverable versions", error_line)
+            self.failUnless(len(error_msg) > 2, error_msg_s) # some traceback
+            units = [simplejson.loads(line) for line in lines[:first_error]]
+            self.failUnlessEqual(len(units), 5) # includes subdir
+            last_unit = units[-1]
+            self.failUnlessEqual(last_unit["path"], ["subdir"])
+        d.addCallback(_check_broken_manifest)
+
+        d.addCallback(self.CHECK, "root", "t=stream-deep-check")
+        def _check_broken_deepcheck(res):
+            lines = res.splitlines()
+            error_lines = [i
+                           for (i,line) in enumerate(lines)
+                           if line.startswith("ERROR:")]
+            if not error_lines:
+                self.fail("no ERROR: in output: %s" % (res,))
+            first_error = error_lines[0]
+            error_line = lines[first_error]
+            error_msg = lines[first_error+1:]
+            error_msg_s = "\n".join(error_msg) + "\n"
+            self.failUnlessIn("ERROR: UnrecoverableFileError", error_line)
+            self.failUnlessIn("no recoverable versions", error_line)
+            self.failUnless(len(error_msg) > 2, error_msg_s) # some traceback
+            units = [simplejson.loads(line) for line in lines[:first_error]]
+            self.failUnlessEqual(len(units), 5) # includes subdir
+            last_unit = units[-1]
+            self.failUnlessEqual(last_unit["path"], ["subdir"])
+            r = last_unit["check-results"]["results"]
+            self.failUnlessEqual(r["count-recoverable-versions"], 0)
+            self.failUnlessEqual(r["count-shares-good"], 1)
+            self.failUnlessEqual(r["recoverable"], False)
+        d.addCallback(_check_broken_deepcheck)
 
         d.addErrback(self.explain_web_error)
         return d
diff --git a/src/allmydata/web/directory.py b/src/allmydata/web/directory.py
index d4798665..9b49c32e 100644
--- a/src/allmydata/web/directory.py
+++ b/src/allmydata/web/directory.py
@@ -404,6 +404,12 @@ class DirectoryNodeHandler(RenderMixin, rend.Page, ReplaceMeMixin):
             f.trap(OperationCancelledError)
             return "Operation Cancelled"
         d.addErrback(_cancelled)
+        def _error(f):
+            # signal the error as a non-JSON "ERROR:" line, plus exception
+            msg = "ERROR: %s\n" % repr(f.value)
+            msg += str(f)
+            return msg
+        d.addErrback(_error)
         return d
 
     def _POST_start_manifest(self, ctx):
@@ -442,6 +448,12 @@ class DirectoryNodeHandler(RenderMixin, rend.Page, ReplaceMeMixin):
             f.trap(OperationCancelledError)
             return "Operation Cancelled"
         d.addErrback(_cancelled)
+        def _error(f):
+            # signal the error as a non-JSON "ERROR:" line, plus exception
+            msg = "ERROR: %s\n" % repr(f.value)
+            msg += str(f)
+            return msg
+        d.addErrback(_error)
         return d
 
     def _POST_set_children(self, req):
-- 
2.45.2