From: Brian Warner Date: Wed, 25 Feb 2009 06:13:35 +0000 (-0700) Subject: webapi: modify streaming deep-manifest/deep-checker to emit an ERROR: line if they... X-Git-Url: https://git.rkrishnan.org/frontends/reliability?a=commitdiff_plain;h=fd4ceb6a8762924c2248a89d547c226b41f0485e;p=tahoe-lafs%2Ftahoe-lafs.git webapi: modify streaming deep-manifest/deep-checker to emit an ERROR: line if they encounter an unrecoverable+untraversable directory. For #590. --- diff --git a/docs/frontends/webapi.txt b/docs/frontends/webapi.txt index 2024addc..5d7f46d9 100644 --- a/docs/frontends/webapi.txt +++ b/docs/frontends/webapi.txt @@ -896,8 +896,9 @@ POST $URL?t=stream-deep-check This initiates a recursive walk of all files and directories reachable from the target, performing a check on each one just like t=check. For each unique object (duplicates are skipped), a single line of JSON is emitted to - the HTTP response channel. When the walk is complete, a final line of JSON - is emitted which contains the accumulated file-size/count "deep-stats" data. + the HTTP response channel (or an error indication, see below). When the walk + is complete, a final line of JSON is emitted which contains the accumulated + file-size/count "deep-stats" data. This command takes the same arguments as t=start-deep-check. @@ -929,6 +930,18 @@ POST $URL?t=stream-deep-check The last unit in the stream will have a type of "stats", and will contain the keys described in the "start-deep-stats" operation, below. + If any errors occur during the traversal (specifically if a directory is + unrecoverable, such that further traversal is not possible), an error + indication is written to the response body, instead of the usual line of + JSON. This error indication line will begin with the string "ERROR:" (in all + caps), and contain a summary of the error on the rest of the line. The + remaining lines of the response body will be a python exception. The client + application should look for the ERROR: and stop processing JSON as soon as + it is seen. Note that neither a file being unrecoverable nor a directory + merely being unhealthy will cause traversal to stop. The line just before + the ERROR: will describe the directory that was untraversable, since the + unit is emitted to the HTTP response body before the child is traversed. + POST $URL?t=check&repair=true @@ -1017,9 +1030,9 @@ POST $URL?t=stream-deep-check&repair=true This triggers a recursive walk of all files and directories, performing a t=check&repair=true on each one. For each unique object (duplicates are - skipped), a single line of JSON is emitted to the HTTP response channel. - When the walk is complete, a final line of JSON is emitted which contains - the accumulated file-size/count "deep-stats" data. + skipped), a single line of JSON is emitted to the HTTP response channel (or + an error indication). When the walk is complete, a final line of JSON is + emitted which contains the accumulated file-size/count "deep-stats" data. This emits the same data as t=stream-deep-check (without the repair=true), except that the "check-results" field is replaced with a @@ -1031,6 +1044,11 @@ POST $URL?t=stream-deep-check&repair=true receiving client is expected to calculate those values itself from the stream of per-object check-and-repair-results. + Note that the "ERROR:" indication will only be emitted if traversal stops, + which will only occur if an unrecoverable directory is encountered. If a + file or directory repair fails, the traversal will continue, and the repair + failure will be indicated in the JSON data (in the "repair-successful" key). + POST $DIRURL?t=start-manifest (must add &ophandle=XYZ) This operation generates a "manfest" of the given directory tree, mostly @@ -1123,8 +1141,9 @@ POST $URL?t=stream-manifest This operation performs a recursive walk of all files and directories reachable from the given starting point. For each such unique object (duplicates are skipped), a single line of JSON is emitted to the HTTP - response channel. When the walk is complete, a final line of JSON is emitted - which contains the accumulated file-size/count "deep-stats" data. + response channel (or an error indication, see below). When the walk is + complete, a final line of JSON is emitted which contains the accumulated + file-size/count "deep-stats" data. A CLI tool can split the response stream on newlines into "response units", and parse each response unit as JSON. Each such parsed unit will be a @@ -1148,6 +1167,16 @@ POST $URL?t=stream-manifest The last unit in the stream will have a type of "stats", and will contain the keys described in the "start-deep-stats" operation, below. + If any errors occur during the traversal (specifically if a directory is + unrecoverable, such that further traversal is not possible), an error + indication is written to the response body, instead of the usual line of + JSON. This error indication line will begin with the string "ERROR:" (in all + caps), and contain a summary of the error on the rest of the line. The + remaining lines of the response body will be a python exception. The client + application should look for the ERROR: and stop processing JSON as soon as + it is seen. The line just before the ERROR: will describe the directory that + was untraversable, since the manifest entry is emitted to the HTTP response + body before the child is traversed. == Other Useful Pages == diff --git a/src/allmydata/test/test_deepcheck.py b/src/allmydata/test/test_deepcheck.py index b7cff2db..9b2318a2 100644 --- a/src/allmydata/test/test_deepcheck.py +++ b/src/allmydata/test/test_deepcheck.py @@ -5,6 +5,7 @@ from twisted.trial import unittest from twisted.internet import defer from twisted.internet import threads # CLI tests use deferToThread from allmydata.immutable import upload +from allmydata.mutable.common import UnrecoverableFileError from allmydata.util import idlib from allmydata.util import base32 from allmydata.scripts import runner @@ -13,7 +14,8 @@ from allmydata.interfaces import ICheckResults, ICheckAndRepairResults, \ from allmydata.monitor import Monitor, OperationCancelledError from twisted.web.client import getPage -from allmydata.test.common import ErrorMixin, _corrupt_mutable_share_data +from allmydata.test.common import ErrorMixin, _corrupt_mutable_share_data, \ + ShouldFailMixin from allmydata.test.common_util import StallMixin from allmydata.test.no_network import GridTestMixin @@ -123,7 +125,7 @@ class MutableChecker(GridTestMixin, unittest.TestCase, ErrorMixin): return d -class DeepCheckBase(GridTestMixin, ErrorMixin, StallMixin): +class DeepCheckBase(GridTestMixin, ErrorMixin, StallMixin, ShouldFailMixin): def web_json(self, n, **kwargs): kwargs["output"] = "json" @@ -835,6 +837,7 @@ class DeepCheckWebBad(DeepCheckBase, unittest.TestCase): d = self.set_up_damaged_tree() d.addCallback(self.do_check) d.addCallback(self.do_deepcheck) + d.addCallback(self.do_deepcheck_broken) d.addCallback(self.do_test_web_bad) d.addErrback(self.explain_web_error) d.addErrback(self.explain_error) @@ -854,6 +857,12 @@ class DeepCheckWebBad(DeepCheckBase, unittest.TestCase): # large-missing-shares # large-corrupt-shares # large-unrecoverable + # broken + # large1-good + # subdir-good + # large2-good + # subdir-unrecoverable + # large3-good self.nodes = {} @@ -871,9 +880,27 @@ class DeepCheckWebBad(DeepCheckBase, unittest.TestCase): d.addCallback(self.create_mangled, "large-missing-shares") d.addCallback(self.create_mangled, "large-corrupt-shares") d.addCallback(self.create_mangled, "large-unrecoverable") - + d.addCallback(lambda ignored: c0.create_empty_dirnode()) + d.addCallback(self._stash_node, "broken") + large1 = upload.Data("Lots of data\n" * 1000 + "large1" + "\n", None) + d.addCallback(lambda ignored: + self.nodes["broken"].add_file(u"large1", large1)) + d.addCallback(lambda ignored: + self.nodes["broken"].create_empty_directory(u"subdir-good")) + large2 = upload.Data("Lots of data\n" * 1000 + "large2" + "\n", None) + d.addCallback(lambda subdir: subdir.add_file(u"large2-good", large2)) + d.addCallback(lambda ignored: + self.nodes["broken"].create_empty_directory(u"subdir-unrecoverable")) + d.addCallback(self._stash_node, "subdir-unrecoverable") + large3 = upload.Data("Lots of data\n" * 1000 + "large3" + "\n", None) + d.addCallback(lambda subdir: subdir.add_file(u"large3-good", large3)) + d.addCallback(lambda ignored: + self._delete_most_shares(self.nodes["broken"])) return d + def _stash_node(self, node, name): + self.nodes[name] = node + return node def create_mangled(self, ignored, name): nodetype, mangletype = name.split("-", 1) @@ -887,10 +914,7 @@ class DeepCheckWebBad(DeepCheckBase, unittest.TestCase): small = upload.Data("Small enough for a LIT", None) d = self.root.add_file(unicode(name), small) - def _stash_node(node): - self.nodes[name] = node - return node - d.addCallback(_stash_node) + d.addCallback(self._stash_node, name) if mangletype == "good": pass @@ -1042,6 +1066,16 @@ class DeepCheckWebBad(DeepCheckBase, unittest.TestCase): return d + def do_deepcheck_broken(self, ignored): + # deep-check on the broken directory should fail, because of the + # untraversable subdir + def _do_deep_check(): + return self.nodes["broken"].start_deep_check().when_done() + d = self.shouldFail(UnrecoverableFileError, "do_deep_check", + "no recoverable versions", + _do_deep_check) + return d + def json_is_healthy(self, data, where): r = data["results"] self.failUnless(r["healthy"], where) diff --git a/src/allmydata/test/test_web.py b/src/allmydata/test/test_web.py index a5b28bd4..492e406a 100644 --- a/src/allmydata/test/test_web.py +++ b/src/allmydata/test/test_web.py @@ -19,7 +19,6 @@ from allmydata.test.common import FakeDirectoryNode, FakeCHKFileNode, \ from allmydata.interfaces import IURI, INewDirectoryURI, \ IReadonlyNewDirectoryURI, IFileURI, IMutableFileURI, IMutableFileNode from allmydata.mutable import servermap, publish, retrieve -from allmydata.mutable.common import UnrecoverableFileError import common_util as testutil from allmydata.test.no_network import GridTestMixin @@ -2860,15 +2859,63 @@ class Grid(GridTestMixin, WebErrorMixin, unittest.TestCase, ShouldFailMixin): d.addCallback(lambda ign: self.delete_shares_numbered(self.uris["subdir"], - range(10))) + range(1, 10))) - ## argh! how should a streaming-JSON API indicate fatal error? - ## answer: emit ERROR: instead of a JSON string - #d.addCallback(lambda ign: - # self.shouldFail(UnrecoverableFileError, 'check-subdir', - # "no recoverable versions", - # self.CHECK, "ignored", - # "root", "t=stream-deep-check")) + # root + # root/good + # root/small + # root/sick + # root/subdir [unrecoverable] + # root/subdir/grandchild + + # how should a streaming-JSON API indicate fatal error? + # answer: emit ERROR: instead of a JSON string + + d.addCallback(self.CHECK, "root", "t=stream-manifest") + def _check_broken_manifest(res): + lines = res.splitlines() + error_lines = [i + for (i,line) in enumerate(lines) + if line.startswith("ERROR:")] + if not error_lines: + self.fail("no ERROR: in output: %s" % (res,)) + first_error = error_lines[0] + error_line = lines[first_error] + error_msg = lines[first_error+1:] + error_msg_s = "\n".join(error_msg) + "\n" + self.failUnlessIn("ERROR: UnrecoverableFileError", error_line) + self.failUnlessIn("no recoverable versions", error_line) + self.failUnless(len(error_msg) > 2, error_msg_s) # some traceback + units = [simplejson.loads(line) for line in lines[:first_error]] + self.failUnlessEqual(len(units), 5) # includes subdir + last_unit = units[-1] + self.failUnlessEqual(last_unit["path"], ["subdir"]) + d.addCallback(_check_broken_manifest) + + d.addCallback(self.CHECK, "root", "t=stream-deep-check") + def _check_broken_deepcheck(res): + lines = res.splitlines() + error_lines = [i + for (i,line) in enumerate(lines) + if line.startswith("ERROR:")] + if not error_lines: + self.fail("no ERROR: in output: %s" % (res,)) + first_error = error_lines[0] + error_line = lines[first_error] + error_msg = lines[first_error+1:] + error_msg_s = "\n".join(error_msg) + "\n" + self.failUnlessIn("ERROR: UnrecoverableFileError", error_line) + self.failUnlessIn("no recoverable versions", error_line) + self.failUnless(len(error_msg) > 2, error_msg_s) # some traceback + units = [simplejson.loads(line) for line in lines[:first_error]] + self.failUnlessEqual(len(units), 5) # includes subdir + last_unit = units[-1] + self.failUnlessEqual(last_unit["path"], ["subdir"]) + r = last_unit["check-results"]["results"] + self.failUnlessEqual(r["count-recoverable-versions"], 0) + self.failUnlessEqual(r["count-shares-good"], 1) + self.failUnlessEqual(r["recoverable"], False) + d.addCallback(_check_broken_deepcheck) d.addErrback(self.explain_web_error) return d diff --git a/src/allmydata/web/directory.py b/src/allmydata/web/directory.py index d4798665..9b49c32e 100644 --- a/src/allmydata/web/directory.py +++ b/src/allmydata/web/directory.py @@ -404,6 +404,12 @@ class DirectoryNodeHandler(RenderMixin, rend.Page, ReplaceMeMixin): f.trap(OperationCancelledError) return "Operation Cancelled" d.addErrback(_cancelled) + def _error(f): + # signal the error as a non-JSON "ERROR:" line, plus exception + msg = "ERROR: %s\n" % repr(f.value) + msg += str(f) + return msg + d.addErrback(_error) return d def _POST_start_manifest(self, ctx): @@ -442,6 +448,12 @@ class DirectoryNodeHandler(RenderMixin, rend.Page, ReplaceMeMixin): f.trap(OperationCancelledError) return "Operation Cancelled" d.addErrback(_cancelled) + def _error(f): + # signal the error as a non-JSON "ERROR:" line, plus exception + msg = "ERROR: %s\n" % repr(f.value) + msg += str(f) + return msg + d.addErrback(_error) return d def _POST_set_children(self, req):