From: Brian Warner Date: Fri, 7 Nov 2008 05:35:47 +0000 (-0700) Subject: checker: add is_recoverable() to checker results, make our stub immutable-verifier... X-Git-Url: https://git.rkrishnan.org/pf/content/en/footer/reliability?a=commitdiff_plain;h=dfa240815747d6995c5cdc597bf8c85b54f052fd;p=tahoe-lafs%2Ftahoe-lafs.git checker: add is_recoverable() to checker results, make our stub immutable-verifier not throw an exception on unrecoverable files, add tests --- diff --git a/src/allmydata/checker_results.py b/src/allmydata/checker_results.py index 803b6f82..7031ff3a 100644 --- a/src/allmydata/checker_results.py +++ b/src/allmydata/checker_results.py @@ -20,6 +20,8 @@ class CheckerResults: def set_healthy(self, healthy): self.healthy = bool(healthy) + def set_recoverable(self, recoverable): + self.recoverable = recoverable def set_needs_rebalancing(self, needs_rebalancing): self.needs_rebalancing_p = bool(needs_rebalancing) def set_data(self, data): @@ -45,6 +47,8 @@ class CheckerResults: def is_healthy(self): return self.healthy + def is_recoverable(self): + return self.recoverable def needs_rebalancing(self): return self.needs_rebalancing_p @@ -93,6 +97,7 @@ class DeepResultsBase: self.objects_checked = 0 self.objects_healthy = 0 self.objects_unhealthy = 0 + self.objects_unrecoverable = 0 self.corrupt_shares = [] self.all_results = {} self.all_results_by_storage_index = {} @@ -130,6 +135,8 @@ class DeepCheckResults(DeepResultsBase): self.objects_healthy += 1 else: self.objects_unhealthy += 1 + if not r.is_recoverable(): + self.objects_unrecoverable += 1 self.all_results[tuple(path)] = r self.all_results_by_storage_index[r.get_storage_index()] = r self.corrupt_shares.extend(r.get_data()["list-corrupt-shares"]) @@ -138,6 +145,7 @@ class DeepCheckResults(DeepResultsBase): return {"count-objects-checked": self.objects_checked, "count-objects-healthy": self.objects_healthy, "count-objects-unhealthy": self.objects_unhealthy, + "count-objects-unrecoverable": self.objects_unrecoverable, "count-corrupt-shares": len(self.corrupt_shares), } @@ -149,8 +157,7 @@ class DeepCheckAndRepairResults(DeepResultsBase): DeepResultsBase.__init__(self, root_storage_index) self.objects_healthy_post_repair = 0 self.objects_unhealthy_post_repair = 0 - self.objects_healthy_post_repair = 0 - self.objects_healthy_post_repair = 0 + self.objects_unrecoverable_post_repair = 0 self.repairs_attempted = 0 self.repairs_successful = 0 self.repairs_unsuccessful = 0 @@ -168,6 +175,8 @@ class DeepCheckAndRepairResults(DeepResultsBase): self.objects_healthy += 1 else: self.objects_unhealthy += 1 + if not pre_repair.is_recoverable(): + self.objects_unrecoverable += 1 self.corrupt_shares.extend(pre_repair.get_data()["list-corrupt-shares"]) if r.get_repair_attempted(): self.repairs_attempted += 1 @@ -179,6 +188,8 @@ class DeepCheckAndRepairResults(DeepResultsBase): self.objects_healthy_post_repair += 1 else: self.objects_unhealthy_post_repair += 1 + if not post_repair.is_recoverable(): + self.objects_unrecoverable_post_repair += 1 self.all_results[tuple(path)] = r self.all_results_by_storage_index[r.get_storage_index()] = r self.corrupt_shares_post_repair.extend(post_repair.get_data()["list-corrupt-shares"]) @@ -187,8 +198,10 @@ class DeepCheckAndRepairResults(DeepResultsBase): return {"count-objects-checked": self.objects_checked, "count-objects-healthy-pre-repair": self.objects_healthy, "count-objects-unhealthy-pre-repair": self.objects_unhealthy, + "count-objects-unrecoverable-pre-repair": self.objects_unrecoverable, "count-objects-healthy-post-repair": self.objects_healthy_post_repair, "count-objects-unhealthy-post-repair": self.objects_unhealthy_post_repair, + "count-objects-unrecoverable-post-repair": self.objects_unrecoverable_post_repair, "count-repairs-attempted": self.repairs_attempted, "count-repairs-successful": self.repairs_successful, "count-repairs-unsuccessful": self.repairs_unsuccessful, diff --git a/src/allmydata/immutable/checker.py b/src/allmydata/immutable/checker.py index c5808007..91bced99 100644 --- a/src/allmydata/immutable/checker.py +++ b/src/allmydata/immutable/checker.py @@ -72,12 +72,14 @@ class SimpleCHKFileChecker: report = [] healthy = bool(len(self.found_shares) >= self.total_shares) r.set_healthy(healthy) + recoverable = bool(len(self.found_shares) >= self.needed_shares) + r.set_recoverable(recoverable) data = {"count-shares-good": len(self.found_shares), "count-shares-needed": self.needed_shares, "count-shares-expected": self.total_shares, "count-wrong-shares": 0, } - if healthy: + if recoverable: data["count-recoverable-versions"] = 1 data["count-unrecoverable-versions"] = 0 else: @@ -120,6 +122,7 @@ class VerifyingOutput: self._opened = False self._results = results results.set_healthy(False) + results.set_recoverable(False) def setup_hashtrees(self, plaintext_hashtree, crypttext_hashtree): self._crypttext_hash_tree = crypttext_hashtree @@ -141,6 +144,7 @@ class VerifyingOutput: def finish(self): self._results.set_healthy(True) + self._results.set_recoverable(True) # the return value of finish() is passed out of FileDownloader._done, # but SimpleCHKFileVerifier overrides this with the CheckerResults # instance instead. @@ -222,7 +226,7 @@ class SimpleCHKFileVerifier(download.FileDownloader): # once we know that, we can download blocks from everybody d.addCallback(self._download_all_segments) d.addCallback(self._done) - d.addCallback(self._verify_done) + d.addCallbacks(self._verify_done, self._verify_failed) return d def _verify_done(self, ignored): @@ -244,3 +248,23 @@ class SimpleCHKFileVerifier(download.FileDownloader): } self._check_results.set_data(data) return self._check_results + + def _verify_failed(self, ignored): + # TODO: The following results are just stubs, and need to be replaced + # with actual values. These exist to make things like deep-check not + # fail. + self._check_results.set_needs_rebalancing(False) + N = self._total_shares + data = { + "count-shares-good": 0, + "count-good-share-hosts": 0, + "count-corrupt-shares": 0, + "list-corrupt-shares": [], + "servers-responding": [], + "sharemap": {}, + "count-wrong-shares": 0, + "count-recoverable-versions": 0, + "count-unrecoverable-versions": 1, + } + self._check_results.set_data(data) + return self._check_results diff --git a/src/allmydata/interfaces.py b/src/allmydata/interfaces.py index ce1adae0..c678b07f 100644 --- a/src/allmydata/interfaces.py +++ b/src/allmydata/interfaces.py @@ -1615,6 +1615,11 @@ class ICheckerResults(Interface): it is damaged in any way. Non-distributed LIT files always return True.""" + def is_recoverable(): + """Return a boolean, True if the file/dir can be recovered, False if + not. Unrecoverable files are obviously unhealthy. Non-distributed LIT + files always return True.""" + def needs_rebalancing(): """Return a boolean, True if the file/dir's reliability could be improved by moving shares to new servers. Non-distributed LIT files @@ -1728,6 +1733,7 @@ class IDeepCheckResults(Interface): count-objects-healthy: how many of those objects were completely healthy count-objects-unhealthy: how many were damaged in some way + count-objects-unrecoverable: how many were unrecoverable count-corrupt-shares: how many shares were found to have corruption, summed over all objects examined @@ -1770,11 +1776,14 @@ class IDeepCheckAndRepairResults(Interface): repair) count-objects-unhealthy-pre-repair: how many were damaged in some way + count-objects-unrecoverable-pre-repair: how many were unrecoverable count-objects-healthy-post-repair: how many of those objects were completely healthy (after any repair) count-objects-unhealthy-post-repair: how many were damaged in some way + count-objects-unrecoverable-post-repair: how many were + unrecoverable count-repairs-attempted: repairs were attempted on this many objects. The count-repairs- keys will always be provided, however unless diff --git a/src/allmydata/mutable/checker.py b/src/allmydata/mutable/checker.py index 8aaaedf9..1a64163b 100644 --- a/src/allmydata/mutable/checker.py +++ b/src/allmydata/mutable/checker.py @@ -265,6 +265,7 @@ class MutableChecker: data["servers-responding"] = list(smap.reachable_peers) r.set_healthy(healthy) + r.set_recoverable(bool(recoverable)) r.set_needs_rebalancing(needs_rebalancing) r.set_data(data) if healthy: diff --git a/src/allmydata/test/common.py b/src/allmydata/test/common.py index a7dbacaf..63ebf2dd 100644 --- a/src/allmydata/test/common.py +++ b/src/allmydata/test/common.py @@ -66,11 +66,13 @@ class FakeCHKFileNode: data["count-unrecoverable-versions"] = 0 if is_bad: r.set_healthy(False) + r.set_recoverable(True) data["count-shares-good"] = 9 data["list-corrupt-shares"] = [(nodeid, self.storage_index, 0)] r.problems = failure.Failure(CorruptShareError(is_bad)) else: r.set_healthy(True) + r.set_recoverable(True) data["count-shares-good"] = 10 r.problems = [] r.set_data(data) @@ -198,12 +200,14 @@ class FakeMutableFileNode: data["count-unrecoverable-versions"] = 0 if is_bad: r.set_healthy(False) + r.set_recoverable(True) data["count-shares-good"] = 9 r.problems = failure.Failure(CorruptShareError("peerid", 0, # shnum is_bad)) else: r.set_healthy(True) + r.set_recoverable(True) data["count-shares-good"] = 10 r.problems = [] r.set_data(data) @@ -947,6 +951,11 @@ class WebErrorMixin: f.trap(WebError) print "Web Error:", f.value, ":", f.value.response return f +class ErrorMixin(WebErrorMixin): + def explain_error(self, f): + if f.check(defer.FirstError): + print "First Error:", f.value.subFailure + return f class MemoryConsumer: implements(IConsumer) diff --git a/src/allmydata/test/test_dirnode.py b/src/allmydata/test/test_dirnode.py index 42b9b26f..3cf94087 100644 --- a/src/allmydata/test/test_dirnode.py +++ b/src/allmydata/test/test_dirnode.py @@ -39,6 +39,7 @@ class Marker: def check(self, monitor, verify=False): r = CheckerResults("", None) r.set_healthy(True) + r.set_recoverable(True) return defer.succeed(r) def check_and_repair(self, monitor, verify=False): @@ -168,6 +169,7 @@ class Dirnode(unittest.TestCase, testutil.ShouldFailMixin, testutil.StallMixin): {"count-objects-checked": 3, "count-objects-healthy": 3, "count-objects-unhealthy": 0, + "count-objects-unrecoverable": 0, "count-corrupt-shares": 0, }) self.failIf(r.get_corrupt_shares()) @@ -186,9 +188,11 @@ class Dirnode(unittest.TestCase, testutil.ShouldFailMixin, testutil.StallMixin): {"count-objects-checked": 3, "count-objects-healthy-pre-repair": 3, "count-objects-unhealthy-pre-repair": 0, + "count-objects-unrecoverable-pre-repair": 0, "count-corrupt-shares-pre-repair": 0, "count-objects-healthy-post-repair": 3, "count-objects-unhealthy-post-repair": 0, + "count-objects-unrecoverable-post-repair": 0, "count-corrupt-shares-post-repair": 0, "count-repairs-attempted": 0, "count-repairs-successful": 0, @@ -215,6 +219,7 @@ class Dirnode(unittest.TestCase, testutil.ShouldFailMixin, testutil.StallMixin): {"count-objects-checked": 3, "count-objects-healthy": 2, "count-objects-unhealthy": 1, + "count-objects-unrecoverable": 0, "count-corrupt-shares": 0, }) #self.failUnlessEqual(len(r.get_problems()), 1) # TODO diff --git a/src/allmydata/test/test_system.py b/src/allmydata/test/test_system.py index 618d4af7..da0d620e 100644 --- a/src/allmydata/test/test_system.py +++ b/src/allmydata/test/test_system.py @@ -24,7 +24,7 @@ from twisted.python.failure import Failure from twisted.web.client import getPage from twisted.web.error import Error -from allmydata.test.common import SystemTestMixin, WebErrorMixin, \ +from allmydata.test.common import SystemTestMixin, ErrorMixin, \ MemoryConsumer, download_to_data LARGE_DATA = """ @@ -1760,7 +1760,7 @@ class SystemTest(SystemTestMixin, unittest.TestCase): return d -class MutableChecker(SystemTestMixin, unittest.TestCase, WebErrorMixin): +class MutableChecker(SystemTestMixin, unittest.TestCase, ErrorMixin): def _run_cli(self, argv): stdout, stderr = StringIO(), StringIO() @@ -1784,7 +1784,7 @@ class MutableChecker(SystemTestMixin, unittest.TestCase, WebErrorMixin): return getPage(url, method="POST") d.addCallback(_do_check) def _got_results(out): - self.failUnless("Healthy!" in out, out) + self.failUnless("Healthy : Healthy" in out, out) self.failUnless("Recoverable Versions: 10*seq1-" in out, out) self.failIf("Not Healthy!" in out, out) self.failIf("Unhealthy" in out, out) @@ -1895,13 +1895,85 @@ class MutableChecker(SystemTestMixin, unittest.TestCase, WebErrorMixin): return d -class DeepCheckWeb(SystemTestMixin, unittest.TestCase, WebErrorMixin): + +class DeepCheckBase(SystemTestMixin, ErrorMixin): + + def web_json(self, n, **kwargs): + kwargs["output"] = "json" + d = self.web(n, "POST", **kwargs) + d.addCallback(self.decode_json) + return d + + def decode_json(self, (s,url)): + try: + data = simplejson.loads(s) + except ValueError: + self.fail("%s: not JSON: '%s'" % (url, s)) + return data + + def web(self, n, method="GET", **kwargs): + # returns (data, url) + url = (self.webish_url + "uri/%s" % urllib.quote(n.get_uri()) + + "?" + "&".join(["%s=%s" % (k,v) for (k,v) in kwargs.items()])) + d = getPage(url, method=method) + d.addCallback(lambda data: (data,url)) + return d + + def wait_for_operation(self, ignored, ophandle): + url = self.webish_url + "operations/" + ophandle + url += "?t=status&output=JSON" + d = getPage(url) + def _got(res): + try: + data = simplejson.loads(res) + except ValueError: + self.fail("%s: not JSON: '%s'" % (url, res)) + if not data["finished"]: + d = self.stall(delay=1.0) + d.addCallback(self.wait_for_operation, ophandle) + return d + return data + d.addCallback(_got) + return d + + def get_operation_results(self, ignored, ophandle, output=None): + url = self.webish_url + "operations/" + ophandle + url += "?t=status" + if output: + url += "&output=" + output + d = getPage(url) + def _got(res): + if output and output.lower() == "json": + try: + return simplejson.loads(res) + except ValueError: + self.fail("%s: not JSON: '%s'" % (url, res)) + return res + d.addCallback(_got) + return d + + def slow_web(self, n, output=None, **kwargs): + # use ophandle= + handle = base32.b2a(os.urandom(4)) + d = self.web(n, "POST", ophandle=handle, **kwargs) + d.addCallback(self.wait_for_operation, handle) + d.addCallback(self.get_operation_results, handle, output=output) + return d + + +class DeepCheckWebGood(DeepCheckBase, unittest.TestCase): # construct a small directory tree (with one dir, one immutable file, one # mutable file, one LIT file, and a loop), and then check/examine it in # various ways. def set_up_tree(self, ignored): # 2.9s + + # root + # mutable + # large + # small + # loop -> root c0 = self.clients[0] d = c0.create_empty_dirnode() def _created_root(n): @@ -1994,18 +2066,19 @@ class DeepCheckWeb(SystemTestMixin, unittest.TestCase, WebErrorMixin): d = self.set_up_nodes() d.addCallback(self.set_up_tree) d.addCallback(self.do_stats) - d.addCallback(self.do_test_good) - d.addCallback(self.do_test_web) + d.addCallback(self.do_test_check_good) + d.addCallback(self.do_test_web_good) d.addErrback(self.explain_web_error) + d.addErrback(self.explain_error) return d def do_stats(self, ignored): d = defer.succeed(None) d.addCallback(lambda ign: self.root.start_deep_stats().when_done()) - d.addCallback(self.check_stats) + d.addCallback(self.check_stats_good) return d - def check_stats(self, s): + def check_stats_good(self, s): self.failUnlessEqual(s["count-directories"], 1) self.failUnlessEqual(s["count-files"], 3) self.failUnlessEqual(s["count-immutable-files"], 1) @@ -2028,7 +2101,7 @@ class DeepCheckWeb(SystemTestMixin, unittest.TestCase, WebErrorMixin): self.failUnlessEqual(s["size-immutable-files"], 13000) self.failUnlessEqual(s["size-literal-files"], 22) - def do_test_good(self, ignored): + def do_test_check_good(self, ignored): d = defer.succeed(None) # check the individual items d.addCallback(lambda ign: self.root.check(Monitor())) @@ -2106,68 +2179,6 @@ class DeepCheckWeb(SystemTestMixin, unittest.TestCase, WebErrorMixin): return d - def web_json(self, n, **kwargs): - kwargs["output"] = "json" - d = self.web(n, "POST", **kwargs) - d.addCallback(self.decode_json) - return d - - def decode_json(self, (s,url)): - try: - data = simplejson.loads(s) - except ValueError: - self.fail("%s: not JSON: '%s'" % (url, s)) - return data - - def web(self, n, method="GET", **kwargs): - # returns (data, url) - url = (self.webish_url + "uri/%s" % urllib.quote(n.get_uri()) - + "?" + "&".join(["%s=%s" % (k,v) for (k,v) in kwargs.items()])) - d = getPage(url, method=method) - d.addCallback(lambda data: (data,url)) - return d - - def wait_for_operation(self, ignored, ophandle): - url = self.webish_url + "operations/" + ophandle - url += "?t=status&output=JSON" - d = getPage(url) - def _got(res): - try: - data = simplejson.loads(res) - except ValueError: - self.fail("%s: not JSON: '%s'" % (url, res)) - if not data["finished"]: - d = self.stall(delay=1.0) - d.addCallback(self.wait_for_operation, ophandle) - return d - return data - d.addCallback(_got) - return d - - def get_operation_results(self, ignored, ophandle, output=None): - url = self.webish_url + "operations/" + ophandle - url += "?t=status" - if output: - url += "&output=" + output - d = getPage(url) - def _got(res): - if output and output.lower() == "json": - try: - return simplejson.loads(res) - except ValueError: - self.fail("%s: not JSON: '%s'" % (url, res)) - return res - d.addCallback(_got) - return d - - def slow_web(self, n, output=None, **kwargs): - # use ophandle= - handle = base32.b2a(os.urandom(4)) - d = self.web(n, "POST", ophandle=handle, **kwargs) - d.addCallback(self.wait_for_operation, handle) - d.addCallback(self.get_operation_results, handle, output=output) - return d - def json_check_is_healthy(self, data, n, where, incomplete=False): self.failUnlessEqual(data["storage-index"], @@ -2217,7 +2228,7 @@ class DeepCheckWeb(SystemTestMixin, unittest.TestCase, WebErrorMixin): self.failUnlessEqual(data["count-corrupt-shares"], 0, where) self.failUnlessEqual(data["list-corrupt-shares"], [], where) self.failUnlessEqual(data["list-unhealthy-files"], [], where) - self.json_check_stats(data["stats"], where) + self.json_check_stats_good(data["stats"], where) def json_full_deepcheck_and_repair_is_healthy(self, data, n, where): self.failUnlessEqual(data["root-storage-index"], @@ -2245,17 +2256,17 @@ class DeepCheckWeb(SystemTestMixin, unittest.TestCase, WebErrorMixin): self.failUnlessEqual(data["storage-index"], "", where) self.failUnlessEqual(data["results"]["healthy"], True, where) - def json_check_stats(self, data, where): - self.check_stats(data) + def json_check_stats_good(self, data, where): + self.check_stats_good(data) - def do_test_web(self, ignored): + def do_test_web_good(self, ignored): d = defer.succeed(None) # stats d.addCallback(lambda ign: self.slow_web(self.root, t="start-deep-stats", output="json")) - d.addCallback(self.json_check_stats, "deep-stats") + d.addCallback(self.json_check_stats_good, "deep-stats") # check, no verify d.addCallback(lambda ign: self.web_json(self.root, t="check")) @@ -2333,3 +2344,314 @@ class DeepCheckWeb(SystemTestMixin, unittest.TestCase, WebErrorMixin): d.addCallback(lambda ign: self.web(self.small, t="info")) return d + +class DeepCheckWebBad(DeepCheckBase, unittest.TestCase): + + def test_bad(self): + self.basedir = self.mktemp() + d = self.set_up_nodes() + d.addCallback(self.set_up_damaged_tree) + d.addCallback(self.do_test_check_bad) + d.addCallback(self.do_test_deepcheck_bad) + d.addCallback(self.do_test_web_bad) + d.addErrback(self.explain_web_error) + d.addErrback(self.explain_error) + return d + + + + def set_up_damaged_tree(self, ignored): + # 6.4s + + # root + # mutable-good + # mutable-missing-shares + # mutable-corrupt-shares + # mutable-unrecoverable + # large-good + # large-missing-shares + # large-corrupt-shares + # large-unrecoverable + + self.nodes = {} + + c0 = self.clients[0] + d = c0.create_empty_dirnode() + def _created_root(n): + self.root = n + self.root_uri = n.get_uri() + d.addCallback(_created_root) + d.addCallback(self.create_mangled, "mutable-good") + d.addCallback(self.create_mangled, "mutable-missing-shares") + d.addCallback(self.create_mangled, "mutable-corrupt-shares") + d.addCallback(self.create_mangled, "mutable-unrecoverable") + d.addCallback(self.create_mangled, "large-good") + d.addCallback(self.create_mangled, "large-missing-shares") + d.addCallback(self.create_mangled, "large-corrupt-shares") + d.addCallback(self.create_mangled, "large-unrecoverable") + + return d + + + def create_mangled(self, ignored, name): + nodetype, mangletype = name.split("-", 1) + if nodetype == "mutable": + d = self.clients[0].create_mutable_file("mutable file contents") + d.addCallback(lambda n: self.root.set_node(unicode(name), n)) + elif nodetype == "large": + large = upload.Data("Lots of data\n" * 1000 + name + "\n", None) + d = self.root.add_file(unicode(name), large) + elif nodetype == "small": + small = upload.Data("Small enough for a LIT", None) + d = self.root.add_file(unicode(name), small) + + def _stash_node(node): + self.nodes[name] = node + return node + d.addCallback(_stash_node) + + if mangletype == "good": + pass + elif mangletype == "missing-shares": + d.addCallback(self._delete_some_shares) + elif mangletype == "corrupt-shares": + d.addCallback(self._corrupt_some_shares) + else: + assert mangletype == "unrecoverable" + d.addCallback(self._delete_most_shares) + + return d + + def _run_cli(self, argv): + stdout, stderr = StringIO(), StringIO() + runner.runner(argv, run_by_human=False, stdout=stdout, stderr=stderr) + return stdout.getvalue() + + def _find_shares(self, node): + si = node.get_storage_index() + out = self._run_cli(["debug", "find-shares", base32.b2a(si)] + + [c.basedir for c in self.clients]) + files = out.split("\n") + return [f for f in files if f] + + def _delete_some_shares(self, node): + shares = self._find_shares(node) + os.unlink(shares[0]) + os.unlink(shares[1]) + + def _corrupt_some_shares(self, node): + shares = self._find_shares(node) + self._run_cli(["debug", "corrupt-share", shares[0]]) + self._run_cli(["debug", "corrupt-share", shares[1]]) + + def _delete_most_shares(self, node): + shares = self._find_shares(node) + for share in shares[1:]: + os.unlink(share) + + + def check_is_healthy(self, cr, where): + self.failUnless(ICheckerResults.providedBy(cr), where) + self.failUnless(cr.is_healthy(), where) + self.failUnless(cr.is_recoverable(), where) + d = cr.get_data() + self.failUnlessEqual(d["count-recoverable-versions"], 1, where) + self.failUnlessEqual(d["count-unrecoverable-versions"], 0, where) + return cr + + def check_is_missing_shares(self, cr, where): + self.failUnless(ICheckerResults.providedBy(cr), where) + self.failIf(cr.is_healthy(), where) + self.failUnless(cr.is_recoverable(), where) + d = cr.get_data() + self.failUnlessEqual(d["count-recoverable-versions"], 1, where) + self.failUnlessEqual(d["count-unrecoverable-versions"], 0, where) + return cr + + def check_has_corrupt_shares(self, cr, where): + # by "corrupt-shares" we mean the file is still recoverable + self.failUnless(ICheckerResults.providedBy(cr), where) + d = cr.get_data() + self.failIf(cr.is_healthy(), where) + self.failUnless(cr.is_recoverable(), where) + d = cr.get_data() + self.failUnless(d["count-shares-good"] < 10, where) + self.failUnless(d["count-corrupt-shares"], where) + self.failUnless(d["list-corrupt-shares"], where) + return cr + + def check_is_unrecoverable(self, cr, where): + self.failUnless(ICheckerResults.providedBy(cr), where) + d = cr.get_data() + self.failIf(cr.is_healthy(), where) + self.failIf(cr.is_recoverable(), where) + self.failUnless(d["count-shares-good"] < d["count-shares-needed"], + where) + self.failUnlessEqual(d["count-recoverable-versions"], 0, where) + self.failUnlessEqual(d["count-unrecoverable-versions"], 1, where) + return cr + + def do_test_check_bad(self, ignored): + d = defer.succeed(None) + + # check the individual items, without verification. This will not + # detect corrupt shares. + def _check(which, checker): + d = self.nodes[which].check(Monitor()) + d.addCallback(checker, which + "--check") + return d + + d.addCallback(lambda ign: _check("mutable-good", self.check_is_healthy)) + d.addCallback(lambda ign: _check("mutable-missing-shares", + self.check_is_missing_shares)) + d.addCallback(lambda ign: _check("mutable-corrupt-shares", + self.check_is_healthy)) + d.addCallback(lambda ign: _check("mutable-unrecoverable", + self.check_is_unrecoverable)) + d.addCallback(lambda ign: _check("large-good", self.check_is_healthy)) + d.addCallback(lambda ign: _check("large-missing-shares", + self.check_is_missing_shares)) + d.addCallback(lambda ign: _check("large-corrupt-shares", + self.check_is_healthy)) + d.addCallback(lambda ign: _check("large-unrecoverable", + self.check_is_unrecoverable)) + + # and again with verify=True, which *does* detect corrupt shares. + def _checkv(which, checker): + d = self.nodes[which].check(Monitor(), verify=True) + d.addCallback(checker, which + "--check-and-verify") + return d + + d.addCallback(lambda ign: _checkv("mutable-good", self.check_is_healthy)) + d.addCallback(lambda ign: _checkv("mutable-missing-shares", + self.check_is_missing_shares)) + d.addCallback(lambda ign: _checkv("mutable-corrupt-shares", + self.check_has_corrupt_shares)) + d.addCallback(lambda ign: _checkv("mutable-unrecoverable", + self.check_is_unrecoverable)) + d.addCallback(lambda ign: _checkv("large-good", self.check_is_healthy)) + # disabled pending immutable verifier + #d.addCallback(lambda ign: _checkv("large-missing-shares", + # self.check_is_missing_shares)) + #d.addCallback(lambda ign: _checkv("large-corrupt-shares", + # self.check_has_corrupt_shares)) + d.addCallback(lambda ign: _checkv("large-unrecoverable", + self.check_is_unrecoverable)) + + return d + + def do_test_deepcheck_bad(self, ignored): + d = defer.succeed(None) + + # now deep-check the root, with various verify= and repair= options + d.addCallback(lambda ign: + self.root.start_deep_check().when_done()) + def _check1(cr): + self.failUnless(IDeepCheckResults.providedBy(cr)) + c = cr.get_counters() + self.failUnlessEqual(c["count-objects-checked"], 9) + self.failUnlessEqual(c["count-objects-healthy"], 5) + self.failUnlessEqual(c["count-objects-unhealthy"], 4) + self.failUnlessEqual(c["count-objects-unrecoverable"], 2) + d.addCallback(_check1) + + d.addCallback(lambda ign: + self.root.start_deep_check(verify=True).when_done()) + def _check2(cr): + self.failUnless(IDeepCheckResults.providedBy(cr)) + c = cr.get_counters() + self.failUnlessEqual(c["count-objects-checked"], 9) + # until we have a real immutable verifier, these counts will be + # off + #self.failUnlessEqual(c["count-objects-healthy"], 3) + #self.failUnlessEqual(c["count-objects-unhealthy"], 6) + self.failUnlessEqual(c["count-objects-healthy"], 5) # todo + self.failUnlessEqual(c["count-objects-unhealthy"], 4) + self.failUnlessEqual(c["count-objects-unrecoverable"], 2) + d.addCallback(_check2) + + return d + + def json_is_healthy(self, data, where): + r = data["results"] + self.failUnless(r["healthy"], where) + self.failUnless(r["recoverable"], where) + self.failUnlessEqual(r["count-recoverable-versions"], 1, where) + self.failUnlessEqual(r["count-unrecoverable-versions"], 0, where) + + def json_is_missing_shares(self, data, where): + r = data["results"] + self.failIf(r["healthy"], where) + self.failUnless(r["recoverable"], where) + self.failUnlessEqual(r["count-recoverable-versions"], 1, where) + self.failUnlessEqual(r["count-unrecoverable-versions"], 0, where) + + def json_has_corrupt_shares(self, data, where): + # by "corrupt-shares" we mean the file is still recoverable + r = data["results"] + self.failIf(r["healthy"], where) + self.failUnless(r["recoverable"], where) + self.failUnless(r["count-shares-good"] < 10, where) + self.failUnless(r["count-corrupt-shares"], where) + self.failUnless(r["list-corrupt-shares"], where) + + def json_is_unrecoverable(self, data, where): + r = data["results"] + self.failIf(r["healthy"], where) + self.failIf(r["recoverable"], where) + self.failUnless(r["count-shares-good"] < r["count-shares-needed"], + where) + self.failUnlessEqual(r["count-recoverable-versions"], 0, where) + self.failUnlessEqual(r["count-unrecoverable-versions"], 1, where) + + def do_test_web_bad(self, ignored): + d = defer.succeed(None) + + # check, no verify + def _check(which, checker): + d = self.web_json(self.nodes[which], t="check") + d.addCallback(checker, which + "--webcheck") + return d + + d.addCallback(lambda ign: _check("mutable-good", + self.json_is_healthy)) + d.addCallback(lambda ign: _check("mutable-missing-shares", + self.json_is_missing_shares)) + d.addCallback(lambda ign: _check("mutable-corrupt-shares", + self.json_is_healthy)) + d.addCallback(lambda ign: _check("mutable-unrecoverable", + self.json_is_unrecoverable)) + d.addCallback(lambda ign: _check("large-good", + self.json_is_healthy)) + d.addCallback(lambda ign: _check("large-missing-shares", + self.json_is_missing_shares)) + d.addCallback(lambda ign: _check("large-corrupt-shares", + self.json_is_healthy)) + d.addCallback(lambda ign: _check("large-unrecoverable", + self.json_is_unrecoverable)) + + # check and verify + def _checkv(which, checker): + d = self.web_json(self.nodes[which], t="check", verify="true") + d.addCallback(checker, which + "--webcheck-and-verify") + return d + + d.addCallback(lambda ign: _checkv("mutable-good", + self.json_is_healthy)) + d.addCallback(lambda ign: _checkv("mutable-missing-shares", + self.json_is_missing_shares)) + d.addCallback(lambda ign: _checkv("mutable-corrupt-shares", + self.json_has_corrupt_shares)) + d.addCallback(lambda ign: _checkv("mutable-unrecoverable", + self.json_is_unrecoverable)) + d.addCallback(lambda ign: _checkv("large-good", + self.json_is_healthy)) + # disabled pending immutable verifier + #d.addCallback(lambda ign: _checkv("large-missing-shares", + # self.json_is_missing_shares)) + #d.addCallback(lambda ign: _checkv("large-corrupt-shares", + # self.json_has_corrupt_shares)) + d.addCallback(lambda ign: _checkv("large-unrecoverable", + self.json_is_unrecoverable)) + + return d diff --git a/src/allmydata/test/test_web.py b/src/allmydata/test/test_web.py index 684ac844..39e59d1b 100644 --- a/src/allmydata/test/test_web.py +++ b/src/allmydata/test/test_web.py @@ -1545,7 +1545,7 @@ class Web(WebMixin, testutil.StallMixin, unittest.TestCase): bar_url = self.public_url + "/foo/bar.txt" d = self.POST(bar_url, t="check") def _check(res): - self.failUnless("Healthy!" in res) + self.failUnless("Healthy :" in res) d.addCallback(_check) redir_url = "http://allmydata.org/TARGET" def _check2(statuscode, target): @@ -1560,7 +1560,7 @@ class Web(WebMixin, testutil.StallMixin, unittest.TestCase): d.addCallback(lambda res: self.POST(bar_url, t="check", return_to=redir_url)) def _check3(res): - self.failUnless("Healthy!" in res) + self.failUnless("Healthy :" in res) self.failUnless("Return to parent directory" in res) self.failUnless(redir_url in res) d.addCallback(_check3) @@ -1579,7 +1579,7 @@ class Web(WebMixin, testutil.StallMixin, unittest.TestCase): bar_url = self.public_url + "/foo/bar.txt" d = self.POST(bar_url, t="check", repair="true") def _check(res): - self.failUnless("Healthy!" in res) + self.failUnless("Healthy :" in res) d.addCallback(_check) redir_url = "http://allmydata.org/TARGET" def _check2(statuscode, target): @@ -1594,7 +1594,7 @@ class Web(WebMixin, testutil.StallMixin, unittest.TestCase): d.addCallback(lambda res: self.POST(bar_url, t="check", return_to=redir_url)) def _check3(res): - self.failUnless("Healthy!" in res) + self.failUnless("Healthy :" in res) self.failUnless("Return to parent directory" in res) self.failUnless(redir_url in res) d.addCallback(_check3) @@ -1604,7 +1604,7 @@ class Web(WebMixin, testutil.StallMixin, unittest.TestCase): foo_url = self.public_url + "/foo/" d = self.POST(foo_url, t="check") def _check(res): - self.failUnless("Healthy!" in res) + self.failUnless("Healthy :" in res, res) d.addCallback(_check) redir_url = "http://allmydata.org/TARGET" def _check2(statuscode, target): @@ -1619,7 +1619,7 @@ class Web(WebMixin, testutil.StallMixin, unittest.TestCase): d.addCallback(lambda res: self.POST(foo_url, t="check", return_to=redir_url)) def _check3(res): - self.failUnless("Healthy!" in res) + self.failUnless("Healthy :" in res, res) self.failUnless("Return to parent directory" in res) self.failUnless(redir_url in res) d.addCallback(_check3) @@ -1638,7 +1638,7 @@ class Web(WebMixin, testutil.StallMixin, unittest.TestCase): foo_url = self.public_url + "/foo/" d = self.POST(foo_url, t="check", repair="true") def _check(res): - self.failUnless("Healthy!" in res) + self.failUnless("Healthy :" in res, res) d.addCallback(_check) redir_url = "http://allmydata.org/TARGET" def _check2(statuscode, target): @@ -1653,7 +1653,7 @@ class Web(WebMixin, testutil.StallMixin, unittest.TestCase): d.addCallback(lambda res: self.POST(foo_url, t="check", return_to=redir_url)) def _check3(res): - self.failUnless("Healthy!" in res) + self.failUnless("Healthy :" in res) self.failUnless("Return to parent directory" in res) self.failUnless(redir_url in res) d.addCallback(_check3) diff --git a/src/allmydata/web/check-and-repair-results.xhtml b/src/allmydata/web/check-and-repair-results.xhtml index ce2785fe..c9b536e4 100644 --- a/src/allmydata/web/check-and-repair-results.xhtml +++ b/src/allmydata/web/check-and-repair-results.xhtml @@ -10,7 +10,7 @@

File Check Results for SI=

-
+
diff --git a/src/allmydata/web/checker-results.xhtml b/src/allmydata/web/checker-results.xhtml index 6fef7986..0f43b9d7 100644 --- a/src/allmydata/web/checker-results.xhtml +++ b/src/allmydata/web/checker-results.xhtml @@ -11,8 +11,7 @@

File Check Results for SI=

- - +
diff --git a/src/allmydata/web/checker_results.py b/src/allmydata/web/checker_results.py index a3eabff7..ff19aa08 100644 --- a/src/allmydata/web/checker_results.py +++ b/src/allmydata/web/checker_results.py @@ -85,6 +85,7 @@ class ResultsBase: data["results"] = self._json_check_counts(r.get_data()) data["results"]["needs-rebalancing"] = r.needs_rebalancing() data["results"]["healthy"] = r.is_healthy() + data["results"]["recoverable"] = r.is_recoverable() return data def _json_check_counts(self, d): @@ -178,10 +179,17 @@ class CheckerResults(CheckerBase, rend.Page, ResultsBase): data = self._json_check_results(self.r) return simplejson.dumps(data, indent=1) + "\n" - def render_healthy(self, ctx, data): + def render_summary(self, ctx, data): + results = [] if self.r.is_healthy(): - return ctx.tag["Healthy!"] - return ctx.tag["Not Healthy!: ", self._html(self.r.get_summary())] + results.append("Healthy") + elif self.r.is_recoverable(): + results.append("Not Healthy!") + else: + results.append("Not Recoverable!") + results.append(" : ") + results.append(self._html(self.r.get_summary())) + return ctx.tag[results] def render_repair(self, ctx, data): if self.r.is_healthy(): @@ -215,11 +223,18 @@ class CheckAndRepairResults(CheckerBase, rend.Page, ResultsBase): data = self._json_check_and_repair_results(self.r) return simplejson.dumps(data, indent=1) + "\n" - def render_healthy(self, ctx, data): + def render_summary(self, ctx, data): cr = self.r.get_post_repair_results() + results = [] if cr.is_healthy(): - return ctx.tag["Healthy!"] - return ctx.tag["Not Healthy!: ", self._html(cr.get_summary())] + results.append("Healthy") + elif cr.is_recoverable(): + results.append("Not Healthy!") + else: + results.append("Not Recoverable!") + results.append(" : ") + results.append(self._html(cr.get_summary())) + return ctx.tag[results] def render_repair_results(self, ctx, data): if self.r.get_repair_attempted(): @@ -296,6 +311,8 @@ class DeepCheckResults(rend.Page, ResultsBase, ReloadMixin): return self.monitor.get_status().get_counters()["count-objects-healthy"] def data_objects_unhealthy(self, ctx, data): return self.monitor.get_status().get_counters()["count-objects-unhealthy"] + def data_objects_unrecoverable(self, ctx, data): + return self.monitor.get_status().get_counters()["count-objects-unrecoverable"] def data_count_corrupt_shares(self, ctx, data): return self.monitor.get_status().get_counters()["count-corrupt-shares"] @@ -382,6 +399,7 @@ class DeepCheckResults(rend.Page, ResultsBase, ReloadMixin): pathstring = "" ctx.fillSlots("path", pathstring) ctx.fillSlots("healthy", str(r.is_healthy())) + ctx.fillSlots("recoverable", str(r.is_recoverable())) storage_index = r.get_storage_index() ctx.fillSlots("storage_index", self._render_si_link(ctx, storage_index)) ctx.fillSlots("summary", self._html(r.get_summary())) diff --git a/src/allmydata/web/deep-check-results.xhtml b/src/allmydata/web/deep-check-results.xhtml index 9ac8fb55..18f41cac 100644 --- a/src/allmydata/web/deep-check-results.xhtml +++ b/src/allmydata/web/deep-check-results.xhtml @@ -18,6 +18,7 @@
  • Objects Checked:
  • Objects Healthy:
  • Objects Unhealthy:
  • +
  • Objects Unrecoverable:
  • Corrupt Shares:
  • @@ -67,12 +68,14 @@ Relative Path Healthy + Recoverable Storage Index Summary +