From: Brian Warner Date: Sun, 7 Sep 2008 19:44:56 +0000 (-0700) Subject: checker: overhaul checker results, split check/check_and_repair into separate methods... X-Git-Url: https://git.rkrishnan.org/simplejson/components/com_hotproperty/...?a=commitdiff_plain;h=3408d552cdf6da80cbed971c1bfc797e4abafd63;p=tahoe-lafs%2Ftahoe-lafs.git checker: overhaul checker results, split check/check_and_repair into separate methods, improve web displays --- diff --git a/src/allmydata/checker_results.py b/src/allmydata/checker_results.py new file mode 100644 index 00000000..b1255941 --- /dev/null +++ b/src/allmydata/checker_results.py @@ -0,0 +1,181 @@ + +from zope.interface import implements +from allmydata.interfaces import ICheckerResults, ICheckAndRepairResults, \ + IDeepCheckResults, IDeepCheckAndRepairResults +from allmydata.util import base32 + +class CheckerResults: + implements(ICheckerResults) + + def __init__(self, storage_index): + self.storage_index = storage_index + self.problems = [] + self.data = {"count-corrupt-shares": 0, + "list-corrupt-shares": [], + } + self.summary = "" + self.report = [] + + def set_healthy(self, healthy): + self.healthy = bool(healthy) + def set_needs_rebalancing(self, needs_rebalancing): + self.needs_rebalancing_p = bool(needs_rebalancing) + def set_data(self, data): + self.data.update(data) + def set_summary(self, summary): + assert isinstance(summary, str) # should be a single string + self.summary = summary + def set_report(self, report): + assert not isinstance(report, str) # should be list of strings + self.report = report + + def set_servermap(self, smap): + # mutable only + self.servermap = smap + + + def get_storage_index(self): + return self.storage_index + def get_storage_index_string(self): + return base32.b2a(self.storage_index) + + def is_healthy(self): + return self.healthy + + def needs_rebalancing(self): + return self.needs_rebalancing_p + def get_data(self): + return self.data + + def get_summary(self): + return self.summary + def get_report(self): + return self.report + def get_servermap(self): + return self.servermap + +class CheckAndRepairResults: + implements(ICheckAndRepairResults) + + def __init__(self, storage_index): + self.storage_index = storage_index + self.repair_attempted = False + + def get_storage_index(self): + return self.storage_index + def get_storage_index_string(self): + return base32.b2a(self.storage_index) + def get_repair_attempted(self): + return self.repair_attempted + def get_repair_successful(self): + return self.repair_successful + def get_pre_repair_results(self): + return self.pre_repair_results + def get_post_repair_results(self): + return self.post_repair_results + + +class DeepResultsBase: + + def __init__(self, root_storage_index): + self.root_storage_index = root_storage_index + if root_storage_index is None: + self.root_storage_index_s = "" + else: + self.root_storage_index_s = base32.b2a(root_storage_index)[:6] + + self.objects_checked = 0 + self.objects_healthy = 0 + self.objects_unhealthy = 0 + self.corrupt_shares = [] + self.all_results = {} + + def get_root_storage_index_string(self): + return self.root_storage_index_s + + def get_corrupt_shares(self): + return self.corrupt_shares + + def get_all_results(self): + return self.all_results + + +class DeepCheckResults(DeepResultsBase): + implements(IDeepCheckResults) + + def add_check(self, r, path): + if not r: + return # non-distributed object, i.e. LIT file + r = ICheckerResults(r) + assert isinstance(path, (list, tuple)) + self.objects_checked += 1 + if r.is_healthy(): + self.objects_healthy += 1 + else: + self.objects_unhealthy += 1 + self.all_results[tuple(path)] = r + self.corrupt_shares.extend(r.get_data()["list-corrupt-shares"]) + + def get_counters(self): + return {"count-objects-checked": self.objects_checked, + "count-objects-healthy": self.objects_healthy, + "count-objects-unhealthy": self.objects_unhealthy, + "count-corrupt-shares": len(self.corrupt_shares), + } + + +class DeepCheckAndRepairResults(DeepResultsBase): + implements(IDeepCheckAndRepairResults) + + def __init__(self, root_storage_index): + DeepResultsBase.__init__(self, root_storage_index) + self.objects_healthy_post_repair = 0 + self.objects_unhealthy_post_repair = 0 + self.objects_healthy_post_repair = 0 + self.objects_healthy_post_repair = 0 + self.repairs_attempted = 0 + self.repairs_successful = 0 + self.repairs_unsuccessful = 0 + self.corrupt_shares_post_repair = [] + + def add_check_and_repair(self, r, path): + if not r: + return # non-distributed object, i.e. LIT file + r = ICheckAndRepairResults(r) + assert isinstance(path, (list, tuple)) + pre_repair = r.get_pre_repair_results() + post_repair = r.get_post_repair_results() + self.objects_checked += 1 + if pre_repair.is_healthy(): + self.objects_healthy += 1 + else: + self.objects_unhealthy += 1 + self.corrupt_shares.extend(pre_repair.get_data()["list-corrupt-shares"]) + if r.get_repair_attempted(): + self.repairs_attempted += 1 + if r.get_repair_successful(): + self.repairs_successful += 1 + else: + self.repairs_unsuccessful += 1 + if post_repair.is_healthy(): + self.objects_healthy_post_repair += 1 + else: + self.objects_unhealthy_post_repair += 1 + self.all_results[tuple(path)] = r + self.corrupt_shares_post_repair.extend(post_repair.get_data()["list-corrupt-shares"]) + + def get_counters(self): + return {"count-objects-checked": self.objects_checked, + "count-objects-healthy-pre-repair": self.objects_healthy, + "count-objects-unhealthy-pre-repair": self.objects_unhealthy, + "count-objects-healthy-post-repair": self.objects_healthy_post_repair, + "count-objects-unhealthy-post-repair": self.objects_unhealthy_post_repair, + "count-repairs-attempted": self.repairs_attempted, + "count-repairs-successful": self.repairs_successful, + "count-repairs-unsuccessful": self.repairs_unsuccessful, + "count-corrupt-shares-pre-repair": len(self.corrupt_shares), + "count-corrupt-shares-post-repair": len(self.corrupt_shares_post_repair), + } + + def get_remaining_corrupt_shares(self): + return self.corrupt_shares_post_repair diff --git a/src/allmydata/dirnode.py b/src/allmydata/dirnode.py index a1b3ee93..71aa61d7 100644 --- a/src/allmydata/dirnode.py +++ b/src/allmydata/dirnode.py @@ -9,7 +9,8 @@ from allmydata.mutable.node import MutableFileNode from allmydata.interfaces import IMutableFileNode, IDirectoryNode,\ IURI, IFileNode, IMutableFileURI, IVerifierURI, IFilesystemNode, \ ExistingChildError, ICheckable -from allmydata.immutable.checker import DeepCheckResults +from allmydata.checker_results import DeepCheckResults, \ + DeepCheckAndRepairResults from allmydata.util import hashutil, mathutil, base32, log from allmydata.util.hashutil import netstring from allmydata.util.limiter import ConcurrencyLimiter @@ -246,9 +247,11 @@ class NewDirectoryNode: def get_storage_index(self): return self._uri._filenode_uri.storage_index - def check(self, verify=False, repair=False): + def check(self, verify=False): """Perform a file check. See IChecker.check for details.""" - return self._node.check(verify, repair) + return self._node.check(verify) + def check_and_repair(self, verify=False): + return self._node.check_and_repair(verify) def list(self): """I return a Deferred that fires with a dictionary mapping child @@ -537,17 +540,25 @@ class NewDirectoryNode: d.addCallback(_got_list) return d - def deep_check(self, verify=False, repair=False): + def deep_check(self, verify=False): + return self.deep_check_base(verify, False) + def deep_check_and_repair(self, verify=False): + return self.deep_check_base(verify, True) + + def deep_check_base(self, verify, repair): # shallow-check each object first, then traverse children root_si = self._node.get_storage_index() self._lp = log.msg(format="deep-check starting (%(si)s)," " verify=%(verify)s, repair=%(repair)s", si=base32.b2a(root_si), verify=verify, repair=repair) - results = DeepCheckResults(root_si) + if repair: + results = DeepCheckAndRepairResults(root_si) + else: + results = DeepCheckResults(root_si) found = set() limiter = ConcurrencyLimiter(10) - d = self._add_deepcheck_from_node(self, results, found, limiter, + d = self._add_deepcheck_from_node([], self, results, found, limiter, verify, repair) def _done(res): log.msg("deep-check done", parent=self._lp) @@ -555,7 +566,7 @@ class NewDirectoryNode: d.addCallback(_done) return d - def _add_deepcheck_from_node(self, node, results, found, limiter, + def _add_deepcheck_from_node(self, path, node, results, found, limiter, verify, repair): verifier = node.get_verifier() if verifier in found: @@ -563,15 +574,25 @@ class NewDirectoryNode: return None found.add(verifier) - d = limiter.add(node.check, verify, repair) - d.addCallback(results.add_check) + if repair: + d = limiter.add(node.check_and_repair, verify) + d.addCallback(results.add_check_and_repair, path) + else: + d = limiter.add(node.check, verify) + d.addCallback(results.add_check, path) + + # TODO: stats: split the DeepStats.foo calls out of + # _add_deepstats_from_node into a separate non-recursing method, call + # it from both here and _add_deepstats_from_node. if IDirectoryNode.providedBy(node): d.addCallback(lambda res: node.list()) def _got_children(children): dl = [] for name, (child, metadata) in children.iteritems(): - d2 = self._add_deepcheck_from_node(child, results, + childpath = path + [name] + d2 = self._add_deepcheck_from_node(childpath, child, + results, found, limiter, verify, repair) if d2: diff --git a/src/allmydata/immutable/checker.py b/src/allmydata/immutable/checker.py index dfb912c1..71b8e9dc 100644 --- a/src/allmydata/immutable/checker.py +++ b/src/allmydata/immutable/checker.py @@ -6,99 +6,12 @@ This does no verification of the shares whatsoever. If the peer claims to have the share, we believe them. """ -from zope.interface import implements from twisted.internet import defer from twisted.python import log from allmydata import storage -from allmydata.interfaces import ICheckerResults, IDeepCheckResults +from allmydata.checker_results import CheckerResults from allmydata.immutable import download -from allmydata.util import hashutil, base32 - -class Results: - implements(ICheckerResults) - - def __init__(self, storage_index): - # storage_index might be None for, say, LIT files - self.storage_index = storage_index - if storage_index is None: - self.storage_index_s = "" - else: - self.storage_index_s = base32.b2a(storage_index)[:6] - self.status_report = "[not generated yet]" # string - - def is_healthy(self): - return self.healthy - - def get_storage_index(self): - return self.storage_index - - def get_storage_index_string(self): - return self.storage_index_s - - def get_mutability_string(self): - if self.storage_index: - return "immutable" - return "literal" - - def to_string(self): - s = "" - if self.healthy: - s += "Healthy!\n" - else: - s += "Not Healthy!\n" - s += "\n" - s += self.status_report - s += "\n" - return s - -class DeepCheckResults: - implements(IDeepCheckResults) - - def __init__(self, root_storage_index): - self.root_storage_index = root_storage_index - if root_storage_index is None: - self.root_storage_index_s = "" - else: - self.root_storage_index_s = base32.b2a(root_storage_index)[:6] - - self.objects_checked = 0 - self.objects_healthy = 0 - self.repairs_attempted = 0 - self.repairs_successful = 0 - self.problems = [] - self.all_results = {} - self.server_problems = {} - - def get_root_storage_index_string(self): - return self.root_storage_index_s - - def add_check(self, r): - self.objects_checked += 1 - if r.is_healthy(): - self.objects_healthy += 1 - else: - self.problems.append(r) - self.all_results[r.get_storage_index()] = r - - def add_repair(self, is_successful): - self.repairs_attempted += 1 - if is_successful: - self.repairs_successful += 1 - - def count_objects_checked(self): - return self.objects_checked - def count_objects_healthy(self): - return self.objects_healthy - def count_repairs_attempted(self): - return self.repairs_attempted - def count_repairs_successful(self): - return self.repairs_successful - def get_server_problems(self): - return self.server_problems - def get_problems(self): - return self.problems - def get_all_results(self): - return self.all_results +from allmydata.util import hashutil class SimpleCHKFileChecker: """Return a list of (needed, total, found, sharemap), where sharemap maps @@ -152,18 +65,25 @@ class SimpleCHKFileChecker: pass def _done(self, res): - r = Results(self.storage_index) + r = CheckerResults(self.storage_index) report = [] - r.healthy = bool(len(self.found_shares) >= self.total_shares) - r.stuff = (self.needed_shares, self.total_shares, - len(self.found_shares), self.sharemap) + r.set_healthy(bool(len(self.found_shares) >= self.total_shares)) + data = {"count-shares-good": len(self.found_shares), + "count-shares-needed": self.needed_shares, + "count-shares-expected": self.total_shares, + } + # TODO: count-good-shares-hosts, count-corrupt-shares, + # list-corrupt-shares, servers-responding, sharemap + #r.stuff = (self.needed_shares, self.total_shares, + # len(self.found_shares), self.sharemap) if len(self.found_shares) < self.total_shares: wanted = set(range(self.total_shares)) missing = wanted - self.found_shares report.append("Missing shares: %s" % ",".join(["sh%d" % shnum for shnum in sorted(missing)])) - r.status_report = "\n".join(report) + "\n" + r.set_report(report) + # TODO: r.set_summary(summary) return r class VerifyingOutput: @@ -175,7 +95,7 @@ class VerifyingOutput: self._crypttext_hash_tree = None self._opened = False self._results = results - results.healthy = False + results.set_healthy(False) def setup_hashtrees(self, plaintext_hashtree, crypttext_hashtree): self._crypttext_hash_tree = crypttext_hashtree @@ -196,8 +116,10 @@ class VerifyingOutput: self.crypttext_hash = self._crypttext_hasher.digest() def finish(self): - self._results.healthy = True - return self._results + self._results.set_healthy(True) + # the return value of finish() is passed out of FileDownloader._done, + # but SimpleCHKFileVerifier overrides this with the CheckerResults + # instance instead. class SimpleCHKFileVerifier(download.FileDownloader): @@ -218,7 +140,7 @@ class SimpleCHKFileVerifier(download.FileDownloader): self._si_s = storage.si_b2a(self._storage_index) self.init_logging() - r = Results(self._storage_index) + self._check_results = r = CheckerResults(self._storage_index) self._output = VerifyingOutput(self._size, r) self._paused = False self._stopped = False @@ -265,5 +187,6 @@ class SimpleCHKFileVerifier(download.FileDownloader): # once we know that, we can download blocks from everybody d.addCallback(self._download_all_segments) d.addCallback(self._done) + d.addCallback(lambda ignored: self._check_results) return d diff --git a/src/allmydata/immutable/filenode.py b/src/allmydata/immutable/filenode.py index 54e883b0..8f98920b 100644 --- a/src/allmydata/immutable/filenode.py +++ b/src/allmydata/immutable/filenode.py @@ -3,8 +3,10 @@ from zope.interface import implements from twisted.internet import defer from allmydata.interfaces import IFileNode, IFileURI, IURI, ICheckable from allmydata import uri -from allmydata.immutable.checker import Results, DeepCheckResults, \ - SimpleCHKFileChecker, SimpleCHKFileVerifier +from allmydata.immutable.checker import SimpleCHKFileChecker, \ + SimpleCHKFileVerifier +from allmydata.checker_results import DeepCheckResults, \ + DeepCheckAndRepairResults class FileNode: implements(IFileNode, ICheckable) @@ -47,8 +49,7 @@ class FileNode: def get_storage_index(self): return self.u.storage_index - def check(self, verify=False, repair=False): - assert repair is False # not implemented yet + def check(self, verify=False): storage_index = self.u.storage_index k = self.u.needed_shares N = self.u.total_shares @@ -61,11 +62,23 @@ class FileNode: v = self.checker_class(self._client, storage_index, k, N) return v.start() - def deep_check(self, verify=False, repair=False): - d = self.check(verify, repair) + def check_and_repair(self, verify=False): + raise NotImplementedError("not implemented yet") + + def deep_check(self, verify=False): + d = self.check(verify) def _done(r): dr = DeepCheckResults(self.get_verifier().storage_index) - dr.add_check(r) + dr.add_check(r, []) + return dr + d.addCallback(_done) + return d + + def deep_check_and_repair(self, verify=False): + d = self.check_and_repair(verify) + def _done(r): + dr = DeepCheckAndRepairResults(self.get_verifier().storage_index) + dr.add_check_and_repair(r, []) return dr d.addCallback(_done) return d @@ -120,20 +133,13 @@ class LiteralFileNode: return None def check(self, verify=False, repair=False): - # neither verify= nor repair= affect LIT files - r = Results(None) - r.healthy = True - r.problems = [] - return defer.succeed(r) + # neither verify= nor repair= affect LIT files, and the check returns + # no results. + return defer.succeed(None) def deep_check(self, verify=False, repair=False): - d = self.check(verify, repair) - def _done(r): - dr = DeepCheckResults(None) - dr.add_check(r) - return dr - d.addCallback(_done) - return d + dr = DeepCheckResults(None) + return defer.succeed(dr) def download(self, target): # note that this does not update the stats_provider diff --git a/src/allmydata/interfaces.py b/src/allmydata/interfaces.py index 5900537a..e37f0b3a 100644 --- a/src/allmydata/interfaces.py +++ b/src/allmydata/interfaces.py @@ -1430,16 +1430,18 @@ class IUploader(Interface): """TODO: how should this work?""" class ICheckable(Interface): - def check(verify=False, repair=False): + def check(verify=False): """Check upon my health, optionally repairing any problems. This returns a Deferred that fires with an instance that provides - ICheckerResults. + ICheckerResults, or None if the object is non-distributed (i.e. LIT + files). Filenodes and dirnodes (which provide IFilesystemNode) are also checkable. Instances that represent verifier-caps will be checkable but not downloadable. Some objects (like LIT files) do not actually - live in the grid, and their checkers indicate a healthy result. + live in the grid, and their checkers return None (non-distributed + files are always healthy). If verify=False, a relatively lightweight check will be performed: I will ask all servers if they have a share for me, and I will believe @@ -1470,7 +1472,19 @@ class ICheckable(Interface): taken. """ - def deep_check(verify=False, repair=False): + def check_and_repair(verify=False): + """Like check(), but if the file/directory is not healthy, attempt to + repair the damage. + + This returns a Deferred which fires with a tuple of (pre, post), each + is either None or an ICheckerResults instance. For non-distributed + files (i.e. a LIT file) both are None. Otherwise, 'pre' is an + ICheckerResults representing the state of the object before any + repair attempt is made. If the file was unhealthy and repair was + attempted, 'post' will be another ICheckerResults instance with the + state of the object after repair.""" + + def deep_check(verify=False): """Check upon the health of me and everything I can reach. This is a recursive form of check(), useable on dirnodes. (it can be @@ -1479,40 +1493,118 @@ class ICheckable(Interface): I return a Deferred that fires with an IDeepCheckResults object. """ + def deep_check_and_repair(verify=False): + """Check upon the health of me and everything I can reach. Repair + anything that isn't healthy. + + This is a recursive form of check_and_repair(), useable on dirnodes. + (it can be called safely on filenodes too, but only checks/repairs + the one object). + + I return a Deferred that fires with an IDeepCheckAndRepairResults + object. + """ + class ICheckerResults(Interface): + """I contain the detailed results of a check/verify operation. + """ + + def get_storage_index(): + """Return a string with the (binary) storage index.""" + def get_storage_index_string(): + """Return a string with the (printable) abbreviated storage index.""" + + def is_healthy(): + """Return a boolean, True if the file/dir is fully healthy, False if + it is damaged in any way. Non-distributed LIT files always return + True.""" + + def needs_rebalancing(): + """Return a boolean, True if the file/dir's reliability could be + improved by moving shares to new servers. Non-distributed LIT files + always returne False.""" + + + def get_data(): + """Return a dictionary that describes the state of the file/dir. + Non-distributed LIT files always return an empty dictionary. Normal + files and directories return a dictionary with the following keys + (note that these use base32-encoded strings rather than binary ones) + (also note that for mutable files, these counts are for the 'best' + version):: + + count-shares-good: the number of distinct good shares that were found + count-shares-needed: 'k', the number of shares required for recovery + count-shares-expected: 'N', the number of total shares generated + count-good-share-hosts: the number of distinct storage servers with + good shares. If this number is less than + count-shares-good, then some shares are + doubled up, increasing the correlation of + failures. This indicates that one or more + shares should be moved to an otherwise unused + server, if one is available. + count-corrupt-shares: the number of shares with integrity failures + list-corrupt-shares: a list of 'share locators', one for each share + that was found to be corrupt. Each share + locator is a list of (serverid, storage_index, + sharenum). + servers-responding: list of base32-encoded storage server identifiers, + one for each server which responded to the share + query. + sharemap: dict mapping share identifier to list of serverids + (base32-encoded strings). This indicates which servers are + holding which shares. For immutable files, the shareid is + an integer (the share number, from 0 to N-1). For + immutable files, it is a string of the form + 'seq%d-%s-sh%d', containing the sequence number, the + roothash, and the share number. + + Mutable files will add the following keys:: + + count-wrong-shares: the number of shares for versions other than + the 'best' one (highest sequence number, highest + roothash). These are either old ... + + count-recoverable-versions: the number of recoverable versions of + the file. For a healthy file, this will + equal 1. + + count-unrecoverable-versions: the number of unrecoverable versions + of the file. For a healthy file, this + will be 0. + + """ + + def get_summary(): + """Return a string with a brief (one-line) summary of the results.""" + + def get_report(): + """Return a list of strings with more detailed results.""" + +class ICheckAndRepairResults(Interface): """I contain the detailed results of a check/verify/repair operation. The IFilesystemNode.check()/verify()/repair() methods all return - instances that provide ICheckerResults. + instances that provide ICheckAndRepairResults. """ - def is_healthy(): - """Return a bool, True if the file is fully healthy, False if it is - damaged in any way.""" - def get_storage_index(): """Return a string with the (binary) storage index.""" def get_storage_index_string(): """Return a string with the (printable) abbreviated storage index.""" - def get_mutability_string(): - """Return a string with 'mutable' or 'immutable'.""" + def get_repair_attempted(): + """Return a boolean, True if a repair was attempted.""" + def get_repair_successful(): + """Return a boolean, True if repair was attempted and the file/dir + was fully healthy afterwards.""" + def get_pre_repair_results(): + """Return an ICheckerResults instance that describes the state of the + file/dir before any repair was attempted.""" + def get_post_repair_results(): + """Return an ICheckerResults instance that describes the state of the + file/dir after any repair was attempted. If no repair was attempted, + the pre-repair and post-repair results will be identical.""" - def to_string(): - """Return a string that describes the detailed results of the - check/verify operation. This string will be displayed on a page all - by itself.""" - - # The old checker results (for only immutable files) were described - # with this: - # For filenodes, this fires with a tuple of (needed_shares, - # total_shares, found_shares, sharemap). The first three are ints. The - # basic health of the file is found_shares / needed_shares: if less - # than 1.0, the file is unrecoverable. - # - # The sharemap has a key for each sharenum. The value is a list of - # (binary) nodeids who hold that share. If two shares are kept on the - # same nodeid, they will fail as a pair, and overall reliability is - # decreased. class IDeepCheckResults(Interface): """I contain the results of a deep-check operation. @@ -1523,24 +1615,86 @@ class IDeepCheckResults(Interface): def get_root_storage_index_string(): """Return the storage index (abbreviated human-readable string) of the first object checked.""" - def count_objects_checked(): - """Return the number of objects that were checked.""" - def count_objects_healthy(): - """Return the number of objects that were fully healthy.""" - def count_repairs_attempted(): - """Return the number of repair operations that were attempted.""" - def count_repairs_successful(): - """Return the number of repair operations that succeeded in bringing - the object back up to full health.""" - def get_server_problems(): - """Return a dict, mapping server nodeid to a count of how many - problems involved that server.""" - def get_problems(): - """Return a list of ICheckerResults, one for each object that - was not fully healthy.""" + def get_counters(): + """Return a dictionary with the following keys:: + + count-objects-checked: count of how many objects were checked + count-objects-healthy: how many of those objects were completely + healthy + count-objects-unhealthy: how many were damaged in some way + count-corrupt-shares: how many shares were found to have + corruption, summed over all objects + examined + """ + + def get_corrupt_shares(): + """Return a set of (serverid, storage_index, sharenum) for all shares + that were found to be corrupt. Both serverid and storage_index are + binary. + """ + def get_all_results(): + """Return a dictionary mapping pathname (a tuple of strings, ready to + be slash-joined) to an ICheckerResults instance, one for each object + that was checked.""" + +class IDeepCheckAndRepairResults(Interface): + """I contain the results of a deep-check-and-repair operation. + + This is returned by a call to ICheckable.deep_check_and_repair(). + """ + + def get_root_storage_index_string(): + """Return the storage index (abbreviated human-readable string) of + the first object checked.""" + def get_counters(): + """Return a dictionary with the following keys:: + + count-objects-checked: count of how many objects were checked + count-objects-healthy-pre-repair: how many of those objects were + completely healthy (before any + repair) + count-objects-unhealthy-pre-repair: how many were damaged in + some way + count-objects-healthy-post-repair: how many of those objects were + completely healthy (after any + repair) + count-objects-unhealthy-post-repair: how many were damaged in + some way + count-repairs-attempted: repairs were attempted on this many + objects. The count-repairs- keys will + always be provided, however unless + repair=true is present, they will all + be zero. + count-repairs-successful: how many repairs resulted in healthy + objects + count-repairs-unsuccessful: how many repairs resulted did not + results in completely healthy objects + count-corrupt-shares-pre-repair: how many shares were found to + have corruption, summed over all + objects examined (before any + repair) + count-corrupt-shares-post-repair: how many shares were found to + have corruption, summed over all + objects examined (after any + repair) + """ + + def get_corrupt_shares(): + """Return a set of (serverid, storage_index, sharenum) for all shares + that were found to be corrupt before any repair was attempted. Both + serverid and storage_index are binary. + """ + def get_remaining_corrupt_shares(): + """Return a set of (serverid, storage_index, sharenum) for all shares + that were found to be corrupt after any repair was completed. Both + serverid and storage_index are binary. These are shares that need + manual inspection and probably deletion. + """ def get_all_results(): - """Return a dict mapping storage_index (a binary string) to an - ICheckerResults instance, one for each object that was checked.""" + """Return a dictionary mapping pathname (a tuple of strings, ready to + be slash-joined) to an ICheckAndRepairResults instance, one for each + object that was checked.""" + class IRepairable(Interface): def repair(checker_results): @@ -1551,7 +1705,16 @@ class IRepairable(Interface): proof that you have actually discovered a problem with this file. I will use the data in the checker results to guide the repair process, such as which servers provided bad data and should therefore be - avoided. + avoided. The ICheckerResults object is inside the + ICheckAndRepairResults object, which is returned by the + ICheckable.check() method:: + + d = filenode.check(repair=False) + def _got_results(check_and_repair_results): + check_results = check_and_repair_results.get_pre_repair_results() + return filenode.repair(check_results) + d.addCallback(_got_results) + return d """ class IRepairResults(Interface): diff --git a/src/allmydata/mutable/checker.py b/src/allmydata/mutable/checker.py index 11ddeef0..379beaf8 100644 --- a/src/allmydata/mutable/checker.py +++ b/src/allmydata/mutable/checker.py @@ -1,10 +1,9 @@ -from zope.interface import implements from twisted.internet import defer from twisted.python import failure from allmydata import hashtree from allmydata.util import hashutil, base32, idlib, log -from allmydata.interfaces import ICheckerResults +from allmydata.checker_results import CheckAndRepairResults, CheckerResults from common import MODE_CHECK, CorruptShareError from servermap import ServerMap, ServermapUpdater @@ -16,21 +15,19 @@ class MutableChecker: self._node = node self.bad_shares = [] # list of (nodeid,shnum,failure) self._storage_index = self._node.get_storage_index() - self.results = Results(self._storage_index) + self.results = CheckerResults(self._storage_index) self.need_repair = False - def check(self, verify=False, repair=False): + def check(self, verify=False): servermap = ServerMap() - self.results.servermap = servermap u = ServermapUpdater(self._node, servermap, MODE_CHECK) d = u.update() d.addCallback(self._got_mapupdate_results) if verify: d.addCallback(self._verify_all_shares) - d.addCallback(self._generate_results) - if repair: - d.addCallback(self._maybe_do_repair) - d.addCallback(self._return_results) + d.addCallback(lambda res: servermap) + d.addCallback(self._fill_checker_results, self.results) + d.addCallback(lambda res: self.results) return d def _got_mapupdate_results(self, servermap): @@ -68,7 +65,7 @@ class MutableChecker: for (shnum, peerid, timestamp) in shares: ss = servermap.connections[peerid] d = self._do_read(ss, peerid, self._storage_index, [shnum], readv) - d.addCallback(self._got_answer, peerid) + d.addCallback(self._got_answer, peerid, servermap) dl.append(d) return defer.DeferredList(dl, fireOnOneErrback=True) @@ -78,7 +75,7 @@ class MutableChecker: d = ss.callRemote("slot_readv", storage_index, shnums, readv) return d - def _got_answer(self, datavs, peerid): + def _got_answer(self, datavs, peerid, servermap): for shnum,datav in datavs.items(): data = datav[0] try: @@ -88,7 +85,7 @@ class MutableChecker: self.need_repair = True self.bad_shares.append( (peerid, shnum, f) ) prefix = data[:SIGNED_PREFIX_LENGTH] - self.results.servermap.mark_bad_share(peerid, shnum, prefix) + servermap.mark_bad_share(peerid, shnum, prefix) def check_prefix(self, peerid, shnum, data): (seqnum, root_hash, IV, segsize, datalength, k, N, prefix, @@ -134,13 +131,34 @@ class MutableChecker: if alleged_writekey != self._node.get_writekey(): raise CorruptShareError(peerid, shnum, "invalid privkey") - def _generate_results(self, res): - self.results.healthy = True - smap = self.results.servermap + def _count_shares(self, smap, version): + available_shares = smap.shares_available() + (num_distinct_shares, k, N) = available_shares[version] + counters = {} + counters["count-shares-good"] = num_distinct_shares + counters["count-shares-needed"] = k + counters["count-shares-expected"] = N + good_hosts = smap.all_peers_for_version(version) + counters["count-good-share-hosts"] = good_hosts + vmap = smap.make_versionmap() + counters["count-wrong-shares"] = sum([len(shares) + for verinfo,shares in vmap.items() + if verinfo != version]) + + return counters + + def _fill_checker_results(self, smap, r): + r.set_servermap(smap.copy()) + healthy = True + data = {} report = [] + summary = [] vmap = smap.make_versionmap() recoverable = smap.recoverable_versions() unrecoverable = smap.unrecoverable_versions() + data["count-recoverable-versions"] = len(recoverable) + data["count-unrecoverable-versions"] = len(unrecoverable) + if recoverable: report.append("Recoverable Versions: " + "/".join(["%d*%s" % (len(vmap[v]), @@ -152,34 +170,65 @@ class MutableChecker: smap.summarize_version(v)) for v in unrecoverable])) if smap.unrecoverable_versions(): - self.results.healthy = False + healthy = False + summary.append("some versions are unrecoverable") report.append("Unhealthy: some versions are unrecoverable") if len(recoverable) == 0: - self.results.healthy = False + healthy = False + summary.append("no versions are recoverable") report.append("Unhealthy: no versions are recoverable") if len(recoverable) > 1: - self.results.healthy = False + healthy = False + summary.append("multiple versions are recoverable") report.append("Unhealthy: there are multiple recoverable versions") - if self.best_version: + + if recoverable: + best_version = smap.best_recoverable_version() report.append("Best Recoverable Version: " + - smap.summarize_version(self.best_version)) - available_shares = smap.shares_available() - (num_distinct_shares, k, N) = available_shares[self.best_version] - if num_distinct_shares < N: - self.results.healthy = False - report.append("Unhealthy: best recoverable version has only %d shares (encoding is %d-of-%d)" - % (num_distinct_shares, k, N)) + smap.summarize_version(best_version)) + counters = self._count_shares(smap, best_version) + data.update(counters) + if counters["count-shares-good"] < counters["count-shares-expected"]: + healthy = False + report.append("Unhealthy: best version has only %d shares " + "(encoding is %d-of-%d)" + % (counters["count-shares-good"], + counters["count-shares-needed"], + counters["count-shares-expected"])) + summary.append("%d shares (enc %d-of-%d)" + % (counters["count-shares-good"], + counters["count-shares-needed"], + counters["count-shares-expected"])) + elif unrecoverable: + healthy = False + # find a k and N from somewhere + first = list(unrecoverable)[0] + # not exactly the best version, but that doesn't matter too much + data.update(self._count_shares(smap, first)) + else: + # couldn't find anything at all + data["count-shares-good"] = 0 + data["count-shares-needed"] = 3 # arbitrary defaults + data["count-shares-expected"] = 10 + data["count-good-share-hosts"] = 0 + data["count-wrong-shares"] = 0 + if self.bad_shares: + data["count-corrupt-shares"] = len(self.bad_shares) + data["list-corrupt-shares"] = locators = [] report.append("Corrupt Shares:") + summary.append("Corrupt Shares:") for (peerid, shnum, f) in sorted(self.bad_shares): + locators.append( (peerid, self._storage_index, shnum) ) s = "%s-sh%d" % (idlib.shortnodeid_b2a(peerid), shnum) if f.check(CorruptShareError): ft = f.value.reason else: ft = str(f) report.append(" %s: %s" % (s, ft)) + summary.append(s) p = (peerid, self._storage_index, shnum, f) - self.results.problems.append(p) + r.problems.append(p) msg = ("CorruptShareError during mutable verify, " "peerid=%(peerid)s, si=%(si)s, shnum=%(shnum)d, " "where=%(where)s") @@ -188,68 +237,52 @@ class MutableChecker: shnum=shnum, where=ft, level=log.WEIRD, umid="EkK8QA") + else: + data["count-corrupt-shares"] = 0 + data["list-corrupt-shares"] = [] + + # TODO: servers-responding, sharemap + + r.set_healthy(healthy) + r.set_needs_rebalancing(False) # TODO + r.set_data(data) + if healthy: + r.set_summary("Healthy") + else: + r.set_summary("Unhealthy: " + " ".join(summary)) + r.set_report(report) - self.results.status_report = "\n".join(report) + "\n" - def _maybe_do_repair(self, res): +class MutableCheckAndRepairer(MutableChecker): + def __init__(self, node): + MutableChecker.__init__(self, node) + self.cr_results = CheckAndRepairResults(self._storage_index) + self.cr_results.pre_repair_results = self.results + self.need_repair = False + + def check(self, verify=False): + d = MutableChecker.check(self, verify) + d.addCallback(self._maybe_repair) + d.addCallback(lambda res: self.cr_results) + return d + + def _maybe_repair(self, res): if not self.need_repair: + self.cr_results.post_repair_results = self.results return - self.results.repair_attempted = True + self.cr_results.repair_attempted = True d = self._node.repair(self.results) def _repair_finished(repair_results): - self.results.repair_succeeded = True - self.results.repair_results = repair_results + self.cr_results.repair_successful = True + r = CheckerResults(self._storage_index) + self.cr_results.post_repair_results = r + self._fill_checker_results(repair_results.servermap, r) + self.cr_results.repair_results = repair_results # TODO? def _repair_error(f): # I'm not sure if I want to pass through a failure or not. - self.results.repair_succeeded = False - self.results.repair_failure = f + self.cr_results.repair_successful = False + self.cr_results.repair_failure = f # TODO? + #self.cr_results.post_repair_results = ?? return f d.addCallbacks(_repair_finished, _repair_error) return d - - def _return_results(self, res): - return self.results - - -class Results: - implements(ICheckerResults) - - def __init__(self, storage_index): - self.storage_index = storage_index - self.storage_index_s = base32.b2a(storage_index)[:6] - self.repair_attempted = False - self.status_report = "[not generated yet]" # string - self.repair_report = None - self.problems = [] # list of (peerid, storage_index, shnum, failure) - - def is_healthy(self): - return self.healthy - - def get_storage_index(self): - return self.storage_index - def get_storage_index_string(self): - return self.storage_index_s - - def get_mutability_string(self): - return "mutable" - - def to_string(self): - s = "" - if self.healthy: - s += "Healthy!\n" - else: - s += "Not Healthy!\n" - s += "\n" - s += self.status_report - s += "\n" - if self.repair_attempted: - s += "Repair attempted " - if self.repair_succeeded: - s += "and successful\n" - else: - s += "and failed\n" - s += "\n" - s += self.repair_results.to_string() - s += "\n" - return s - diff --git a/src/allmydata/mutable/node.py b/src/allmydata/mutable/node.py index b5ee826d..3ad4fd56 100644 --- a/src/allmydata/mutable/node.py +++ b/src/allmydata/mutable/node.py @@ -12,7 +12,8 @@ from allmydata.util import hashutil from allmydata.util.assertutil import precondition from allmydata.uri import WriteableSSKFileURI from allmydata.immutable.encode import NotEnoughSharesError -from allmydata.immutable.checker import DeepCheckResults +from allmydata.checker_results import DeepCheckResults, \ + DeepCheckAndRepairResults from pycryptopp.publickey import rsa from pycryptopp.cipher.aes import AES @@ -21,7 +22,7 @@ from common import MODE_READ, MODE_WRITE, UnrecoverableFileError, \ ResponseCache, UncoordinatedWriteError from servermap import ServerMap, ServermapUpdater from retrieve import Retrieve -from checker import MutableChecker +from checker import MutableChecker, MutableCheckAndRepairer from repair import Repairer @@ -54,6 +55,7 @@ class MutableFileNode: SIGNATURE_KEY_SIZE = 2048 DEFAULT_ENCODING = (3, 10) checker_class = MutableChecker + check_and_repairer_class = MutableCheckAndRepairer def __init__(self, client): self._client = client @@ -243,15 +245,29 @@ class MutableFileNode: ################################# # ICheckable - def check(self, verify=False, repair=False): + def check(self, verify=False): checker = self.checker_class(self) - return checker.check(verify, repair) + return checker.check(verify) - def deep_check(self, verify=False, repair=False): - d = self.check(verify, repair) + def check_and_repair(self, verify=False): + checker = self.check_and_repairer_class(self) + return checker.check(verify) + + def deep_check(self, verify=False): + # deep-check on a filenode only gets one result + d = self.check(verify) def _done(r): dr = DeepCheckResults(self.get_storage_index()) - dr.add_check(r) + dr.add_check(r, []) + return dr + d.addCallback(_done) + return d + + def deep_check_and_repair(self, verify=False): + d = self.check_and_repair(verify) + def _done(r): + dr = DeepCheckAndRepairResults(self.get_storage_index()) + dr.add_check_and_repair(r, []) return dr d.addCallback(_done) return d diff --git a/src/allmydata/mutable/repair.py b/src/allmydata/mutable/repair.py index 82b8e4c7..f3ae1ce6 100644 --- a/src/allmydata/mutable/repair.py +++ b/src/allmydata/mutable/repair.py @@ -1,10 +1,13 @@ from zope.interface import implements -from allmydata.interfaces import IRepairResults +from allmydata.interfaces import IRepairResults, ICheckerResults class RepairResults: implements(IRepairResults) + def __init__(self, smap): + self.servermap = smap + def to_string(self): return "" @@ -14,7 +17,7 @@ class MustForceRepairError(Exception): class Repairer: def __init__(self, node, checker_results): self.node = node - self.checker_results = checker_results + self.checker_results = ICheckerResults(checker_results) assert checker_results.storage_index == self.node.get_storage_index() def start(self, force=False): @@ -44,7 +47,7 @@ class Repairer: # old shares: replace old shares with the latest version # bogus shares (bad sigs): replace the bad one with a good one - smap = self.checker_results.servermap + smap = self.checker_results.get_servermap() if smap.unrecoverable_newer_versions(): if not force: @@ -88,8 +91,8 @@ class Repairer: best_version = smap.best_recoverable_version() d = self.node.download_version(smap, best_version, fetch_privkey=True) d.addCallback(self.node.upload, smap) - d.addCallback(self.get_results) + d.addCallback(self.get_results, smap) return d - def get_results(self, res): - return RepairResults() + def get_results(self, res, smap): + return RepairResults(smap) diff --git a/src/allmydata/mutable/servermap.py b/src/allmydata/mutable/servermap.py index 3f18b12b..4dbbd4e3 100644 --- a/src/allmydata/mutable/servermap.py +++ b/src/allmydata/mutable/servermap.py @@ -121,6 +121,17 @@ class ServerMap: self.last_update_mode = None self.last_update_time = 0 + def copy(self): + s = ServerMap() + s.servermap = self.servermap.copy() # tuple->tuple + s.connections = self.connections.copy() # str->RemoteReference + s.unreachable_peers = set(self.unreachable_peers) + s.problems = self.problems[:] + s.bad_shares = self.bad_shares.copy() # tuple->str + s.last_update_mode = self.last_update_mode + s.last_update_time = self.last_update_time + return s + def mark_bad_share(self, peerid, shnum, checkstring): """This share was found to be bad, either in the checkstring or signature (detected during mapupdate), or deeper in the share @@ -162,6 +173,13 @@ class ServerMap: for (peerid, shnum) in self.servermap]) + def all_peers_for_version(self, verinfo): + """Return a set of peerids that hold shares for the given version.""" + return set([peerid + for ( (peerid, shnum), (verinfo2, timestamp) ) + in self.servermap.items() + if verinfo == verinfo2]) + def make_sharemap(self): """Return a dict that maps shnum to a set of peerds that hold it.""" sharemap = DictOfSets() diff --git a/src/allmydata/test/common.py b/src/allmydata/test/common.py index e3af2eee..14eef8f4 100644 --- a/src/allmydata/test/common.py +++ b/src/allmydata/test/common.py @@ -10,9 +10,9 @@ from allmydata import uri, dirnode, client from allmydata.introducer.server import IntroducerNode from allmydata.interfaces import IURI, IMutableFileNode, IFileNode, \ FileTooLargeError, ICheckable -from allmydata.immutable import checker from allmydata.immutable.encode import NotEnoughSharesError -from allmydata.mutable.checker import Results as MutableCheckerResults +from allmydata.checker_results import CheckerResults, CheckAndRepairResults, \ + DeepCheckResults, DeepCheckAndRepairResults from allmydata.mutable.common import CorruptShareError from allmydata.util import log, testutil, fileutil from allmydata.stats import PickleStatsGatherer @@ -44,16 +44,27 @@ class FakeCHKFileNode: return self.my_uri def get_verifier(self): return IURI(self.my_uri).get_verifier() - def check(self, verify=False, repair=False): - r = checker.Results(None) + def check(self, verify=False): + r = CheckerResults(self.storage_index) is_bad = self.bad_shares.get(self.storage_index, None) + data = {} if is_bad: - r.healthy = False + r.set_healthy(False) r.problems = failure.Failure(CorruptShareError(is_bad)) else: - r.healthy = True + r.set_healthy(True) r.problems = [] + r.set_data(data) return defer.succeed(r) + def check_and_repair(self, verify=False): + d = self.check(verify) + def _got(cr): + r = CheckAndRepairResults(self.storage_index) + r.pre_repair_results = r.post_repair_results = cr + return r + d.addCallback(_got) + return d + def is_mutable(self): return False def is_readonly(self): @@ -136,24 +147,45 @@ class FakeMutableFileNode: def get_storage_index(self): return self.storage_index - def check(self, verify=False, repair=False): - r = MutableCheckerResults(self.storage_index) + def check(self, verify=False): + r = CheckerResults(self.storage_index) is_bad = self.bad_shares.get(self.storage_index, None) + data = {} + data["list-corrupt-shares"] = [] if is_bad: - r.healthy = False + r.set_healthy(False) r.problems = failure.Failure(CorruptShareError("peerid", 0, # shnum is_bad)) else: - r.healthy = True + r.set_healthy(True) r.problems = [] + r.set_data(data) return defer.succeed(r) - def deep_check(self, verify=False, repair=False): - d = self.check(verify, repair) + def check_and_repair(self, verify=False): + d = self.check(verify) + def _got(cr): + r = CheckAndRepairResults(self.storage_index) + r.pre_repair_results = r.post_repair_results = cr + return r + d.addCallback(_got) + return d + + def deep_check(self, verify=False): + d = self.check(verify) + def _done(r): + dr = DeepCheckResults(self.storage_index) + dr.add_check(r, []) + return dr + d.addCallback(_done) + return d + + def deep_check_and_repair(self, verify=False): + d = self.check_and_repair(verify) def _done(r): - dr = checker.DeepCheckResults(self.storage_index) - dr.add_check(r) + dr = DeepCheckAndRepairResults(self.storage_index) + dr.add_check(r, []) return dr d.addCallback(_done) return d diff --git a/src/allmydata/test/test_dirnode.py b/src/allmydata/test/test_dirnode.py index 4cd3163e..517bd6b4 100644 --- a/src/allmydata/test/test_dirnode.py +++ b/src/allmydata/test/test_dirnode.py @@ -4,12 +4,14 @@ from zope.interface import implements from twisted.trial import unittest from twisted.internet import defer from allmydata import uri, dirnode -from allmydata.immutable import upload, checker +from allmydata.immutable import upload from allmydata.interfaces import IURI, IClient, IMutableFileNode, \ - INewDirectoryURI, IReadonlyNewDirectoryURI, IFileNode, ExistingChildError + INewDirectoryURI, IReadonlyNewDirectoryURI, IFileNode, \ + ExistingChildError, IDeepCheckResults, IDeepCheckAndRepairResults from allmydata.util import hashutil, testutil from allmydata.test.common import make_chk_file_uri, make_mutable_file_uri, \ FakeDirectoryNode, create_chk_filenode +from allmydata.checker_results import CheckerResults, CheckAndRepairResults # to test dirnode.py, we want to construct a tree of real DirectoryNodes that # contain pointers to fake files. We start with a fake MutableFileNode that @@ -32,12 +34,20 @@ class Marker: def get_verifier(self): return self.verifieruri - def check(self, verify=False, repair=False): - r = checker.Results(None) - r.healthy = True - r.problems = [] + def check(self, verify=False): + r = CheckerResults(None) + r.set_healthy(True) return defer.succeed(r) + def check_and_repair(self, verify=False): + d = self.check(verify) + def _got(cr): + r = CheckAndRepairResults(None) + r.pre_repair_results = r.post_repair_results = cr + return r + d.addCallback(_got) + return d + # dirnode requires three methods from the client: upload(), # create_node_from_uri(), and create_empty_dirnode(). Of these, upload() is # only used by the convenience composite method add_file(). @@ -150,12 +160,40 @@ class Dirnode(unittest.TestCase, testutil.ShouldFailMixin, testutil.StallMixin): d = self._test_deepcheck_create() d.addCallback(lambda rootnode: rootnode.deep_check()) def _check_results(r): - self.failUnlessEqual(r.count_objects_checked(), 3) - self.failUnlessEqual(r.count_objects_healthy(), 3) - self.failUnlessEqual(r.count_repairs_attempted(), 0) - self.failUnlessEqual(r.count_repairs_successful(), 0) - self.failUnlessEqual(len(r.get_server_problems()), 0) - self.failUnlessEqual(len(r.get_problems()), 0) + self.failUnless(IDeepCheckResults.providedBy(r)) + c = r.get_counters() + self.failUnlessEqual(c, + {"count-objects-checked": 3, + "count-objects-healthy": 3, + "count-objects-unhealthy": 0, + "count-corrupt-shares": 0, + }) + self.failIf(r.get_corrupt_shares()) + self.failUnlessEqual(len(r.get_all_results()), 3) + d.addCallback(_check_results) + return d + + def test_deepcheck_and_repair(self): + d = self._test_deepcheck_create() + d.addCallback(lambda rootnode: rootnode.deep_check_and_repair()) + def _check_results(r): + self.failUnless(IDeepCheckAndRepairResults.providedBy(r)) + c = r.get_counters() + self.failUnlessEqual(c, + {"count-objects-checked": 3, + "count-objects-healthy-pre-repair": 3, + "count-objects-unhealthy-pre-repair": 0, + "count-corrupt-shares-pre-repair": 0, + "count-objects-healthy-post-repair": 3, + "count-objects-unhealthy-post-repair": 0, + "count-corrupt-shares-post-repair": 0, + "count-repairs-attempted": 0, + "count-repairs-successful": 0, + "count-repairs-unsuccessful": 0, + }) + self.failIf(r.get_corrupt_shares()) + self.failIf(r.get_remaining_corrupt_shares()) + self.failUnlessEqual(len(r.get_all_results()), 3) d.addCallback(_check_results) return d @@ -169,12 +207,14 @@ class Dirnode(unittest.TestCase, testutil.ShouldFailMixin, testutil.StallMixin): d.addCallback(lambda rootnode: self._mark_file_bad(rootnode)) d.addCallback(lambda rootnode: rootnode.deep_check()) def _check_results(r): - self.failUnlessEqual(r.count_objects_checked(), 3) - self.failUnlessEqual(r.count_objects_healthy(), 2) - self.failUnlessEqual(r.count_repairs_attempted(), 0) - self.failUnlessEqual(r.count_repairs_successful(), 0) - self.failUnlessEqual(len(r.get_server_problems()), 0) - self.failUnlessEqual(len(r.get_problems()), 1) + c = r.get_counters() + self.failUnlessEqual(c, + {"count-objects-checked": 3, + "count-objects-healthy": 2, + "count-objects-unhealthy": 1, + "count-corrupt-shares": 0, + }) + #self.failUnlessEqual(len(r.get_problems()), 1) # TODO d.addCallback(_check_results) return d diff --git a/src/allmydata/test/test_filenode.py b/src/allmydata/test/test_filenode.py index 04cc68d1..8ccdbd71 100644 --- a/src/allmydata/test/test_filenode.py +++ b/src/allmydata/test/test_filenode.py @@ -2,7 +2,8 @@ from twisted.trial import unittest from twisted.internet import defer from allmydata import uri -from allmydata.immutable import filenode, download, checker +from allmydata.immutable import filenode, download +from allmydata.checker_results import CheckerResults, CheckAndRepairResults from allmydata.mutable.node import MutableFileNode from allmydata.util import hashutil @@ -131,10 +132,21 @@ class Checker(unittest.TestCase): d.addCallback(lambda res: fn1.check(verify=True)) d.addCallback(_check_checker_results) + # TODO: check-and-repair + d.addCallback(lambda res: fn1.deep_check()) def _check_deepcheck_results(dcr): - self.failIf(dcr.get_problems()) + c = dcr.get_counters() + self.failUnlessEqual(c["count-objects-checked"], 1) + self.failUnlessEqual(c["count-objects-healthy"], 1) + self.failUnlessEqual(c["count-objects-unhealthy"], 0) + self.failUnlessEqual(c["count-corrupt-shares"], 0) + self.failIf(dcr.get_corrupt_shares()) + d.addCallback(_check_deepcheck_results) + + d.addCallback(lambda res: fn1.deep_check(verify=True)) d.addCallback(_check_deepcheck_results) + return d def test_literal_filenode(self): @@ -145,7 +157,7 @@ class Checker(unittest.TestCase): d = fn1.check() def _check_checker_results(cr): - self.failUnless(cr.is_healthy()) + self.failUnlessEqual(cr, None) d.addCallback(_check_checker_results) d.addCallback(lambda res: fn1.check(verify=True)) @@ -153,7 +165,15 @@ class Checker(unittest.TestCase): d.addCallback(lambda res: fn1.deep_check()) def _check_deepcheck_results(dcr): - self.failIf(dcr.get_problems()) + c = dcr.get_counters() + self.failUnlessEqual(c["count-objects-checked"], 0) + self.failUnlessEqual(c["count-objects-healthy"], 0) + self.failUnlessEqual(c["count-objects-unhealthy"], 0) + self.failUnlessEqual(c["count-corrupt-shares"], 0) + self.failIf(dcr.get_corrupt_shares()) + d.addCallback(_check_deepcheck_results) + + d.addCallback(lambda res: fn1.deep_check(verify=True)) d.addCallback(_check_deepcheck_results) return d @@ -169,6 +189,7 @@ class Checker(unittest.TestCase): n = MutableFileNode(client).init_from_uri(u) n.checker_class = FakeMutableChecker + n.check_and_repairer_class = FakeMutableCheckAndRepairer d = n.check() def _check_checker_results(cr): @@ -180,24 +201,41 @@ class Checker(unittest.TestCase): d.addCallback(lambda res: n.deep_check()) def _check_deepcheck_results(dcr): - self.failIf(dcr.get_problems()) + c = dcr.get_counters() + self.failUnlessEqual(c["count-objects-checked"], 1) + self.failUnlessEqual(c["count-objects-healthy"], 1) + self.failUnlessEqual(c["count-objects-unhealthy"], 0) + self.failUnlessEqual(c["count-corrupt-shares"], 0) + self.failIf(dcr.get_corrupt_shares()) d.addCallback(_check_deepcheck_results) + + d.addCallback(lambda res: n.deep_check(verify=True)) + d.addCallback(_check_deepcheck_results) + return d class FakeMutableChecker: def __init__(self, node): - self.r = checker.Results(node.get_storage_index()) - self.r.healthy = True - self.r.problems = [] + self.r = CheckerResults(node.get_storage_index()) + self.r.set_healthy(True) + + def check(self, verify): + return defer.succeed(self.r) + +class FakeMutableCheckAndRepairer: + def __init__(self, node): + cr = CheckerResults(node.get_storage_index()) + cr.set_healthy(True) + self.r = CheckAndRepairResults(node.get_storage_index()) + self.r.pre_repair_results = self.r.post_repair_results = cr - def check(self, verify, repair): + def check(self, verify): return defer.succeed(self.r) class FakeImmutableChecker: def __init__(self, client, storage_index, needed_shares, total_shares): - self.r = checker.Results(storage_index) - self.r.healthy = True - self.r.problems = [] + self.r = CheckerResults(storage_index) + self.r.set_healthy(True) def start(self): return defer.succeed(self.r) diff --git a/src/allmydata/test/test_mutable.py b/src/allmydata/test/test_mutable.py index 3b05a866..6cb5a4b7 100644 --- a/src/allmydata/test/test_mutable.py +++ b/src/allmydata/test/test_mutable.py @@ -1179,12 +1179,11 @@ class Roundtrip(unittest.TestCase, testutil.ShouldFailMixin, PublishMixin): class CheckerMixin: def check_good(self, r, where): - self.failUnless(r.healthy, where) - self.failIf(r.problems, where) + self.failUnless(r.is_healthy(), where) return r def check_bad(self, r, where): - self.failIf(r.healthy, where) + self.failIf(r.is_healthy(), where) return r def check_expected_failure(self, r, expected_exception, substring, where): diff --git a/src/allmydata/test/test_system.py b/src/allmydata/test/test_system.py index 90016cf9..f2803b20 100644 --- a/src/allmydata/test/test_system.py +++ b/src/allmydata/test/test_system.py @@ -1683,11 +1683,11 @@ class SystemTest(SystemTestMixin, unittest.TestCase): def _got_lit_filenode(n): self.failUnless(isinstance(n, filenode.LiteralFileNode)) d = n.check() - def _check_filenode_results(r): - self.failUnless(r.is_healthy()) - d.addCallback(_check_filenode_results) + def _check_lit_filenode_results(r): + self.failUnlessEqual(r, None) + d.addCallback(_check_lit_filenode_results) d.addCallback(lambda res: n.check(verify=True)) - d.addCallback(_check_filenode_results) + d.addCallback(_check_lit_filenode_results) return d d.addCallback(_got_lit_filenode) return d @@ -1776,7 +1776,7 @@ class ImmutableChecker(ShareManglingMixin, unittest.TestCase): def _check1(filenode): before_check_reads = self._count_reads() - d2 = filenode.check(verify=False, repair=False) + d2 = filenode.check(verify=False) def _after_check(checkresults): after_check_reads = self._count_reads() self.failIf(after_check_reads - before_check_reads > 0, after_check_reads - before_check_reads) @@ -1789,7 +1789,7 @@ class ImmutableChecker(ShareManglingMixin, unittest.TestCase): d.addCallback(self._corrupt_a_share) def _check2(ignored): before_check_reads = self._count_reads() - d2 = self.filenode.check(verify=False, repair=False) + d2 = self.filenode.check(verify=False) def _after_check(checkresults): after_check_reads = self._count_reads() @@ -1803,7 +1803,7 @@ class ImmutableChecker(ShareManglingMixin, unittest.TestCase): d.addCallback(lambda ignore: self.replace_shares({})) def _check3(ignored): before_check_reads = self._count_reads() - d2 = self.filenode.check(verify=False, repair=False) + d2 = self.filenode.check(verify=False) def _after_check(checkresults): after_check_reads = self._count_reads() @@ -1824,7 +1824,7 @@ class ImmutableChecker(ShareManglingMixin, unittest.TestCase): def _check1(filenode): before_check_reads = self._count_reads() - d2 = filenode.check(verify=True, repair=False) + d2 = filenode.check(verify=True) def _after_check(checkresults): after_check_reads = self._count_reads() # print "delta was ", after_check_reads - before_check_reads @@ -1838,7 +1838,7 @@ class ImmutableChecker(ShareManglingMixin, unittest.TestCase): d.addCallback(self._corrupt_a_share) def _check2(ignored): before_check_reads = self._count_reads() - d2 = self.filenode.check(verify=True, repair=False) + d2 = self.filenode.check(verify=True) def _after_check(checkresults): after_check_reads = self._count_reads() @@ -1876,7 +1876,8 @@ class MutableChecker(SystemTestMixin, unittest.TestCase): return getPage(url, method="POST") d.addCallback(_do_check) def _got_results(out): - self.failUnless("
Healthy!" in out, out)
+            self.failUnless("
Healthy!
" in out, out) + self.failUnless("Recoverable Versions: 10*seq1-" in out, out) self.failIf("Not Healthy!" in out, out) self.failIf("Unhealthy" in out, out) self.failIf("Corrupt Shares" in out, out) @@ -1911,10 +1912,8 @@ class MutableChecker(SystemTestMixin, unittest.TestCase): d.addCallback(_do_check) def _got_results(out): self.failUnless("Not Healthy!" in out, out) - self.failUnless("Unhealthy: best recoverable version has only 9 shares (encoding is 3-of-10)" in out, out) - shid_re = (r"Corrupt Shares:\s+%s: block hash tree failure" % - self.corrupt_shareid) - self.failUnless(re.search(shid_re, out), out) + self.failUnless("Unhealthy: best version has only 9 shares (encoding is 3-of-10)" in out, out) + self.failUnless("Corrupt Shares:" in out, out) d.addCallback(_got_results) # now make sure the webapi repairer can fix it @@ -1925,12 +1924,12 @@ class MutableChecker(SystemTestMixin, unittest.TestCase): return getPage(url, method="POST") d.addCallback(_do_repair) def _got_repair_results(out): - self.failUnless("Repair attempted and successful" in out) + self.failUnless("
Repair successful
" in out, out) d.addCallback(_got_repair_results) d.addCallback(_do_check) def _got_postrepair_results(out): self.failIf("Not Healthy!" in out, out) - self.failUnless("Recoverable Versions: 10*seq" in out) + self.failUnless("Recoverable Versions: 10*seq" in out, out) d.addCallback(_got_postrepair_results) return d @@ -1963,7 +1962,7 @@ class MutableChecker(SystemTestMixin, unittest.TestCase): d.addCallback(_do_check) def _got_results(out): self.failUnless("Not Healthy!" in out, out) - self.failUnless("Unhealthy: best recoverable version has only 9 shares (encoding is 3-of-10)" in out, out) + self.failUnless("Unhealthy: best version has only 9 shares (encoding is 3-of-10)" in out, out) self.failIf("Corrupt Shares" in out, out) d.addCallback(_got_results) @@ -1975,7 +1974,7 @@ class MutableChecker(SystemTestMixin, unittest.TestCase): return getPage(url, method="POST") d.addCallback(_do_repair) def _got_repair_results(out): - self.failUnless("Repair attempted and successful" in out) + self.failUnless("Repair successful" in out) d.addCallback(_got_repair_results) d.addCallback(_do_check) def _got_postrepair_results(out): diff --git a/src/allmydata/test/test_web.py b/src/allmydata/test/test_web.py index eed62d1e..3c4d0cc6 100644 --- a/src/allmydata/test/test_web.py +++ b/src/allmydata/test/test_web.py @@ -1451,6 +1451,31 @@ class Web(WebMixin, unittest.TestCase): d.addCallback(_check3) return d + def test_POST_FILEURL_check_and_repair(self): + bar_url = self.public_url + "/foo/bar.txt" + d = self.POST(bar_url, t="check", repair="true") + def _check(res): + self.failUnless("Healthy!" in res) + d.addCallback(_check) + redir_url = "http://allmydata.org/TARGET" + def _check2(statuscode, target): + self.failUnlessEqual(statuscode, str(http.FOUND)) + self.failUnlessEqual(target, redir_url) + d.addCallback(lambda res: + self.shouldRedirect2("test_POST_FILEURL_check_and_repair", + _check2, + self.POST, bar_url, + t="check", repair="true", + when_done=redir_url)) + d.addCallback(lambda res: + self.POST(bar_url, t="check", return_to=redir_url)) + def _check3(res): + self.failUnless("Healthy!" in res) + self.failUnless("Return to parent directory" in res) + self.failUnless(redir_url in res) + d.addCallback(_check3) + return d + def test_POST_DIRURL_check(self): foo_url = self.public_url + "/foo/" d = self.POST(foo_url, t="check") @@ -1476,13 +1501,72 @@ class Web(WebMixin, unittest.TestCase): d.addCallback(_check3) return d + def test_POST_DIRURL_check_and_repair(self): + foo_url = self.public_url + "/foo/" + d = self.POST(foo_url, t="check", repair="true") + def _check(res): + self.failUnless("Healthy!" in res) + d.addCallback(_check) + redir_url = "http://allmydata.org/TARGET" + def _check2(statuscode, target): + self.failUnlessEqual(statuscode, str(http.FOUND)) + self.failUnlessEqual(target, redir_url) + d.addCallback(lambda res: + self.shouldRedirect2("test_POST_DIRURL_check_and_repair", + _check2, + self.POST, foo_url, + t="check", repair="true", + when_done=redir_url)) + d.addCallback(lambda res: + self.POST(foo_url, t="check", return_to=redir_url)) + def _check3(res): + self.failUnless("Healthy!" in res) + self.failUnless("Return to parent directory" in res) + self.failUnless(redir_url in res) + d.addCallback(_check3) + return d + def test_POST_DIRURL_deepcheck(self): d = self.POST(self.public_url, t="deep-check") def _check(res): self.failUnless("Objects Checked: 8" in res) self.failUnless("Objects Healthy: 8" in res) + d.addCallback(_check) + redir_url = "http://allmydata.org/TARGET" + def _check2(statuscode, target): + self.failUnlessEqual(statuscode, str(http.FOUND)) + self.failUnlessEqual(target, redir_url) + d.addCallback(lambda res: + self.shouldRedirect2("test_POST_DIRURL_check", + _check2, + self.POST, self.public_url, + t="deep-check", + when_done=redir_url)) + d.addCallback(lambda res: + self.POST(self.public_url, t="deep-check", + return_to=redir_url)) + def _check3(res): + self.failUnless("Return to parent directory" in res) + self.failUnless(redir_url in res) + d.addCallback(_check3) + return d + + def test_POST_DIRURL_deepcheck_and_repair(self): + d = self.POST(self.public_url, t="deep-check", repair="true") + def _check(res): + self.failUnless("Objects Checked: 8" in res) + + self.failUnless("Objects Healthy (before repair): 8" in res) + self.failUnless("Objects Unhealthy (before repair): 0" in res) + self.failUnless("Corrupt Shares (before repair): 0" in res) + self.failUnless("Repairs Attempted: 0" in res) self.failUnless("Repairs Successful: 0" in res) + self.failUnless("Repairs Unsuccessful: 0" in res) + + self.failUnless("Objects Healthy (after repair): 8" in res) + self.failUnless("Objects Unhealthy (after repair): 0" in res) + self.failUnless("Corrupt Shares (after repair): 0" in res) d.addCallback(_check) redir_url = "http://allmydata.org/TARGET" def _check2(statuscode, target): diff --git a/src/allmydata/web/check-and-repair-results.xhtml b/src/allmydata/web/check-and-repair-results.xhtml new file mode 100644 index 00000000..ce2785fe --- /dev/null +++ b/src/allmydata/web/check-and-repair-results.xhtml @@ -0,0 +1,24 @@ + + + AllMyData - Tahoe - Check Results + + + + + + +

File Check Results for SI=

+ +
+ +
+ +
+ +
+ +
+ + + diff --git a/src/allmydata/web/checker-results.xhtml b/src/allmydata/web/checker-results.xhtml index 01fa8fc8..c6eb79e9 100644 --- a/src/allmydata/web/checker-results.xhtml +++ b/src/allmydata/web/checker-results.xhtml @@ -8,9 +8,11 @@ -

File Check Results for SI= ()

+

File Check Results for SI=

-
+
+ +
diff --git a/src/allmydata/web/checker_results.py b/src/allmydata/web/checker_results.py index 370605fa..30c25ad7 100644 --- a/src/allmydata/web/checker_results.py +++ b/src/allmydata/web/checker_results.py @@ -1,24 +1,39 @@ import time from nevow import rend, inevow, tags as T -from allmydata.web.common import getxmlfile, get_arg -from allmydata.interfaces import ICheckerResults, IDeepCheckResults +from twisted.web import html +from allmydata.web.common import getxmlfile, get_arg, IClient +from allmydata.interfaces import ICheckAndRepairResults, ICheckerResults, \ + IDeepCheckResults, IDeepCheckAndRepairResults +from allmydata.util import base32, idlib -class CheckerResults(rend.Page): +class ResultsBase: + def _render_results(self, cr): + assert ICheckerResults(cr) + return T.pre["\n".join(self._html(cr.get_report()))] # TODO: more + def _html(self, s): + if isinstance(s, (str, unicode)): + return html.escape(s) + assert isinstance(s, (list, tuple)) + return [html.escape(w) for w in s] + +class CheckerResults(rend.Page, ResultsBase): docFactory = getxmlfile("checker-results.xhtml") def __init__(self, results): - assert ICheckerResults(results) - self.r = results + self.r = ICheckerResults(results) def render_storage_index(self, ctx, data): return self.r.get_storage_index_string() - def render_mutability(self, ctx, data): - return self.r.get_mutability_string() + def render_healthy(self, ctx, data): + if self.r.is_healthy(): + return ctx.tag["Healthy!"] + return ctx.tag["Not Healthy!:", self._html(self.r.get_summary())] def render_results(self, ctx, data): - return ctx.tag[self.r.to_string()] + cr = self._render_results(self.r) + return ctx.tag[cr] def render_return(self, ctx, data): req = inevow.IRequest(ctx) @@ -27,7 +42,47 @@ class CheckerResults(rend.Page): return T.div[T.a(href=return_to)["Return to parent directory"]] return "" -class DeepCheckResults(rend.Page): +class CheckAndRepairResults(rend.Page, ResultsBase): + docFactory = getxmlfile("check-and-repair-results.xhtml") + + def __init__(self, results): + self.r = ICheckAndRepairResults(results) + + def render_storage_index(self, ctx, data): + return self.r.get_storage_index_string() + + def render_healthy(self, ctx, data): + cr = self.r.get_post_repair_results() + if cr.is_healthy(): + return ctx.tag["Healthy!"] + return ctx.tag["Not Healthy!:", self._html(cr.get_summary())] + + def render_repair_results(self, ctx, data): + if self.r.get_repair_attempted(): + if self.r.get_repair_successful(): + return ctx.tag["Repair successful"] + else: + return ctx.tag["Repair unsuccessful"] + return ctx.tag["No repair necessary"] + + def render_post_repair_results(self, ctx, data): + cr = self._render_results(self.r.get_post_repair_results()) + return ctx.tag[cr] + + def render_maybe_pre_repair_results(self, ctx, data): + if self.r.get_repair_attempted(): + cr = self._render_results(self.r.get_pre_repair_results()) + return ctx.tag[T.div["Pre-Repair Checker Results:"], cr] + return "" + + def render_return(self, ctx, data): + req = inevow.IRequest(ctx) + return_to = get_arg(req, "return_to", None) + if return_to: + return T.div[T.a(href=return_to)["Return to parent directory"]] + return "" + +class DeepCheckResults(rend.Page, ResultsBase): docFactory = getxmlfile("deep-check-results.xhtml") def __init__(self, results): @@ -38,35 +93,191 @@ class DeepCheckResults(rend.Page): return self.r.get_root_storage_index_string() def data_objects_checked(self, ctx, data): - return self.r.count_objects_checked() + return self.r.get_counters()["count-objects-checked"] def data_objects_healthy(self, ctx, data): - return self.r.count_objects_healthy() - def data_repairs_attempted(self, ctx, data): - return self.r.count_repairs_attempted() - def data_repairs_successful(self, ctx, data): - return self.r.count_repairs_successful() + return self.r.get_counters()["count-objects-healthy"] + def data_objects_unhealthy(self, ctx, data): + return self.r.get_counters()["count-objects-unhealthy"] + + def data_count_corrupt_shares(self, ctx, data): + return self.r.get_counters()["count-corrupt-shares"] + + def render_problems_p(self, ctx, data): + c = self.r.get_counters() + if c["count-objects-unhealthy"]: + return ctx.tag + return "" def data_problems(self, ctx, data): - for cr in self.r.get_problems(): - yield cr + all_objects = self.r.get_all_results() + for path in sorted(all_objects.keys()): + cr = all_objects[path] + assert ICheckerResults.providedBy(cr) + if not cr.is_healthy(): + yield path, cr + def render_problem(self, ctx, data): - cr = data - text = cr.get_storage_index_string() - text += ": " - text += cr.status_report - return ctx.tag[text] + path, cr = data + summary_text = "" + summary = cr.get_summary() + if summary: + summary_text = ": " + summary + summary_text += " [SI: %s]" % cr.get_storage_index_string() + return ctx.tag["/".join(self._html(path)), self._html(summary_text)] + + + def render_servers_with_corrupt_shares_p(self, ctx, data): + if self.r.get_counters()["count-corrupt-shares"]: + return ctx.tag + return "" + + def data_servers_with_corrupt_shares(self, ctx, data): + servers = [serverid + for (serverid, storage_index, sharenum) + in self.r.get_corrupt_shares()] + servers.sort() + return servers + + def render_server_problem(self, ctx, data): + serverid = data + data = [idlib.shortnodeid_b2a(serverid)] + c = IClient(ctx) + nickname = c.get_nickname_for_peerid(serverid) + if nickname: + data.append(" (%s)" % self._html(nickname)) + return ctx.tag[data] + + + def render_corrupt_shares_p(self, ctx, data): + if self.r.get_counters()["count-corrupt-shares"]: + return ctx.tag + return "" + def data_corrupt_shares(self, ctx, data): + return self.r.get_corrupt_shares() + def render_share_problem(self, ctx, data): + serverid, storage_index, sharenum = data + nickname = IClient(ctx).get_nickname_for_peerid(serverid) + ctx.fillSlots("serverid", idlib.shortnodeid_b2a(serverid)) + if nickname: + ctx.fillSlots("nickname", self._html(nickname)) + ctx.fillSlots("si", base32.b2a(storage_index)) + ctx.fillSlots("shnum", str(sharenum)) + return ctx.tag + + def render_return(self, ctx, data): + req = inevow.IRequest(ctx) + return_to = get_arg(req, "return_to", None) + if return_to: + return T.div[T.a(href=return_to)["Return to parent directory"]] + return "" def data_all_objects(self, ctx, data): r = self.r.get_all_results() - for storage_index in sorted(r.keys()): - yield r[storage_index] + for path in sorted(r.keys()): + yield (path, r[path]) def render_object(self, ctx, data): - r = data - ctx.fillSlots("storage_index", r.get_storage_index_string()) + path, r = data + ctx.fillSlots("path", "/".join(self._html(path))) ctx.fillSlots("healthy", str(r.is_healthy())) + ctx.fillSlots("summary", self._html(r.get_summary())) return ctx.tag + def render_runtime(self, ctx, data): + req = inevow.IRequest(ctx) + runtime = time.time() - req.processing_started_timestamp + return ctx.tag["runtime: %s seconds" % runtime] + +class DeepCheckAndRepairResults(rend.Page, ResultsBase): + docFactory = getxmlfile("deep-check-and-repair-results.xhtml") + + def __init__(self, results): + assert IDeepCheckAndRepairResults(results) + self.r = results + + def render_root_storage_index(self, ctx, data): + return self.r.get_root_storage_index_string() + + def data_objects_checked(self, ctx, data): + return self.r.get_counters()["count-objects-checked"] + + def data_objects_healthy(self, ctx, data): + return self.r.get_counters()["count-objects-healthy-pre-repair"] + def data_objects_unhealthy(self, ctx, data): + return self.r.get_counters()["count-objects-unhealthy-pre-repair"] + def data_corrupt_shares(self, ctx, data): + return self.r.get_counters()["count-corrupt-shares-pre-repair"] + + def data_repairs_attempted(self, ctx, data): + return self.r.get_counters()["count-repairs-attempted"] + def data_repairs_successful(self, ctx, data): + return self.r.get_counters()["count-repairs-successful"] + def data_repairs_unsuccessful(self, ctx, data): + return self.r.get_counters()["count-repairs-unsuccessful"] + + def data_objects_healthy_post(self, ctx, data): + return self.r.get_counters()["count-objects-healthy-post-repair"] + def data_objects_unhealthy_post(self, ctx, data): + return self.r.get_counters()["count-objects-unhealthy-post-repair"] + def data_corrupt_shares_post(self, ctx, data): + return self.r.get_counters()["count-corrupt-shares-post-repair"] + + def render_pre_repair_problems_p(self, ctx, data): + c = self.r.get_counters() + if c["count-objects-unhealthy-pre-repair"]: + return ctx.tag + return "" + + def data_pre_repair_problems(self, ctx, data): + all_objects = self.r.get_all_results() + for path in sorted(all_objects.keys()): + r = all_objects[path] + assert ICheckAndRepairResults.providedBy(r) + cr = r.get_pre_repair_results() + if not cr.is_healthy(): + yield path, cr + + def render_problem(self, ctx, data): + path, cr = data + return ["/".join(self._html(path)), ": ", self._html(cr.get_summary())] + + def render_post_repair_problems_p(self, ctx, data): + c = self.r.get_counters() + if (c["count-objects-unhealthy-post-repair"] + or c["count-corrupt-shares-post-repair"]): + return ctx.tag + return "" + + def data_post_repair_problems(self, ctx, data): + all_objects = self.r.get_all_results() + for path in sorted(all_objects.keys()): + r = all_objects[path] + assert ICheckAndRepairResults.providedBy(r) + cr = r.get_post_repair_results() + if not cr.is_healthy(): + yield path, cr + + def render_servers_with_corrupt_shares_p(self, ctx, data): + if self.r.get_counters()["count-corrupt-shares-pre-repair"]: + return ctx.tag + return "" + def data_servers_with_corrupt_shares(self, ctx, data): + return [] # TODO + def render_server_problem(self, ctx, data): + pass + + + def render_remaining_corrupt_shares_p(self, ctx, data): + if self.r.get_counters()["count-corrupt-shares-post-repair"]: + return ctx.tag + return "" + def data_post_repair_corrupt_shares(self, ctx, data): + return [] # TODO + + def render_share_problem(self, ctx, data): + pass + + def render_return(self, ctx, data): req = inevow.IRequest(ctx) return_to = get_arg(req, "return_to", None) @@ -74,6 +285,22 @@ class DeepCheckResults(rend.Page): return T.div[T.a(href=return_to)["Return to parent directory"]] return "" + def data_all_objects(self, ctx, data): + r = self.r.get_all_results() + for path in sorted(r.keys()): + yield (path, r[path]) + + def render_object(self, ctx, data): + path, r = data + ctx.fillSlots("path", "/".join(self._html(path))) + ctx.fillSlots("healthy_pre_repair", + str(r.get_pre_repair_results().is_healthy())) + ctx.fillSlots("healthy_post_repair", + str(r.get_post_repair_results().is_healthy())) + ctx.fillSlots("summary", + self._html(r.get_pre_repair_results().get_summary())) + return ctx.tag + def render_runtime(self, ctx, data): req = inevow.IRequest(ctx) runtime = time.time() - req.processing_started_timestamp diff --git a/src/allmydata/web/deep-check-and-repair-results.xhtml b/src/allmydata/web/deep-check-and-repair-results.xhtml new file mode 100644 index 00000000..36afd272 --- /dev/null +++ b/src/allmydata/web/deep-check-and-repair-results.xhtml @@ -0,0 +1,89 @@ + + + AllMyData - Tahoe - Deep Check Results + + + + + + +

Deep-Check-And-Repair Results for root + SI=

+ +

Counters:

+
    +
  • Objects Checked:
  • + +
  • Objects Healthy (before repair):
  • +
  • Objects Unhealthy (before repair):
  • +
  • Corrupt Shares (before repair):
  • + +
  • Repairs Attempted:
  • +
  • Repairs Successful:
  • +
  • Repairs Unsuccessful:
  • + +
  • Objects Healthy (after repair):
  • +
  • Objects Unhealthy (after repair):
  • +
  • Corrupt Shares (after repair):
  • + +
+ +
+

Files/Directories That Had Problems:

+ +
    +
  • +
  • None
  • +
+
+ + +
+

Files/Directories That Still Have Problems:

+
    +
  • +
  • None
  • +
+
+ +
+

Servers on which corrupt shares were found

+
    +
  • +
  • None
  • +
+
+ +
+

Remaining Corrupt Shares

+

These shares need to be manually inspected and removed.

+
    +
  • +
  • None
  • +
+
+ +
+ +
+ + + + + + + + + + + + + +
Relative PathHealthyPost-RepairSummary
+
+ +
+ + + diff --git a/src/allmydata/web/deep-check-results.xhtml b/src/allmydata/web/deep-check-results.xhtml index d4b040bc..69ec37e4 100644 --- a/src/allmydata/web/deep-check-results.xhtml +++ b/src/allmydata/web/deep-check-results.xhtml @@ -10,44 +10,70 @@

Deep-Check Results for root SI=

+

Counters:

  • Objects Checked:
  • Objects Healthy:
  • +
  • Objects Unhealthy:
  • +
  • Corrupt Shares:
  • +
-

Problems:

+
+

Files/Directories That Had Problems:

  • None
+
-

Repair Results:

-
    -
  • Repairs Attempted:
  • -
  • Repairs Successful:
  • + +
    +

    Servers on which corrupt shares were found

    +
      +
    • +
    • None
    +
    + +
    +

    Corrupt Shares

    +

    If repair fails, these shares need to be manually inspected and removed.

    + + + + + + + + + + + + + +
    ServerServer NicknameStorage IndexShare Number
    +
    + +
    -

    Objects Checked

    +

    All Results

    - - + + + - + + - - -
    Storage IndexHealthy?Relative PathHealthySummary
    no objects?
    - -
    -
    diff --git a/src/allmydata/web/directory.py b/src/allmydata/web/directory.py index 3b62b4a0..0e9096d2 100644 --- a/src/allmydata/web/directory.py +++ b/src/allmydata/web/directory.py @@ -21,7 +21,8 @@ from allmydata.web.common import text_plain, WebError, IClient, \ getxmlfile, RenderMixin from allmydata.web.filenode import ReplaceMeMixin, \ FileNodeHandler, PlaceHolderNodeHandler -from allmydata.web.checker_results import CheckerResults, DeepCheckResults +from allmydata.web.checker_results import CheckerResults, DeepCheckResults, \ + DeepCheckAndRepairResults class BlockingFileError(Exception): # TODO: catch and transform @@ -340,8 +341,12 @@ class DirectoryNodeHandler(RenderMixin, rend.Page, ReplaceMeMixin): # check this directory and everything reachable from it verify = boolean_of_arg(get_arg(req, "verify", "false")) repair = boolean_of_arg(get_arg(req, "repair", "false")) - d = self.node.deep_check(verify, repair) - d.addCallback(lambda res: DeepCheckResults(res)) + if repair: + d = self.node.deep_check_and_repair(verify) + d.addCallback(lambda res: DeepCheckAndRepairResults(res)) + else: + d = self.node.deep_check(verify) + d.addCallback(lambda res: DeepCheckResults(res)) return d def _POST_set_children(self, req): diff --git a/src/allmydata/web/filenode.py b/src/allmydata/web/filenode.py index 7777132e..eef13928 100644 --- a/src/allmydata/web/filenode.py +++ b/src/allmydata/web/filenode.py @@ -14,7 +14,7 @@ from allmydata.util import log from allmydata.web.common import text_plain, WebError, IClient, RenderMixin, \ boolean_of_arg, get_arg, should_create_intermediate_directories -from allmydata.web.checker_results import CheckerResults +from allmydata.web.checker_results import CheckerResults, CheckAndRepairResults class ReplaceMeMixin: @@ -256,8 +256,12 @@ class FileNodeHandler(RenderMixin, rend.Page, ReplaceMeMixin): def _POST_check(self, req): verify = boolean_of_arg(get_arg(req, "verify", "false")) repair = boolean_of_arg(get_arg(req, "repair", "false")) - d = self.node.check(verify, repair) - d.addCallback(lambda res: CheckerResults(res)) + if repair: + d = self.node.check_and_repair(verify) + d.addCallback(lambda res: CheckAndRepairResults(res)) + else: + d = self.node.check(verify) + d.addCallback(lambda res: CheckerResults(res)) return d def render_DELETE(self, ctx):