From: Daira Hopwood Date: Thu, 20 Mar 2014 16:13:57 +0000 (+0000) Subject: Remove 'needs-rebalancing' and add 'count-happiness' to checker reports; repair tests... X-Git-Tag: allmydata-tahoe-1.10.1a1~194 X-Git-Url: https://git.rkrishnan.org/about.html?a=commitdiff_plain;h=refs%2Fheads%2F1784-2105-checker-results;p=tahoe-lafs%2Ftahoe-lafs.git Remove 'needs-rebalancing' and add 'count-happiness' to checker reports; repair tests. fixes #1784, #2105 Signed-off-by: Daira Hopwood --- diff --git a/docs/frontends/webapi.rst b/docs/frontends/webapi.rst index b54ad91f..0968f39b 100644 --- a/docs/frontends/webapi.rst +++ b/docs/frontends/webapi.rst @@ -1415,6 +1415,8 @@ mainly intended for developers. this dictionary has only the 'healthy' key, which will always be True. For distributed files, this dictionary has the following keys: + count-happiness: the servers-of-happiness level of the file, as + defined in `docs/specifications/servers-of-happiness.rst`_. count-shares-good: the number of good shares that were found count-shares-needed: 'k', the number of shares required for recovery count-shares-expected: 'N', the number of total shares generated @@ -1438,12 +1440,6 @@ mainly intended for developers. list-corrupt-shares: a list of "share locators", one for each share that was found to be corrupt. Each share locator is a list of (serverid, storage_index, sharenum). - needs-rebalancing: (bool) This field is intended to be True iff - reliability could be improved for this file by - rebalancing, i.e. by moving some shares to other - servers. It may be incorrect in some cases for - Tahoe-LAFS up to and including v1.10, and its - precise definition is expected to change. servers-responding: list of base32-encoded storage server identifiers, one for each server which responded to the share query. @@ -1466,6 +1462,12 @@ mainly intended for developers. 'seq%d-%s-sh%d', containing the sequence number, the roothash, and the share number. +Before Tahoe-LAFS v1.11, the `results` dictionary also had a `needs-rebalancing` +field, but that has been removed since it was computed incorrectly. + +.. _`docs/specifications/servers-of-happiness.rst`: ../specifications/servers-of-happiness.rst + + ``POST $URL?t=start-deep-check`` (must add &ophandle=XYZ) This initiates a recursive walk of all files and directories reachable from diff --git a/src/allmydata/check_results.py b/src/allmydata/check_results.py index a5601c15..4071de6a 100644 --- a/src/allmydata/check_results.py +++ b/src/allmydata/check_results.py @@ -8,7 +8,7 @@ class CheckResults: implements(ICheckResults) def __init__(self, uri, storage_index, - healthy, recoverable, needs_rebalancing, + healthy, recoverable, count_happiness, count_shares_needed, count_shares_expected, count_shares_good, count_good_share_hosts, count_recoverable_versions, count_unrecoverable_versions, @@ -31,8 +31,8 @@ class CheckResults: self._recoverable = recoverable if not self._recoverable: assert not self._healthy - self._needs_rebalancing_p = bool(needs_rebalancing) + self._count_happiness = count_happiness self._count_shares_needed = count_shares_needed self._count_shares_expected = count_shares_expected self._count_shares_good = count_shares_good @@ -78,8 +78,8 @@ class CheckResults: def is_recoverable(self): return self._recoverable - def needs_rebalancing(self): - return self._needs_rebalancing_p + def get_happiness(self): + return self._count_happiness def get_encoding_needed(self): return self._count_shares_needed @@ -120,7 +120,8 @@ class CheckResults: for (s, SI, shnum) in self._list_corrupt_shares] incompatible = [(s.get_serverid(), SI, shnum) for (s, SI, shnum) in self._list_incompatible_shares] - d = {"count-shares-needed": self._count_shares_needed, + d = {"count-happiness": self._count_happiness, + "count-shares-needed": self._count_shares_needed, "count-shares-expected": self._count_shares_expected, "count-shares-good": self._count_shares_good, "count-good-share-hosts": self._count_good_share_hosts, diff --git a/src/allmydata/immutable/checker.py b/src/allmydata/immutable/checker.py index 41000f7e..d931a15b 100644 --- a/src/allmydata/immutable/checker.py +++ b/src/allmydata/immutable/checker.py @@ -12,6 +12,7 @@ from allmydata.util.hashutil import file_renewal_secret_hash, \ file_cancel_secret_hash, bucket_renewal_secret_hash, \ bucket_cancel_secret_hash, uri_extension_hash, CRYPTO_VAL_SIZE, \ block_hash +from allmydata.util.happinessutil import servers_of_happiness from allmydata.immutable import layout @@ -775,15 +776,11 @@ class Checker(log.PrefixingLogMixin): recoverable = 0 unrecoverable = 1 - # The file needs rebalancing if the set of servers that have at least - # one share is less than the number of uniquely-numbered shares - # available. - # TODO: this may be wrong, see ticket #1115 comment:27 and ticket #1784. - needs_rebalancing = bool(good_share_hosts < len(verifiedshares)) + count_happiness = servers_of_happiness(verifiedshares) cr = CheckResults(self._verifycap, SI, healthy=healthy, recoverable=bool(recoverable), - needs_rebalancing=needs_rebalancing, + count_happiness=count_happiness, count_shares_needed=self._verifycap.needed_shares, count_shares_expected=self._verifycap.total_shares, count_shares_good=len(verifiedshares), diff --git a/src/allmydata/immutable/filenode.py b/src/allmydata/immutable/filenode.py index 6b54b2d0..1c3780ba 100644 --- a/src/allmydata/immutable/filenode.py +++ b/src/allmydata/immutable/filenode.py @@ -11,6 +11,7 @@ from allmydata.interfaces import IImmutableFileNode, IUploadResults from allmydata.util import consumer from allmydata.check_results import CheckResults, CheckAndRepairResults from allmydata.util.dictutil import DictOfSets +from allmydata.util.happinessutil import servers_of_happiness from pycryptopp.cipher.aes import AES # local imports @@ -144,12 +145,11 @@ class CiphertextFileNode: is_healthy = bool(len(sm) >= verifycap.total_shares) is_recoverable = bool(len(sm) >= verifycap.needed_shares) - # TODO: this may be wrong, see ticket #1115 comment:27 and ticket #1784. - needs_rebalancing = bool(len(sm) >= verifycap.total_shares) + count_happiness = servers_of_happiness(sm) prr = CheckResults(cr.get_uri(), cr.get_storage_index(), healthy=is_healthy, recoverable=is_recoverable, - needs_rebalancing=needs_rebalancing, + count_happiness=count_happiness, count_shares_needed=verifycap.needed_shares, count_shares_expected=verifycap.total_shares, count_shares_good=len(sm), diff --git a/src/allmydata/interfaces.py b/src/allmydata/interfaces.py index b556d4e3..a8dbdfeb 100644 --- a/src/allmydata/interfaces.py +++ b/src/allmydata/interfaces.py @@ -2156,18 +2156,16 @@ class ICheckResults(Interface): not. Unrecoverable files are obviously unhealthy. Non-distributed LIT files always return True.""" - def needs_rebalancing(): - """Return a boolean, True if the file/dir's reliability could be - improved by moving shares to new servers. Non-distributed LIT files - always return False.""" - # the following methods all return None for non-distributed LIT files + def get_happiness(): + """Return the happiness count of the file.""" + def get_encoding_needed(): - """Return 'k', the number of shares required for recovery""" + """Return 'k', the number of shares required for recovery.""" def get_encoding_expected(): - """Return 'N', the number of total shares generated""" + """Return 'N', the number of total shares generated.""" def get_share_counter_good(): """Return the number of distinct good shares that were found. For diff --git a/src/allmydata/mutable/checker.py b/src/allmydata/mutable/checker.py index 08580f18..cf073b80 100644 --- a/src/allmydata/mutable/checker.py +++ b/src/allmydata/mutable/checker.py @@ -1,6 +1,7 @@ from allmydata.uri import from_string from allmydata.util import base32, log, dictutil +from allmydata.util.happinessutil import servers_of_happiness from allmydata.check_results import CheckAndRepairResults, CheckResults from allmydata.mutable.common import MODE_CHECK, MODE_WRITE, CorruptShareError @@ -152,7 +153,6 @@ class MutableChecker: summary.append("multiple versions are recoverable") report.append("Unhealthy: there are multiple recoverable versions") - needs_rebalancing = False if recoverable: best_version = smap.best_recoverable_version() report.append("Best Recoverable Version: " + @@ -166,16 +166,12 @@ class MutableChecker: report.append("Unhealthy: best version has only %d shares " "(encoding is %d-of-%d)" % (s, k, N)) summary.append("%d shares (enc %d-of-%d)" % (s, k, N)) - hosts = smap.all_servers_for_version(best_version) - needs_rebalancing = bool( len(hosts) < N ) elif unrecoverable: healthy = False # find a k and N from somewhere first = list(unrecoverable)[0] # not exactly the best version, but that doesn't matter too much counters = self._count_shares(smap, first) - # leave needs_rebalancing=False: the file being unrecoverable is - # the bigger problem else: # couldn't find anything at all counters = { @@ -223,10 +219,12 @@ class MutableChecker: else: summary = "Unhealthy: " + " ".join(summary) + count_happiness = servers_of_happiness(sharemap) + cr = CheckResults(from_string(self._node.get_uri()), self._storage_index, healthy=healthy, recoverable=bool(recoverable), - needs_rebalancing=needs_rebalancing, + count_happiness=count_happiness, count_shares_needed=counters["count-shares-needed"], count_shares_expected=counters["count-shares-expected"], count_shares_good=counters["count-shares-good"], diff --git a/src/allmydata/test/common.py b/src/allmydata/test/common.py index 82ccbf64..6bf2f974 100644 --- a/src/allmydata/test/common.py +++ b/src/allmydata/test/common.py @@ -70,7 +70,7 @@ class FakeCHKFileNode: s = StubServer("\x00"*20) r = CheckResults(self.my_uri, self.storage_index, healthy=True, recoverable=True, - needs_rebalancing=False, + count_happiness=10, count_shares_needed=3, count_shares_expected=10, count_shares_good=10, @@ -282,7 +282,7 @@ class FakeMutableFileNode: s = StubServer("\x00"*20) r = CheckResults(self.my_uri, self.storage_index, healthy=True, recoverable=True, - needs_rebalancing=False, + count_happiness=10, count_shares_needed=3, count_shares_expected=10, count_shares_good=10, diff --git a/src/allmydata/test/test_checker.py b/src/allmydata/test/test_checker.py index 65498e76..ad5acb15 100644 --- a/src/allmydata/test/test_checker.py +++ b/src/allmydata/test/test_checker.py @@ -84,7 +84,8 @@ class WebResultsRendering(unittest.TestCase, WebRenderingMixin): server_1 = sb.get_stub_server(serverid_1) server_f = sb.get_stub_server(serverid_f) u = uri.CHKFileURI("\x00"*16, "\x00"*32, 3, 10, 1234) - data = { "count_shares_needed": 3, + data = { "count_happiness": 8, + "count_shares_needed": 3, "count_shares_expected": 9, "count_shares_good": 10, "count_good_share_hosts": 11, @@ -101,7 +102,6 @@ class WebResultsRendering(unittest.TestCase, WebRenderingMixin): } cr = check_results.CheckResults(u, u.get_storage_index(), healthy=True, recoverable=True, - needs_rebalancing=False, summary="groovy", **data) w = web_check_results.CheckResultsRenderer(c, cr) @@ -110,6 +110,7 @@ class WebResultsRendering(unittest.TestCase, WebRenderingMixin): self.failUnlessIn("File Check Results for SI=2k6avp", s) # abbreviated self.failUnlessIn("Healthy : groovy", s) self.failUnlessIn("Share Counts: need 3-of-9, have 10", s) + self.failUnlessIn("Happiness Level: 8", s) self.failUnlessIn("Hosts with good shares: 11", s) self.failUnlessIn("Corrupt shares: none", s) self.failUnlessIn("Wrong Shares: 0", s) @@ -119,7 +120,6 @@ class WebResultsRendering(unittest.TestCase, WebRenderingMixin): cr = check_results.CheckResults(u, u.get_storage_index(), healthy=False, recoverable=True, - needs_rebalancing=False, summary="ungroovy", **data) w = web_check_results.CheckResultsRenderer(c, cr) @@ -132,7 +132,6 @@ class WebResultsRendering(unittest.TestCase, WebRenderingMixin): data["list_corrupt_shares"] = [(server_1, u.get_storage_index(), 2)] cr = check_results.CheckResults(u, u.get_storage_index(), healthy=False, recoverable=False, - needs_rebalancing=False, summary="rather dead", **data) w = web_check_results.CheckResultsRenderer(c, cr) @@ -157,7 +156,7 @@ class WebResultsRendering(unittest.TestCase, WebRenderingMixin): self.failUnlessEqual(j["summary"], "rather dead") self.failUnlessEqual(j["storage-index"], "2k6avpjga3dho3zsjo6nnkt7n4") - expected = {'needs-rebalancing': False, + expected = {'count-happiness': 8, 'count-shares-expected': 9, 'healthy': False, 'count-unrecoverable-versions': 0, @@ -192,7 +191,8 @@ class WebResultsRendering(unittest.TestCase, WebRenderingMixin): serverid_f = "\xff"*20 u = uri.CHKFileURI("\x00"*16, "\x00"*32, 3, 10, 1234) - data = { "count_shares_needed": 3, + data = { "count_happiness": 5, + "count_shares_needed": 3, "count_shares_expected": 10, "count_shares_good": 6, "count_good_share_hosts": 7, @@ -210,11 +210,11 @@ class WebResultsRendering(unittest.TestCase, WebRenderingMixin): } pre_cr = check_results.CheckResults(u, u.get_storage_index(), healthy=False, recoverable=True, - needs_rebalancing=False, summary="illing", **data) - data = { "count_shares_needed": 3, + data = { "count_happiness": 9, + "count_shares_needed": 3, "count_shares_expected": 10, "count_shares_good": 10, "count_good_share_hosts": 11, @@ -232,7 +232,6 @@ class WebResultsRendering(unittest.TestCase, WebRenderingMixin): } post_cr = check_results.CheckResults(u, u.get_storage_index(), healthy=True, recoverable=True, - needs_rebalancing=False, summary="groovy", **data) @@ -265,7 +264,6 @@ class WebResultsRendering(unittest.TestCase, WebRenderingMixin): crr.repair_successful = False post_cr = check_results.CheckResults(u, u.get_storage_index(), healthy=False, recoverable=True, - needs_rebalancing=False, summary="better", **data) crr.post_repair_results = post_cr @@ -281,7 +279,6 @@ class WebResultsRendering(unittest.TestCase, WebRenderingMixin): crr.repair_successful = False post_cr = check_results.CheckResults(u, u.get_storage_index(), healthy=False, recoverable=False, - needs_rebalancing=False, summary="worse", **data) crr.post_repair_results = post_cr diff --git a/src/allmydata/test/test_deepcheck.py b/src/allmydata/test/test_deepcheck.py index 98f2a528..fd9db7ec 100644 --- a/src/allmydata/test/test_deepcheck.py +++ b/src/allmydata/test/test_deepcheck.py @@ -277,13 +277,12 @@ class DeepCheckWebGood(DeepCheckBase, unittest.TestCase): self.failUnlessEqual(cr.get_storage_index_string(), base32.b2a(n.get_storage_index()), where) num_servers = len(self.g.all_servers) - needs_rebalancing = bool( num_servers < 10 ) - if not incomplete: - self.failUnlessEqual(cr.needs_rebalancing(), needs_rebalancing, - str((where, cr, cr.as_dict()))) - self.failUnlessEqual(cr.get_share_counter_good(), 10, where) + self.failUnlessEqual(num_servers, 10, where) + + self.failUnlessEqual(cr.get_happiness(), num_servers, where) + self.failUnlessEqual(cr.get_share_counter_good(), num_servers, where) self.failUnlessEqual(cr.get_encoding_needed(), 3, where) - self.failUnlessEqual(cr.get_encoding_expected(), 10, where) + self.failUnlessEqual(cr.get_encoding_expected(), num_servers, where) if not incomplete: self.failUnlessEqual(cr.get_host_counter_good_shares(), num_servers, where) @@ -533,13 +532,13 @@ class DeepCheckWebGood(DeepCheckBase, unittest.TestCase): r = data["results"] self.failUnlessEqual(r["healthy"], True, where) num_servers = len(self.g.all_servers) - needs_rebalancing = bool( num_servers < 10 ) - if not incomplete: - self.failUnlessEqual(r["needs-rebalancing"], needs_rebalancing, - where) - self.failUnlessEqual(r["count-shares-good"], 10, where) + self.failUnlessEqual(num_servers, 10) + + self.failIfIn("needs-rebalancing", r) + self.failUnlessEqual(r["count-happiness"], num_servers, where) + self.failUnlessEqual(r["count-shares-good"], num_servers, where) self.failUnlessEqual(r["count-shares-needed"], 3, where) - self.failUnlessEqual(r["count-shares-expected"], 10, where) + self.failUnlessEqual(r["count-shares-expected"], num_servers, where) if not incomplete: self.failUnlessEqual(r["count-good-share-hosts"], num_servers, where) diff --git a/src/allmydata/test/test_web.py b/src/allmydata/test/test_web.py index ba76b8c6..731e710d 100644 --- a/src/allmydata/test/test_web.py +++ b/src/allmydata/test/test_web.py @@ -4580,7 +4580,7 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi r = simplejson.loads(res) self.failUnlessEqual(r["summary"], "Healthy") self.failUnless(r["results"]["healthy"]) - self.failIf(r["results"]["needs-rebalancing"]) + self.failIfIn("needs-rebalancing", r["results"]) self.failUnless(r["results"]["recoverable"]) d.addCallback(_got_json_good) @@ -4624,8 +4624,8 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi self.failUnlessEqual(r["summary"], "Not Healthy: 9 shares (enc 3-of-10)") self.failIf(r["results"]["healthy"]) - self.failIf(r["results"]["needs-rebalancing"]) self.failUnless(r["results"]["recoverable"]) + self.failIfIn("needs-rebalancing", r["results"]) d.addCallback(_got_json_sick) d.addCallback(self.CHECK, "dead", "t=check") @@ -4638,8 +4638,8 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi self.failUnlessEqual(r["summary"], "Not Healthy: 1 shares (enc 3-of-10)") self.failIf(r["results"]["healthy"]) - self.failIf(r["results"]["needs-rebalancing"]) self.failIf(r["results"]["recoverable"]) + self.failIfIn("needs-rebalancing", r["results"]) d.addCallback(_got_json_dead) d.addCallback(self.CHECK, "corrupt", "t=check&verify=true") @@ -4652,6 +4652,8 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi self.failUnlessIn("Unhealthy: 9 shares (enc 3-of-10)", r["summary"]) self.failIf(r["results"]["healthy"]) self.failUnless(r["results"]["recoverable"]) + self.failIfIn("needs-rebalancing", r["results"]) + self.failUnlessReallyEqual(r["results"]["count-happiness"], 9) self.failUnlessReallyEqual(r["results"]["count-shares-good"], 9) self.failUnlessReallyEqual(r["results"]["count-corrupt-shares"], 1) d.addCallback(_got_json_corrupt) @@ -5100,12 +5102,14 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi self.failUnlessEqual(u0["type"], "directory") self.failUnlessReallyEqual(to_str(u0["cap"]), self.rootnode.get_uri()) u0cr = u0["check-results"] + self.failUnlessReallyEqual(u0cr["results"]["count-happiness"], 10) self.failUnlessReallyEqual(u0cr["results"]["count-shares-good"], 10) ugood = [u for u in units if u["type"] == "file" and u["path"] == [u"good"]][0] self.failUnlessReallyEqual(to_str(ugood["cap"]), self.uris["good"]) ugoodcr = ugood["check-results"] + self.failUnlessReallyEqual(ugoodcr["results"]["count-happiness"], 10) self.failUnlessReallyEqual(ugoodcr["results"]["count-shares-good"], 10) stats = units[-1] @@ -5204,6 +5208,7 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi self.failUnlessEqual(last_unit["path"], ["subdir"]) r = last_unit["check-results"]["results"] self.failUnlessReallyEqual(r["count-recoverable-versions"], 0) + self.failUnlessReallyEqual(r["count-happiness"], 1) self.failUnlessReallyEqual(r["count-shares-good"], 1) self.failUnlessReallyEqual(r["recoverable"], False) d.addCallback(_check_broken_deepcheck) @@ -5281,6 +5286,7 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi self.failUnlessReallyEqual(to_str(u0["cap"]), self.rootnode.get_uri()) u0crr = u0["check-and-repair-results"] self.failUnlessReallyEqual(u0crr["repair-attempted"], False) + self.failUnlessReallyEqual(u0crr["pre-repair-results"]["results"]["count-happiness"], 10) self.failUnlessReallyEqual(u0crr["pre-repair-results"]["results"]["count-shares-good"], 10) ugood = [u for u in units @@ -5288,6 +5294,7 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi self.failUnlessEqual(to_str(ugood["cap"]), self.uris["good"]) ugoodcrr = ugood["check-and-repair-results"] self.failUnlessReallyEqual(ugoodcrr["repair-attempted"], False) + self.failUnlessReallyEqual(ugoodcrr["pre-repair-results"]["results"]["count-happiness"], 10) self.failUnlessReallyEqual(ugoodcrr["pre-repair-results"]["results"]["count-shares-good"], 10) usick = [u for u in units @@ -5296,7 +5303,9 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi usickcrr = usick["check-and-repair-results"] self.failUnlessReallyEqual(usickcrr["repair-attempted"], True) self.failUnlessReallyEqual(usickcrr["repair-successful"], True) + self.failUnlessReallyEqual(usickcrr["pre-repair-results"]["results"]["count-happiness"], 9) self.failUnlessReallyEqual(usickcrr["pre-repair-results"]["results"]["count-shares-good"], 9) + self.failUnlessReallyEqual(usickcrr["post-repair-results"]["results"]["count-happiness"], 10) self.failUnlessReallyEqual(usickcrr["post-repair-results"]["results"]["count-shares-good"], 10) stats = units[-1] diff --git a/src/allmydata/web/check_results.py b/src/allmydata/web/check_results.py index e3ddbc94..5a62b24c 100644 --- a/src/allmydata/web/check_results.py +++ b/src/allmydata/web/check_results.py @@ -8,8 +8,10 @@ from allmydata.web.operations import ReloadMixin from allmydata.interfaces import ICheckAndRepairResults, ICheckResults from allmydata.util import base32, dictutil + def json_check_counts(r): - d = {"count-shares-good": r.get_share_counter_good(), + d = {"count-happiness": r.get_happiness(), + "count-shares-good": r.get_share_counter_good(), "count-shares-needed": r.get_encoding_needed(), "count-shares-expected": r.get_encoding_expected(), "count-good-share-hosts": r.get_host_counter_good_shares(), @@ -40,7 +42,6 @@ def json_check_results(r): data["storage-index"] = r.get_storage_index_string() data["summary"] = r.get_summary() data["results"] = json_check_counts(r) - data["results"]["needs-rebalancing"] = r.needs_rebalancing() data["results"]["healthy"] = r.is_healthy() data["results"]["recoverable"] = r.is_recoverable() return data @@ -86,6 +87,7 @@ class ResultsBase: "need %d-of-%d, have %d" % (cr.get_encoding_needed(), cr.get_encoding_expected(), cr.get_share_counter_good())) + add("Happiness Level", cr.get_happiness()) add("Hosts with good shares", cr.get_host_counter_good_shares()) if cr.get_corrupt_shares():