From: Zooko O'Whielacronx Date: Sun, 21 Dec 2008 22:07:52 +0000 (-0700) Subject: immutable, checker, and tests: improve docstrings, assertions, tests X-Git-Url: https://git.rkrishnan.org/pf/content/en/seg/cyclelanguage?a=commitdiff_plain;h=8b7ce325d73975a50eed5d3d6300949da0f16588;p=tahoe-lafs%2Ftahoe-lafs.git immutable, checker, and tests: improve docstrings, assertions, tests No functional changes, but remove unused code, improve or fix docstrings, etc. --- diff --git a/docs/architecture.txt b/docs/architecture.txt index 1a275a53..a9a225b0 100644 --- a/docs/architecture.txt +++ b/docs/architecture.txt @@ -83,10 +83,9 @@ A tagged hash of the encryption key is used to form the "storage index", which is used for both server selection (described below) and to index shares within the Storage Servers on the selected peers. -A variety of hashes are computed while the shares are being produced, -to validate the plaintext, the ciphertext, and the shares -themselves. Merkle hash trees are also produced to enable validation -of individual segments of plaintext or ciphertext without requiring +Hashes are computed while the shares are being produced, to validate +the ciphertext and the shares themselves. Merkle hash trees are used to +enable validation of individual segments of ciphertext without requiring the download/decoding of the whole file. These hashes go into the "Capability Extension Block", which will be stored with each share. diff --git a/src/allmydata/hashtree.py b/src/allmydata/hashtree.py index 8e66471e..20d2afc7 100644 --- a/src/allmydata/hashtree.py +++ b/src/allmydata/hashtree.py @@ -128,7 +128,7 @@ class CompleteBinaryTreeMixin: Return a list of node indices that are necessary for the hash chain. """ if i < 0 or i >= len(self): - raise IndexError('index out of range: ' + repr(i)) + raise IndexError('index out of range: 0 >= %s < %s' % (i, len(self))) needed = [] here = i while here != 0: diff --git a/src/allmydata/immutable/download.py b/src/allmydata/immutable/download.py index fcf5c422..2f4d8c3a 100644 --- a/src/allmydata/immutable/download.py +++ b/src/allmydata/immutable/download.py @@ -15,10 +15,6 @@ from allmydata.interfaces import IDownloadTarget, IDownloader, IFileURI, IVerifi from allmydata.immutable import layout from pycryptopp.cipher.aes import AES -class HaveAllPeersError(Exception): - # we use this to jump out of the loop - pass - class IntegrityCheckError(Exception): pass @@ -464,6 +460,7 @@ class BlockDownloader(log.PrefixingLogMixin): """ def __init__(self, vbucket, blocknum, parent, results): + precondition(isinstance(vbucket, ValidatedReadBucketProxy), vbucket) prefix = "%s-%d" % (vbucket, blocknum) log.PrefixingLogMixin.__init__(self, facility="tahoe.immutable.download", prefix=prefix) self.vbucket = vbucket @@ -551,6 +548,7 @@ class SegmentDownloader: # through it. downloaders = [] for blocknum, vbucket in active_buckets.iteritems(): + assert isinstance(vbucket, ValidatedReadBucketProxy), vbucket bd = BlockDownloader(vbucket, blocknum, self, self.results) downloaders.append(bd) if self.results: diff --git a/src/allmydata/immutable/encode.py b/src/allmydata/immutable/encode.py index 1bc47e82..0945ded9 100644 --- a/src/allmydata/immutable/encode.py +++ b/src/allmydata/immutable/encode.py @@ -557,10 +557,9 @@ class Encoder(object): return d def send_all_share_hash_trees(self): - # each bucket gets a set of share hash tree nodes that are needed to - # validate their share. This includes the share hash itself, but does - # not include the top-level hash root (which is stored securely in - # the URI instead). + # Each bucket gets a set of share hash tree nodes that are needed to validate their + # share. This includes the share hash itself, but does not include the top-level hash + # root (which is stored securely in the URI instead). self.log("sending all share hash trees", level=log.NOISY) self.set_status("Sending Share Hash Trees") self.set_encode_and_push_progress(extra=0.6) diff --git a/src/allmydata/interfaces.py b/src/allmydata/interfaces.py index e3484688..2f824935 100644 --- a/src/allmydata/interfaces.py +++ b/src/allmydata/interfaces.py @@ -1609,16 +1609,15 @@ class ICheckerResults(Interface): def needs_rebalancing(): """Return a boolean, True if the file/dir's reliability could be improved by moving shares to new servers. Non-distributed LIT files - always returne False.""" + always return False.""" def get_data(): - """Return a dictionary that describes the state of the file/dir. - Non-distributed LIT files always return an empty dictionary. Normal - files and directories return a dictionary with the following keys - (note that these use base32-encoded strings rather than binary ones) - (also note that for mutable files, these counts are for the 'best' - version):: + """Return a dictionary that describes the state of the file/dir. Non-distributed LIT + files always return an empty dictionary. Normal files and directories return a + dictionary with the following keys (note that these use binary strings rather than + base32-encoded ones) (also note that for mutable files, these counts are for the 'best' + version): count-shares-good: the number of distinct good shares that were found count-shares-needed: 'k', the number of shares required for recovery @@ -1637,7 +1636,11 @@ class ICheckerResults(Interface): sharenum). servers-responding: list of (binary) storage server identifiers, one for each server which responded to the share - query. + query (even if they said they didn't have shares, + and even if they said they did have shares but then + refused to send them when asked, and even if they + said they did have shares and sent incorrect ones + when asked) sharemap: dict mapping share identifier to list of serverids (binary strings). This indicates which servers are holding which shares. For immutable files, the shareid is an diff --git a/src/allmydata/storage.py b/src/allmydata/storage.py index faf6b5bb..cc142382 100644 --- a/src/allmydata/storage.py +++ b/src/allmydata/storage.py @@ -99,7 +99,7 @@ class ShareFile: self.home = filename f = open(self.home, 'rb') (version, size, num_leases) = struct.unpack(">LLL", f.read(0xc)) - assert version == 1 + assert version == 1, version self._size = size self._num_leases = num_leases self._data_offset = 0xc diff --git a/src/allmydata/test/test_encode.py b/src/allmydata/test/test_encode.py index 6f08c911..57822040 100644 --- a/src/allmydata/test/test_encode.py +++ b/src/allmydata/test/test_encode.py @@ -606,8 +606,8 @@ class Roundtrip(unittest.TestCase, testutil.ShouldFailMixin): for i in range(7, 10)]) d = self.send_and_recover((4,8,10), bucket_modes=modemap) def _done(res): - self.failUnless(isinstance(res, Failure)) - self.failUnless(res.check(NotEnoughSharesError)) + self.failUnless(isinstance(res, Failure), res) + self.failUnless(res.check(NotEnoughSharesError), res) d.addBoth(_done) return d @@ -747,8 +747,8 @@ class Roundtrip(unittest.TestCase, testutil.ShouldFailMixin): for i in range(7, 10)]) d = self.send_and_recover((4,8,10), bucket_modes=modemap) def _done(res): - self.failUnless(isinstance(res, Failure)) - self.failUnless(res.check(NotEnoughSharesError)) + self.failUnless(isinstance(res, Failure), res) + self.failUnless(res.check(NotEnoughSharesError), res) d.addBoth(_done) return d diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py index 6d694c59..df9440af 100644 --- a/src/allmydata/test/test_runner.py +++ b/src/allmydata/test/test_runner.py @@ -266,7 +266,7 @@ class RunNode(unittest.TestCase, pollmixin.PollMixin): # 'tahoe stop' command takes a while. def _stop(res): open(HOTLINE_FILE, "w").write("") - self.failUnless(os.path.exists(TWISTD_PID_FILE)) + self.failUnless(os.path.exists(TWISTD_PID_FILE), (TWISTD_PID_FILE, os.listdir(os.path.dirname(TWISTD_PID_FILE)))) argv = ["--quiet", "stop", c1] out,err = StringIO(), StringIO() rc = runner.runner(argv, stdout=out, stderr=err) diff --git a/src/allmydata/test/test_system.py b/src/allmydata/test/test_system.py index 7d78f14a..7bc51735 100644 --- a/src/allmydata/test/test_system.py +++ b/src/allmydata/test/test_system.py @@ -2047,7 +2047,7 @@ class DeepCheckWebGood(DeepCheckBase, unittest.TestCase): base32.b2a(n.get_storage_index()), where) needs_rebalancing = bool( len(self.clients) < 10 ) if not incomplete: - self.failUnlessEqual(cr.needs_rebalancing(), needs_rebalancing, where) + self.failUnlessEqual(cr.needs_rebalancing(), needs_rebalancing, str((where, cr, cr.get_data()))) d = cr.get_data() self.failUnlessEqual(d["count-shares-good"], 10, where) self.failUnlessEqual(d["count-shares-needed"], 3, where) @@ -2060,7 +2060,7 @@ class DeepCheckWebGood(DeepCheckBase, unittest.TestCase): self.failUnlessEqual(sorted(d["servers-responding"]), sorted([c.nodeid for c in self.clients]), where) - self.failUnless("sharemap" in d, where) + self.failUnless("sharemap" in d, str((where, d))) all_serverids = set() for (shareid, serverids) in d["sharemap"].items(): all_serverids.update(serverids) @@ -2622,13 +2622,17 @@ class DeepCheckWebBad(DeepCheckBase, unittest.TestCase): def check_is_healthy(self, cr, where): - self.failUnless(ICheckerResults.providedBy(cr), (cr, type(cr), where)) - self.failUnless(cr.is_healthy(), (cr.get_report(), cr.is_healthy(), cr.get_summary(), where)) - self.failUnless(cr.is_recoverable(), where) - d = cr.get_data() - self.failUnlessEqual(d["count-recoverable-versions"], 1, where) - self.failUnlessEqual(d["count-unrecoverable-versions"], 0, where) - return cr + try: + self.failUnless(ICheckerResults.providedBy(cr), (cr, type(cr), where)) + self.failUnless(cr.is_healthy(), (cr.get_report(), cr.is_healthy(), cr.get_summary(), where)) + self.failUnless(cr.is_recoverable(), where) + d = cr.get_data() + self.failUnlessEqual(d["count-recoverable-versions"], 1, where) + self.failUnlessEqual(d["count-unrecoverable-versions"], 0, where) + return cr + except Exception, le: + le.args = tuple(le.args + (where,)) + raise def check_is_missing_shares(self, cr, where): self.failUnless(ICheckerResults.providedBy(cr), where) @@ -2643,7 +2647,7 @@ class DeepCheckWebBad(DeepCheckBase, unittest.TestCase): # by "corrupt-shares" we mean the file is still recoverable self.failUnless(ICheckerResults.providedBy(cr), where) d = cr.get_data() - self.failIf(cr.is_healthy(), where) + self.failIf(cr.is_healthy(), (where, cr)) self.failUnless(cr.is_recoverable(), where) d = cr.get_data() self.failUnless(d["count-shares-good"] < 10, where)