From: Brian Warner Date: Mon, 5 Oct 2009 21:48:44 +0000 (-0700) Subject: Verifier: check the full block-hash-tree on each share X-Git-Tag: trac-4100~28 X-Git-Url: https://git.rkrishnan.org/uri/URI:DIR2-RO:%5B%5E?a=commitdiff_plain;h=504c767d03e044d9c8a1c1d761236be320e5d2ae;p=tahoe-lafs%2Ftahoe-lafs.git Verifier: check the full block-hash-tree on each share Removed the .todo from two test_repairer tests that check this. The only remaining .todos are on the three crypttext-hash-tree tests. --- diff --git a/src/allmydata/immutable/checker.py b/src/allmydata/immutable/checker.py index dc319371..cbdeb702 100644 --- a/src/allmydata/immutable/checker.py +++ b/src/allmydata/immutable/checker.py @@ -157,10 +157,12 @@ class Checker(log.PrefixingLogMixin): d = vrbp.get_all_sharehashes() # we fill share_hash_tree before fetching any blocks, so the # block fetches won't send redundant share-hash-tree requests, to - # speed things up. - d.addCallbacks(lambda ign: vrbp) + # speed things up. Then we fetch+validate all the blockhashes. + d.addCallback(lambda ign: vrbp.get_all_blockhashes()) + d.addCallback(lambda ign: vrbp) return d - d.addCallbacks(_got_ueb) + d.addCallback(_got_ueb) + def _discard_result(r): assert isinstance(r, str), r # to free up the RAM diff --git a/src/allmydata/immutable/download.py b/src/allmydata/immutable/download.py index de7f2daf..e86bf5c4 100644 --- a/src/allmydata/immutable/download.py +++ b/src/allmydata/immutable/download.py @@ -386,6 +386,39 @@ class ValidatedReadBucketProxy(log.PrefixingLogMixin): d.addCallback(_got_share_hashes) return d + def get_all_blockhashes(self): + """Retrieve and validate all the block-hash-tree nodes that are + included in this share. Each share contains a full Merkle tree, but + we usually only fetch the minimal subset necessary for any particular + block. This function fetches everything at once. The Verifier uses + this function to validate the block hash tree. + + Call this (and wait for the Deferred it returns to fire) after + calling get_all_sharehashes() and before calling get_block() for the + first time: this lets us check that the share contains all block + hashes and avoids downloading them multiple times. + + I return a Deferred which errbacks upon failure, probably with + BadOrMissingHash. + """ + + # get_block_hashes(anything) currently always returns everything + needed = list(range(len(self.block_hash_tree))) + d = self.bucket.get_block_hashes(needed) + def _got_block_hashes(blockhashes): + if len(blockhashes) < len(self.block_hash_tree): + raise BadOrMissingHash() + bh = dict(enumerate(blockhashes)) + + try: + self.block_hash_tree.set_hashes(bh) + except IndexError, le: + raise BadOrMissingHash(le) + except (hashtree.BadHashError, hashtree.NotEnoughHashesError), le: + raise BadOrMissingHash(le) + d.addCallback(_got_block_hashes) + return d + def get_block(self, blocknum): # the first time we use this bucket, we need to fetch enough elements # of the share hash tree to validate it from our share hash up to the diff --git a/src/allmydata/test/test_repairer.py b/src/allmydata/test/test_repairer.py index c39a9217..248d93e1 100644 --- a/src/allmydata/test/test_repairer.py +++ b/src/allmydata/test/test_repairer.py @@ -258,7 +258,6 @@ class Verifier(GridTestMixin, unittest.TestCase, RepairTestMixin): self.basedir = "repairer/Verify/corrupt_block_hashtree_offset" return self._help_test_verify(common._corrupt_offset_of_block_hashes, self.judge_invisible_corruption) - test_corrupt_block_hashtree_offset.todo = "Verifier doesn't yet properly detect this kind of corruption." def test_wrong_share_verno(self): self.basedir = "repairer/Verify/wrong_share_verno" @@ -286,13 +285,19 @@ class Verifier(GridTestMixin, unittest.TestCase, RepairTestMixin): self.basedir = "repairer/Verify/corrupt_block_hashtree" return self._help_test_verify(common._corrupt_block_hashes, self.judge_invisible_corruption) - test_corrupt_block_hashtree.todo = "Verifier doesn't yet properly detect this kind of corruption." def test_corrupt_share_hashtree(self): self.basedir = "repairer/Verify/corrupt_share_hashtree" return self._help_test_verify(common._corrupt_share_hashes, self.judge_invisible_corruption) + # TODO: the Verifier should decode to ciphertext and check it against the + # crypttext-hash-tree. Check this by constructing a bogus file, in which + # the crypttext-hash-tree is modified after encoding is done, but before + # the UEB is finalized. The Verifier should see a valid + # crypttext-hash-tree but then the ciphertext should show up as invalid. + # Normally this could only be triggered by a bug in FEC decode. + # We'll allow you to pass this test even if you trigger thirty-five times as # many block sends and disk writes as would be optimal. WRITE_LEEWAY = 35