From: Brian Warner Date: Mon, 5 Oct 2009 21:34:43 +0000 (-0700) Subject: Verifier: check the full share-hash chain on each share X-Git-Tag: trac-4100~29 X-Git-Url: https://git.rkrishnan.org/%5B/rgr-080307.php?a=commitdiff_plain;h=e8f56af5a7e827d602ad13041e0a40b92e08acb3;p=tahoe-lafs%2Ftahoe-lafs.git Verifier: check the full share-hash chain on each share Removed the .todo from two test_repairer tests that check this. --- diff --git a/src/allmydata/immutable/checker.py b/src/allmydata/immutable/checker.py index 83f06065..dc319371 100644 --- a/src/allmydata/immutable/checker.py +++ b/src/allmydata/immutable/checker.py @@ -50,8 +50,6 @@ class Checker(log.PrefixingLogMixin): self._verify = verify # bool: verify what the servers claim, or not? self._add_lease = add_lease - self._share_hash_tree = None - frs = file_renewal_secret_hash(secret_holder.get_renewal_secret(), self._verifycap.storage_index) self.file_renewal_secret = frs @@ -138,36 +136,53 @@ class Checker(log.PrefixingLogMixin): d = veup.start() def _got_ueb(vup): - self._share_hash_tree = IncompleteHashTree(vcap.total_shares) - self._share_hash_tree.set_hashes({0: vup.share_root_hash}) + share_hash_tree = IncompleteHashTree(vcap.total_shares) + share_hash_tree.set_hashes({0: vup.share_root_hash}) vrbp = download.ValidatedReadBucketProxy(sharenum, b, - self._share_hash_tree, + share_hash_tree, vup.num_segments, vup.block_size, vup.share_size) + # note: normal download doesn't use get_all_sharehashes(), + # because it gets more data than necessary. We've discussed the + # security properties of having verification and download look + # identical (so the server couldn't, say, provide good responses + # for one and not the other), but I think that full verification + # is more important than defending against inconsistent server + # behavior. Besides, they can't pass the verifier without storing + # all the data, so there's not so much to be gained by behaving + # inconsistently. + d = vrbp.get_all_sharehashes() + # we fill share_hash_tree before fetching any blocks, so the + # block fetches won't send redundant share-hash-tree requests, to + # speed things up. + d.addCallbacks(lambda ign: vrbp) + return d + d.addCallbacks(_got_ueb) + def _discard_result(r): + assert isinstance(r, str), r + # to free up the RAM + return None + def _get_blocks(vrbp): ds = [] - for blocknum in range(vup.num_segments): - def _discard_result(r): - assert isinstance(r, str), r - # to free up the RAM - return None - d2 = vrbp.get_block(blocknum) - d2.addCallback(_discard_result) - ds.append(d2) - - dl = deferredutil.gatherResults(ds) - # dl will fire once every block of this share has been downloaded - # and verified, or else it will errback. - - def _cb(result): - return (True, sharenum, None) - - dl.addCallback(_cb) - return dl - d.addCallback(_got_ueb) - + for blocknum in range(veup.num_segments): + db = vrbp.get_block(blocknum) + db.addCallback(_discard_result) + ds.append(db) + # this gatherResults will fire once every block of this share has + # been downloaded and verified, or else it will errback. + return deferredutil.gatherResults(ds) + d.addCallback(_get_blocks) + + # if none of those errbacked, the blocks (and the hashes above them) + # are good + def _all_good(ign): + return (True, sharenum, None) + d.addCallback(_all_good) + + # but if anything fails, we'll land here def _errb(f): # We didn't succeed at fetching and verifying all the blocks of # this share. Handle each reason for failure differently. diff --git a/src/allmydata/immutable/download.py b/src/allmydata/immutable/download.py index 7c55666a..de7f2daf 100644 --- a/src/allmydata/immutable/download.py +++ b/src/allmydata/immutable/download.py @@ -357,6 +357,35 @@ class ValidatedReadBucketProxy(log.PrefixingLogMixin): self.share_size = share_size self.block_hash_tree = hashtree.IncompleteHashTree(self.num_blocks) + def get_all_sharehashes(self): + """Retrieve and validate all the share-hash-tree nodes that are + included in this share, regardless of whether we need them to + validate the share or not. Each share contains a minimal Merkle tree + chain, but there is lots of overlap, so usually we'll be using hashes + from other shares and not reading every single hash from this share. + The Verifier uses this function to read and validate every single + hash from this share. + + Call this (and wait for the Deferred it returns to fire) before + calling get_block() for the first time: this lets us check that the + share share contains enough hashes to validate its own data, and + avoids downloading any share hash twice. + + I return a Deferred which errbacks upon failure, probably with + BadOrMissingHash.""" + + d = self.bucket.get_share_hashes() + def _got_share_hashes(sh): + sharehashes = dict(sh) + try: + self.share_hash_tree.set_hashes(sharehashes) + except IndexError, le: + raise BadOrMissingHash(le) + except (hashtree.BadHashError, hashtree.NotEnoughHashesError), le: + raise BadOrMissingHash(le) + d.addCallback(_got_share_hashes) + return d + def get_block(self, blocknum): # the first time we use this bucket, we need to fetch enough elements # of the share hash tree to validate it from our share hash up to the diff --git a/src/allmydata/test/test_repairer.py b/src/allmydata/test/test_repairer.py index df6bbb41..c39a9217 100644 --- a/src/allmydata/test/test_repairer.py +++ b/src/allmydata/test/test_repairer.py @@ -269,7 +269,6 @@ class Verifier(GridTestMixin, unittest.TestCase, RepairTestMixin): self.basedir = "repairer/Verify/corrupt_share_hashtree_offset" return self._help_test_verify(common._corrupt_offset_of_share_hashes, self.judge_invisible_corruption) - test_corrupt_share_hashtree_offset.todo = "Verifier doesn't yet properly detect this kind of corruption." def test_corrupt_crypttext_hashtree_offset(self): self.basedir = "repairer/Verify/corrupt_crypttext_hashtree_offset" @@ -293,7 +292,6 @@ class Verifier(GridTestMixin, unittest.TestCase, RepairTestMixin): self.basedir = "repairer/Verify/corrupt_share_hashtree" return self._help_test_verify(common._corrupt_share_hashes, self.judge_invisible_corruption) - test_corrupt_share_hashtree.todo = "Verifier doesn't yet properly detect this kind of corruption." # We'll allow you to pass this test even if you trigger thirty-five times as # many block sends and disk writes as would be optimal.