Verifier: check the full share-hash chain on each share
authorBrian Warner <warner@lothar.com>
Mon, 5 Oct 2009 21:34:43 +0000 (14:34 -0700)
committerBrian Warner <warner@lothar.com>
Mon, 5 Oct 2009 21:34:43 +0000 (14:34 -0700)
Removed the .todo from two test_repairer tests that check this.

src/allmydata/immutable/checker.py
src/allmydata/immutable/download.py
src/allmydata/test/test_repairer.py

index 83f060653806232f3f251bd468cae1bf61390318..dc31937128f5c08dbb5636f6706b0be4201a32f5 100644 (file)
@@ -50,8 +50,6 @@ class Checker(log.PrefixingLogMixin):
         self._verify = verify # bool: verify what the servers claim, or not?
         self._add_lease = add_lease
 
-        self._share_hash_tree = None
-
         frs = file_renewal_secret_hash(secret_holder.get_renewal_secret(),
                                        self._verifycap.storage_index)
         self.file_renewal_secret = frs
@@ -138,36 +136,53 @@ class Checker(log.PrefixingLogMixin):
         d = veup.start()
 
         def _got_ueb(vup):
-            self._share_hash_tree = IncompleteHashTree(vcap.total_shares)
-            self._share_hash_tree.set_hashes({0: vup.share_root_hash})
+            share_hash_tree = IncompleteHashTree(vcap.total_shares)
+            share_hash_tree.set_hashes({0: vup.share_root_hash})
 
             vrbp = download.ValidatedReadBucketProxy(sharenum, b,
-                                                     self._share_hash_tree,
+                                                     share_hash_tree,
                                                      vup.num_segments,
                                                      vup.block_size,
                                                      vup.share_size)
 
+            # note: normal download doesn't use get_all_sharehashes(),
+            # because it gets more data than necessary. We've discussed the
+            # security properties of having verification and download look
+            # identical (so the server couldn't, say, provide good responses
+            # for one and not the other), but I think that full verification
+            # is more important than defending against inconsistent server
+            # behavior. Besides, they can't pass the verifier without storing
+            # all the data, so there's not so much to be gained by behaving
+            # inconsistently.
+            d = vrbp.get_all_sharehashes()
+            # we fill share_hash_tree before fetching any blocks, so the
+            # block fetches won't send redundant share-hash-tree requests, to
+            # speed things up.
+            d.addCallbacks(lambda ign: vrbp)
+            return d
+        d.addCallbacks(_got_ueb)
+        def _discard_result(r):
+            assert isinstance(r, str), r
+            # to free up the RAM
+            return None
+        def _get_blocks(vrbp):
             ds = []
-            for blocknum in range(vup.num_segments):
-                def _discard_result(r):
-                    assert isinstance(r, str), r
-                    # to free up the RAM
-                    return None
-                d2 = vrbp.get_block(blocknum)
-                d2.addCallback(_discard_result)
-                ds.append(d2)
-
-            dl = deferredutil.gatherResults(ds)
-            # dl will fire once every block of this share has been downloaded
-            # and verified, or else it will errback.
-
-            def _cb(result):
-                return (True, sharenum, None)
-
-            dl.addCallback(_cb)
-            return dl
-        d.addCallback(_got_ueb)
-
+            for blocknum in range(veup.num_segments):
+                db = vrbp.get_block(blocknum)
+                db.addCallback(_discard_result)
+                ds.append(db)
+            # this gatherResults will fire once every block of this share has
+            # been downloaded and verified, or else it will errback.
+            return deferredutil.gatherResults(ds)
+        d.addCallback(_get_blocks)
+
+        # if none of those errbacked, the blocks (and the hashes above them)
+        # are good
+        def _all_good(ign):
+            return (True, sharenum, None)
+        d.addCallback(_all_good)
+
+        # but if anything fails, we'll land here
         def _errb(f):
             # We didn't succeed at fetching and verifying all the blocks of
             # this share. Handle each reason for failure differently.
index 7c55666ac8fee83d1cc3872569b259851c8874c0..de7f2daff8a5eac0f650aa3aab2071a39c9d5212 100644 (file)
@@ -357,6 +357,35 @@ class ValidatedReadBucketProxy(log.PrefixingLogMixin):
         self.share_size = share_size
         self.block_hash_tree = hashtree.IncompleteHashTree(self.num_blocks)
 
+    def get_all_sharehashes(self):
+        """Retrieve and validate all the share-hash-tree nodes that are
+        included in this share, regardless of whether we need them to
+        validate the share or not. Each share contains a minimal Merkle tree
+        chain, but there is lots of overlap, so usually we'll be using hashes
+        from other shares and not reading every single hash from this share.
+        The Verifier uses this function to read and validate every single
+        hash from this share.
+
+        Call this (and wait for the Deferred it returns to fire) before
+        calling get_block() for the first time: this lets us check that the
+        share share contains enough hashes to validate its own data, and
+        avoids downloading any share hash twice.
+
+        I return a Deferred which errbacks upon failure, probably with
+        BadOrMissingHash."""
+
+        d = self.bucket.get_share_hashes()
+        def _got_share_hashes(sh):
+            sharehashes = dict(sh)
+            try:
+                self.share_hash_tree.set_hashes(sharehashes)
+            except IndexError, le:
+                raise BadOrMissingHash(le)
+            except (hashtree.BadHashError, hashtree.NotEnoughHashesError), le:
+                raise BadOrMissingHash(le)
+        d.addCallback(_got_share_hashes)
+        return d
+
     def get_block(self, blocknum):
         # the first time we use this bucket, we need to fetch enough elements
         # of the share hash tree to validate it from our share hash up to the
index df6bbb410f849c9f49257856ca3618ca5f31377d..c39a9217af703246358da2ef3d658fb349737461 100644 (file)
@@ -269,7 +269,6 @@ class Verifier(GridTestMixin, unittest.TestCase, RepairTestMixin):
         self.basedir = "repairer/Verify/corrupt_share_hashtree_offset"
         return self._help_test_verify(common._corrupt_offset_of_share_hashes,
                                       self.judge_invisible_corruption)
-    test_corrupt_share_hashtree_offset.todo = "Verifier doesn't yet properly detect this kind of corruption."
 
     def test_corrupt_crypttext_hashtree_offset(self):
         self.basedir = "repairer/Verify/corrupt_crypttext_hashtree_offset"
@@ -293,7 +292,6 @@ class Verifier(GridTestMixin, unittest.TestCase, RepairTestMixin):
         self.basedir = "repairer/Verify/corrupt_share_hashtree"
         return self._help_test_verify(common._corrupt_share_hashes,
                                       self.judge_invisible_corruption)
-    test_corrupt_share_hashtree.todo = "Verifier doesn't yet properly detect this kind of corruption."
 
 # We'll allow you to pass this test even if you trigger thirty-five times as
 # many block sends and disk writes as would be optimal.