From 504c767d03e044d9c8a1c1d761236be320e5d2ae Mon Sep 17 00:00:00 2001
From: Brian Warner <warner@lothar.com>
Date: Mon, 5 Oct 2009 14:48:44 -0700
Subject: [PATCH] Verifier: check the full block-hash-tree on each share

Removed the .todo from two test_repairer tests that check this. The only
remaining .todos are on the three crypttext-hash-tree tests.
---
 src/allmydata/immutable/checker.py  |  8 ++++---
 src/allmydata/immutable/download.py | 33 +++++++++++++++++++++++++++++
 src/allmydata/test/test_repairer.py |  9 ++++++--
 3 files changed, 45 insertions(+), 5 deletions(-)

diff --git a/src/allmydata/immutable/checker.py b/src/allmydata/immutable/checker.py
index dc319371..cbdeb702 100644
--- a/src/allmydata/immutable/checker.py
+++ b/src/allmydata/immutable/checker.py
@@ -157,10 +157,12 @@ class Checker(log.PrefixingLogMixin):
             d = vrbp.get_all_sharehashes()
             # we fill share_hash_tree before fetching any blocks, so the
             # block fetches won't send redundant share-hash-tree requests, to
-            # speed things up.
-            d.addCallbacks(lambda ign: vrbp)
+            # speed things up. Then we fetch+validate all the blockhashes.
+            d.addCallback(lambda ign: vrbp.get_all_blockhashes())
+            d.addCallback(lambda ign: vrbp)
             return d
-        d.addCallbacks(_got_ueb)
+        d.addCallback(_got_ueb)
+
         def _discard_result(r):
             assert isinstance(r, str), r
             # to free up the RAM
diff --git a/src/allmydata/immutable/download.py b/src/allmydata/immutable/download.py
index de7f2daf..e86bf5c4 100644
--- a/src/allmydata/immutable/download.py
+++ b/src/allmydata/immutable/download.py
@@ -386,6 +386,39 @@ class ValidatedReadBucketProxy(log.PrefixingLogMixin):
         d.addCallback(_got_share_hashes)
         return d
 
+    def get_all_blockhashes(self):
+        """Retrieve and validate all the block-hash-tree nodes that are
+        included in this share. Each share contains a full Merkle tree, but
+        we usually only fetch the minimal subset necessary for any particular
+        block. This function fetches everything at once. The Verifier uses
+        this function to validate the block hash tree.
+
+        Call this (and wait for the Deferred it returns to fire) after
+        calling get_all_sharehashes() and before calling get_block() for the
+        first time: this lets us check that the share contains all block
+        hashes and avoids downloading them multiple times.
+
+        I return a Deferred which errbacks upon failure, probably with
+        BadOrMissingHash.
+        """
+
+        # get_block_hashes(anything) currently always returns everything
+        needed = list(range(len(self.block_hash_tree)))
+        d = self.bucket.get_block_hashes(needed)
+        def _got_block_hashes(blockhashes):
+            if len(blockhashes) < len(self.block_hash_tree):
+                raise BadOrMissingHash()
+            bh = dict(enumerate(blockhashes))
+
+            try:
+                self.block_hash_tree.set_hashes(bh)
+            except IndexError, le:
+                raise BadOrMissingHash(le)
+            except (hashtree.BadHashError, hashtree.NotEnoughHashesError), le:
+                raise BadOrMissingHash(le)
+        d.addCallback(_got_block_hashes)
+        return d
+
     def get_block(self, blocknum):
         # the first time we use this bucket, we need to fetch enough elements
         # of the share hash tree to validate it from our share hash up to the
diff --git a/src/allmydata/test/test_repairer.py b/src/allmydata/test/test_repairer.py
index c39a9217..248d93e1 100644
--- a/src/allmydata/test/test_repairer.py
+++ b/src/allmydata/test/test_repairer.py
@@ -258,7 +258,6 @@ class Verifier(GridTestMixin, unittest.TestCase, RepairTestMixin):
         self.basedir = "repairer/Verify/corrupt_block_hashtree_offset"
         return self._help_test_verify(common._corrupt_offset_of_block_hashes,
                                       self.judge_invisible_corruption)
-    test_corrupt_block_hashtree_offset.todo = "Verifier doesn't yet properly detect this kind of corruption."
 
     def test_wrong_share_verno(self):
         self.basedir = "repairer/Verify/wrong_share_verno"
@@ -286,13 +285,19 @@ class Verifier(GridTestMixin, unittest.TestCase, RepairTestMixin):
         self.basedir = "repairer/Verify/corrupt_block_hashtree"
         return self._help_test_verify(common._corrupt_block_hashes,
                                       self.judge_invisible_corruption)
-    test_corrupt_block_hashtree.todo = "Verifier doesn't yet properly detect this kind of corruption."
 
     def test_corrupt_share_hashtree(self):
         self.basedir = "repairer/Verify/corrupt_share_hashtree"
         return self._help_test_verify(common._corrupt_share_hashes,
                                       self.judge_invisible_corruption)
 
+    # TODO: the Verifier should decode to ciphertext and check it against the
+    # crypttext-hash-tree. Check this by constructing a bogus file, in which
+    # the crypttext-hash-tree is modified after encoding is done, but before
+    # the UEB is finalized. The Verifier should see a valid
+    # crypttext-hash-tree but then the ciphertext should show up as invalid.
+    # Normally this could only be triggered by a bug in FEC decode.
+
 # We'll allow you to pass this test even if you trigger thirty-five times as
 # many block sends and disk writes as would be optimal.
 WRITE_LEEWAY = 35
-- 
2.45.2