From: Zooko O'Whielacronx <zooko@zooko.com>
Date: Thu, 8 Jan 2009 03:26:38 +0000 (-0700)
Subject: download: make sure you really get all the crypttext hashes
X-Git-Url: https://git.rkrishnan.org/%5B/(%5B%5E?a=commitdiff_plain;h=e598ca2f3fb771ab541aef15083374c0419ad29d;p=tahoe-lafs%2Ftahoe-lafs.git

download: make sure you really get all the crypttext hashes
We were not making sure that we really got all the crypttext hashes during download.  If a server were to return less than the complete set of crypttext hashes, then our subsequent attempt to verify the correctness of the ciphertext would fail.  (And it wouldn't be obvious without very careful debugging why it had failed.)
This patch makes it so that you keep trying to get ciphertext hashes until you have a full set or you run out of servers to ask.
---

diff --git a/src/allmydata/immutable/checker.py b/src/allmydata/immutable/checker.py
index 290d8cfd..6b36a346 100644
--- a/src/allmydata/immutable/checker.py
+++ b/src/allmydata/immutable/checker.py
@@ -44,7 +44,6 @@ class Checker(log.PrefixingLogMixin):
         self._verify = verify # bool: verify what the servers claim, or not?
 
         self._share_hash_tree = None
-        self._crypttext_hash_tree = None
 
     def _get_buckets(self, server, storageindex, serverid):
         """ Return a deferred that eventually fires with ({sharenum: bucket}, serverid,
diff --git a/src/allmydata/immutable/download.py b/src/allmydata/immutable/download.py
index ed9b77d4..c63dccc7 100644
--- a/src/allmydata/immutable/download.py
+++ b/src/allmydata/immutable/download.py
@@ -151,9 +151,10 @@ class ValidatedCrypttextHashTreeProxy:
     """ I am a front-end for a remote crypttext hash tree using a local ReadBucketProxy -- I use
     its get_crypttext_hashes() method and offer the Validated Thing protocol (i.e., I have a
     start() method that fires with self once I get a valid one). """
-    def __init__(self, readbucketproxy, crypttext_hash_tree, fetch_failures=None):
+    def __init__(self, readbucketproxy, crypttext_hash_tree, num_segments, fetch_failures=None):
         # fetch_failures is for debugging -- see test_encode.py
         self._readbucketproxy = readbucketproxy
+        self._num_segments = num_segments
         self._fetch_failures = fetch_failures
         self._crypttext_hash_tree = crypttext_hash_tree
 
@@ -165,6 +166,11 @@ class ValidatedCrypttextHashTreeProxy:
             if self._fetch_failures is not None:
                 self._fetch_failures["crypttext_hash_tree"] += 1
             raise BadOrMissingHash(le)
+        # If we now have enough of the crypttext hash tree to integrity-check *any* segment of ciphertext, then we are done.
+        # TODO: It would have better alacrity if we downloaded only part of the crypttext hash tree at a time.
+        for segnum in range(self._num_segments):
+            if self._crypttext_hash_tree.needed_hashes(segnum):
+                raise NotEnoughHashesError("not enough hashes to validate segment number %d" % (segnum,))
         return self
 
     def start(self):
@@ -863,7 +869,7 @@ class FileDownloader(log.PrefixingLogMixin):
     def _get_crypttext_hash_tree(self, res):
         vchtps = []
         for sharenum, bucket in self._share_buckets:
-            vchtp = ValidatedCrypttextHashTreeProxy(bucket, self._crypttext_hash_tree, self._fetch_failures)
+            vchtp = ValidatedCrypttextHashTreeProxy(bucket, self._crypttext_hash_tree, self._vup.num_segments, self._fetch_failures)
             vchtps.append(vchtp)
 
         _get_crypttext_hash_tree_started = time.time()
@@ -874,7 +880,8 @@ class FileDownloader(log.PrefixingLogMixin):
         d = vto.start()
 
         def _got_crypttext_hash_tree(res):
-            self._crypttext_hash_tree = res._crypttext_hash_tree
+            # Good -- the self._crypttext_hash_tree that we passed to vchtp is now populated
+            # with hashes.
             self._output.got_crypttext_hash_tree(self._crypttext_hash_tree)
             if self._results:
                 elapsed = time.time() - _get_crypttext_hash_tree_started