download: make sure you really get all the crypttext hashes
authorZooko O'Whielacronx <zooko@zooko.com>
Thu, 8 Jan 2009 03:26:38 +0000 (20:26 -0700)
committerZooko O'Whielacronx <zooko@zooko.com>
Thu, 8 Jan 2009 03:26:38 +0000 (20:26 -0700)
We were not making sure that we really got all the crypttext hashes during download.  If a server were to return less than the complete set of crypttext hashes, then our subsequent attempt to verify the correctness of the ciphertext would fail.  (And it wouldn't be obvious without very careful debugging why it had failed.)
This patch makes it so that you keep trying to get ciphertext hashes until you have a full set or you run out of servers to ask.

src/allmydata/immutable/checker.py
src/allmydata/immutable/download.py

index 290d8cfdc582ff19a168c97af125bba3d933b6e0..6b36a346925695b339ee97bdc670dc363de22aad 100644 (file)
@@ -44,7 +44,6 @@ class Checker(log.PrefixingLogMixin):
         self._verify = verify # bool: verify what the servers claim, or not?
 
         self._share_hash_tree = None
-        self._crypttext_hash_tree = None
 
     def _get_buckets(self, server, storageindex, serverid):
         """ Return a deferred that eventually fires with ({sharenum: bucket}, serverid,
index ed9b77d44f68879ae7941b18ef116f2df9cda1ba..c63dccc7c6e9961a18fb7480d26778dc094c5b91 100644 (file)
@@ -151,9 +151,10 @@ class ValidatedCrypttextHashTreeProxy:
     """ I am a front-end for a remote crypttext hash tree using a local ReadBucketProxy -- I use
     its get_crypttext_hashes() method and offer the Validated Thing protocol (i.e., I have a
     start() method that fires with self once I get a valid one). """
-    def __init__(self, readbucketproxy, crypttext_hash_tree, fetch_failures=None):
+    def __init__(self, readbucketproxy, crypttext_hash_tree, num_segments, fetch_failures=None):
         # fetch_failures is for debugging -- see test_encode.py
         self._readbucketproxy = readbucketproxy
+        self._num_segments = num_segments
         self._fetch_failures = fetch_failures
         self._crypttext_hash_tree = crypttext_hash_tree
 
@@ -165,6 +166,11 @@ class ValidatedCrypttextHashTreeProxy:
             if self._fetch_failures is not None:
                 self._fetch_failures["crypttext_hash_tree"] += 1
             raise BadOrMissingHash(le)
+        # If we now have enough of the crypttext hash tree to integrity-check *any* segment of ciphertext, then we are done.
+        # TODO: It would have better alacrity if we downloaded only part of the crypttext hash tree at a time.
+        for segnum in range(self._num_segments):
+            if self._crypttext_hash_tree.needed_hashes(segnum):
+                raise NotEnoughHashesError("not enough hashes to validate segment number %d" % (segnum,))
         return self
 
     def start(self):
@@ -863,7 +869,7 @@ class FileDownloader(log.PrefixingLogMixin):
     def _get_crypttext_hash_tree(self, res):
         vchtps = []
         for sharenum, bucket in self._share_buckets:
-            vchtp = ValidatedCrypttextHashTreeProxy(bucket, self._crypttext_hash_tree, self._fetch_failures)
+            vchtp = ValidatedCrypttextHashTreeProxy(bucket, self._crypttext_hash_tree, self._vup.num_segments, self._fetch_failures)
             vchtps.append(vchtp)
 
         _get_crypttext_hash_tree_started = time.time()
@@ -874,7 +880,8 @@ class FileDownloader(log.PrefixingLogMixin):
         d = vto.start()
 
         def _got_crypttext_hash_tree(res):
-            self._crypttext_hash_tree = res._crypttext_hash_tree
+            # Good -- the self._crypttext_hash_tree that we passed to vchtp is now populated
+            # with hashes.
             self._output.got_crypttext_hash_tree(self._crypttext_hash_tree)
             if self._results:
                 elapsed = time.time() - _get_crypttext_hash_tree_started