immutable file download: make the ciphertext hash tree mandatory
authorZooko O'Whielacronx <zooko@zooko.com>
Mon, 21 Jul 2008 16:31:02 +0000 (09:31 -0700)
committerZooko O'Whielacronx <zooko@zooko.com>
Mon, 21 Jul 2008 16:31:02 +0000 (09:31 -0700)
This fixes #491 (URIs do not refer to unique files in Allmydata Tahoe).
Fortunately all of the versions of Tahoe currently in use are already producing
this ciphertext hash tree when uploading, so there is no
backwards-compatibility problem with having the downloader require it to be
present.

src/allmydata/immutable/download.py
src/allmydata/test/test_encode.py

index 975a71692c87957eaec79769167fa083113eb4a4..cdc210c54ae1ade599c4a62a9d1e14495aa1d0db 100644 (file)
@@ -18,11 +18,16 @@ class HaveAllPeersError(Exception):
     # we use this to jump out of the loop
     pass
 
-class BadURIExtensionHashValue(Exception):
+class IntegrityCheckError(Exception):
     pass
-class BadPlaintextHashValue(Exception):
+
+class BadURIExtensionHashValue(IntegrityCheckError):
+    pass
+class BadURIExtension(IntegrityCheckError):
     pass
-class BadCrypttextHashValue(Exception):
+class BadPlaintextHashValue(IntegrityCheckError):
+    pass
+class BadCrypttextHashValue(IntegrityCheckError):
     pass
 
 class DownloadStopped(Exception):
@@ -731,10 +736,13 @@ class FileDownloader:
         return d
 
     def _get_crypttext_hashtrees(self, res):
-        # crypttext hashes are optional too
+        # Ciphertext hash tree root is mandatory, so that there is at
+        # most one ciphertext that matches this read-cap or
+        # verify-cap.  The integrity check on the shares is not
+        # sufficient to prevent the original encoder from creating
+        # some shares of file A and other shares of file B.
         if "crypttext_root_hash" not in self._uri_extension_data:
-            self._crypttext_hashtree = None
-            return
+            raise BadURIExtension("URI Extension block did not have the ciphertext hash tree root")
         def _validate_crypttext_hashtree(proposal, bucket):
             if proposal[0] != self._uri_extension_data['crypttext_root_hash']:
                 self._fetch_failures["crypttext_hashroot"] += 1
index bb9ba969a590a32b592555dfed5334120a2c87b2..9266fdb84101a1b63b9406ab2ca0a8d5fbd2f221 100644 (file)
@@ -428,6 +428,15 @@ class Roundtrip(unittest.TestCase, testutil.ShouldFailMixin):
                 uri_extension['crypttext_root_hash'] = badtree[0]
                 return uri_extension
             d.addCallback(_corrupt_crypttext_hashes)
+        elif "omit_crypttext_root_hash" in recover_mode:
+            # make it as though the crypttext hash tree root had not
+            # been in the UEB
+            def _remove_crypttext_hashes(uri_extension):
+                assert isinstance(uri_extension, dict)
+                assert 'crypttext_root_hash' in uri_extension
+                del uri_extension['crypttext_root_hash']
+                return uri_extension
+            d.addCallback(_remove_crypttext_hashes)
 
         d.addCallback(fd._got_uri_extension)
 
@@ -636,6 +645,19 @@ class Roundtrip(unittest.TestCase, testutil.ShouldFailMixin):
         d.addBoth(_done)
         return d
 
+    def test_omitted_crypttext_hashes_failure(self):
+        # Test that the downloader requires a Merkle Tree over the
+        # ciphertext (per http://crisp.cs.du.edu/?q=node/88 -- the
+        # problem that checking the integrity of the shares could let
+        # more than one immutable file match the same read-cap).
+        modemap = dict([(i, "good") for i in range(0, 10)])
+        d = self.send_and_recover((4,8,10), bucket_modes=modemap,
+                                  recover_mode=("omit_crypttext_root_hash"))
+        def _done(res):
+            self.failUnless(isinstance(res, Failure))
+            self.failUnless(res.check(download.BadURIExtension), res)
+        d.addBoth(_done)
+        return d
 
     def OFF_test_bad_plaintext(self):
         # faking a decryption failure is easier: just corrupt the key