]> git.rkrishnan.org Git - tahoe-lafs/tahoe-lafs.git/commitdiff
UNDO: upload: stop putting plaintext and ciphertext hashes in shares.
authorBrian Warner <warner@lothar.com>
Sun, 23 Mar 2008 22:35:54 +0000 (15:35 -0700)
committerBrian Warner <warner@lothar.com>
Sun, 23 Mar 2008 22:35:54 +0000 (15:35 -0700)
This removes the guess-partial-information attack vector, and reduces
the amount of overhead that we consume with each file. It also introduces
a forwards-compability break: older versions of the code (before the
previous download-time "make hashes optional" patch) will be unable
to read files uploaded by this version, as they will complain about the
missing hashes. This patch is experimental, and is being pushed into
trunk to obtain test coverage. We may undo it before releasing 1.0.

src/allmydata/encode.py
src/allmydata/test/test_encode.py
src/allmydata/test/test_system.py

index a7e71070d02f9e04215c355319a0cfe4ed9acb6b..a815a5d7795b406ecd57e26a8f116f12eb0c9fe1 100644 (file)
@@ -239,6 +239,10 @@ class Encoder(object):
 
         d.addCallback(lambda res: self.finish_hashing())
 
+        d.addCallback(lambda res:
+                      self.send_plaintext_hash_tree_to_all_shareholders())
+        d.addCallback(lambda res:
+                      self.send_crypttext_hash_tree_to_all_shareholders())
         d.addCallback(lambda res: self.send_all_subshare_hash_trees())
         d.addCallback(lambda res: self.send_all_share_hash_trees())
         d.addCallback(lambda res: self.send_uri_extension_to_all_shareholders())
@@ -506,7 +510,63 @@ class Encoder(object):
         self._start_hashing_and_close_timestamp = time.time()
         self.set_status("Finishing hashes")
         self.set_encode_and_push_progress(extra=0.0)
-        return self._uploadable.close()
+        crypttext_hash = self._crypttext_hasher.digest()
+        self.uri_extension_data["crypttext_hash"] = crypttext_hash
+        d = self._uploadable.get_plaintext_hash()
+        def _got(plaintext_hash):
+            self.uri_extension_data["plaintext_hash"] = plaintext_hash
+            return self._uploadable.get_plaintext_hashtree_leaves(0, self.num_segments, self.num_segments)
+        d.addCallback(_got)
+        def _got_hashtree_leaves(leaves):
+            self.log("Encoder: got plaintext_hashtree_leaves: %s" %
+                     (",".join([base32.b2a(h) for h in leaves]),),
+                     level=log.NOISY)
+            ht = list(HashTree(list(leaves)))
+            self.uri_extension_data["plaintext_root_hash"] = ht[0]
+            self._plaintext_hashtree_nodes = ht
+        d.addCallback(_got_hashtree_leaves)
+
+        d.addCallback(lambda res: self._uploadable.close())
+        return d
+
+    def send_plaintext_hash_tree_to_all_shareholders(self):
+        self.log("sending plaintext hash tree", level=log.NOISY)
+        self.set_status("Sending Plaintext Hash Tree")
+        self.set_encode_and_push_progress(extra=0.2)
+        dl = []
+        for shareid in self.landlords.keys():
+            d = self.send_plaintext_hash_tree(shareid,
+                                              self._plaintext_hashtree_nodes)
+            dl.append(d)
+        return self._gather_responses(dl)
+
+    def send_plaintext_hash_tree(self, shareid, all_hashes):
+        if shareid not in self.landlords:
+            return defer.succeed(None)
+        sh = self.landlords[shareid]
+        d = sh.put_plaintext_hashes(all_hashes)
+        d.addErrback(self._remove_shareholder, shareid, "put_plaintext_hashes")
+        return d
+
+    def send_crypttext_hash_tree_to_all_shareholders(self):
+        self.log("sending crypttext hash tree", level=log.NOISY)
+        self.set_status("Sending Crypttext Hash Tree")
+        self.set_encode_and_push_progress(extra=0.3)
+        t = HashTree(self._crypttext_hashes)
+        all_hashes = list(t)
+        self.uri_extension_data["crypttext_root_hash"] = t[0]
+        dl = []
+        for shareid in self.landlords.keys():
+            dl.append(self.send_crypttext_hash_tree(shareid, all_hashes))
+        return self._gather_responses(dl)
+
+    def send_crypttext_hash_tree(self, shareid, all_hashes):
+        if shareid not in self.landlords:
+            return defer.succeed(None)
+        sh = self.landlords[shareid]
+        d = sh.put_crypttext_hashes(all_hashes)
+        d.addErrback(self._remove_shareholder, shareid, "put_crypttext_hashes")
+        return d
 
     def send_all_subshare_hash_trees(self):
         self.log("sending subshare hash trees", level=log.NOISY)
@@ -569,6 +629,10 @@ class Encoder(object):
         lp = self.log("sending uri_extension", level=log.NOISY)
         self.set_status("Sending URI Extensions")
         self.set_encode_and_push_progress(extra=0.8)
+        for k in ('crypttext_root_hash', 'crypttext_hash',
+                  'plaintext_root_hash', 'plaintext_hash',
+                  ):
+            assert k in self.uri_extension_data
         uri_extension = uri.pack_extension(self.uri_extension_data)
         ed = {}
         for k,v in self.uri_extension_data.items():
index fea99e5a920a64f4b9c8c9b840246e8aa5725564..67b7e15b43e6645316c2ed95217f1c72953f6eff 100644 (file)
@@ -521,6 +521,72 @@ class Roundtrip(unittest.TestCase):
         d.addCallback(self.assertFetchFailureIn, "uri_extension")
         return d
 
+    def test_bad_plaintext_hashroot(self):
+        # the first server has a bad plaintext hashroot, so we will fail over
+        # to a different server.
+        modemap = dict([(i, "bad plaintext hashroot") for i in range(1)] +
+                       [(i, "good") for i in range(1, 10)])
+        d = self.send_and_recover((4,8,10), bucket_modes=modemap)
+        d.addCallback(self.assertFetchFailureIn, "plaintext_hashroot")
+        return d
+
+    def test_bad_crypttext_hashroot(self):
+        # the first server has a bad crypttext hashroot, so we will fail
+        # over to a different server.
+        modemap = dict([(i, "bad crypttext hashroot") for i in range(1)] +
+                       [(i, "good") for i in range(1, 10)])
+        d = self.send_and_recover((4,8,10), bucket_modes=modemap)
+        d.addCallback(self.assertFetchFailureIn, "crypttext_hashroot")
+        return d
+
+    def test_bad_plaintext_hashes(self):
+        # the first server has a bad plaintext hash block, so we will fail
+        # over to a different server.
+        modemap = dict([(i, "bad plaintext hash") for i in range(1)] +
+                       [(i, "good") for i in range(1, 10)])
+        d = self.send_and_recover((4,8,10), bucket_modes=modemap)
+        d.addCallback(self.assertFetchFailureIn, "plaintext_hashtree")
+        return d
+
+    def test_bad_crypttext_hashes(self):
+        # the first server has a bad crypttext hash block, so we will fail
+        # over to a different server.
+        modemap = dict([(i, "bad crypttext hash") for i in range(1)] +
+                       [(i, "good") for i in range(1, 10)])
+        d = self.send_and_recover((4,8,10), bucket_modes=modemap)
+        d.addCallback(self.assertFetchFailureIn, "crypttext_hashtree")
+        return d
+
+    def test_bad_crypttext_hashes_failure(self):
+        # to test that the crypttext merkle tree is really being applied, we
+        # sneak into the download process and corrupt two things: we replace
+        # everybody's crypttext hashtree with a bad version (computed over
+        # bogus data), and we modify the supposedly-validated uri_extension
+        # block to match the new crypttext hashtree root. The download
+        # process should notice that the crypttext coming out of FEC doesn't
+        # match the tree, and fail.
+
+        modemap = dict([(i, "good") for i in range(0, 10)])
+        d = self.send_and_recover((4,8,10), bucket_modes=modemap,
+                                  recover_mode=("corrupt_crypttext_hashes"))
+        def _done(res):
+            self.failUnless(isinstance(res, Failure))
+            self.failUnless(res.check(hashtree.BadHashError), res)
+        d.addBoth(_done)
+        return d
+
+
+    def test_bad_plaintext(self):
+        # faking a decryption failure is easier: just corrupt the key
+        modemap = dict([(i, "good") for i in range(0, 10)])
+        d = self.send_and_recover((4,8,10), bucket_modes=modemap,
+                                  recover_mode=("corrupt_key"))
+        def _done(res):
+            self.failUnless(isinstance(res, Failure))
+            self.failUnless(res.check(hashtree.BadHashError), res)
+        d.addBoth(_done)
+        return d
+
     def test_bad_sharehashes_failure(self):
         # the first 7 servers have bad block hashes, so the sharehash tree
         # will not validate, and the download will fail
index 77092d802d51d2ae21a0a7c43f6a543746dc5d23..3fa9441e1ec64ce200cc5b5a68886bb78a8cb577 100644 (file)
@@ -1303,6 +1303,8 @@ class SystemTest(testutil.SignalMixin, testutil.PollMixin, unittest.TestCase):
         for key in ("size", "num_segments", "segment_size",
                     "needed_shares", "total_shares",
                     "codec_name", "codec_params", "tail_codec_params",
+                    "plaintext_hash", "plaintext_root_hash",
+                    "crypttext_hash", "crypttext_root_hash",
                     "share_root_hash", "UEB_hash"):
             self.failUnless("%s: " % key in output, key)