From 7b21054c33d4651dd7ba18ece5ec84d5e62d321c Mon Sep 17 00:00:00 2001
From: Brian Warner <warner@lothar.com>
Date: Sun, 23 Mar 2008 15:35:54 -0700
Subject: [PATCH] UNDO: upload: stop putting plaintext and ciphertext hashes in
 shares. This removes the guess-partial-information attack vector, and reduces
 the amount of overhead that we consume with each file. It also introduces a
 forwards-compability break: older versions of the code (before the previous
 download-time "make hashes optional" patch) will be unable to read files
 uploaded by this version, as they will complain about the missing hashes.
 This patch is experimental, and is being pushed into trunk to obtain test
 coverage. We may undo it before releasing 1.0.

---
 src/allmydata/encode.py           | 66 ++++++++++++++++++++++++++++++-
 src/allmydata/test/test_encode.py | 66 +++++++++++++++++++++++++++++++
 src/allmydata/test/test_system.py |  2 +
 3 files changed, 133 insertions(+), 1 deletion(-)

diff --git a/src/allmydata/encode.py b/src/allmydata/encode.py
index a7e71070..a815a5d7 100644
--- a/src/allmydata/encode.py
+++ b/src/allmydata/encode.py
@@ -239,6 +239,10 @@ class Encoder(object):
 
         d.addCallback(lambda res: self.finish_hashing())
 
+        d.addCallback(lambda res:
+                      self.send_plaintext_hash_tree_to_all_shareholders())
+        d.addCallback(lambda res:
+                      self.send_crypttext_hash_tree_to_all_shareholders())
         d.addCallback(lambda res: self.send_all_subshare_hash_trees())
         d.addCallback(lambda res: self.send_all_share_hash_trees())
         d.addCallback(lambda res: self.send_uri_extension_to_all_shareholders())
@@ -506,7 +510,63 @@ class Encoder(object):
         self._start_hashing_and_close_timestamp = time.time()
         self.set_status("Finishing hashes")
         self.set_encode_and_push_progress(extra=0.0)
-        return self._uploadable.close()
+        crypttext_hash = self._crypttext_hasher.digest()
+        self.uri_extension_data["crypttext_hash"] = crypttext_hash
+        d = self._uploadable.get_plaintext_hash()
+        def _got(plaintext_hash):
+            self.uri_extension_data["plaintext_hash"] = plaintext_hash
+            return self._uploadable.get_plaintext_hashtree_leaves(0, self.num_segments, self.num_segments)
+        d.addCallback(_got)
+        def _got_hashtree_leaves(leaves):
+            self.log("Encoder: got plaintext_hashtree_leaves: %s" %
+                     (",".join([base32.b2a(h) for h in leaves]),),
+                     level=log.NOISY)
+            ht = list(HashTree(list(leaves)))
+            self.uri_extension_data["plaintext_root_hash"] = ht[0]
+            self._plaintext_hashtree_nodes = ht
+        d.addCallback(_got_hashtree_leaves)
+
+        d.addCallback(lambda res: self._uploadable.close())
+        return d
+
+    def send_plaintext_hash_tree_to_all_shareholders(self):
+        self.log("sending plaintext hash tree", level=log.NOISY)
+        self.set_status("Sending Plaintext Hash Tree")
+        self.set_encode_and_push_progress(extra=0.2)
+        dl = []
+        for shareid in self.landlords.keys():
+            d = self.send_plaintext_hash_tree(shareid,
+                                              self._plaintext_hashtree_nodes)
+            dl.append(d)
+        return self._gather_responses(dl)
+
+    def send_plaintext_hash_tree(self, shareid, all_hashes):
+        if shareid not in self.landlords:
+            return defer.succeed(None)
+        sh = self.landlords[shareid]
+        d = sh.put_plaintext_hashes(all_hashes)
+        d.addErrback(self._remove_shareholder, shareid, "put_plaintext_hashes")
+        return d
+
+    def send_crypttext_hash_tree_to_all_shareholders(self):
+        self.log("sending crypttext hash tree", level=log.NOISY)
+        self.set_status("Sending Crypttext Hash Tree")
+        self.set_encode_and_push_progress(extra=0.3)
+        t = HashTree(self._crypttext_hashes)
+        all_hashes = list(t)
+        self.uri_extension_data["crypttext_root_hash"] = t[0]
+        dl = []
+        for shareid in self.landlords.keys():
+            dl.append(self.send_crypttext_hash_tree(shareid, all_hashes))
+        return self._gather_responses(dl)
+
+    def send_crypttext_hash_tree(self, shareid, all_hashes):
+        if shareid not in self.landlords:
+            return defer.succeed(None)
+        sh = self.landlords[shareid]
+        d = sh.put_crypttext_hashes(all_hashes)
+        d.addErrback(self._remove_shareholder, shareid, "put_crypttext_hashes")
+        return d
 
     def send_all_subshare_hash_trees(self):
         self.log("sending subshare hash trees", level=log.NOISY)
@@ -569,6 +629,10 @@ class Encoder(object):
         lp = self.log("sending uri_extension", level=log.NOISY)
         self.set_status("Sending URI Extensions")
         self.set_encode_and_push_progress(extra=0.8)
+        for k in ('crypttext_root_hash', 'crypttext_hash',
+                  'plaintext_root_hash', 'plaintext_hash',
+                  ):
+            assert k in self.uri_extension_data
         uri_extension = uri.pack_extension(self.uri_extension_data)
         ed = {}
         for k,v in self.uri_extension_data.items():
diff --git a/src/allmydata/test/test_encode.py b/src/allmydata/test/test_encode.py
index fea99e5a..67b7e15b 100644
--- a/src/allmydata/test/test_encode.py
+++ b/src/allmydata/test/test_encode.py
@@ -521,6 +521,72 @@ class Roundtrip(unittest.TestCase):
         d.addCallback(self.assertFetchFailureIn, "uri_extension")
         return d
 
+    def test_bad_plaintext_hashroot(self):
+        # the first server has a bad plaintext hashroot, so we will fail over
+        # to a different server.
+        modemap = dict([(i, "bad plaintext hashroot") for i in range(1)] +
+                       [(i, "good") for i in range(1, 10)])
+        d = self.send_and_recover((4,8,10), bucket_modes=modemap)
+        d.addCallback(self.assertFetchFailureIn, "plaintext_hashroot")
+        return d
+
+    def test_bad_crypttext_hashroot(self):
+        # the first server has a bad crypttext hashroot, so we will fail
+        # over to a different server.
+        modemap = dict([(i, "bad crypttext hashroot") for i in range(1)] +
+                       [(i, "good") for i in range(1, 10)])
+        d = self.send_and_recover((4,8,10), bucket_modes=modemap)
+        d.addCallback(self.assertFetchFailureIn, "crypttext_hashroot")
+        return d
+
+    def test_bad_plaintext_hashes(self):
+        # the first server has a bad plaintext hash block, so we will fail
+        # over to a different server.
+        modemap = dict([(i, "bad plaintext hash") for i in range(1)] +
+                       [(i, "good") for i in range(1, 10)])
+        d = self.send_and_recover((4,8,10), bucket_modes=modemap)
+        d.addCallback(self.assertFetchFailureIn, "plaintext_hashtree")
+        return d
+
+    def test_bad_crypttext_hashes(self):
+        # the first server has a bad crypttext hash block, so we will fail
+        # over to a different server.
+        modemap = dict([(i, "bad crypttext hash") for i in range(1)] +
+                       [(i, "good") for i in range(1, 10)])
+        d = self.send_and_recover((4,8,10), bucket_modes=modemap)
+        d.addCallback(self.assertFetchFailureIn, "crypttext_hashtree")
+        return d
+
+    def test_bad_crypttext_hashes_failure(self):
+        # to test that the crypttext merkle tree is really being applied, we
+        # sneak into the download process and corrupt two things: we replace
+        # everybody's crypttext hashtree with a bad version (computed over
+        # bogus data), and we modify the supposedly-validated uri_extension
+        # block to match the new crypttext hashtree root. The download
+        # process should notice that the crypttext coming out of FEC doesn't
+        # match the tree, and fail.
+
+        modemap = dict([(i, "good") for i in range(0, 10)])
+        d = self.send_and_recover((4,8,10), bucket_modes=modemap,
+                                  recover_mode=("corrupt_crypttext_hashes"))
+        def _done(res):
+            self.failUnless(isinstance(res, Failure))
+            self.failUnless(res.check(hashtree.BadHashError), res)
+        d.addBoth(_done)
+        return d
+
+
+    def test_bad_plaintext(self):
+        # faking a decryption failure is easier: just corrupt the key
+        modemap = dict([(i, "good") for i in range(0, 10)])
+        d = self.send_and_recover((4,8,10), bucket_modes=modemap,
+                                  recover_mode=("corrupt_key"))
+        def _done(res):
+            self.failUnless(isinstance(res, Failure))
+            self.failUnless(res.check(hashtree.BadHashError), res)
+        d.addBoth(_done)
+        return d
+
     def test_bad_sharehashes_failure(self):
         # the first 7 servers have bad block hashes, so the sharehash tree
         # will not validate, and the download will fail
diff --git a/src/allmydata/test/test_system.py b/src/allmydata/test/test_system.py
index 77092d80..3fa9441e 100644
--- a/src/allmydata/test/test_system.py
+++ b/src/allmydata/test/test_system.py
@@ -1303,6 +1303,8 @@ class SystemTest(testutil.SignalMixin, testutil.PollMixin, unittest.TestCase):
         for key in ("size", "num_segments", "segment_size",
                     "needed_shares", "total_shares",
                     "codec_name", "codec_params", "tail_codec_params",
+                    "plaintext_hash", "plaintext_root_hash",
+                    "crypttext_hash", "crypttext_root_hash",
                     "share_root_hash", "UEB_hash"):
             self.failUnless("%s: " % key in output, key)
 
-- 
2.45.2