From 5cbdc240e2689c445a56ff7da1e6bed932f10897 Mon Sep 17 00:00:00 2001
From: Brian Warner <warner@lothar.com>
Date: Wed, 6 Jun 2007 19:40:20 -0700
Subject: [PATCH] encode: add plaintext/crypttext merkle trees to the shares,
 and the thingA block. Still needs tests and download-side verification.

---
 src/allmydata/encode.py           | 51 +++++++++++++++++++++++++++++--
 src/allmydata/hashtree.py         |  6 ++++
 src/allmydata/interfaces.py       |  7 ++++-
 src/allmydata/storageserver.py    | 12 ++++++++
 src/allmydata/test/test_encode.py | 12 ++++++++
 5 files changed, 85 insertions(+), 3 deletions(-)

diff --git a/src/allmydata/encode.py b/src/allmydata/encode.py
index 84faa770..14d1804b 100644
--- a/src/allmydata/encode.py
+++ b/src/allmydata/encode.py
@@ -3,7 +3,8 @@
 from zope.interface import implements
 from twisted.internet import defer
 from twisted.python import log
-from allmydata.hashtree import HashTree, block_hash, thingA_hash
+from allmydata.hashtree import HashTree, IncompleteHashTree, \
+     block_hash, thingA_hash, plaintext_hash, crypttext_hash
 from allmydata.Crypto.Cipher import AES
 from allmydata.util import mathutil, bencode
 from allmydata.util.assertutil import _assert
@@ -164,8 +165,10 @@ class Encoder(object):
                                               self.segment_size)
         self.share_size = mathutil.div_ceil(self.file_size,
                                             self.required_shares)
+        self._plaintext_hashes = []
+        self._crypttext_hashes = []
         self.setup_encryption()
-        self.setup_codec()
+        self.setup_codec() # TODO: duplicate call?
         d = defer.succeed(None)
 
         for i in range(self.num_segments-1):
@@ -176,6 +179,10 @@ class Encoder(object):
             d.addCallback(lambda res, i=i: self.do_segment(i))
         d.addCallback(lambda res: self.do_tail_segment(self.num_segments-1))
 
+        d.addCallback(lambda res:
+                      self.send_plaintext_hash_tree_to_all_shareholders())
+        d.addCallback(lambda res:
+                      self.send_crypttext_hash_tree_to_all_shareholders())
         d.addCallback(lambda res: self.send_all_subshare_hash_trees())
         d.addCallback(lambda res: self.send_all_share_hash_trees())
         d.addCallback(lambda res: self.send_thingA_to_all_shareholders())
@@ -216,8 +223,10 @@ class Encoder(object):
             input_piece = self.infile.read(input_piece_size)
             # non-tail segments should be the full segment size
             assert len(input_piece) == input_piece_size
+            self._plaintext_hashes.append(plaintext_hash(input_piece))
             encrypted_piece = self.cryptor.encrypt(input_piece)
             chunks.append(encrypted_piece)
+            self._crypttext_hashes.append(crypttext_hash(encrypted_piece))
         d = codec.encode(chunks)
         d.addCallback(self._encoded_segment, segnum)
         return d
@@ -229,10 +238,12 @@ class Encoder(object):
 
         for i in range(self.required_shares):
             input_piece = self.infile.read(input_piece_size)
+            self._plaintext_hashes.append(plaintext_hash(input_piece))
             if len(input_piece) < input_piece_size:
                 # padding
                 input_piece += ('\x00' * (input_piece_size - len(input_piece)))
             encrypted_piece = self.cryptor.encrypt(input_piece)
+            self._crypttext_hashes.append(crypttext_hash(encrypted_piece))
             chunks.append(encrypted_piece)
         d = codec.encode(chunks)
         d.addCallback(self._encoded_segment, segnum)
@@ -300,6 +311,42 @@ class Encoder(object):
             d0.addErrback(_eatNotEnoughPeersError)
         return d
 
+    def send_plaintext_hash_tree_to_all_shareholders(self):
+        log.msg("%s sending plaintext hash tree" % self)
+        t = HashTree(self._plaintext_hashes)
+        all_hashes = list(t)
+        self.thingA_data["plaintext_root_hash"] = t[0]
+        dl = []
+        for shareid in self.landlords.keys():
+            dl.append(self.send_plaintext_hash_tree(shareid, all_hashes))
+        return self._gather_responses(dl)
+
+    def send_plaintext_hash_tree(self, shareid, all_hashes):
+        if shareid not in self.landlords:
+            return defer.succeed(None)
+        sh = self.landlords[shareid]
+        d = sh.callRemote("put_plaintext_hashes", all_hashes)
+        d.addErrback(self._remove_shareholder, shareid, "put_plaintext_hashes")
+        return d
+
+    def send_crypttext_hash_tree_to_all_shareholders(self):
+        log.msg("%s sending crypttext hash tree" % self)
+        t = HashTree(self._crypttext_hashes)
+        all_hashes = list(t)
+        self.thingA_data["crypttext_root_hash"] = t[0]
+        dl = []
+        for shareid in self.landlords.keys():
+            dl.append(self.send_crypttext_hash_tree(shareid, all_hashes))
+        return self._gather_responses(dl)
+
+    def send_crypttext_hash_tree(self, shareid, all_hashes):
+        if shareid not in self.landlords:
+            return defer.succeed(None)
+        sh = self.landlords[shareid]
+        d = sh.callRemote("put_crypttext_hashes", all_hashes)
+        d.addErrback(self._remove_shareholder, shareid, "put_crypttext_hashes")
+        return d
+
     def send_all_subshare_hash_trees(self):
         log.msg("%s sending subshare hash trees" % self)
         dl = []
diff --git a/src/allmydata/hashtree.py b/src/allmydata/hashtree.py
index 2640cfc1..3694b91b 100644
--- a/src/allmydata/hashtree.py
+++ b/src/allmydata/hashtree.py
@@ -442,3 +442,9 @@ def block_hash(data):
 
 def thingA_hash(data):
     return tagged_hash("thingA", data)
+
+def plaintext_hash(data):
+    return tagged_hash("plaintext segment", data)
+
+def crypttext_hash(data):
+    return tagged_hash("crypttext segment", data)
diff --git a/src/allmydata/interfaces.py b/src/allmydata/interfaces.py
index f419145b..2917ee46 100644
--- a/src/allmydata/interfaces.py
+++ b/src/allmydata/interfaces.py
@@ -51,7 +51,12 @@ class RIBucketWriter(RemoteInterface):
         bytes in length. The last segment might be shorter.
         """
         return None
-    
+
+    def put_plaintext_hashes(hashes=ListOf(Hash, maxLength=2**20)):
+        return None
+    def put_crypttext_hashes(hashes=ListOf(Hash, maxLength=2**20)):
+        return None
+
     def put_block_hashes(blockhashes=ListOf(Hash, maxLength=2**20)):
         return None
         
diff --git a/src/allmydata/storageserver.py b/src/allmydata/storageserver.py
index 310a965e..788c6389 100644
--- a/src/allmydata/storageserver.py
+++ b/src/allmydata/storageserver.py
@@ -49,6 +49,18 @@ class BucketWriter(Referenceable):
         f.seek(self.blocksize*segmentnum)
         f.write(data)
 
+    def remote_put_plaintext_hashes(self, hashes):
+        precondition(not self.closed)
+        # TODO: verify the length of blockhashes.
+        # TODO: tighten foolscap schema to require exactly 32 bytes.
+        self._write_file('plaintext_hashes', ''.join(hashes))
+
+    def remote_put_crypttext_hashes(self, hashes):
+        precondition(not self.closed)
+        # TODO: verify the length of blockhashes.
+        # TODO: tighten foolscap schema to require exactly 32 bytes.
+        self._write_file('crypttext_hashes', ''.join(hashes))
+
     def remote_put_block_hashes(self, blockhashes):
         precondition(not self.closed)
         # TODO: verify the length of blockhashes.
diff --git a/src/allmydata/test/test_encode.py b/src/allmydata/test/test_encode.py
index d8f58087..cb6f2e37 100644
--- a/src/allmydata/test/test_encode.py
+++ b/src/allmydata/test/test_encode.py
@@ -49,6 +49,8 @@ class FakeBucketWriter:
     def __init__(self, mode="good"):
         self.mode = mode
         self.blocks = {}
+        self.plaintext_hashes = None
+        self.crypttext_hashes = None
         self.block_hashes = None
         self.share_hashes = None
         self.closed = False
@@ -66,6 +68,16 @@ class FakeBucketWriter:
             raise LostPeerError("I'm going away now")
         self.blocks[segmentnum] = data
 
+    def put_plaintext_hashes(self, hashes):
+        assert not self.closed
+        assert self.plaintext_hashes is None
+        self.plaintext_hashes = hashes
+
+    def put_crypttext_hashes(self, hashes):
+        assert not self.closed
+        assert self.crypttext_hashes is None
+        self.crypttext_hashes = hashes
+
     def put_block_hashes(self, blockhashes):
         assert not self.closed
         assert self.block_hashes is None
-- 
2.45.2