From: Brian Warner Date: Thu, 7 Jun 2007 02:40:20 +0000 (-0700) Subject: encode: add plaintext/crypttext merkle trees to the shares, and the thingA block... X-Git-Tag: allmydata-tahoe-0.3.0~27 X-Git-Url: https://git.rkrishnan.org/Site/Content/Exhibitors/index.php?a=commitdiff_plain;h=5cbdc240e2689c445a56ff7da1e6bed932f10897;p=tahoe-lafs%2Ftahoe-lafs.git encode: add plaintext/crypttext merkle trees to the shares, and the thingA block. Still needs tests and download-side verification. --- diff --git a/src/allmydata/encode.py b/src/allmydata/encode.py index 84faa770..14d1804b 100644 --- a/src/allmydata/encode.py +++ b/src/allmydata/encode.py @@ -3,7 +3,8 @@ from zope.interface import implements from twisted.internet import defer from twisted.python import log -from allmydata.hashtree import HashTree, block_hash, thingA_hash +from allmydata.hashtree import HashTree, IncompleteHashTree, \ + block_hash, thingA_hash, plaintext_hash, crypttext_hash from allmydata.Crypto.Cipher import AES from allmydata.util import mathutil, bencode from allmydata.util.assertutil import _assert @@ -164,8 +165,10 @@ class Encoder(object): self.segment_size) self.share_size = mathutil.div_ceil(self.file_size, self.required_shares) + self._plaintext_hashes = [] + self._crypttext_hashes = [] self.setup_encryption() - self.setup_codec() + self.setup_codec() # TODO: duplicate call? d = defer.succeed(None) for i in range(self.num_segments-1): @@ -176,6 +179,10 @@ class Encoder(object): d.addCallback(lambda res, i=i: self.do_segment(i)) d.addCallback(lambda res: self.do_tail_segment(self.num_segments-1)) + d.addCallback(lambda res: + self.send_plaintext_hash_tree_to_all_shareholders()) + d.addCallback(lambda res: + self.send_crypttext_hash_tree_to_all_shareholders()) d.addCallback(lambda res: self.send_all_subshare_hash_trees()) d.addCallback(lambda res: self.send_all_share_hash_trees()) d.addCallback(lambda res: self.send_thingA_to_all_shareholders()) @@ -216,8 +223,10 @@ class Encoder(object): input_piece = self.infile.read(input_piece_size) # non-tail segments should be the full segment size assert len(input_piece) == input_piece_size + self._plaintext_hashes.append(plaintext_hash(input_piece)) encrypted_piece = self.cryptor.encrypt(input_piece) chunks.append(encrypted_piece) + self._crypttext_hashes.append(crypttext_hash(encrypted_piece)) d = codec.encode(chunks) d.addCallback(self._encoded_segment, segnum) return d @@ -229,10 +238,12 @@ class Encoder(object): for i in range(self.required_shares): input_piece = self.infile.read(input_piece_size) + self._plaintext_hashes.append(plaintext_hash(input_piece)) if len(input_piece) < input_piece_size: # padding input_piece += ('\x00' * (input_piece_size - len(input_piece))) encrypted_piece = self.cryptor.encrypt(input_piece) + self._crypttext_hashes.append(crypttext_hash(encrypted_piece)) chunks.append(encrypted_piece) d = codec.encode(chunks) d.addCallback(self._encoded_segment, segnum) @@ -300,6 +311,42 @@ class Encoder(object): d0.addErrback(_eatNotEnoughPeersError) return d + def send_plaintext_hash_tree_to_all_shareholders(self): + log.msg("%s sending plaintext hash tree" % self) + t = HashTree(self._plaintext_hashes) + all_hashes = list(t) + self.thingA_data["plaintext_root_hash"] = t[0] + dl = [] + for shareid in self.landlords.keys(): + dl.append(self.send_plaintext_hash_tree(shareid, all_hashes)) + return self._gather_responses(dl) + + def send_plaintext_hash_tree(self, shareid, all_hashes): + if shareid not in self.landlords: + return defer.succeed(None) + sh = self.landlords[shareid] + d = sh.callRemote("put_plaintext_hashes", all_hashes) + d.addErrback(self._remove_shareholder, shareid, "put_plaintext_hashes") + return d + + def send_crypttext_hash_tree_to_all_shareholders(self): + log.msg("%s sending crypttext hash tree" % self) + t = HashTree(self._crypttext_hashes) + all_hashes = list(t) + self.thingA_data["crypttext_root_hash"] = t[0] + dl = [] + for shareid in self.landlords.keys(): + dl.append(self.send_crypttext_hash_tree(shareid, all_hashes)) + return self._gather_responses(dl) + + def send_crypttext_hash_tree(self, shareid, all_hashes): + if shareid not in self.landlords: + return defer.succeed(None) + sh = self.landlords[shareid] + d = sh.callRemote("put_crypttext_hashes", all_hashes) + d.addErrback(self._remove_shareholder, shareid, "put_crypttext_hashes") + return d + def send_all_subshare_hash_trees(self): log.msg("%s sending subshare hash trees" % self) dl = [] diff --git a/src/allmydata/hashtree.py b/src/allmydata/hashtree.py index 2640cfc1..3694b91b 100644 --- a/src/allmydata/hashtree.py +++ b/src/allmydata/hashtree.py @@ -442,3 +442,9 @@ def block_hash(data): def thingA_hash(data): return tagged_hash("thingA", data) + +def plaintext_hash(data): + return tagged_hash("plaintext segment", data) + +def crypttext_hash(data): + return tagged_hash("crypttext segment", data) diff --git a/src/allmydata/interfaces.py b/src/allmydata/interfaces.py index f419145b..2917ee46 100644 --- a/src/allmydata/interfaces.py +++ b/src/allmydata/interfaces.py @@ -51,7 +51,12 @@ class RIBucketWriter(RemoteInterface): bytes in length. The last segment might be shorter. """ return None - + + def put_plaintext_hashes(hashes=ListOf(Hash, maxLength=2**20)): + return None + def put_crypttext_hashes(hashes=ListOf(Hash, maxLength=2**20)): + return None + def put_block_hashes(blockhashes=ListOf(Hash, maxLength=2**20)): return None diff --git a/src/allmydata/storageserver.py b/src/allmydata/storageserver.py index 310a965e..788c6389 100644 --- a/src/allmydata/storageserver.py +++ b/src/allmydata/storageserver.py @@ -49,6 +49,18 @@ class BucketWriter(Referenceable): f.seek(self.blocksize*segmentnum) f.write(data) + def remote_put_plaintext_hashes(self, hashes): + precondition(not self.closed) + # TODO: verify the length of blockhashes. + # TODO: tighten foolscap schema to require exactly 32 bytes. + self._write_file('plaintext_hashes', ''.join(hashes)) + + def remote_put_crypttext_hashes(self, hashes): + precondition(not self.closed) + # TODO: verify the length of blockhashes. + # TODO: tighten foolscap schema to require exactly 32 bytes. + self._write_file('crypttext_hashes', ''.join(hashes)) + def remote_put_block_hashes(self, blockhashes): precondition(not self.closed) # TODO: verify the length of blockhashes. diff --git a/src/allmydata/test/test_encode.py b/src/allmydata/test/test_encode.py index d8f58087..cb6f2e37 100644 --- a/src/allmydata/test/test_encode.py +++ b/src/allmydata/test/test_encode.py @@ -49,6 +49,8 @@ class FakeBucketWriter: def __init__(self, mode="good"): self.mode = mode self.blocks = {} + self.plaintext_hashes = None + self.crypttext_hashes = None self.block_hashes = None self.share_hashes = None self.closed = False @@ -66,6 +68,16 @@ class FakeBucketWriter: raise LostPeerError("I'm going away now") self.blocks[segmentnum] = data + def put_plaintext_hashes(self, hashes): + assert not self.closed + assert self.plaintext_hashes is None + self.plaintext_hashes = hashes + + def put_crypttext_hashes(self, hashes): + assert not self.closed + assert self.crypttext_hashes is None + self.crypttext_hashes = hashes + def put_block_hashes(self, blockhashes): assert not self.closed assert self.block_hashes is None