From 956d5ae256c771d67996b31e2e5c649603c9e50c Mon Sep 17 00:00:00 2001 From: Brian Warner Date: Sat, 9 Jun 2007 20:46:04 -0700 Subject: [PATCH] rename fileid/verifierid to plaintext_hash/crypttext_hash --- docs/URI-extension.txt | 7 +--- src/allmydata/download.py | 52 +++++++++++++------------- src/allmydata/interfaces.py | 2 +- src/allmydata/test/test_encode.py | 14 +++---- src/allmydata/test/test_system.py | 2 +- src/allmydata/test/test_upload.py | 5 ++- src/allmydata/upload.py | 61 +++++++++++++++++-------------- src/allmydata/util/hashutil.py | 18 ++++----- 8 files changed, 83 insertions(+), 78 deletions(-) diff --git a/docs/URI-extension.txt b/docs/URI-extension.txt index 0b434809..e8d47aa8 100644 --- a/docs/URI-extension.txt +++ b/docs/URI-extension.txt @@ -24,9 +24,9 @@ on their sizes): tail_codec_params 12 share_root_hash 32 (binary) or 52 (base32-encoded) each - fileid + plaintext_hash plaintext_root_hash - verifierid + crypttext_hash crypttext_root_hash Some pieces are needed elsewhere (size should be visible without pulling the @@ -35,9 +35,6 @@ peer selection algorithms need needed_shares to ask a minimal set of peers). Some pieces are arguably redundant but are convenient to have present (test_encode.py makes use of num_segments). -fileid/verifierid need to be renamed 'plaintext_hash' and 'crypttext_hash' -respectively. - The rule for this data block is that it should be a constant size for all files, regardless of file size. Therefore hash trees (which have a size that depends linearly upon the number of segments) are stored elsewhere in the diff --git a/src/allmydata/download.py b/src/allmydata/download.py index 498c171f..4a7cab68 100644 --- a/src/allmydata/download.py +++ b/src/allmydata/download.py @@ -29,8 +29,8 @@ class Output: self.downloadable = downloadable self._decryptor = AES.new(key=key, mode=AES.MODE_CTR, counterstart="\x00"*16) - self._verifierid_hasher = hashutil.verifierid_hasher() - self._fileid_hasher = hashutil.fileid_hasher() + self._crypttext_hasher = hashutil.crypttext_hasher() + self._plaintext_hasher = hashutil.plaintext_hasher() self.length = 0 self._segment_number = 0 self._plaintext_hash_tree = None @@ -49,7 +49,7 @@ class Output: # memory footprint: 'crypttext' is the only segment_size usage # outstanding. While we decrypt it into 'plaintext', we hit # 2*segment_size. - self._verifierid_hasher.update(crypttext) + self._crypttext_hasher.update(crypttext) if self._crypttext_hash_tree: ch = hashutil.crypttext_segment_hasher() ch.update(crypttext) @@ -61,7 +61,7 @@ class Output: # now we're back down to 1*segment_size. - self._fileid_hasher.update(plaintext) + self._plaintext_hasher.update(plaintext) if self._plaintext_hash_tree: ph = hashutil.plaintext_segment_hasher() ph.update(plaintext) @@ -74,8 +74,8 @@ class Output: self.downloadable.write(plaintext) def close(self): - self.verifierid = self._verifierid_hasher.digest() - self.fileid = self._fileid_hasher.digest() + self.crypttext_hash = self._crypttext_hasher.digest() + self.plaintext_hash = self._plaintext_hasher.digest() self.downloadable.close() def finish(self): @@ -252,8 +252,8 @@ class SegmentDownloader: self.parent.bucket_failed(vbucket) class FileDownloader: - check_verifierid = True - check_fileid = True + check_crypttext_hash = True + check_plaintext_hash = True def __init__(self, client, uri, downloadable): self._client = client @@ -412,11 +412,11 @@ class FileDownloader: self._tail_codec = codec.get_decoder_by_name(d['codec_name']) self._tail_codec.set_serialized_params(d['tail_codec_params']) - verifierid = d['verifierid'] - assert isinstance(verifierid, str) - assert len(verifierid) == 32 - self._verifierid = verifierid - self._fileid = d['fileid'] + crypttext_hash = d['crypttext_hash'] + assert isinstance(crypttext_hash, str) + assert len(crypttext_hash) == 32 + self._crypttext_hash = crypttext_hash + self._plaintext_hash = d['plaintext_hash'] self._roothash = d['share_root_hash'] self._segment_size = segment_size = d['segment_size'] @@ -576,18 +576,20 @@ class FileDownloader: def _done(self, res): self._output.close() - log.msg("computed VERIFIERID: %s" % idlib.b2a(self._output.verifierid)) - log.msg("computed FILEID: %s" % idlib.b2a(self._output.fileid)) - if self.check_verifierid: - _assert(self._verifierid == self._output.verifierid, - "bad verifierid: computed=%s, expected=%s" % - (idlib.b2a(self._output.verifierid), - idlib.b2a(self._verifierid))) - if self.check_fileid: - _assert(self._fileid == self._output.fileid, - "bad fileid: computed=%s, expected=%s" % - (idlib.b2a(self._output.fileid), - idlib.b2a(self._fileid))) + log.msg("computed CRYPTTEXT_HASH: %s" % + idlib.b2a(self._output.crypttext_hash)) + log.msg("computed PLAINTEXT_HASH: %s" % + idlib.b2a(self._output.plaintext_hash)) + if self.check_crypttext_hash: + _assert(self._crypttext_hash == self._output.crypttext_hash, + "bad crypttext_hash: computed=%s, expected=%s" % + (idlib.b2a(self._output.crypttext_hash), + idlib.b2a(self._crypttext_hash))) + if self.check_plaintext_hash: + _assert(self._plaintext_hash == self._output.plaintext_hash, + "bad plaintext_hash: computed=%s, expected=%s" % + (idlib.b2a(self._output.plaintext_hash), + idlib.b2a(self._plaintext_hash))) _assert(self._output.length == self._size, got=self._output.length, expected=self._size) return self._output.finish() diff --git a/src/allmydata/interfaces.py b/src/allmydata/interfaces.py index 4f40fd21..e0686a0d 100644 --- a/src/allmydata/interfaces.py +++ b/src/allmydata/interfaces.py @@ -205,7 +205,7 @@ class ICodecEncoder(Interface): this means it may contain hex digits and hyphens, and nothing else. The idea is that the URI contains something like '%s:%s:%s' % (encoder.get_encoder_name(), encoder.get_serialized_params(), - b2a(verifierid)), and this is enough information to construct a + b2a(crypttext_hash)), and this is enough information to construct a compatible decoder. """ diff --git a/src/allmydata/test/test_encode.py b/src/allmydata/test/test_encode.py index 1b194f01..dc7a64d5 100644 --- a/src/allmydata/test/test_encode.py +++ b/src/allmydata/test/test_encode.py @@ -33,7 +33,7 @@ class FakeStorageServer: d = eventual.fireEventually() d.addCallback(lambda res: _call()) return d - def allocate_buckets(self, verifierid, sharenums, shareize, blocksize, canary): + def allocate_buckets(self, crypttext_hash, sharenums, shareize, blocksize, canary): if self.mode == "full": return (set(), {},) elif self.mode == "already got them": @@ -296,15 +296,15 @@ class Roundtrip(unittest.TestCase): peer = FakeBucketWriter(mode) shareholders[shnum] = peer e.set_shareholders(shareholders) - fileid_hasher = hashutil.fileid_hasher() - fileid_hasher.update(data) + plaintext_hasher = hashutil.plaintext_hasher() + plaintext_hasher.update(data) cryptor = AES.new(key=nonkey, mode=AES.MODE_CTR, counterstart="\x00"*16) - verifierid_hasher = hashutil.verifierid_hasher() - verifierid_hasher.update(cryptor.encrypt(data)) + crypttext_hasher = hashutil.crypttext_hasher() + crypttext_hasher.update(cryptor.encrypt(data)) - e.set_uri_extension_data({'verifierid': verifierid_hasher.digest(), - 'fileid': fileid_hasher.digest(), + e.set_uri_extension_data({'crypttext_hash': crypttext_hasher.digest(), + 'plaintext_hash': plaintext_hasher.digest(), }) d = e.start() def _sent(uri_extension_hash): diff --git a/src/allmydata/test/test_system.py b/src/allmydata/test/test_system.py index 90b7c792..95355410 100644 --- a/src/allmydata/test/test_system.py +++ b/src/allmydata/test/test_system.py @@ -226,7 +226,7 @@ class SystemTest(testutil.SignalMixin, unittest.TestCase): # some of the validation data, so it will fail in the post-download phase # when the file's crypttext integrity check fails. Do the same thing for # the key, which should cause the download to fail the post-download - # plaintext verifierid check. + # plaintext_hash check. def test_vdrive(self): self.basedir = "test_system/SystemTest/test_vdrive" diff --git a/src/allmydata/test/test_upload.py b/src/allmydata/test/test_upload.py index cf6017a0..f1372d5e 100644 --- a/src/allmydata/test/test_upload.py +++ b/src/allmydata/test/test_upload.py @@ -11,8 +11,9 @@ from test_encode import FakePeer class FakeClient: def __init__(self, mode="good"): self.mode = mode - def get_permuted_peers(self, verifierid): - return [ ("%20d"%fakeid, "%20d"%fakeid, FakePeer(self.mode),) for fakeid in range(50) ] + def get_permuted_peers(self, storage_index): + return [ ("%20d"%fakeid, "%20d"%fakeid, FakePeer(self.mode),) + for fakeid in range(50) ] class GoodServer(unittest.TestCase): def setUp(self): diff --git a/src/allmydata/upload.py b/src/allmydata/upload.py index 49f93d82..93bc432b 100644 --- a/src/allmydata/upload.py +++ b/src/allmydata/upload.py @@ -22,14 +22,15 @@ class TooFullError(Exception): pass class PeerTracker: - def __init__(self, peerid, permutedid, connection, sharesize, blocksize, verifierid): + def __init__(self, peerid, permutedid, connection, + sharesize, blocksize, crypttext_hash): self.peerid = peerid self.permutedid = permutedid self.connection = connection # to an RIClient self.buckets = {} # k: shareid, v: IRemoteBucketWriter self.sharesize = sharesize self.blocksize = blocksize - self.verifierid = verifierid + self.crypttext_hash = crypttext_hash self._storageserver = None def query(self, sharenums): @@ -42,9 +43,11 @@ class PeerTracker: def _got_storageserver(self, storageserver): self._storageserver = storageserver def _query(self, sharenums): - d = self._storageserver.callRemote("allocate_buckets", self.verifierid, + d = self._storageserver.callRemote("allocate_buckets", + self.crypttext_hash, sharenums, self.sharesize, - self.blocksize, canary=Referenceable()) + self.blocksize, + canary=Referenceable()) d.addCallback(self._got_reply) return d @@ -70,13 +73,13 @@ class FileUploader: self._size = filehandle.tell() filehandle.seek(0) - def set_id_strings(self, verifierid, fileid): - assert isinstance(verifierid, str) - assert len(verifierid) == 32 - self._verifierid = verifierid - assert isinstance(fileid, str) - assert len(fileid) == 32 - self._fileid = fileid + def set_id_strings(self, crypttext_hash, plaintext_hash): + assert isinstance(crypttext_hash, str) + assert len(crypttext_hash) == 32 + self._crypttext_hash = crypttext_hash + assert isinstance(plaintext_hash, str) + assert len(plaintext_hash) == 32 + self._plaintext_hash = plaintext_hash def set_encryption_key(self, key): assert isinstance(key, str) @@ -92,7 +95,7 @@ class FileUploader: This method returns a Deferred that will fire with the URI (a string).""" - log.msg("starting upload [%s]" % (idlib.b2a(self._verifierid),)) + log.msg("starting upload [%s]" % (idlib.b2a(self._crypttext_hash),)) assert self.needed_shares # create the encoder, so we can know how large the shares will be @@ -103,9 +106,11 @@ class FileUploader: # we are responsible for locating the shareholders. self._encoder is # responsible for handling the data and sending out the shares. - peers = self._client.get_permuted_peers(self._verifierid) + peers = self._client.get_permuted_peers(self._crypttext_hash) assert peers - trackers = [ PeerTracker(peerid, permutedid, conn, share_size, block_size, self._verifierid) + trackers = [ PeerTracker(peerid, permutedid, conn, + share_size, block_size, + self._crypttext_hash) for permutedid, peerid, conn in peers ] self.usable_peers = set(trackers) # this set shrinks over time self.used_peers = set() # while this set grows @@ -236,13 +241,13 @@ class FileUploader: self._encoder.set_shareholders(buckets) uri_extension_data = {} - uri_extension_data['verifierid'] = self._verifierid - uri_extension_data['fileid'] = self._fileid + uri_extension_data['crypttext_hash'] = self._crypttext_hash + uri_extension_data['plaintext_hash'] = self._plaintext_hash self._encoder.set_uri_extension_data(uri_extension_data) return self._encoder.start() def _compute_uri(self, uri_extension_hash): - return pack_uri(storage_index=self._verifierid, + return pack_uri(storage_index=self._crypttext_hash, key=self._encryption_key, uri_extension_hash=uri_extension_hash, needed_shares=self.needed_shares, @@ -291,8 +296,8 @@ class Uploader(service.MultiService): total_shares = 100 # Total number of shares created by encoding. If everybody has room then this is is how many we will upload. def compute_id_strings(self, f): - # return a list of (fileid, encryptionkey, verifierid) - fileid_hasher = hashutil.fileid_hasher() + # return a list of (plaintext_hash, encryptionkey, crypttext_hash) + plaintext_hasher = hashutil.plaintext_hasher() enckey_hasher = hashutil.key_hasher() f.seek(0) BLOCKSIZE = 64*1024 @@ -300,14 +305,14 @@ class Uploader(service.MultiService): data = f.read(BLOCKSIZE) if not data: break - fileid_hasher.update(data) + plaintext_hasher.update(data) enckey_hasher.update(data) - fileid = fileid_hasher.digest() + plaintext_hash = plaintext_hasher.digest() enckey = enckey_hasher.digest() - # now make a second pass to determine the verifierid. It would be + # now make a second pass to determine the crypttext_hash. It would be # nice to make this involve fewer passes. - verifierid_hasher = hashutil.verifierid_hasher() + crypttext_hasher = hashutil.crypttext_hasher() key = enckey[:16] cryptor = AES.new(key=key, mode=AES.MODE_CTR, counterstart="\x00"*16) @@ -316,13 +321,13 @@ class Uploader(service.MultiService): data = f.read(BLOCKSIZE) if not data: break - verifierid_hasher.update(cryptor.encrypt(data)) - verifierid = verifierid_hasher.digest() + crypttext_hasher.update(cryptor.encrypt(data)) + crypttext_hash = crypttext_hasher.digest() # and leave the file pointer at the beginning f.seek(0) - return fileid, key, verifierid + return plaintext_hash, key, crypttext_hash def upload(self, f, options={}): # this returns the URI @@ -333,9 +338,9 @@ class Uploader(service.MultiService): u = self.uploader_class(self.parent, options) u.set_filehandle(fh) u.set_params(self.needed_shares, self.desired_shares, self.total_shares) - fileid, key, verifierid = self.compute_id_strings(fh) + plaintext_hash, key, crypttext_hash = self.compute_id_strings(fh) u.set_encryption_key(key) - u.set_id_strings(verifierid, fileid) + u.set_id_strings(crypttext_hash, plaintext_hash) d = u.start() def _done(res): f.close_filehandle(fh) diff --git a/src/allmydata/util/hashutil.py b/src/allmydata/util/hashutil.py index 9cd42a21..c3d967b9 100644 --- a/src/allmydata/util/hashutil.py +++ b/src/allmydata/util/hashutil.py @@ -31,15 +31,15 @@ def uri_extension_hash(data): def uri_extension_hasher(): return tagged_hasher("allmydata_uri_extension_v1") -def fileid_hash(data): - return tagged_hash("allmydata_fileid_v1", data) -def fileid_hasher(): - return tagged_hasher("allmydata_fileid_v1") - -def verifierid_hash(data): - return tagged_hash("allmydata_verifierid_v1", data) -def verifierid_hasher(): - return tagged_hasher("allmydata_verifierid_v1") +def plaintext_hash(data): + return tagged_hash("allmydata_plaintext_hash_v1", data) +def plaintext_hasher(): + return tagged_hasher("allmydata_plaintext_hash_v1") + +def crypttext_hash(data): + return tagged_hash("allmydata_crypttext_hash_v1", data) +def crypttext_hasher(): + return tagged_hasher("allmydata_crypttext_hash_v1") def crypttext_segment_hash(data): return tagged_hash("allmydata_crypttext_segment_v1", data) -- 2.45.2