From: Brian Warner Date: Thu, 7 Feb 2008 02:50:47 +0000 (-0700) Subject: change encryption-key hash to include encoding parameters. This is a minor compatibil... X-Git-Tag: allmydata-tahoe-0.8.0~114 X-Git-Url: https://git.rkrishnan.org/module-simplejson.encoder.html?a=commitdiff_plain;h=da9610e90ac65c381c3f6c6fb074e29f07f0e110;p=tahoe-lafs%2Ftahoe-lafs.git change encryption-key hash to include encoding parameters. This is a minor compatibility break: CHK files encoded (with convergence) before and after this will have different keys and ciphertexts. Also switched to SHA-256d for both the data-to-key hash and the key-to-storageindex hash --- diff --git a/src/allmydata/test/test_helper.py b/src/allmydata/test/test_helper.py index b45811a8..6e0c8322 100644 --- a/src/allmydata/test/test_helper.py +++ b/src/allmydata/test/test_helper.py @@ -7,7 +7,7 @@ from foolscap import Tub, eventual from foolscap.logging import log from allmydata import upload, offloaded -from allmydata.util import hashutil, fileutil, idlib +from allmydata.util import hashutil, fileutil, idlib, mathutil from pycryptopp.cipher.aes import AES MiB = 1024*1024 @@ -139,8 +139,18 @@ class AssistedUpload(unittest.TestCase): # we want to make sure that an upload which fails (leaving the # ciphertext in the CHK_encoding/ directory) does not prevent a later # attempt to upload that file from working. We simulate this by - # populating the directory manually. - key = hashutil.key_hash(DATA)[:16] + # populating the directory manually. The hardest part is guessing the + # storage index. + + k = FakeClient.DEFAULT_ENCODING_PARAMETERS["k"] + n = FakeClient.DEFAULT_ENCODING_PARAMETERS["n"] + max_segsize = FakeClient.DEFAULT_ENCODING_PARAMETERS["max_segment_size"] + segsize = min(max_segsize, len(DATA)) + # this must be a multiple of 'required_shares'==k + segsize = mathutil.next_multiple(segsize, k) + + key = hashutil.content_hash_key_hash(k, n, segsize, DATA) + assert len(key) == 16 encryptor = AES(key) SI = hashutil.storage_index_hash(key) SI_s = idlib.b2a(SI) diff --git a/src/allmydata/test/test_util.py b/src/allmydata/test/test_util.py index 21ed7253..46fa94c3 100644 --- a/src/allmydata/test/test_util.py +++ b/src/allmydata/test/test_util.py @@ -443,3 +443,10 @@ class HashUtilTests(unittest.TestCase): self.failUnlessEqual(len(h1), 16) self.failUnlessEqual(len(h2), 16) self.failUnlessEqual(h1, h2) + + def test_chk(self): + h1 = hashutil.content_hash_key_hash(3, 10, 1000, "data") + h2 = hashutil.content_hash_key_hasher(3, 10, 1000) + h2.update("data") + h2 = h2.digest() + self.failUnlessEqual(h1, h2) diff --git a/src/allmydata/upload.py b/src/allmydata/upload.py index 23fe41e6..ac1613e9 100644 --- a/src/allmydata/upload.py +++ b/src/allmydata/upload.py @@ -11,7 +11,7 @@ from foolscap.logging import log from allmydata.util.hashutil import file_renewal_secret_hash, \ file_cancel_secret_hash, bucket_renewal_secret_hash, \ bucket_cancel_secret_hash, plaintext_hasher, \ - storage_index_hash, plaintext_segment_hasher, key_hasher + storage_index_hash, plaintext_segment_hasher, content_hash_key_hasher from allmydata import encode, storage, hashtree, uri from allmydata.util import idlib, mathutil from allmydata.util.assertutil import precondition @@ -945,10 +945,14 @@ class FileHandle(BaseUploadable): self._contenthashkey = contenthashkey def _get_encryption_key_content_hash(self): - if self._key is None: + if self._key is not None: + return defer.succeed(self._key) + + d = self.get_all_encoding_parameters() + def _got(params): + k, happy, n, segsize = params f = self._filehandle - enckey_hasher = key_hasher() - #enckey_hasher.update(encoding_parameters) # TODO + enckey_hasher = content_hash_key_hasher(k, n, segsize) f.seek(0) BLOCKSIZE = 64*1024 while True: @@ -957,9 +961,11 @@ class FileHandle(BaseUploadable): break enckey_hasher.update(data) f.seek(0) - self._key = enckey_hasher.digest()[:16] - - return defer.succeed(self._key) + self._key = enckey_hasher.digest() + assert len(self._key) == 16 + return self._key + d.addCallback(_got) + return d def _get_encryption_key_random(self): if self._key is None: diff --git a/src/allmydata/util/hashutil.py b/src/allmydata/util/hashutil.py index 8a8dc4c6..412fd70a 100644 --- a/src/allmydata/util/hashutil.py +++ b/src/allmydata/util/hashutil.py @@ -66,12 +66,10 @@ def tagged_hasher(tag): def storage_index_hash(key): # storage index is truncated to 128 bits (16 bytes). We're only hashing a # 16-byte value to get it, so there's no point in using a larger value. - # TODO: remove the word "CHK" from this tag since we use this same tagged - # hash for random-keyed immutable files, mutable files, content-hash-keyed - # immutabie files. Or, define two other tagged hashes, one for each kind. - # (Either way is fine -- we can never have collisions of storage indexes - # anyway, since we can't have collisions of keys.) - return tagged_hash("allmydata_CHK_storage_index_v1", key)[:16] + # We use this same tagged hash to go from encryption key to storage index + # for random-keyed immutable files and content-hash-keyed immutabie + # files. Mutable files use ssk_storage_index_hash(). + return tagged_hash_256d("allmydata_immutable_storage_index_v2", key, 16) def block_hash(data): return tagged_hash("allmydata_encoded_subshare_v1", data) @@ -103,10 +101,16 @@ def plaintext_segment_hash(data): def plaintext_segment_hasher(): return tagged_hasher("allmydata_plaintext_segment_v1") -def key_hash(data): - return tagged_hash("allmydata_encryption_key_v1", data) -def key_hasher(): - return tagged_hasher("allmydata_encryption_key_v1") +def content_hash_key_hash(k, n, segsize, data): + # this is defined to return a 16-byte AES key. We use SHA-256d here.. + # we'd like to use it everywhere, but we're only switching algorithms + # when we can hide the compatibility breaks in other necessary changes. + param_tag = netstring("%d,%d,%d" % (k, n, segsize)) + h = tagged_hash_256d("allmydata_encryption_key_v2+" + param_tag, data, 16) + return h +def content_hash_key_hasher(k, n, segsize): + param_tag = netstring("%d,%d,%d" % (k, n, segsize)) + return tagged_hasher_256d("allmydata_encryption_key_v2+" + param_tag, 16) KEYLEN = 16 def random_key():