From: Brian Warner <warner@allmydata.com>
Date: Thu, 7 Feb 2008 02:50:47 +0000 (-0700)
Subject: change encryption-key hash to include encoding parameters. This is a minor compatibil... 
X-Git-Tag: allmydata-tahoe-0.8.0~114
X-Git-Url: https://git.rkrishnan.org/%5B/%5D%20/file/URI:LIT:krugkidfnzsc4/(%5B%5E?a=commitdiff_plain;h=da9610e90ac65c381c3f6c6fb074e29f07f0e110;p=tahoe-lafs%2Ftahoe-lafs.git

change encryption-key hash to include encoding parameters. This is a minor compatibility break: CHK files encoded (with convergence) before and after this will have different keys and ciphertexts. Also switched to SHA-256d for both the data-to-key hash and the key-to-storageindex hash
---

diff --git a/src/allmydata/test/test_helper.py b/src/allmydata/test/test_helper.py
index b45811a8..6e0c8322 100644
--- a/src/allmydata/test/test_helper.py
+++ b/src/allmydata/test/test_helper.py
@@ -7,7 +7,7 @@ from foolscap import Tub, eventual
 from foolscap.logging import log
 
 from allmydata import upload, offloaded
-from allmydata.util import hashutil, fileutil, idlib
+from allmydata.util import hashutil, fileutil, idlib, mathutil
 from pycryptopp.cipher.aes import AES
 
 MiB = 1024*1024
@@ -139,8 +139,18 @@ class AssistedUpload(unittest.TestCase):
         # we want to make sure that an upload which fails (leaving the
         # ciphertext in the CHK_encoding/ directory) does not prevent a later
         # attempt to upload that file from working. We simulate this by
-        # populating the directory manually.
-        key = hashutil.key_hash(DATA)[:16]
+        # populating the directory manually. The hardest part is guessing the
+        # storage index.
+
+        k = FakeClient.DEFAULT_ENCODING_PARAMETERS["k"]
+        n = FakeClient.DEFAULT_ENCODING_PARAMETERS["n"]
+        max_segsize = FakeClient.DEFAULT_ENCODING_PARAMETERS["max_segment_size"]
+        segsize = min(max_segsize, len(DATA))
+        # this must be a multiple of 'required_shares'==k
+        segsize = mathutil.next_multiple(segsize, k)
+
+        key = hashutil.content_hash_key_hash(k, n, segsize, DATA)
+        assert len(key) == 16
         encryptor = AES(key)
         SI = hashutil.storage_index_hash(key)
         SI_s = idlib.b2a(SI)
diff --git a/src/allmydata/test/test_util.py b/src/allmydata/test/test_util.py
index 21ed7253..46fa94c3 100644
--- a/src/allmydata/test/test_util.py
+++ b/src/allmydata/test/test_util.py
@@ -443,3 +443,10 @@ class HashUtilTests(unittest.TestCase):
         self.failUnlessEqual(len(h1), 16)
         self.failUnlessEqual(len(h2), 16)
         self.failUnlessEqual(h1, h2)
+
+    def test_chk(self):
+        h1 = hashutil.content_hash_key_hash(3, 10, 1000, "data")
+        h2 = hashutil.content_hash_key_hasher(3, 10, 1000)
+        h2.update("data")
+        h2 = h2.digest()
+        self.failUnlessEqual(h1, h2)
diff --git a/src/allmydata/upload.py b/src/allmydata/upload.py
index 23fe41e6..ac1613e9 100644
--- a/src/allmydata/upload.py
+++ b/src/allmydata/upload.py
@@ -11,7 +11,7 @@ from foolscap.logging import log
 from allmydata.util.hashutil import file_renewal_secret_hash, \
      file_cancel_secret_hash, bucket_renewal_secret_hash, \
      bucket_cancel_secret_hash, plaintext_hasher, \
-     storage_index_hash, plaintext_segment_hasher, key_hasher
+     storage_index_hash, plaintext_segment_hasher, content_hash_key_hasher
 from allmydata import encode, storage, hashtree, uri
 from allmydata.util import idlib, mathutil
 from allmydata.util.assertutil import precondition
@@ -945,10 +945,14 @@ class FileHandle(BaseUploadable):
         self._contenthashkey = contenthashkey
 
     def _get_encryption_key_content_hash(self):
-        if self._key is None:
+        if self._key is not None:
+            return defer.succeed(self._key)
+
+        d = self.get_all_encoding_parameters()
+        def _got(params):
+            k, happy, n, segsize = params
             f = self._filehandle
-            enckey_hasher = key_hasher()
-            #enckey_hasher.update(encoding_parameters) # TODO
+            enckey_hasher = content_hash_key_hasher(k, n, segsize)
             f.seek(0)
             BLOCKSIZE = 64*1024
             while True:
@@ -957,9 +961,11 @@ class FileHandle(BaseUploadable):
                     break
                 enckey_hasher.update(data)
             f.seek(0)
-            self._key = enckey_hasher.digest()[:16]
-
-        return defer.succeed(self._key)
+            self._key = enckey_hasher.digest()
+            assert len(self._key) == 16
+            return self._key
+        d.addCallback(_got)
+        return d
 
     def _get_encryption_key_random(self):
         if self._key is None:
diff --git a/src/allmydata/util/hashutil.py b/src/allmydata/util/hashutil.py
index 8a8dc4c6..412fd70a 100644
--- a/src/allmydata/util/hashutil.py
+++ b/src/allmydata/util/hashutil.py
@@ -66,12 +66,10 @@ def tagged_hasher(tag):
 def storage_index_hash(key):
     # storage index is truncated to 128 bits (16 bytes). We're only hashing a
     # 16-byte value to get it, so there's no point in using a larger value.
-    # TODO: remove the word "CHK" from this tag since we use this same tagged
-    # hash for random-keyed immutable files, mutable files, content-hash-keyed
-    # immutabie files.  Or, define two other tagged hashes, one for each kind.
-    # (Either way is fine -- we can never have collisions of storage indexes
-    # anyway, since we can't have collisions of keys.)
-    return tagged_hash("allmydata_CHK_storage_index_v1", key)[:16]
+    # We use this same tagged hash to go from encryption key to storage index
+    # for random-keyed immutable files and content-hash-keyed immutabie
+    # files. Mutable files use ssk_storage_index_hash().
+    return tagged_hash_256d("allmydata_immutable_storage_index_v2", key, 16)
 
 def block_hash(data):
     return tagged_hash("allmydata_encoded_subshare_v1", data)
@@ -103,10 +101,16 @@ def plaintext_segment_hash(data):
 def plaintext_segment_hasher():
     return tagged_hasher("allmydata_plaintext_segment_v1")
 
-def key_hash(data):
-    return tagged_hash("allmydata_encryption_key_v1", data)
-def key_hasher():
-    return tagged_hasher("allmydata_encryption_key_v1")
+def content_hash_key_hash(k, n, segsize, data):
+    # this is defined to return a 16-byte AES key. We use SHA-256d here..
+    # we'd like to use it everywhere, but we're only switching algorithms
+    # when we can hide the compatibility breaks in other necessary changes.
+    param_tag = netstring("%d,%d,%d" % (k, n, segsize))
+    h = tagged_hash_256d("allmydata_encryption_key_v2+" + param_tag, data, 16)
+    return h
+def content_hash_key_hasher(k, n, segsize):
+    param_tag = netstring("%d,%d,%d" % (k, n, segsize))
+    return tagged_hasher_256d("allmydata_encryption_key_v2+" + param_tag, 16)
 
 KEYLEN = 16
 def random_key():