truncate storage index to 128 bits, since it's derived from a 128 bit AES key

author Brian Warner <warner@lothar.com>

Mon, 23 Jul 2007 02:48:44 +0000 (19:48 -0700)

committer Brian Warner <warner@lothar.com>

Mon, 23 Jul 2007 02:48:44 +0000 (19:48 -0700)
author Brian Warner <warner@lothar.com>
Mon, 23 Jul 2007 02:48:44 +0000 (19:48 -0700)
committer Brian Warner <warner@lothar.com>
Mon, 23 Jul 2007 02:48:44 +0000 (19:48 -0700)
diff --git a/docs/uri.txt b/docs/uri.txt

index e5f54723f7b91e1933b0525d6a3be1ceab2153d5..e83c873fa242ee5fc0c0da310ff0c139a42f85b4 100644 (file)
--- a/docs/uri.txt
+++ b/docs/uri.txt
@@ -48,8 +48,9 @@ property), and encrypted using a "read key". A secure hash of the data is
  computed to help validate the data afterwards (providing the "identification"
  property). All of these pieces, plus information about the file's size and
  the number of shares into which it has been distributed, are put into the
-"CHK" uri. The storage index is derived by hashing the read key, so it does
-not need to be physically present in the URI.
+"CHK" uri. The storage index is derived by hashing the read key (using a
+tagged SHA-256 hash, then truncated to 128 bits), so it does not need to be
+physically present in the URI.
  
  The current format for CHK URIs is the concatenation of the following
  strings:
diff --git a/src/allmydata/interfaces.py b/src/allmydata/interfaces.py

index 84e8de2e683bf0692eda18e00b42442497f6fd1b..cffc764954b09de7555670119b71454e6e8ef3d1 100644 (file)
--- a/src/allmydata/interfaces.py
+++ b/src/allmydata/interfaces.py
@@ -11,7 +11,7 @@ Hash = StringConstraint(maxLength=HASH_SIZE,
  Nodeid = StringConstraint(maxLength=20,
                            minLength=20) # binary format 20-byte SHA1 hash
  FURL = StringConstraint(1000)
-StorageIndex = StringConstraint(32)
+StorageIndex = StringConstraint(16)
  URI = StringConstraint(300) # kind of arbitrary
  MAX_BUCKETS = 200  # per peer
  ShareData = StringConstraint(400000) # 1MB segment / k=3 = 334kB
diff --git a/src/allmydata/test/test_upload.py b/src/allmydata/test/test_upload.py

index 3b335d1f5dd9ecbaf3dd94952d8df7317e93dc9e..15148085dc21ea405dbeae8d7b8f3290b9fa9bd9 100644 (file)
--- a/src/allmydata/test/test_upload.py
+++ b/src/allmydata/test/test_upload.py
@@ -163,7 +163,7 @@ class GoodServer(unittest.TestCase):
          u = IFileURI(newuri)
          self.failUnless(isinstance(u, uri.CHKFileURI))
          self.failUnless(isinstance(u.storage_index, str))
-        self.failUnlessEqual(len(u.storage_index), 32)
+        self.failUnlessEqual(len(u.storage_index), 16)
          self.failUnless(isinstance(u.key, str))
          self.failUnlessEqual(len(u.key), 16)
          self.failUnlessEqual(u.size, size)
diff --git a/src/allmydata/upload.py b/src/allmydata/upload.py

index 3a18f3f63927bd862ee784d288ac9015e4181269..7f9ee0ab6317592e59f5b50d3ad223fad3af7680 100644 (file)
--- a/src/allmydata/upload.py
+++ b/src/allmydata/upload.py
@@ -288,9 +288,9 @@ class CHKUploader:
          self._encoder.set_encryption_key(key)
          storage_index = hashutil.storage_index_chk_hash(key)
          assert isinstance(storage_index, str)
-        # TODO: is there any point to having the SI be longer than the key?
-        # There's certainly no extra entropy to be had..
-        assert len(storage_index) == 32  # SHA-256
+        # There's no point to having the SI be longer than the key, so we
+        # specify that it is truncated to the same 128 bits as the AES key.
+        assert len(storage_index) == 16  # SHA-256 truncated to 128b
          self._storage_index = storage_index
          log.msg(" upload storage_index is [%s]" % (idlib.b2a(storage_index,)))
  
diff --git a/src/allmydata/uri.py b/src/allmydata/uri.py

index 47e617d1f9358ab48e4dba98da7002f93b6b34ea..72aece890eb6d0772f765379e65a2d1116efa0ce 100644 (file)
--- a/src/allmydata/uri.py
+++ b/src/allmydata/uri.py
@@ -53,7 +53,7 @@ class CHKFileURI(_BaseURI):
  
          self.storage_index = hashutil.storage_index_chk_hash(self.key)
          assert isinstance(self.storage_index, str)
-        assert len(self.storage_index) == 32 # sha256 hash
+        assert len(self.storage_index) == 16 # sha256 hash truncated to 128
  
          self.uri_extension_hash = idlib.a2b(uri_extension_hash_s)
          assert isinstance(self.uri_extension_hash, str)
diff --git a/src/allmydata/util/hashutil.py b/src/allmydata/util/hashutil.py

index 62042c4b8e59bfccb9b209604bf3778376c6c75b..25ac24244837d4136dacae5bec62f7ca98666422 100644 (file)
--- a/src/allmydata/util/hashutil.py
+++ b/src/allmydata/util/hashutil.py
@@ -23,7 +23,9 @@ def tagged_hasher(tag):
      return SHA256.new(netstring(tag))
  
  def storage_index_chk_hash(data):
-    return tagged_hash("allmydata_CHK_storage_index_v1", data)
+    # storage index is truncated to 128 bits (16 bytes). We're only hashing a
+    # 16-byte value to get it, so there's no point in using a larger value.
+    return tagged_hash("allmydata_CHK_storage_index_v1", data)[:16]
  
  def block_hash(data):
      return tagged_hash("allmydata_encoded_subshare_v1", data)
author	Brian Warner <warner@lothar.com>
	Mon, 23 Jul 2007 02:48:44 +0000 (19:48 -0700)
committer	Brian Warner <warner@lothar.com>
	Mon, 23 Jul 2007 02:48:44 +0000 (19:48 -0700)
docs/uri.txt		patch \| blob \| history
src/allmydata/interfaces.py		patch \| blob \| history
src/allmydata/test/test_upload.py		patch \| blob \| history
src/allmydata/upload.py		patch \| blob \| history
src/allmydata/uri.py		patch \| blob \| history
src/allmydata/util/hashutil.py		patch \| blob \| history