From 9c5ab89afe75963f6a50e995131930932491bc04 Mon Sep 17 00:00:00 2001 From: Brian Warner Date: Sun, 22 Jul 2007 19:48:44 -0700 Subject: [PATCH] truncate storage index to 128 bits, since it's derived from a 128 bit AES key --- docs/uri.txt | 5 +++-- src/allmydata/interfaces.py | 2 +- src/allmydata/test/test_upload.py | 2 +- src/allmydata/upload.py | 6 +++--- src/allmydata/uri.py | 2 +- src/allmydata/util/hashutil.py | 4 +++- 6 files changed, 12 insertions(+), 9 deletions(-) diff --git a/docs/uri.txt b/docs/uri.txt index e5f54723..e83c873f 100644 --- a/docs/uri.txt +++ b/docs/uri.txt @@ -48,8 +48,9 @@ property), and encrypted using a "read key". A secure hash of the data is computed to help validate the data afterwards (providing the "identification" property). All of these pieces, plus information about the file's size and the number of shares into which it has been distributed, are put into the -"CHK" uri. The storage index is derived by hashing the read key, so it does -not need to be physically present in the URI. +"CHK" uri. The storage index is derived by hashing the read key (using a +tagged SHA-256 hash, then truncated to 128 bits), so it does not need to be +physically present in the URI. The current format for CHK URIs is the concatenation of the following strings: diff --git a/src/allmydata/interfaces.py b/src/allmydata/interfaces.py index 84e8de2e..cffc7649 100644 --- a/src/allmydata/interfaces.py +++ b/src/allmydata/interfaces.py @@ -11,7 +11,7 @@ Hash = StringConstraint(maxLength=HASH_SIZE, Nodeid = StringConstraint(maxLength=20, minLength=20) # binary format 20-byte SHA1 hash FURL = StringConstraint(1000) -StorageIndex = StringConstraint(32) +StorageIndex = StringConstraint(16) URI = StringConstraint(300) # kind of arbitrary MAX_BUCKETS = 200 # per peer ShareData = StringConstraint(400000) # 1MB segment / k=3 = 334kB diff --git a/src/allmydata/test/test_upload.py b/src/allmydata/test/test_upload.py index 3b335d1f..15148085 100644 --- a/src/allmydata/test/test_upload.py +++ b/src/allmydata/test/test_upload.py @@ -163,7 +163,7 @@ class GoodServer(unittest.TestCase): u = IFileURI(newuri) self.failUnless(isinstance(u, uri.CHKFileURI)) self.failUnless(isinstance(u.storage_index, str)) - self.failUnlessEqual(len(u.storage_index), 32) + self.failUnlessEqual(len(u.storage_index), 16) self.failUnless(isinstance(u.key, str)) self.failUnlessEqual(len(u.key), 16) self.failUnlessEqual(u.size, size) diff --git a/src/allmydata/upload.py b/src/allmydata/upload.py index 3a18f3f6..7f9ee0ab 100644 --- a/src/allmydata/upload.py +++ b/src/allmydata/upload.py @@ -288,9 +288,9 @@ class CHKUploader: self._encoder.set_encryption_key(key) storage_index = hashutil.storage_index_chk_hash(key) assert isinstance(storage_index, str) - # TODO: is there any point to having the SI be longer than the key? - # There's certainly no extra entropy to be had.. - assert len(storage_index) == 32 # SHA-256 + # There's no point to having the SI be longer than the key, so we + # specify that it is truncated to the same 128 bits as the AES key. + assert len(storage_index) == 16 # SHA-256 truncated to 128b self._storage_index = storage_index log.msg(" upload storage_index is [%s]" % (idlib.b2a(storage_index,))) diff --git a/src/allmydata/uri.py b/src/allmydata/uri.py index 47e617d1..72aece89 100644 --- a/src/allmydata/uri.py +++ b/src/allmydata/uri.py @@ -53,7 +53,7 @@ class CHKFileURI(_BaseURI): self.storage_index = hashutil.storage_index_chk_hash(self.key) assert isinstance(self.storage_index, str) - assert len(self.storage_index) == 32 # sha256 hash + assert len(self.storage_index) == 16 # sha256 hash truncated to 128 self.uri_extension_hash = idlib.a2b(uri_extension_hash_s) assert isinstance(self.uri_extension_hash, str) diff --git a/src/allmydata/util/hashutil.py b/src/allmydata/util/hashutil.py index 62042c4b..25ac2424 100644 --- a/src/allmydata/util/hashutil.py +++ b/src/allmydata/util/hashutil.py @@ -23,7 +23,9 @@ def tagged_hasher(tag): return SHA256.new(netstring(tag)) def storage_index_chk_hash(data): - return tagged_hash("allmydata_CHK_storage_index_v1", data) + # storage index is truncated to 128 bits (16 bytes). We're only hashing a + # 16-byte value to get it, so there's no point in using a larger value. + return tagged_hash("allmydata_CHK_storage_index_v1", data)[:16] def block_hash(data): return tagged_hash("allmydata_encoded_subshare_v1", data) -- 2.45.2