From: Brian Warner Date: Sun, 22 Jul 2007 01:23:15 +0000 (-0700) Subject: CHK: remove the storage index from the URI, deriving it from the key instead X-Git-Url: https://git.rkrishnan.org/pf/content//%22%3C?a=commitdiff_plain;h=81a99044554f72efdf908d17ffe1d8a3452a11a9;p=tahoe-lafs%2Ftahoe-lafs.git CHK: remove the storage index from the URI, deriving it from the key instead --- diff --git a/docs/uri.txt b/docs/uri.txt index 14ea8db3..e5f54723 100644 --- a/docs/uri.txt +++ b/docs/uri.txt @@ -48,29 +48,25 @@ property), and encrypted using a "read key". A secure hash of the data is computed to help validate the data afterwards (providing the "identification" property). All of these pieces, plus information about the file's size and the number of shares into which it has been distributed, are put into the -"CHK" uri. - -In the current release, these URIs contain both the storage index and the -read key. The plan is to derive the storage index from the read key, so only -the latter will appear in the URI. +"CHK" uri. The storage index is derived by hashing the read key, so it does +not need to be physically present in the URI. The current format for CHK URIs is the concatenation of the following strings: - URI:CHK:(storage-index):(key):(hash):(needed-shares):(total-shares):(size) + URI:CHK:(key):(hash):(needed-shares):(total-shares):(size) -Where (storage-index) is the base32 encoding of the (binary) storage index -(which itself is a SHA-256 hash), (key) is the base32 encoding of the 16-byte -AES read key, (hash) is the base32 encoding of the SHA-256 hash of the URI -Extension Block, (needed-shares) is an ascii decimal representation of the -number of shares required to reconstruct this file, (total-shares) is the -same representation of the total number of shares created, and (size) is an -ascii decimal representation of the size of the data represented by this URI. +Where (key) is the base32 encoding of the 16-byte AES read key, (hash) is the +base32 encoding of the SHA-256 hash of the URI Extension Block, +(needed-shares) is an ascii decimal representation of the number of shares +required to reconstruct this file, (total-shares) is the same representation +of the total number of shares created, and (size) is an ascii decimal +representation of the size of the data represented by this URI. For example, the following is a CHK URI, generated from the contents of the architecture.txt document that lives next to this one in the source tree: -URI:CHK:khzth3n672elnovimdpaczwuyukwm42vkdzenbqoaj6sqebk3zjq====:ihrbeov7lbvoduupd4qblysj7a======:bg5agsdt62jb34hxvxmdsbza6do64f4fg5anxxod2buttbo6udzq====:3:10:28733 +URI:CHK:ihrbeov7lbvoduupd4qblysj7a======:bg5agsdt62jb34hxvxmdsbza6do64f4fg5anxxod2buttbo6udzq====:3:10:28733 === LIT URIs === diff --git a/src/allmydata/test/test_dirnode.py b/src/allmydata/test/test_dirnode.py index d6b69cd2..cc64c4ef 100644 --- a/src/allmydata/test/test_dirnode.py +++ b/src/allmydata/test/test_dirnode.py @@ -173,14 +173,12 @@ class Test(unittest.TestCase): self.failUnlessEqual(res, {}) d.addCallback(_listed) - file1 = uri.CHKFileURI(storage_index="11" + " "*30, - key="k"*16, + file1 = uri.CHKFileURI(key="k"*15+"1", uri_extension_hash="e"*32, needed_shares=25, total_shares=100, size=12345).to_string() - file2 = uri.CHKFileURI(storage_index="2i" + " "*30, - key="k"*16, + file2 = uri.CHKFileURI(key="k"*15+"2", uri_extension_hash="e"*32, needed_shares=25, total_shares=100, diff --git a/src/allmydata/test/test_encode.py b/src/allmydata/test/test_encode.py index 2cdec5d6..16b1a3a1 100644 --- a/src/allmydata/test/test_encode.py +++ b/src/allmydata/test/test_encode.py @@ -302,15 +302,21 @@ class Roundtrip(unittest.TestCase): def recover(self, (uri_extension_hash, e, shareholders), AVAILABLE_SHARES, recover_mode): key = e.key + if "corrupt_key" in recover_mode: - key = flip_bit(key) - - URI = uri.CHKFileURI(storage_index="S" * 32, - key=key, - uri_extension_hash=uri_extension_hash, - needed_shares=e.required_shares, - total_shares=e.num_shares, - size=e.file_size).to_string() + # we corrupt the key, so that the decrypted data is corrupted and + # will fail the plaintext hash check. Since we're manually + # attaching shareholders, the fact that the storage index is also + # corrupted doesn't matter. + key = flip_bit(e.key) + + u = uri.CHKFileURI(key=key, + uri_extension_hash=uri_extension_hash, + needed_shares=e.required_shares, + total_shares=e.num_shares, + size=e.file_size) + URI = u.to_string() + client = None target = download.Data() fd = download.FileDownloader(client, URI, target) diff --git a/src/allmydata/test/test_system.py b/src/allmydata/test/test_system.py index 432d503d..a1948eb1 100644 --- a/src/allmydata/test/test_system.py +++ b/src/allmydata/test/test_system.py @@ -222,11 +222,10 @@ class SystemTest(testutil.SignalMixin, unittest.TestCase): return good[:-1] + chr(ord(good[-1]) ^ 0x01) def mangle_uri(self, gooduri): - # change the storage index, which means we'll be asking about the - # wrong file, so nobody will have any shares + # change the key, which changes the storage index, which means we'll + # be asking about the wrong file, so nobody will have any shares u = IFileURI(gooduri) - u2 = uri.CHKFileURI(storage_index=self.flip_bit(u.storage_index), - key=u.key, + u2 = uri.CHKFileURI(key=self.flip_bit(u.key), uri_extension_hash=u.uri_extension_hash, needed_shares=u.needed_shares, total_shares=u.total_shares, diff --git a/src/allmydata/test/test_uri.py b/src/allmydata/test/test_uri.py index 8aa01938..a05e8e5e 100644 --- a/src/allmydata/test/test_uri.py +++ b/src/allmydata/test/test_uri.py @@ -47,14 +47,13 @@ class Literal(unittest.TestCase): class CHKFile(unittest.TestCase): def test_pack(self): - storage_index = hashutil.tagged_hash("foo", "bar") - key = "\x00" * 16 + key = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" + storage_index = hashutil.storage_index_chk_hash(key) uri_extension_hash = hashutil.uri_extension_hash("stuff") needed_shares = 25 total_shares = 100 size = 1234 - u = uri.CHKFileURI(storage_index=storage_index, - key=key, + u = uri.CHKFileURI(key=key, uri_extension_hash=uri_extension_hash, needed_shares=needed_shares, total_shares=total_shares, diff --git a/src/allmydata/test/test_web.py b/src/allmydata/test/test_web.py index dbc9a845..2c46c4f2 100644 --- a/src/allmydata/test/test_web.py +++ b/src/allmydata/test/test_web.py @@ -51,8 +51,7 @@ uri_counter = itertools.count() def make_newuri(data): n = uri_counter.next() assert len(str(n)) < 5 - newuri = uri.CHKFileURI(storage_index="SI%05d" % n + "i"*25, - key="K"*16, + newuri = uri.CHKFileURI(key="K%05d" % n + "k"*10, uri_extension_hash="EH" + "h"*30, needed_shares=25, total_shares=100, @@ -220,8 +219,7 @@ class Web(unittest.TestCase): def makefile(self, number): n = str(number) assert len(n) == 1 - newuri = uri.CHKFileURI(storage_index="SI" + n*30, - key="K" + n*15, + newuri = uri.CHKFileURI(key="K" + n*15, uri_extension_hash="EH" + n*30, needed_shares=25, total_shares=100, diff --git a/src/allmydata/upload.py b/src/allmydata/upload.py index 59b8db32..3a18f3f6 100644 --- a/src/allmydata/upload.py +++ b/src/allmydata/upload.py @@ -292,7 +292,7 @@ class CHKUploader: # There's certainly no extra entropy to be had.. assert len(storage_index) == 32 # SHA-256 self._storage_index = storage_index - log.msg(" upload SI is [%s]" % (idlib.b2a(storage_index,))) + log.msg(" upload storage_index is [%s]" % (idlib.b2a(storage_index,))) def locate_all_shareholders(self, ignored=None): @@ -320,13 +320,13 @@ class CHKUploader: self._encoder.set_shareholders(buckets) def _compute_uri(self, uri_extension_hash): - u = uri.CHKFileURI(storage_index=self._storage_index, - key=self._encryption_key, + u = uri.CHKFileURI(key=self._encryption_key, uri_extension_hash=uri_extension_hash, needed_shares=self.needed_shares, total_shares=self.total_shares, size=self._size, ) + assert u.storage_index == self._storage_index return u.to_string() def read_this_many_bytes(uploadable, size, prepend_data=[]): diff --git a/src/allmydata/uri.py b/src/allmydata/uri.py index 90d095dc..47e617d1 100644 --- a/src/allmydata/uri.py +++ b/src/allmydata/uri.py @@ -27,43 +27,50 @@ class CHKFileURI(_BaseURI): # construct me with kwargs, since there are so many of them if not kwargs: return - for name in ("storage_index", "key", "uri_extension_hash", - "needed_shares", "total_shares", "size"): - value = kwargs[name] - setattr(self, name, value) + keys = ("key", "uri_extension_hash", + "needed_shares", "total_shares", "size") + for name in kwargs: + if name in keys: + value = kwargs[name] + setattr(self, name, value) + else: + raise TypeError("CHKFileURI does not accept '%s=' argument" + % name) + self.storage_index = hashutil.storage_index_chk_hash(self.key) def init_from_string(self, uri): assert uri.startswith("URI:CHK:"), uri d = {} (header_uri, header_chk, - storage_index_s, key_s, uri_extension_hash_s, + key_s, uri_extension_hash_s, needed_shares_s, total_shares_s, size_s) = uri.split(":") assert header_uri == "URI" assert header_chk == "CHK" - self.storage_index = idlib.a2b(storage_index_s) + self.key = idlib.a2b(key_s) - self.uri_extension_hash = idlib.a2b(uri_extension_hash_s) - self.needed_shares = int(needed_shares_s) - self.total_shares = int(total_shares_s) - self.size = int(size_s) - return self + assert isinstance(self.key, str) + assert len(self.key) == 16 # AES-128 - def to_string(self): + self.storage_index = hashutil.storage_index_chk_hash(self.key) assert isinstance(self.storage_index, str) assert len(self.storage_index) == 32 # sha256 hash + self.uri_extension_hash = idlib.a2b(uri_extension_hash_s) assert isinstance(self.uri_extension_hash, str) assert len(self.uri_extension_hash) == 32 # sha56 hash - assert isinstance(self.key, str) - assert len(self.key) == 16 # AES-128 + self.needed_shares = int(needed_shares_s) + self.total_shares = int(total_shares_s) + self.size = int(size_s) + return self + + def to_string(self): assert isinstance(self.needed_shares, int) assert isinstance(self.total_shares, int) assert isinstance(self.size, (int,long)) - return ("URI:CHK:%s:%s:%s:%d:%d:%d" % - (idlib.b2a(self.storage_index), - idlib.b2a(self.key), + return ("URI:CHK:%s:%s:%d:%d:%d" % + (idlib.b2a(self.key), idlib.b2a(self.uri_extension_hash), self.needed_shares, self.total_shares,