CHK: remove the storage index from the URI, deriving it from the key instead
authorBrian Warner <warner@lothar.com>
Sun, 22 Jul 2007 01:23:15 +0000 (18:23 -0700)
committerBrian Warner <warner@lothar.com>
Sun, 22 Jul 2007 01:23:15 +0000 (18:23 -0700)
docs/uri.txt
src/allmydata/test/test_dirnode.py
src/allmydata/test/test_encode.py
src/allmydata/test/test_system.py
src/allmydata/test/test_uri.py
src/allmydata/test/test_web.py
src/allmydata/upload.py
src/allmydata/uri.py

index 14ea8db37d3045c820d000f8b9cde3d75e984ab8..e5f54723f7b91e1933b0525d6a3be1ceab2153d5 100644 (file)
@@ -48,29 +48,25 @@ property), and encrypted using a "read key". A secure hash of the data is
 computed to help validate the data afterwards (providing the "identification"
 property). All of these pieces, plus information about the file's size and
 the number of shares into which it has been distributed, are put into the
-"CHK" uri.
-
-In the current release, these URIs contain both the storage index and the
-read key. The plan is to derive the storage index from the read key, so only
-the latter will appear in the URI.
+"CHK" uri. The storage index is derived by hashing the read key, so it does
+not need to be physically present in the URI.
 
 The current format for CHK URIs is the concatenation of the following
 strings:
 
- URI:CHK:(storage-index):(key):(hash):(needed-shares):(total-shares):(size)
+ URI:CHK:(key):(hash):(needed-shares):(total-shares):(size)
 
-Where (storage-index) is the base32 encoding of the (binary) storage index
-(which itself is a SHA-256 hash), (key) is the base32 encoding of the 16-byte
-AES read key, (hash) is the base32 encoding of the SHA-256 hash of the URI
-Extension Block, (needed-shares) is an ascii decimal representation of the
-number of shares required to reconstruct this file, (total-shares) is the
-same representation of the total number of shares created, and (size) is an
-ascii decimal representation of the size of the data represented by this URI.
+Where (key) is the base32 encoding of the 16-byte AES read key, (hash) is the
+base32 encoding of the SHA-256 hash of the URI Extension Block,
+(needed-shares) is an ascii decimal representation of the number of shares
+required to reconstruct this file, (total-shares) is the same representation
+of the total number of shares created, and (size) is an ascii decimal
+representation of the size of the data represented by this URI.
 
 For example, the following is a CHK URI, generated from the contents of the
 architecture.txt document that lives next to this one in the source tree:
 
-URI:CHK:khzth3n672elnovimdpaczwuyukwm42vkdzenbqoaj6sqebk3zjq====:ihrbeov7lbvoduupd4qblysj7a======:bg5agsdt62jb34hxvxmdsbza6do64f4fg5anxxod2buttbo6udzq====:3:10:28733
+URI:CHK:ihrbeov7lbvoduupd4qblysj7a======:bg5agsdt62jb34hxvxmdsbza6do64f4fg5anxxod2buttbo6udzq====:3:10:28733
 
 
 === LIT URIs ===
index d6b69cd2f9d7aa7a6290e8964acf5483020d07e5..cc64c4ef31d2af11171b6010fc86f0f448f1e250 100644 (file)
@@ -173,14 +173,12 @@ class Test(unittest.TestCase):
             self.failUnlessEqual(res, {})
         d.addCallback(_listed)
 
-        file1 = uri.CHKFileURI(storage_index="11" + " "*30,
-                               key="k"*16,
+        file1 = uri.CHKFileURI(key="k"*15+"1",
                                uri_extension_hash="e"*32,
                                needed_shares=25,
                                total_shares=100,
                                size=12345).to_string()
-        file2 = uri.CHKFileURI(storage_index="2i" + " "*30,
-                               key="k"*16,
+        file2 = uri.CHKFileURI(key="k"*15+"2",
                                uri_extension_hash="e"*32,
                                needed_shares=25,
                                total_shares=100,
index 2cdec5d6d4dbca82395cb5b4431ca02bf584e04f..16b1a3a1fb16860ef9af59004b904abe2a1901ff 100644 (file)
@@ -302,15 +302,21 @@ class Roundtrip(unittest.TestCase):
     def recover(self, (uri_extension_hash, e, shareholders), AVAILABLE_SHARES,
                 recover_mode):
         key = e.key
+
         if "corrupt_key" in recover_mode:
-            key = flip_bit(key)
-
-        URI = uri.CHKFileURI(storage_index="S" * 32,
-                             key=key,
-                             uri_extension_hash=uri_extension_hash,
-                             needed_shares=e.required_shares,
-                             total_shares=e.num_shares,
-                             size=e.file_size).to_string()
+            # we corrupt the key, so that the decrypted data is corrupted and
+            # will fail the plaintext hash check. Since we're manually
+            # attaching shareholders, the fact that the storage index is also
+            # corrupted doesn't matter.
+            key = flip_bit(e.key)
+
+        u = uri.CHKFileURI(key=key,
+                           uri_extension_hash=uri_extension_hash,
+                           needed_shares=e.required_shares,
+                           total_shares=e.num_shares,
+                           size=e.file_size)
+        URI = u.to_string()
+
         client = None
         target = download.Data()
         fd = download.FileDownloader(client, URI, target)
index 432d503d7fd391e7d4bb39864dedd230c51140ee..a1948eb19acce25b453595b6097c05aec3de45fb 100644 (file)
@@ -222,11 +222,10 @@ class SystemTest(testutil.SignalMixin, unittest.TestCase):
         return good[:-1] + chr(ord(good[-1]) ^ 0x01)
 
     def mangle_uri(self, gooduri):
-        # change the storage index, which means we'll be asking about the
-        # wrong file, so nobody will have any shares
+        # change the key, which changes the storage index, which means we'll
+        # be asking about the wrong file, so nobody will have any shares
         u = IFileURI(gooduri)
-        u2 = uri.CHKFileURI(storage_index=self.flip_bit(u.storage_index),
-                            key=u.key,
+        u2 = uri.CHKFileURI(key=self.flip_bit(u.key),
                             uri_extension_hash=u.uri_extension_hash,
                             needed_shares=u.needed_shares,
                             total_shares=u.total_shares,
index 8aa0193882afdb677770be4cf053d3a42d1ad42e..a05e8e5eff6183a377a9c8977df0582f3f50cab9 100644 (file)
@@ -47,14 +47,13 @@ class Literal(unittest.TestCase):
 
 class CHKFile(unittest.TestCase):
     def test_pack(self):
-        storage_index = hashutil.tagged_hash("foo", "bar")
-        key = "\x00" * 16
+        key = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+        storage_index = hashutil.storage_index_chk_hash(key)
         uri_extension_hash = hashutil.uri_extension_hash("stuff")
         needed_shares = 25
         total_shares = 100
         size = 1234
-        u = uri.CHKFileURI(storage_index=storage_index,
-                           key=key,
+        u = uri.CHKFileURI(key=key,
                            uri_extension_hash=uri_extension_hash,
                            needed_shares=needed_shares,
                            total_shares=total_shares,
index dbc9a845cd38c5007ff78d5ca03f5d23582723be..2c46c4f25c58e1c6a35559698c00f3d438fafb0b 100644 (file)
@@ -51,8 +51,7 @@ uri_counter = itertools.count()
 def make_newuri(data):
     n = uri_counter.next()
     assert len(str(n)) < 5
-    newuri = uri.CHKFileURI(storage_index="SI%05d" % n + "i"*25,
-                            key="K"*16,
+    newuri = uri.CHKFileURI(key="K%05d" % n + "k"*10,
                             uri_extension_hash="EH" + "h"*30,
                             needed_shares=25,
                             total_shares=100,
@@ -220,8 +219,7 @@ class Web(unittest.TestCase):
     def makefile(self, number):
         n = str(number)
         assert len(n) == 1
-        newuri = uri.CHKFileURI(storage_index="SI" + n*30,
-                                key="K" + n*15,
+        newuri = uri.CHKFileURI(key="K" + n*15,
                                 uri_extension_hash="EH" + n*30,
                                 needed_shares=25,
                                 total_shares=100,
index 59b8db32049bfa6dc4149268d70b64b28f6b1c62..3a18f3f63927bd862ee784d288ac9015e4181269 100644 (file)
@@ -292,7 +292,7 @@ class CHKUploader:
         # There's certainly no extra entropy to be had..
         assert len(storage_index) == 32  # SHA-256
         self._storage_index = storage_index
-        log.msg(" upload SI is [%s]" % (idlib.b2a(storage_index,)))
+        log.msg(" upload storage_index is [%s]" % (idlib.b2a(storage_index,)))
 
 
     def locate_all_shareholders(self, ignored=None):
@@ -320,13 +320,13 @@ class CHKUploader:
         self._encoder.set_shareholders(buckets)
 
     def _compute_uri(self, uri_extension_hash):
-        u = uri.CHKFileURI(storage_index=self._storage_index,
-                           key=self._encryption_key,
+        u = uri.CHKFileURI(key=self._encryption_key,
                            uri_extension_hash=uri_extension_hash,
                            needed_shares=self.needed_shares,
                            total_shares=self.total_shares,
                            size=self._size,
                            )
+        assert u.storage_index == self._storage_index
         return u.to_string()
 
 def read_this_many_bytes(uploadable, size, prepend_data=[]):
index 90d095dcc1ff0b924b808b0ba9e3621da5da9202..47e617d1f9358ab48e4dba98da7002f93b6b34ea 100644 (file)
@@ -27,43 +27,50 @@ class CHKFileURI(_BaseURI):
         # construct me with kwargs, since there are so many of them
         if not kwargs:
             return
-        for name in ("storage_index", "key", "uri_extension_hash",
-                     "needed_shares", "total_shares", "size"):
-            value = kwargs[name]
-            setattr(self, name, value)
+        keys = ("key", "uri_extension_hash",
+                "needed_shares", "total_shares", "size")
+        for name in kwargs:
+            if name in keys:
+                value = kwargs[name]
+                setattr(self, name, value)
+            else:
+                raise TypeError("CHKFileURI does not accept '%s=' argument"
+                                % name)
+        self.storage_index = hashutil.storage_index_chk_hash(self.key)
 
     def init_from_string(self, uri):
         assert uri.startswith("URI:CHK:"), uri
         d = {}
         (header_uri, header_chk,
-         storage_index_s, key_s, uri_extension_hash_s,
+         key_s, uri_extension_hash_s,
          needed_shares_s, total_shares_s, size_s) = uri.split(":")
         assert header_uri == "URI"
         assert header_chk == "CHK"
-        self.storage_index = idlib.a2b(storage_index_s)
+
         self.key = idlib.a2b(key_s)
-        self.uri_extension_hash = idlib.a2b(uri_extension_hash_s)
-        self.needed_shares = int(needed_shares_s)
-        self.total_shares = int(total_shares_s)
-        self.size = int(size_s)
-        return self
+        assert isinstance(self.key, str)
+        assert len(self.key) == 16 # AES-128
 
-    def to_string(self):
+        self.storage_index = hashutil.storage_index_chk_hash(self.key)
         assert isinstance(self.storage_index, str)
         assert len(self.storage_index) == 32 # sha256 hash
 
+        self.uri_extension_hash = idlib.a2b(uri_extension_hash_s)
         assert isinstance(self.uri_extension_hash, str)
         assert len(self.uri_extension_hash) == 32 # sha56 hash
 
-        assert isinstance(self.key, str)
-        assert len(self.key) == 16 # AES-128
+        self.needed_shares = int(needed_shares_s)
+        self.total_shares = int(total_shares_s)
+        self.size = int(size_s)
+        return self
+
+    def to_string(self):
         assert isinstance(self.needed_shares, int)
         assert isinstance(self.total_shares, int)
         assert isinstance(self.size, (int,long))
 
-        return ("URI:CHK:%s:%s:%s:%d:%d:%d" %
-                (idlib.b2a(self.storage_index),
-                 idlib.b2a(self.key),
+        return ("URI:CHK:%s:%s:%d:%d:%d" %
+                (idlib.b2a(self.key),
                  idlib.b2a(self.uri_extension_hash),
                  self.needed_shares,
                  self.total_shares,