computed to help validate the data afterwards (providing the "identification"
property). All of these pieces, plus information about the file's size and
the number of shares into which it has been distributed, are put into the
-"CHK" uri.
-
-In the current release, these URIs contain both the storage index and the
-read key. The plan is to derive the storage index from the read key, so only
-the latter will appear in the URI.
+"CHK" uri. The storage index is derived by hashing the read key, so it does
+not need to be physically present in the URI.
The current format for CHK URIs is the concatenation of the following
strings:
- URI:CHK:(storage-index):(key):(hash):(needed-shares):(total-shares):(size)
+ URI:CHK:(key):(hash):(needed-shares):(total-shares):(size)
-Where (storage-index) is the base32 encoding of the (binary) storage index
-(which itself is a SHA-256 hash), (key) is the base32 encoding of the 16-byte
-AES read key, (hash) is the base32 encoding of the SHA-256 hash of the URI
-Extension Block, (needed-shares) is an ascii decimal representation of the
-number of shares required to reconstruct this file, (total-shares) is the
-same representation of the total number of shares created, and (size) is an
-ascii decimal representation of the size of the data represented by this URI.
+Where (key) is the base32 encoding of the 16-byte AES read key, (hash) is the
+base32 encoding of the SHA-256 hash of the URI Extension Block,
+(needed-shares) is an ascii decimal representation of the number of shares
+required to reconstruct this file, (total-shares) is the same representation
+of the total number of shares created, and (size) is an ascii decimal
+representation of the size of the data represented by this URI.
For example, the following is a CHK URI, generated from the contents of the
architecture.txt document that lives next to this one in the source tree:
-URI:CHK:khzth3n672elnovimdpaczwuyukwm42vkdzenbqoaj6sqebk3zjq====:ihrbeov7lbvoduupd4qblysj7a======:bg5agsdt62jb34hxvxmdsbza6do64f4fg5anxxod2buttbo6udzq====:3:10:28733
+URI:CHK:ihrbeov7lbvoduupd4qblysj7a======:bg5agsdt62jb34hxvxmdsbza6do64f4fg5anxxod2buttbo6udzq====:3:10:28733
=== LIT URIs ===
self.failUnlessEqual(res, {})
d.addCallback(_listed)
- file1 = uri.CHKFileURI(storage_index="11" + " "*30,
- key="k"*16,
+ file1 = uri.CHKFileURI(key="k"*15+"1",
uri_extension_hash="e"*32,
needed_shares=25,
total_shares=100,
size=12345).to_string()
- file2 = uri.CHKFileURI(storage_index="2i" + " "*30,
- key="k"*16,
+ file2 = uri.CHKFileURI(key="k"*15+"2",
uri_extension_hash="e"*32,
needed_shares=25,
total_shares=100,
def recover(self, (uri_extension_hash, e, shareholders), AVAILABLE_SHARES,
recover_mode):
key = e.key
+
if "corrupt_key" in recover_mode:
- key = flip_bit(key)
-
- URI = uri.CHKFileURI(storage_index="S" * 32,
- key=key,
- uri_extension_hash=uri_extension_hash,
- needed_shares=e.required_shares,
- total_shares=e.num_shares,
- size=e.file_size).to_string()
+ # we corrupt the key, so that the decrypted data is corrupted and
+ # will fail the plaintext hash check. Since we're manually
+ # attaching shareholders, the fact that the storage index is also
+ # corrupted doesn't matter.
+ key = flip_bit(e.key)
+
+ u = uri.CHKFileURI(key=key,
+ uri_extension_hash=uri_extension_hash,
+ needed_shares=e.required_shares,
+ total_shares=e.num_shares,
+ size=e.file_size)
+ URI = u.to_string()
+
client = None
target = download.Data()
fd = download.FileDownloader(client, URI, target)
return good[:-1] + chr(ord(good[-1]) ^ 0x01)
def mangle_uri(self, gooduri):
- # change the storage index, which means we'll be asking about the
- # wrong file, so nobody will have any shares
+ # change the key, which changes the storage index, which means we'll
+ # be asking about the wrong file, so nobody will have any shares
u = IFileURI(gooduri)
- u2 = uri.CHKFileURI(storage_index=self.flip_bit(u.storage_index),
- key=u.key,
+ u2 = uri.CHKFileURI(key=self.flip_bit(u.key),
uri_extension_hash=u.uri_extension_hash,
needed_shares=u.needed_shares,
total_shares=u.total_shares,
class CHKFile(unittest.TestCase):
def test_pack(self):
- storage_index = hashutil.tagged_hash("foo", "bar")
- key = "\x00" * 16
+ key = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+ storage_index = hashutil.storage_index_chk_hash(key)
uri_extension_hash = hashutil.uri_extension_hash("stuff")
needed_shares = 25
total_shares = 100
size = 1234
- u = uri.CHKFileURI(storage_index=storage_index,
- key=key,
+ u = uri.CHKFileURI(key=key,
uri_extension_hash=uri_extension_hash,
needed_shares=needed_shares,
total_shares=total_shares,
def make_newuri(data):
n = uri_counter.next()
assert len(str(n)) < 5
- newuri = uri.CHKFileURI(storage_index="SI%05d" % n + "i"*25,
- key="K"*16,
+ newuri = uri.CHKFileURI(key="K%05d" % n + "k"*10,
uri_extension_hash="EH" + "h"*30,
needed_shares=25,
total_shares=100,
def makefile(self, number):
n = str(number)
assert len(n) == 1
- newuri = uri.CHKFileURI(storage_index="SI" + n*30,
- key="K" + n*15,
+ newuri = uri.CHKFileURI(key="K" + n*15,
uri_extension_hash="EH" + n*30,
needed_shares=25,
total_shares=100,
# There's certainly no extra entropy to be had..
assert len(storage_index) == 32 # SHA-256
self._storage_index = storage_index
- log.msg(" upload SI is [%s]" % (idlib.b2a(storage_index,)))
+ log.msg(" upload storage_index is [%s]" % (idlib.b2a(storage_index,)))
def locate_all_shareholders(self, ignored=None):
self._encoder.set_shareholders(buckets)
def _compute_uri(self, uri_extension_hash):
- u = uri.CHKFileURI(storage_index=self._storage_index,
- key=self._encryption_key,
+ u = uri.CHKFileURI(key=self._encryption_key,
uri_extension_hash=uri_extension_hash,
needed_shares=self.needed_shares,
total_shares=self.total_shares,
size=self._size,
)
+ assert u.storage_index == self._storage_index
return u.to_string()
def read_this_many_bytes(uploadable, size, prepend_data=[]):
# construct me with kwargs, since there are so many of them
if not kwargs:
return
- for name in ("storage_index", "key", "uri_extension_hash",
- "needed_shares", "total_shares", "size"):
- value = kwargs[name]
- setattr(self, name, value)
+ keys = ("key", "uri_extension_hash",
+ "needed_shares", "total_shares", "size")
+ for name in kwargs:
+ if name in keys:
+ value = kwargs[name]
+ setattr(self, name, value)
+ else:
+ raise TypeError("CHKFileURI does not accept '%s=' argument"
+ % name)
+ self.storage_index = hashutil.storage_index_chk_hash(self.key)
def init_from_string(self, uri):
assert uri.startswith("URI:CHK:"), uri
d = {}
(header_uri, header_chk,
- storage_index_s, key_s, uri_extension_hash_s,
+ key_s, uri_extension_hash_s,
needed_shares_s, total_shares_s, size_s) = uri.split(":")
assert header_uri == "URI"
assert header_chk == "CHK"
- self.storage_index = idlib.a2b(storage_index_s)
+
self.key = idlib.a2b(key_s)
- self.uri_extension_hash = idlib.a2b(uri_extension_hash_s)
- self.needed_shares = int(needed_shares_s)
- self.total_shares = int(total_shares_s)
- self.size = int(size_s)
- return self
+ assert isinstance(self.key, str)
+ assert len(self.key) == 16 # AES-128
- def to_string(self):
+ self.storage_index = hashutil.storage_index_chk_hash(self.key)
assert isinstance(self.storage_index, str)
assert len(self.storage_index) == 32 # sha256 hash
+ self.uri_extension_hash = idlib.a2b(uri_extension_hash_s)
assert isinstance(self.uri_extension_hash, str)
assert len(self.uri_extension_hash) == 32 # sha56 hash
- assert isinstance(self.key, str)
- assert len(self.key) == 16 # AES-128
+ self.needed_shares = int(needed_shares_s)
+ self.total_shares = int(total_shares_s)
+ self.size = int(size_s)
+ return self
+
+ def to_string(self):
assert isinstance(self.needed_shares, int)
assert isinstance(self.total_shares, int)
assert isinstance(self.size, (int,long))
- return ("URI:CHK:%s:%s:%s:%d:%d:%d" %
- (idlib.b2a(self.storage_index),
- idlib.b2a(self.key),
+ return ("URI:CHK:%s:%s:%d:%d:%d" %
+ (idlib.b2a(self.key),
idlib.b2a(self.uri_extension_hash),
self.needed_shares,
self.total_shares,