From: Brian Warner Date: Thu, 26 Apr 2007 00:53:10 +0000 (-0700) Subject: use real encryption, generate/store/verify verifierid and fileid X-Git-Tag: tahoe_v0.1.1-0-UNSTABLE~7 X-Git-Url: https://git.rkrishnan.org/%5B/README.win32?a=commitdiff_plain;h=4b2298937b0f4dff375593ae96bdd0c0ade308c8;p=tahoe-lafs%2Ftahoe-lafs.git use real encryption, generate/store/verify verifierid and fileid --- diff --git a/src/allmydata/download.py b/src/allmydata/download.py index 856ed726..656c8ecb 100644 --- a/src/allmydata/download.py +++ b/src/allmydata/download.py @@ -25,8 +25,8 @@ class Output: self.downloadable = downloadable self._decryptor = AES.new(key=key, mode=AES.MODE_CTR, counterstart="\x00"*16) - self._verifierid_hasher = sha.new(netstring("allmydata_v1_verifierid")) - self._fileid_hasher = sha.new(netstring("allmydata_v1_fileid")) + self._verifierid_hasher = sha.new(netstring("allmydata_verifierid_v1")) + self._fileid_hasher = sha.new(netstring("allmydata_fileid_v1")) self.length = 0 def open(self): @@ -208,14 +208,17 @@ class SegmentDownloader: del self.parent._share_buckets[shnum] class FileDownloader: + check_verifierid = True + check_fileid = True def __init__(self, client, uri, downloadable): self._client = client self._downloadable = downloadable - (codec_name, codec_params, tail_codec_params, verifierid, roothash, needed_shares, total_shares, size, segment_size) = unpack_uri(uri) + (codec_name, codec_params, tail_codec_params, verifierid, fileid, key, roothash, needed_shares, total_shares, size, segment_size) = unpack_uri(uri) assert isinstance(verifierid, str) assert len(verifierid) == 20 self._verifierid = verifierid + self._fileid = fileid self._roothash = roothash self._codec = codec.get_decoder_by_name(codec_name) @@ -230,7 +233,6 @@ class FileDownloader: self._size = size self._num_needed_shares = self._codec.get_needed_shares() - key = "\x00" * 16 self._output = Output(downloadable, key) self._share_hashtree = hashtree.IncompleteHashTree(total_shares) @@ -349,10 +351,18 @@ class FileDownloader: def _done(self, res): self._output.close() - #print "VERIFIERID: %s" % idlib.b2a(self._output.verifierid) - #print "FILEID: %s" % idlib.b2a(self._output.fileid) - #assert self._verifierid == self._output.verifierid - #assert self._fileid = self._output.fileid + log.msg("computed VERIFIERID: %s" % idlib.b2a(self._output.verifierid)) + log.msg("computed FILEID: %s" % idlib.b2a(self._output.fileid)) + if self.check_verifierid: + _assert(self._verifierid == self._output.verifierid, + "bad verifierid: computed=%s, expected=%s" % + (idlib.b2a(self._output.verifierid), + idlib.b2a(self._verifierid))) + if self.check_fileid: + _assert(self._fileid == self._output.fileid, + "bad fileid: computed=%s, expected=%s" % + (idlib.b2a(self._output.fileid), + idlib.b2a(self._fileid))) _assert(self._output.length == self._size, got=self._output.length, expected=self._size) return self._output.finish() diff --git a/src/allmydata/encode.py b/src/allmydata/encode.py index 98d1ff1a..5b4cb75f 100644 --- a/src/allmydata/encode.py +++ b/src/allmydata/encode.py @@ -79,8 +79,11 @@ class Encoder(object): self.NEEDED_SHARES = k self.TOTAL_SHARES = n - def setup(self, infile): + def setup(self, infile, encryption_key): self.infile = infile + assert isinstance(encryption_key, str) + assert len(encryption_key) == 16 # AES-128 + self.key = encryption_key infile.seek(0, 2) self.file_size = infile.tell() infile.seek(0, 0) @@ -158,7 +161,6 @@ class Encoder(object): return d def setup_encryption(self): - self.key = "\x00"*16 self.cryptor = AES.new(key=self.key, mode=AES.MODE_CTR, counterstart="\x00"*16) self.segment_num = 0 diff --git a/src/allmydata/test/test_encode.py b/src/allmydata/test/test_encode.py index 846ad577..58022de6 100644 --- a/src/allmydata/test/test_encode.py +++ b/src/allmydata/test/test_encode.py @@ -115,7 +115,8 @@ class Encode(unittest.TestCase): # force use of multiple segments options = {"max_segment_size": max_segment_size} e = encode.Encoder(options) - e.setup(StringIO(data)) + nonkey = "\x00" * 16 + e.setup(StringIO(data), nonkey) assert e.num_shares == NUM_SHARES # else we'll be completely confused e.setup_codec() # need to rebuild the codec for that change assert (NUM_SEGMENTS-1)*e.segment_size < len(data) <= NUM_SEGMENTS*e.segment_size @@ -222,7 +223,8 @@ class Roundtrip(unittest.TestCase): options = {"max_segment_size": max_segment_size, "needed_and_total_shares": k_and_n} e = encode.Encoder(options) - e.setup(StringIO(data)) + nonkey = "\x00" * 16 + e.setup(StringIO(data), nonkey) assert e.num_shares == NUM_SHARES # else we'll be completely confused e.setup_codec() # need to rebuild the codec for that change @@ -238,18 +240,22 @@ class Roundtrip(unittest.TestCase): e.set_shareholders(shareholders) d = e.start() def _uploaded(roothash): - URI = pack_uri(e._codec.get_encoder_type(), - e._codec.get_serialized_params(), - e._tail_codec.get_serialized_params(), - "V" * 20, - roothash, - e.required_shares, - e.num_shares, - e.file_size, - e.segment_size) + URI = pack_uri(codec_name=e._codec.get_encoder_type(), + codec_params=e._codec.get_serialized_params(), + tail_codec_params=e._tail_codec.get_serialized_params(), + verifierid="V" * 20, + fileid="F" * 20, + key=nonkey, + roothash=roothash, + needed_shares=e.required_shares, + total_shares=e.num_shares, + size=e.file_size, + segment_size=e.segment_size) client = None target = download.Data() fd = download.FileDownloader(client, URI, target) + fd.check_verifierid = False + fd.check_fileid = False for shnum in range(AVAILABLE_SHARES): bucket = all_shareholders[shnum] fd.add_share_bucket(shnum, bucket) diff --git a/src/allmydata/test/test_system.py b/src/allmydata/test/test_system.py index 4c3567a8..00e0dc60 100644 --- a/src/allmydata/test/test_system.py +++ b/src/allmydata/test/test_system.py @@ -194,7 +194,8 @@ class SystemTest(testutil.SignalMixin, unittest.TestCase): d1 = self.downloader.download_to_data(baduri) def _baduri_should_fail(res): self.failUnless(isinstance(res, Failure)) - self.failUnless(res.check(download.NotEnoughPeersError)) + self.failUnless(res.check(download.NotEnoughPeersError), + "expected NotEnoughPeersError, got %s" % res) # TODO: files that have zero peers should get a special kind # of NotEnoughPeersError, which can be used to suggest that # the URI might be wrong or that they've nver uploaded the @@ -209,11 +210,19 @@ class SystemTest(testutil.SignalMixin, unittest.TestCase): return good[:-1] + chr(ord(good[-1]) ^ 0x01) def mangle_uri(self, gooduri): + # change the verifierid, which means we'll be asking about the wrong + # file, so nobody will have any shares pieces = list(uri.unpack_uri(gooduri)) - # [4] is the verifierid - pieces[4] = self.flip_bit(pieces[4]) + # [3] is the verifierid + assert len(pieces[3]) == 20 + pieces[3] = self.flip_bit(pieces[3]) return uri.pack_uri(*pieces) + # TODO: add a test which mangles the fileid instead, and should fail in + # the post-download phase when the file's integrity check fails. Do the + # same thing for the key, which should cause the download to fail the + # post-download verifierid check. + def test_vdrive(self): self.basedir = "test_system/SystemTest/test_vdrive" self.data = DATA = "Some data to publish to the virtual drive\n" diff --git a/src/allmydata/test/test_upload.py b/src/allmydata/test/test_upload.py index 175a0e35..91dbef17 100644 --- a/src/allmydata/test/test_upload.py +++ b/src/allmydata/test/test_upload.py @@ -24,9 +24,13 @@ class GoodServer(unittest.TestCase): def _check(self, uri): self.failUnless(isinstance(uri, str)) self.failUnless(uri.startswith("URI:")) - codec_name, codec_params, tail_codec_params, verifierid, roothash, needed_shares, total_shares, size, segment_size = unpack_uri(uri) + codec_name, codec_params, tail_codec_params, verifierid, fileid, key, roothash, needed_shares, total_shares, size, segment_size = unpack_uri(uri) self.failUnless(isinstance(verifierid, str)) self.failUnlessEqual(len(verifierid), 20) + self.failUnless(isinstance(fileid, str)) + self.failUnlessEqual(len(fileid), 20) + self.failUnless(isinstance(key, str)) + self.failUnlessEqual(len(key), 16) self.failUnless(isinstance(codec_params, str)) def testData(self): diff --git a/src/allmydata/upload.py b/src/allmydata/upload.py index 62fd5d79..dc11d77f 100644 --- a/src/allmydata/upload.py +++ b/src/allmydata/upload.py @@ -8,6 +8,7 @@ from allmydata.util import idlib from allmydata import encode from allmydata.uri import pack_uri from allmydata.interfaces import IUploadable, IUploader +from allmydata.Crypto.Cipher import AES from cStringIO import StringIO import collections, random, sha @@ -72,10 +73,18 @@ class FileUploader: self._size = filehandle.tell() filehandle.seek(0) - def set_verifierid(self, vid): - assert isinstance(vid, str) - assert len(vid) == 20 - self._verifierid = vid + def set_id_strings(self, verifierid, fileid): + assert isinstance(verifierid, str) + assert len(verifierid) == 20 + self._verifierid = verifierid + assert isinstance(fileid, str) + assert len(fileid) == 20 + self._fileid = fileid + + def set_encryption_key(self, key): + assert isinstance(key, str) + assert len(key) == 16 # AES-128 + self._encryption_key = key def start(self): """Start uploading the file. @@ -91,7 +100,7 @@ class FileUploader: # create the encoder, so we can know how large the shares will be self._encoder = encode.Encoder(self._options) - self._encoder.setup(self._filehandle) + self._encoder.setup(self._filehandle, self._encryption_key) share_size = self._encoder.get_share_size() block_size = self._encoder.get_block_size() @@ -234,10 +243,17 @@ class FileUploader: codec_type = self._encoder._codec.get_encoder_type() codec_params = self._encoder._codec.get_serialized_params() tail_codec_params = self._encoder._tail_codec.get_serialized_params() - return pack_uri(codec_type, codec_params, tail_codec_params, - self._verifierid, - roothash, self.needed_shares, self.total_shares, - self._size, self._encoder.segment_size) + return pack_uri(codec_name=codec_type, + codec_params=codec_params, + tail_codec_params=tail_codec_params, + verifierid=self._verifierid, + fileid=self._fileid, + key=self._encryption_key, + roothash=roothash, + needed_shares=self.needed_shares, + total_shares=self.total_shares, + size=self._size, + segment_size=self._encoder.segment_size) def netstring(s): @@ -282,14 +298,39 @@ class Uploader(service.MultiService): desired_shares = 75 # We will abort an upload unless we can allocate space for at least this many. total_shares = 100 # Total number of shares created by encoding. If everybody has room then this is is how many we will upload. - def _compute_verifierid(self, f): - hasher = sha.new(netstring("allmydata_v1_verifierid")) + def compute_id_strings(self, f): + # return a list of (fileid, encryptionkey, verifierid) + fileid_hasher = sha.new(netstring("allmydata_fileid_v1")) + enckey_hasher = sha.new(netstring("allmydata_encryption_key_v1")) + f.seek(0) + BLOCKSIZE = 64*1024 + while True: + data = f.read(BLOCKSIZE) + if not data: + break + fileid_hasher.update(data) + enckey_hasher.update(data) + fileid = fileid_hasher.digest() + enckey = enckey_hasher.digest() + + # now make a second pass to determine the verifierid. It would be + # nice to make this involve fewer passes. + verifierid_hasher = sha.new(netstring("allmydata_verifierid_v1")) + key = enckey[:16] + cryptor = AES.new(key=key, mode=AES.MODE_CTR, + counterstart="\x00"*16) f.seek(0) - data = f.read() - hasher.update(data)#f.read()) + while True: + data = f.read(BLOCKSIZE) + if not data: + break + verifierid_hasher.update(cryptor.encrypt(data)) + verifierid = verifierid_hasher.digest() + + # and leave the file pointer at the beginning f.seek(0) - # note: this is only of the plaintext data, no encryption yet - return hasher.digest() + + return fileid, key, verifierid def upload(self, f, options={}): # this returns the URI @@ -300,7 +341,9 @@ class Uploader(service.MultiService): u = self.uploader_class(self.parent, options) u.set_filehandle(fh) u.set_params(self.needed_shares, self.desired_shares, self.total_shares) - u.set_verifierid(self._compute_verifierid(fh)) + fileid, key, verifierid = self.compute_id_strings(fh) + u.set_encryption_key(key) + u.set_id_strings(verifierid, fileid) d = u.start() def _done(res): f.close_filehandle(fh) diff --git a/src/allmydata/uri.py b/src/allmydata/uri.py index a0f77fdd..356e409b 100644 --- a/src/allmydata/uri.py +++ b/src/allmydata/uri.py @@ -5,7 +5,9 @@ from allmydata.util import idlib # enough information to retrieve and validate the contents. It shall be # expressed in a limited character set (namely [TODO]). -def pack_uri(codec_name, codec_params, tail_codec_params, verifierid, roothash, needed_shares, total_shares, size, segment_size): +def pack_uri(codec_name, codec_params, tail_codec_params, + verifierid, fileid, key, + roothash, needed_shares, total_shares, size, segment_size): assert isinstance(codec_name, str) assert len(codec_name) < 10 assert ":" not in codec_name @@ -15,18 +17,24 @@ def pack_uri(codec_name, codec_params, tail_codec_params, verifierid, roothash, assert ":" not in tail_codec_params assert isinstance(verifierid, str) assert len(verifierid) == 20 # sha1 hash - return "URI:%s:%s:%s:%s:%s:%s:%s:%s:%s" % (codec_name, codec_params, tail_codec_params, idlib.b2a(verifierid), idlib.b2a(roothash), needed_shares, total_shares, size, segment_size) + assert isinstance(fileid, str) + assert len(fileid) == 20 # sha1 hash + assert isinstance(key, str) + assert len(key) == 16 # AES-128 + return "URI:%s:%s:%s:%s:%s:%s:%s:%s:%s:%s:%s" % (codec_name, codec_params, tail_codec_params, idlib.b2a(verifierid), idlib.b2a(fileid), idlib.b2a(key), idlib.b2a(roothash), needed_shares, total_shares, size, segment_size) def unpack_uri(uri): assert uri.startswith("URI:") - header, codec_name, codec_params, tail_codec_params, verifierid_s, roothash_s, needed_shares_s, total_shares_s, size_s, segment_size_s = uri.split(":") + header, codec_name, codec_params, tail_codec_params, verifierid_s, fileid_s, key_s, roothash_s, needed_shares_s, total_shares_s, size_s, segment_size_s = uri.split(":") verifierid = idlib.a2b(verifierid_s) + fileid = idlib.a2b(fileid_s) + key = idlib.a2b(key_s) roothash = idlib.a2b(roothash_s) needed_shares = int(needed_shares_s) total_shares = int(total_shares_s) size = int(size_s) segment_size = int(segment_size_s) - return codec_name, codec_params, tail_codec_params, verifierid, roothash, needed_shares, total_shares, size, segment_size + return codec_name, codec_params, tail_codec_params, verifierid, fileid, key, roothash, needed_shares, total_shares, size, segment_size