From: Brian Warner <warner@allmydata.com>
Date: Thu, 26 Apr 2007 00:53:10 +0000 (-0700)
Subject: use real encryption, generate/store/verify verifierid and fileid
X-Git-Tag: tahoe_v0.1.1-0-UNSTABLE~7
X-Git-Url: https://git.rkrishnan.org/components/com_hotproperty/simplejson/about.html?a=commitdiff_plain;h=4b2298937b0f4dff375593ae96bdd0c0ade308c8;p=tahoe-lafs%2Ftahoe-lafs.git

use real encryption, generate/store/verify verifierid and fileid
---

diff --git a/src/allmydata/download.py b/src/allmydata/download.py
index 856ed726..656c8ecb 100644
--- a/src/allmydata/download.py
+++ b/src/allmydata/download.py
@@ -25,8 +25,8 @@ class Output:
         self.downloadable = downloadable
         self._decryptor = AES.new(key=key, mode=AES.MODE_CTR,
                                   counterstart="\x00"*16)
-        self._verifierid_hasher = sha.new(netstring("allmydata_v1_verifierid"))
-        self._fileid_hasher = sha.new(netstring("allmydata_v1_fileid"))
+        self._verifierid_hasher = sha.new(netstring("allmydata_verifierid_v1"))
+        self._fileid_hasher = sha.new(netstring("allmydata_fileid_v1"))
         self.length = 0
 
     def open(self):
@@ -208,14 +208,17 @@ class SegmentDownloader:
             del self.parent._share_buckets[shnum]
 
 class FileDownloader:
+    check_verifierid = True
+    check_fileid = True
 
     def __init__(self, client, uri, downloadable):
         self._client = client
         self._downloadable = downloadable
-        (codec_name, codec_params, tail_codec_params, verifierid, roothash, needed_shares, total_shares, size, segment_size) = unpack_uri(uri)
+        (codec_name, codec_params, tail_codec_params, verifierid, fileid, key, roothash, needed_shares, total_shares, size, segment_size) = unpack_uri(uri)
         assert isinstance(verifierid, str)
         assert len(verifierid) == 20
         self._verifierid = verifierid
+        self._fileid = fileid
         self._roothash = roothash
 
         self._codec = codec.get_decoder_by_name(codec_name)
@@ -230,7 +233,6 @@ class FileDownloader:
         self._size = size
         self._num_needed_shares = self._codec.get_needed_shares()
 
-        key = "\x00" * 16
         self._output = Output(downloadable, key)
 
         self._share_hashtree = hashtree.IncompleteHashTree(total_shares)
@@ -349,10 +351,18 @@ class FileDownloader:
 
     def _done(self, res):
         self._output.close()
-        #print "VERIFIERID: %s" % idlib.b2a(self._output.verifierid)
-        #print "FILEID: %s" % idlib.b2a(self._output.fileid)
-        #assert self._verifierid == self._output.verifierid
-        #assert self._fileid = self._output.fileid
+        log.msg("computed VERIFIERID: %s" % idlib.b2a(self._output.verifierid))
+        log.msg("computed FILEID: %s" % idlib.b2a(self._output.fileid))
+        if self.check_verifierid:
+            _assert(self._verifierid == self._output.verifierid,
+                    "bad verifierid: computed=%s, expected=%s" %
+                    (idlib.b2a(self._output.verifierid),
+                     idlib.b2a(self._verifierid)))
+        if self.check_fileid:
+            _assert(self._fileid == self._output.fileid,
+                    "bad fileid: computed=%s, expected=%s" %
+                    (idlib.b2a(self._output.fileid),
+                     idlib.b2a(self._fileid)))
         _assert(self._output.length == self._size,
                 got=self._output.length, expected=self._size)
         return self._output.finish()
diff --git a/src/allmydata/encode.py b/src/allmydata/encode.py
index 98d1ff1a..5b4cb75f 100644
--- a/src/allmydata/encode.py
+++ b/src/allmydata/encode.py
@@ -79,8 +79,11 @@ class Encoder(object):
         self.NEEDED_SHARES = k
         self.TOTAL_SHARES = n
 
-    def setup(self, infile):
+    def setup(self, infile, encryption_key):
         self.infile = infile
+        assert isinstance(encryption_key, str)
+        assert len(encryption_key) == 16 # AES-128
+        self.key = encryption_key
         infile.seek(0, 2)
         self.file_size = infile.tell()
         infile.seek(0, 0)
@@ -158,7 +161,6 @@ class Encoder(object):
         return d
 
     def setup_encryption(self):
-        self.key = "\x00"*16
         self.cryptor = AES.new(key=self.key, mode=AES.MODE_CTR,
                                counterstart="\x00"*16)
         self.segment_num = 0
diff --git a/src/allmydata/test/test_encode.py b/src/allmydata/test/test_encode.py
index 846ad577..58022de6 100644
--- a/src/allmydata/test/test_encode.py
+++ b/src/allmydata/test/test_encode.py
@@ -115,7 +115,8 @@ class Encode(unittest.TestCase):
         # force use of multiple segments
         options = {"max_segment_size": max_segment_size}
         e = encode.Encoder(options)
-        e.setup(StringIO(data))
+        nonkey = "\x00" * 16
+        e.setup(StringIO(data), nonkey)
         assert e.num_shares == NUM_SHARES # else we'll be completely confused
         e.setup_codec() # need to rebuild the codec for that change
         assert (NUM_SEGMENTS-1)*e.segment_size < len(data) <= NUM_SEGMENTS*e.segment_size
@@ -222,7 +223,8 @@ class Roundtrip(unittest.TestCase):
         options = {"max_segment_size": max_segment_size,
                    "needed_and_total_shares": k_and_n}
         e = encode.Encoder(options)
-        e.setup(StringIO(data))
+        nonkey = "\x00" * 16
+        e.setup(StringIO(data), nonkey)
 
         assert e.num_shares == NUM_SHARES # else we'll be completely confused
         e.setup_codec() # need to rebuild the codec for that change
@@ -238,18 +240,22 @@ class Roundtrip(unittest.TestCase):
         e.set_shareholders(shareholders)
         d = e.start()
         def _uploaded(roothash):
-            URI = pack_uri(e._codec.get_encoder_type(),
-                           e._codec.get_serialized_params(),
-                           e._tail_codec.get_serialized_params(),
-                           "V" * 20,
-                           roothash,
-                           e.required_shares,
-                           e.num_shares,
-                           e.file_size,
-                           e.segment_size)
+            URI = pack_uri(codec_name=e._codec.get_encoder_type(),
+                           codec_params=e._codec.get_serialized_params(),
+                           tail_codec_params=e._tail_codec.get_serialized_params(),
+                           verifierid="V" * 20,
+                           fileid="F" * 20,
+                           key=nonkey,
+                           roothash=roothash,
+                           needed_shares=e.required_shares,
+                           total_shares=e.num_shares,
+                           size=e.file_size,
+                           segment_size=e.segment_size)
             client = None
             target = download.Data()
             fd = download.FileDownloader(client, URI, target)
+            fd.check_verifierid = False
+            fd.check_fileid = False
             for shnum in range(AVAILABLE_SHARES):
                 bucket = all_shareholders[shnum]
                 fd.add_share_bucket(shnum, bucket)
diff --git a/src/allmydata/test/test_system.py b/src/allmydata/test/test_system.py
index 4c3567a8..00e0dc60 100644
--- a/src/allmydata/test/test_system.py
+++ b/src/allmydata/test/test_system.py
@@ -194,7 +194,8 @@ class SystemTest(testutil.SignalMixin, unittest.TestCase):
             d1 = self.downloader.download_to_data(baduri)
             def _baduri_should_fail(res):
                 self.failUnless(isinstance(res, Failure))
-                self.failUnless(res.check(download.NotEnoughPeersError))
+                self.failUnless(res.check(download.NotEnoughPeersError),
+                                "expected NotEnoughPeersError, got %s" % res)
                 # TODO: files that have zero peers should get a special kind
                 # of NotEnoughPeersError, which can be used to suggest that
                 # the URI might be wrong or that they've nver uploaded the
@@ -209,11 +210,19 @@ class SystemTest(testutil.SignalMixin, unittest.TestCase):
         return good[:-1] + chr(ord(good[-1]) ^ 0x01)
 
     def mangle_uri(self, gooduri):
+        # change the verifierid, which means we'll be asking about the wrong
+        # file, so nobody will have any shares
         pieces = list(uri.unpack_uri(gooduri))
-        # [4] is the verifierid
-        pieces[4] = self.flip_bit(pieces[4])
+        # [3] is the verifierid
+        assert len(pieces[3]) == 20
+        pieces[3] = self.flip_bit(pieces[3])
         return uri.pack_uri(*pieces)
 
+    # TODO: add a test which mangles the fileid instead, and should fail in
+    # the post-download phase when the file's integrity check fails. Do the
+    # same thing for the key, which should cause the download to fail the
+    # post-download verifierid check.
+
     def test_vdrive(self):
         self.basedir = "test_system/SystemTest/test_vdrive"
         self.data = DATA = "Some data to publish to the virtual drive\n"
diff --git a/src/allmydata/test/test_upload.py b/src/allmydata/test/test_upload.py
index 175a0e35..91dbef17 100644
--- a/src/allmydata/test/test_upload.py
+++ b/src/allmydata/test/test_upload.py
@@ -24,9 +24,13 @@ class GoodServer(unittest.TestCase):
     def _check(self, uri):
         self.failUnless(isinstance(uri, str))
         self.failUnless(uri.startswith("URI:"))
-        codec_name, codec_params, tail_codec_params, verifierid, roothash, needed_shares, total_shares, size, segment_size = unpack_uri(uri)
+        codec_name, codec_params, tail_codec_params, verifierid, fileid, key, roothash, needed_shares, total_shares, size, segment_size = unpack_uri(uri)
         self.failUnless(isinstance(verifierid, str))
         self.failUnlessEqual(len(verifierid), 20)
+        self.failUnless(isinstance(fileid, str))
+        self.failUnlessEqual(len(fileid), 20)
+        self.failUnless(isinstance(key, str))
+        self.failUnlessEqual(len(key), 16)
         self.failUnless(isinstance(codec_params, str))
 
     def testData(self):
diff --git a/src/allmydata/upload.py b/src/allmydata/upload.py
index 62fd5d79..dc11d77f 100644
--- a/src/allmydata/upload.py
+++ b/src/allmydata/upload.py
@@ -8,6 +8,7 @@ from allmydata.util import idlib
 from allmydata import encode
 from allmydata.uri import pack_uri
 from allmydata.interfaces import IUploadable, IUploader
+from allmydata.Crypto.Cipher import AES
 
 from cStringIO import StringIO
 import collections, random, sha
@@ -72,10 +73,18 @@ class FileUploader:
         self._size = filehandle.tell()
         filehandle.seek(0)
 
-    def set_verifierid(self, vid):
-        assert isinstance(vid, str)
-        assert len(vid) == 20
-        self._verifierid = vid
+    def set_id_strings(self, verifierid, fileid):
+        assert isinstance(verifierid, str)
+        assert len(verifierid) == 20
+        self._verifierid = verifierid
+        assert isinstance(fileid, str)
+        assert len(fileid) == 20
+        self._fileid = fileid
+
+    def set_encryption_key(self, key):
+        assert isinstance(key, str)
+        assert len(key) == 16  # AES-128
+        self._encryption_key = key
 
     def start(self):
         """Start uploading the file.
@@ -91,7 +100,7 @@ class FileUploader:
 
         # create the encoder, so we can know how large the shares will be
         self._encoder = encode.Encoder(self._options)
-        self._encoder.setup(self._filehandle)
+        self._encoder.setup(self._filehandle, self._encryption_key)
         share_size = self._encoder.get_share_size()
         block_size = self._encoder.get_block_size()
 
@@ -234,10 +243,17 @@ class FileUploader:
         codec_type = self._encoder._codec.get_encoder_type()
         codec_params = self._encoder._codec.get_serialized_params()
         tail_codec_params = self._encoder._tail_codec.get_serialized_params()
-        return pack_uri(codec_type, codec_params, tail_codec_params,
-                        self._verifierid,
-                        roothash, self.needed_shares, self.total_shares,
-                        self._size, self._encoder.segment_size)
+        return pack_uri(codec_name=codec_type,
+                        codec_params=codec_params,
+                        tail_codec_params=tail_codec_params,
+                        verifierid=self._verifierid,
+                        fileid=self._fileid,
+                        key=self._encryption_key,
+                        roothash=roothash,
+                        needed_shares=self.needed_shares,
+                        total_shares=self.total_shares,
+                        size=self._size,
+                        segment_size=self._encoder.segment_size)
 
 
 def netstring(s):
@@ -282,14 +298,39 @@ class Uploader(service.MultiService):
     desired_shares = 75 # We will abort an upload unless we can allocate space for at least this many.
     total_shares = 100 # Total number of shares created by encoding.  If everybody has room then this is is how many we will upload.
 
-    def _compute_verifierid(self, f):
-        hasher = sha.new(netstring("allmydata_v1_verifierid"))
+    def compute_id_strings(self, f):
+        # return a list of (fileid, encryptionkey, verifierid)
+        fileid_hasher = sha.new(netstring("allmydata_fileid_v1"))
+        enckey_hasher = sha.new(netstring("allmydata_encryption_key_v1"))
+        f.seek(0)
+        BLOCKSIZE = 64*1024
+        while True:
+            data = f.read(BLOCKSIZE)
+            if not data:
+                break
+            fileid_hasher.update(data)
+            enckey_hasher.update(data)
+        fileid = fileid_hasher.digest()
+        enckey = enckey_hasher.digest()
+
+        # now make a second pass to determine the verifierid. It would be
+        # nice to make this involve fewer passes.
+        verifierid_hasher = sha.new(netstring("allmydata_verifierid_v1"))
+        key = enckey[:16]
+        cryptor = AES.new(key=key, mode=AES.MODE_CTR,
+                          counterstart="\x00"*16)
         f.seek(0)
-        data = f.read()
-        hasher.update(data)#f.read())
+        while True:
+            data = f.read(BLOCKSIZE)
+            if not data:
+                break
+            verifierid_hasher.update(cryptor.encrypt(data))
+        verifierid = verifierid_hasher.digest()
+
+        # and leave the file pointer at the beginning
         f.seek(0)
-        # note: this is only of the plaintext data, no encryption yet
-        return hasher.digest()
+
+        return fileid, key, verifierid
 
     def upload(self, f, options={}):
         # this returns the URI
@@ -300,7 +341,9 @@ class Uploader(service.MultiService):
         u = self.uploader_class(self.parent, options)
         u.set_filehandle(fh)
         u.set_params(self.needed_shares, self.desired_shares, self.total_shares)
-        u.set_verifierid(self._compute_verifierid(fh))
+        fileid, key, verifierid = self.compute_id_strings(fh)
+        u.set_encryption_key(key)
+        u.set_id_strings(verifierid, fileid)
         d = u.start()
         def _done(res):
             f.close_filehandle(fh)
diff --git a/src/allmydata/uri.py b/src/allmydata/uri.py
index a0f77fdd..356e409b 100644
--- a/src/allmydata/uri.py
+++ b/src/allmydata/uri.py
@@ -5,7 +5,9 @@ from allmydata.util import idlib
 # enough information to retrieve and validate the contents. It shall be
 # expressed in a limited character set (namely [TODO]).
 
-def pack_uri(codec_name, codec_params, tail_codec_params, verifierid, roothash, needed_shares, total_shares, size, segment_size):
+def pack_uri(codec_name, codec_params, tail_codec_params,
+             verifierid, fileid, key,
+             roothash, needed_shares, total_shares, size, segment_size):
     assert isinstance(codec_name, str)
     assert len(codec_name) < 10
     assert ":" not in codec_name
@@ -15,18 +17,24 @@ def pack_uri(codec_name, codec_params, tail_codec_params, verifierid, roothash,
     assert ":" not in tail_codec_params
     assert isinstance(verifierid, str)
     assert len(verifierid) == 20 # sha1 hash
-    return "URI:%s:%s:%s:%s:%s:%s:%s:%s:%s" % (codec_name, codec_params, tail_codec_params, idlib.b2a(verifierid), idlib.b2a(roothash), needed_shares, total_shares, size, segment_size)
+    assert isinstance(fileid, str)
+    assert len(fileid) == 20 # sha1 hash
+    assert isinstance(key, str)
+    assert len(key) == 16 # AES-128
+    return "URI:%s:%s:%s:%s:%s:%s:%s:%s:%s:%s:%s" % (codec_name, codec_params, tail_codec_params, idlib.b2a(verifierid), idlib.b2a(fileid), idlib.b2a(key), idlib.b2a(roothash), needed_shares, total_shares, size, segment_size)
 
 
 def unpack_uri(uri):
     assert uri.startswith("URI:")
-    header, codec_name, codec_params, tail_codec_params, verifierid_s, roothash_s, needed_shares_s, total_shares_s, size_s, segment_size_s = uri.split(":")
+    header, codec_name, codec_params, tail_codec_params, verifierid_s, fileid_s, key_s, roothash_s, needed_shares_s, total_shares_s, size_s, segment_size_s = uri.split(":")
     verifierid = idlib.a2b(verifierid_s)
+    fileid = idlib.a2b(fileid_s)
+    key = idlib.a2b(key_s)
     roothash = idlib.a2b(roothash_s)
     needed_shares = int(needed_shares_s)
     total_shares = int(total_shares_s)
     size = int(size_s)
     segment_size = int(segment_size_s)
-    return codec_name, codec_params, tail_codec_params, verifierid, roothash, needed_shares, total_shares, size, segment_size
+    return codec_name, codec_params, tail_codec_params, verifierid, fileid, key, roothash, needed_shares, total_shares, size, segment_size