make content-hash-key encryption a parameter of uploading
authorZooko O'Whielacronx <zooko@zooko.com>
Wed, 30 Jan 2008 19:24:50 +0000 (12:24 -0700)
committerZooko O'Whielacronx <zooko@zooko.com>
Wed, 30 Jan 2008 19:24:50 +0000 (12:24 -0700)
fixes #293

src/allmydata/test/test_system.py
src/allmydata/test/test_upload.py
src/allmydata/upload.py

index 538d7f6ddda76cddfd3817ac278eecddca7fe755..842adb2268d0003a3c389c92c0232ae4a03a2d46 100644 (file)
@@ -178,7 +178,15 @@ class SystemTest(testutil.SignalMixin, unittest.TestCase):
     # reinstate this test until it does.
     del test_connections
 
-    def test_upload_and_download(self):
+    def test_upload_and_download_random_key(self):
+        return self._test_upload_and_download(False)
+    test_upload_and_download_random_key.timeout = 4800
+
+    def test_upload_and_download_content_hash_key(self):
+        return self._test_upload_and_download(True)
+    test_upload_and_download_content_hash_key.timeout = 4800
+
+    def _test_upload_and_download(self, contenthashkey):
         self.basedir = "system/SystemTest/test_upload_and_download"
         # we use 4000 bytes of data, which will result in about 400k written
         # to disk among all our simulated nodes
@@ -203,7 +211,7 @@ class SystemTest(testutil.SignalMixin, unittest.TestCase):
             # tail segment is not the same length as the others. This actualy
             # gets rounded up to 1025 to be a multiple of the number of
             # required shares (since we use 25 out of 100 FEC).
-            up = upload.Data(DATA)
+            up = upload.Data(DATA, contenthashkey=contenthashkey)
             up.max_segment_size = 1024
             d1 = u.upload(up)
             return d1
@@ -216,12 +224,12 @@ class SystemTest(testutil.SignalMixin, unittest.TestCase):
         d.addCallback(_upload_done)
 
         def _upload_again(res):
-            # upload again. This ought to be short-circuited, however with
-            # the way we currently generate URIs (i.e. because they include
-            # the roothash), we have to do all of the encoding work, and only
-            # get to save on the upload part.
+            # Upload again. If contenthashkey then this ought to be
+            # short-circuited, however with the way we currently generate URIs
+            # (i.e. because they include the roothash), we have to do all of the
+            # encoding work, and only get to save on the upload part.
             log.msg("UPLOADING AGAIN")
-            up = upload.Data(DATA)
+            up = upload.Data(DATA, contenthashkey=contenthashkey)
             up.max_segment_size = 1024
             d1 = self.uploader.upload(up)
         d.addCallback(_upload_again)
@@ -283,7 +291,7 @@ class SystemTest(testutil.SignalMixin, unittest.TestCase):
 
         def _upload_with_helper(res):
             DATA = "Data that needs help to upload" * 1000
-            u = upload.Data(DATA)
+            u = upload.Data(DATA, contenthashkey=contenthashkey)
             d = self.extra_node.upload(u)
             def _uploaded(uri):
                 return self.downloader.download_to_data(uri)
@@ -296,8 +304,8 @@ class SystemTest(testutil.SignalMixin, unittest.TestCase):
 
         def _upload_resumable(res):
             DATA = "Data that needs help to upload and gets interrupted" * 1000
-            u1 = upload.Data(DATA)
-            u2 = upload.Data(DATA)
+            u1 = upload.Data(DATA, contenthashkey=contenthashkey)
+            u2 = upload.Data(DATA, contenthashkey=contenthashkey)
 
             # tell the upload to drop the connection after about 5kB
             u1.debug_interrupt = 5000
@@ -370,30 +378,45 @@ class SystemTest(testutil.SignalMixin, unittest.TestCase):
                 log.msg("Second upload complete", level=log.NOISY,
                         facility="tahoe.test.test_system")
                 reu = u2.debug_RemoteEncryptedUploadable
-                # make sure we didn't read the whole file the second time
-                # around
-                self.failUnless(reu._bytes_sent < len(DATA),
+
+                # We currently don't support resumption of upload if the data is
+                # encrypted with a random key.  (Because that would require us
+                # to store the key locally and re-use it on the next upload of
+                # this file, which isn't a bad thing to do, but we currently
+                # don't do it.)
+                if contenthashkey:
+                    # Make sure we did not have to read the whole file the
+                    # second time around .
+                    self.failUnless(reu._bytes_sent < len(DATA),
                                 "resumption didn't save us any work:"
                                 " read %d bytes out of %d total" %
                                 (reu._bytes_sent, len(DATA)))
+                else:
+                    # Make sure we did have to read the whole file the second
+                    # time around -- because the one that we partially uploaded
+                    # earlier was encrypted with a different random key.
+                    self.failIf(reu._bytes_sent < len(DATA),
+                                "resumption saved us some work even though we were using random keys:"
+                                " read %d bytes out of %d total" %
+                                (reu._bytes_sent, len(DATA)))
                 return self.downloader.download_to_data(uri)
             d.addCallback(_uploaded)
 
             def _check(newdata):
                 self.failUnlessEqual(newdata, DATA)
-                # also check that the helper has removed the temp file from
-                # its directories
-                basedir = os.path.join(self.getdir("client0"), "helper")
-                files = os.listdir(os.path.join(basedir, "CHK_encoding"))
-                self.failUnlessEqual(files, [])
-                files = os.listdir(os.path.join(basedir, "CHK_incoming"))
-                self.failUnlessEqual(files, [])
+                # If using a content hash key, then also check that the helper
+                # has removed the temp file from its directories.
+                if contenthashkey:
+                    basedir = os.path.join(self.getdir("client0"), "helper")
+                    files = os.listdir(os.path.join(basedir, "CHK_encoding"))
+                    self.failUnlessEqual(files, [])
+                    files = os.listdir(os.path.join(basedir, "CHK_incoming"))
+                    self.failUnlessEqual(files, [])
             d.addCallback(_check)
             return d
         d.addCallback(_upload_resumable)
 
         return d
-    test_upload_and_download.timeout = 4800
 
     def _find_shares(self, basedir):
         shares = []
index df5d12a2d5ba5c0d345a247ab7df6df96afbd3e2..52d84863b5569ad9065afe2ebc9f284e77dbc194 100644 (file)
@@ -21,9 +21,15 @@ class Uploadable(unittest.TestCase):
         s = "".join(data)
         self.failUnlessEqual(s, expected)
 
-    def test_filehandle(self):
+    def test_filehandle_random_key(self):
+        return self._test_filehandle(True)
+
+    def test_filehandle_content_hash_key(self):
+        return self._test_filehandle(False)
+
+    def _test_filehandle(self, randomkey):
         s = StringIO("a"*41)
-        u = upload.FileHandle(s)
+        u = upload.FileHandle(s, randomkey)
         d = u.get_size()
         d.addCallback(self.failUnlessEqual, 41)
         d.addCallback(lambda res: u.read(1))
index 35d86a19835f683da50b8cd546e2056a5adcda5a..32d2ecc28a0ba467e74d76445e7af90efc4cfce1 100644 (file)
@@ -863,13 +863,15 @@ class NoParameterPreferencesMixin:
     def get_encoding_parameters(self):
         return defer.succeed(self.encoding_parameters)
 
-class ConvergentUploadMixin:
-    # to use this, the class it is mixed in to must have a seekable
-    # filehandle named self._filehandle
-    _params = None
-    _key = None
+class FileHandle(NoParameterPreferencesMixin):
+    implements(IUploadable)
 
-    def get_encryption_key(self):
+    def __init__(self, filehandle, contenthashkey=True):
+        self._filehandle = filehandle
+        self._key = None
+        self._contenthashkey = contenthashkey
+
+    def _get_encryption_key_content_hash(self):
         if self._key is None:
             f = self._filehandle
             enckey_hasher = key_hasher()
@@ -886,20 +888,16 @@ class ConvergentUploadMixin:
 
         return defer.succeed(self._key)
 
-class NonConvergentUploadMixin:
-    _key = None
-
-    def get_encryption_key(self):
+    def _get_encryption_key_random(self):
         if self._key is None:
             self._key = os.urandom(16)
         return defer.succeed(self._key)
 
-
-class FileHandle(ConvergentUploadMixin, NoParameterPreferencesMixin):
-    implements(IUploadable)
-
-    def __init__(self, filehandle):
-        self._filehandle = filehandle
+    def get_encryption_key(self):
+        if self._contenthashkey:
+            return self._get_encryption_key_content_hash()
+        else:
+            return self._get_encryption_key_random()
 
     def get_size(self):
         self._filehandle.seek(0,2)
@@ -915,15 +913,15 @@ class FileHandle(ConvergentUploadMixin, NoParameterPreferencesMixin):
         pass
 
 class FileName(FileHandle):
-    def __init__(self, filename):
-        FileHandle.__init__(self, open(filename, "rb"))
+    def __init__(self, filename, contenthashkey=True):
+        FileHandle.__init__(self, open(filename, "rb"), contenthashkey=contenthashkey)
     def close(self):
         FileHandle.close(self)
         self._filehandle.close()
 
 class Data(FileHandle):
-    def __init__(self, data):
-        FileHandle.__init__(self, StringIO(data))
+    def __init__(self, data, contenthashkey=False):
+        FileHandle.__init__(self, StringIO(data), contenthashkey=contenthashkey)
 
 class Uploader(service.MultiService):
     """I am a service that allows file uploading.
@@ -977,9 +975,9 @@ class Uploader(service.MultiService):
         return d
 
     # utility functions
-    def upload_data(self, data):
-        return self.upload(Data(data))
-    def upload_filename(self, filename):
-        return self.upload(FileName(filename))
-    def upload_filehandle(self, filehandle):
-        return self.upload(FileHandle(filehandle))
+    def upload_data(self, data, contenthashkey=True):
+        return self.upload(Data(data, contenthashkey=contenthashkey))
+    def upload_filename(self, filename, contenthashkey=True):
+        return self.upload(FileName(filename, contenthashkey=contenthashkey))
+    def upload_filehandle(self, filehandle, contenthashkey=True):
+        return self.upload(FileHandle(filehandle, contenthashkey=contenthashkey))