From: Zooko O'Whielacronx Date: Wed, 30 Jan 2008 19:24:50 +0000 (-0700) Subject: make content-hash-key encryption a parameter of uploading X-Git-Url: https://git.rkrishnan.org/(%5B%5E?a=commitdiff_plain;h=ca971559e67dc187c09c7a2af33117ac3029de51;p=tahoe-lafs%2Ftahoe-lafs.git make content-hash-key encryption a parameter of uploading fixes #293 --- diff --git a/src/allmydata/test/test_system.py b/src/allmydata/test/test_system.py index 538d7f6d..842adb22 100644 --- a/src/allmydata/test/test_system.py +++ b/src/allmydata/test/test_system.py @@ -178,7 +178,15 @@ class SystemTest(testutil.SignalMixin, unittest.TestCase): # reinstate this test until it does. del test_connections - def test_upload_and_download(self): + def test_upload_and_download_random_key(self): + return self._test_upload_and_download(False) + test_upload_and_download_random_key.timeout = 4800 + + def test_upload_and_download_content_hash_key(self): + return self._test_upload_and_download(True) + test_upload_and_download_content_hash_key.timeout = 4800 + + def _test_upload_and_download(self, contenthashkey): self.basedir = "system/SystemTest/test_upload_and_download" # we use 4000 bytes of data, which will result in about 400k written # to disk among all our simulated nodes @@ -203,7 +211,7 @@ class SystemTest(testutil.SignalMixin, unittest.TestCase): # tail segment is not the same length as the others. This actualy # gets rounded up to 1025 to be a multiple of the number of # required shares (since we use 25 out of 100 FEC). - up = upload.Data(DATA) + up = upload.Data(DATA, contenthashkey=contenthashkey) up.max_segment_size = 1024 d1 = u.upload(up) return d1 @@ -216,12 +224,12 @@ class SystemTest(testutil.SignalMixin, unittest.TestCase): d.addCallback(_upload_done) def _upload_again(res): - # upload again. This ought to be short-circuited, however with - # the way we currently generate URIs (i.e. because they include - # the roothash), we have to do all of the encoding work, and only - # get to save on the upload part. + # Upload again. If contenthashkey then this ought to be + # short-circuited, however with the way we currently generate URIs + # (i.e. because they include the roothash), we have to do all of the + # encoding work, and only get to save on the upload part. log.msg("UPLOADING AGAIN") - up = upload.Data(DATA) + up = upload.Data(DATA, contenthashkey=contenthashkey) up.max_segment_size = 1024 d1 = self.uploader.upload(up) d.addCallback(_upload_again) @@ -283,7 +291,7 @@ class SystemTest(testutil.SignalMixin, unittest.TestCase): def _upload_with_helper(res): DATA = "Data that needs help to upload" * 1000 - u = upload.Data(DATA) + u = upload.Data(DATA, contenthashkey=contenthashkey) d = self.extra_node.upload(u) def _uploaded(uri): return self.downloader.download_to_data(uri) @@ -296,8 +304,8 @@ class SystemTest(testutil.SignalMixin, unittest.TestCase): def _upload_resumable(res): DATA = "Data that needs help to upload and gets interrupted" * 1000 - u1 = upload.Data(DATA) - u2 = upload.Data(DATA) + u1 = upload.Data(DATA, contenthashkey=contenthashkey) + u2 = upload.Data(DATA, contenthashkey=contenthashkey) # tell the upload to drop the connection after about 5kB u1.debug_interrupt = 5000 @@ -370,30 +378,45 @@ class SystemTest(testutil.SignalMixin, unittest.TestCase): log.msg("Second upload complete", level=log.NOISY, facility="tahoe.test.test_system") reu = u2.debug_RemoteEncryptedUploadable - # make sure we didn't read the whole file the second time - # around - self.failUnless(reu._bytes_sent < len(DATA), + + # We currently don't support resumption of upload if the data is + # encrypted with a random key. (Because that would require us + # to store the key locally and re-use it on the next upload of + # this file, which isn't a bad thing to do, but we currently + # don't do it.) + if contenthashkey: + # Make sure we did not have to read the whole file the + # second time around . + self.failUnless(reu._bytes_sent < len(DATA), "resumption didn't save us any work:" " read %d bytes out of %d total" % (reu._bytes_sent, len(DATA))) + else: + # Make sure we did have to read the whole file the second + # time around -- because the one that we partially uploaded + # earlier was encrypted with a different random key. + self.failIf(reu._bytes_sent < len(DATA), + "resumption saved us some work even though we were using random keys:" + " read %d bytes out of %d total" % + (reu._bytes_sent, len(DATA))) return self.downloader.download_to_data(uri) d.addCallback(_uploaded) def _check(newdata): self.failUnlessEqual(newdata, DATA) - # also check that the helper has removed the temp file from - # its directories - basedir = os.path.join(self.getdir("client0"), "helper") - files = os.listdir(os.path.join(basedir, "CHK_encoding")) - self.failUnlessEqual(files, []) - files = os.listdir(os.path.join(basedir, "CHK_incoming")) - self.failUnlessEqual(files, []) + # If using a content hash key, then also check that the helper + # has removed the temp file from its directories. + if contenthashkey: + basedir = os.path.join(self.getdir("client0"), "helper") + files = os.listdir(os.path.join(basedir, "CHK_encoding")) + self.failUnlessEqual(files, []) + files = os.listdir(os.path.join(basedir, "CHK_incoming")) + self.failUnlessEqual(files, []) d.addCallback(_check) return d d.addCallback(_upload_resumable) return d - test_upload_and_download.timeout = 4800 def _find_shares(self, basedir): shares = [] diff --git a/src/allmydata/test/test_upload.py b/src/allmydata/test/test_upload.py index df5d12a2..52d84863 100644 --- a/src/allmydata/test/test_upload.py +++ b/src/allmydata/test/test_upload.py @@ -21,9 +21,15 @@ class Uploadable(unittest.TestCase): s = "".join(data) self.failUnlessEqual(s, expected) - def test_filehandle(self): + def test_filehandle_random_key(self): + return self._test_filehandle(True) + + def test_filehandle_content_hash_key(self): + return self._test_filehandle(False) + + def _test_filehandle(self, randomkey): s = StringIO("a"*41) - u = upload.FileHandle(s) + u = upload.FileHandle(s, randomkey) d = u.get_size() d.addCallback(self.failUnlessEqual, 41) d.addCallback(lambda res: u.read(1)) diff --git a/src/allmydata/upload.py b/src/allmydata/upload.py index 35d86a19..32d2ecc2 100644 --- a/src/allmydata/upload.py +++ b/src/allmydata/upload.py @@ -863,13 +863,15 @@ class NoParameterPreferencesMixin: def get_encoding_parameters(self): return defer.succeed(self.encoding_parameters) -class ConvergentUploadMixin: - # to use this, the class it is mixed in to must have a seekable - # filehandle named self._filehandle - _params = None - _key = None +class FileHandle(NoParameterPreferencesMixin): + implements(IUploadable) - def get_encryption_key(self): + def __init__(self, filehandle, contenthashkey=True): + self._filehandle = filehandle + self._key = None + self._contenthashkey = contenthashkey + + def _get_encryption_key_content_hash(self): if self._key is None: f = self._filehandle enckey_hasher = key_hasher() @@ -886,20 +888,16 @@ class ConvergentUploadMixin: return defer.succeed(self._key) -class NonConvergentUploadMixin: - _key = None - - def get_encryption_key(self): + def _get_encryption_key_random(self): if self._key is None: self._key = os.urandom(16) return defer.succeed(self._key) - -class FileHandle(ConvergentUploadMixin, NoParameterPreferencesMixin): - implements(IUploadable) - - def __init__(self, filehandle): - self._filehandle = filehandle + def get_encryption_key(self): + if self._contenthashkey: + return self._get_encryption_key_content_hash() + else: + return self._get_encryption_key_random() def get_size(self): self._filehandle.seek(0,2) @@ -915,15 +913,15 @@ class FileHandle(ConvergentUploadMixin, NoParameterPreferencesMixin): pass class FileName(FileHandle): - def __init__(self, filename): - FileHandle.__init__(self, open(filename, "rb")) + def __init__(self, filename, contenthashkey=True): + FileHandle.__init__(self, open(filename, "rb"), contenthashkey=contenthashkey) def close(self): FileHandle.close(self) self._filehandle.close() class Data(FileHandle): - def __init__(self, data): - FileHandle.__init__(self, StringIO(data)) + def __init__(self, data, contenthashkey=False): + FileHandle.__init__(self, StringIO(data), contenthashkey=contenthashkey) class Uploader(service.MultiService): """I am a service that allows file uploading. @@ -977,9 +975,9 @@ class Uploader(service.MultiService): return d # utility functions - def upload_data(self, data): - return self.upload(Data(data)) - def upload_filename(self, filename): - return self.upload(FileName(filename)) - def upload_filehandle(self, filehandle): - return self.upload(FileHandle(filehandle)) + def upload_data(self, data, contenthashkey=True): + return self.upload(Data(data, contenthashkey=contenthashkey)) + def upload_filename(self, filename, contenthashkey=True): + return self.upload(FileName(filename, contenthashkey=contenthashkey)) + def upload_filehandle(self, filehandle, contenthashkey=True): + return self.upload(FileHandle(filehandle, contenthashkey=contenthashkey))