From: Zooko O'Whielacronx <zooko@zooko.com> Date: Mon, 24 Mar 2008 16:46:06 +0000 (-0700) Subject: use added secret to protect convergent encryption X-Git-Tag: allmydata-tahoe-1.0.0~17 X-Git-Url: https://git.rkrishnan.org/components/%22news.html//%22?a=commitdiff_plain;h=fc3bd0c9871c76744d962a75fd3763d041d006cc;p=tahoe-lafs%2Ftahoe-lafs.git use added secret to protect convergent encryption Now upload or encode methods take a required argument named "convergence" which can be either None, indicating no convergent encryption at all, or a string, which is the "added secret" to be mixed in to the content hash key. If you want traditional convergent encryption behavior, set the added secret to be the empty string. This patch also renames "content hash key" to "convergent encryption" in a argument names and variable names. (A different and larger renaming is needed in order to clarify that Tahoe supports immutable files which are not encrypted content-hash-key a.k.a. convergent encryption.) This patch also changes a few unit tests to use non-convergent encryption, because it doesn't matter for what they are testing and non-convergent encryption is slightly faster. --- diff --git a/src/allmydata/client.py b/src/allmydata/client.py index ba857d77..94880ec2 100644 --- a/src/allmydata/client.py +++ b/src/allmydata/client.py @@ -34,6 +34,9 @@ PiB=1024*TiB class StubClient(Referenceable): implements(RIStubClient) +def _make_secret(): + return base32.b2a(os.urandom(hashutil.CRYPTO_VAL_SIZE)) + "\n" + class Client(node.Node, testutil.PollMixin): PORTNUMFILE = "client.port" STOREDIR = 'storage' @@ -103,9 +106,7 @@ class Client(node.Node, testutil.PollMixin): self.stats_provider = None def init_lease_secret(self): - def make_secret(): - return base32.b2a(os.urandom(hashutil.CRYPTO_VAL_SIZE)) + "\n" - secret_s = self.get_or_create_private_config("secret", make_secret) + secret_s = self.get_or_create_private_config("secret", _make_secret) self._lease_secret = base32.a2b(secret_s) def init_storage(self): @@ -151,6 +152,8 @@ class Client(node.Node, testutil.PollMixin): def init_client(self): helper_furl = self.get_config("helper.furl") + convergence_s = self.get_or_create_private_config('convergence', _make_secret) + self.convergence = base32.a2b(convergence_s) self.add_service(Uploader(helper_furl)) self.add_service(Downloader()) self.add_service(Checker()) diff --git a/src/allmydata/control.py b/src/allmydata/control.py index cd9d3cee..99a3139b 100644 --- a/src/allmydata/control.py +++ b/src/allmydata/control.py @@ -42,9 +42,9 @@ class ControlServer(Referenceable, service.Service, testutil.PollMixin): def remote_wait_for_client_connections(self, num_clients): return self.parent.debug_wait_for_client_connections(num_clients) - def remote_upload_from_file_to_uri(self, filename): + def remote_upload_from_file_to_uri(self, filename, convergence): uploader = self.parent.getServiceNamed("uploader") - u = upload.FileName(filename) + u = upload.FileName(filename, convergence=convergence) d = uploader.upload(u) d.addCallback(lambda results: results.uri) return d @@ -161,7 +161,7 @@ class SpeedTest: d1 = self._n.overwrite(data) d1.addCallback(lambda res: self._n.get_uri()) else: - up = upload.FileName(fn) + up = upload.FileName(fn, convergence=None) d1 = self.parent.upload(up) d1.addCallback(lambda results: results.uri) d1.addCallback(_record_uri, i) diff --git a/src/allmydata/interfaces.py b/src/allmydata/interfaces.py index d438de7b..190288a0 100644 --- a/src/allmydata/interfaces.py +++ b/src/allmydata/interfaces.py @@ -1578,11 +1578,14 @@ class RIControlClient(RemoteInterface): storage servers. """ - def upload_from_file_to_uri(filename=str): + def upload_from_file_to_uri(filename=str, convergence=ChoiceOf(None, StringConstraint(2**20))): """Upload a file to the grid. This accepts a filename (which must be - absolute) that points to a file on the node's local disk. The node - will read the contents of this file, upload it to the grid, then - return the URI at which it was uploaded. + absolute) that points to a file on the node's local disk. The node will + read the contents of this file, upload it to the grid, then return the + URI at which it was uploaded. If convergence is None then a random + encryption key will be used, else the plaintext will be hashed, then + that hash will be mixed together with the "convergence" string to form + the encryption key. """ return URI diff --git a/src/allmydata/test/check_memory.py b/src/allmydata/test/check_memory.py index de65de51..c01769e7 100644 --- a/src/allmydata/test/check_memory.py +++ b/src/allmydata/test/check_memory.py @@ -367,7 +367,7 @@ this file are ignored. if self.mode in ("upload", "upload-self"): files[name] = self.create_data(name, size) d = self.control_rref.callRemote("upload_from_file_to_uri", - files[name]) + files[name], convergence=None) def _done(uri): os.remove(files[name]) del files[name] diff --git a/src/allmydata/test/test_dirnode.py b/src/allmydata/test/test_dirnode.py index 9d053dc8..dafc67af 100644 --- a/src/allmydata/test/test_dirnode.py +++ b/src/allmydata/test/test_dirnode.py @@ -120,7 +120,7 @@ class Dirnode(unittest.TestCase, testutil.ShouldFailMixin): def test_readonly(self): fileuri = make_chk_file_uri(1234) filenode = self.client.create_node_from_uri(fileuri) - uploadable = upload.Data("some data") + uploadable = upload.Data("some data", convergence="some convergence string") d = self.client.create_empty_dirnode() def _created(rw_dn): @@ -338,7 +338,7 @@ class Dirnode(unittest.TestCase, testutil.ShouldFailMixin): # hundrededths of a second. d.addCallback(self.stall, 0.1) d.addCallback(lambda res: n.add_file(u"timestamps", - upload.Data("stamp me"))) + upload.Data("stamp me", convergence="some convergence string"))) d.addCallback(self.stall, 0.1) def _stop(res): self._stop_timestamp = time.time() @@ -393,7 +393,7 @@ class Dirnode(unittest.TestCase, testutil.ShouldFailMixin): self.failUnlessEqual(sorted(children.keys()), sorted([u"child"]))) - uploadable = upload.Data("some data") + uploadable = upload.Data("some data", convergence="some convergence string") d.addCallback(lambda res: n.add_file(u"newfile", uploadable)) d.addCallback(lambda newnode: self.failUnless(IFileNode.providedBy(newnode))) @@ -406,7 +406,7 @@ class Dirnode(unittest.TestCase, testutil.ShouldFailMixin): self.failUnlessEqual(sorted(metadata.keys()), ["ctime", "mtime"])) - uploadable = upload.Data("some data") + uploadable = upload.Data("some data", convergence="some convergence string") d.addCallback(lambda res: n.add_file(u"newfile-metadata", uploadable, {"key": "value"})) diff --git a/src/allmydata/test/test_encode.py b/src/allmydata/test/test_encode.py index 4b729456..fea99e5a 100644 --- a/src/allmydata/test/test_encode.py +++ b/src/allmydata/test/test_encode.py @@ -168,7 +168,7 @@ class Encode(unittest.TestCase): data = make_data(datalen) # force use of multiple segments e = encode.Encoder() - u = upload.Data(data) + u = upload.Data(data, convergence="some convergence string") u.max_segment_size = max_segment_size u.encoding_param_k = 25 u.encoding_param_happy = 75 @@ -303,7 +303,7 @@ class Roundtrip(unittest.TestCase): if AVAILABLE_SHARES is None: AVAILABLE_SHARES = NUM_SHARES e = encode.Encoder() - u = upload.Data(data) + u = upload.Data(data, convergence="some convergence string") # force use of multiple segments by using a low max_segment_size u.max_segment_size = max_segment_size u.encoding_param_k = k diff --git a/src/allmydata/test/test_helper.py b/src/allmydata/test/test_helper.py index 9b0d0b0c..9470a0dc 100644 --- a/src/allmydata/test/test_helper.py +++ b/src/allmydata/test/test_helper.py @@ -72,8 +72,8 @@ def flush_but_dont_ignore(res): d.addCallback(_done) return d -def upload_data(uploader, data): - u = upload.Data(data) +def upload_data(uploader, data, convergence): + u = upload.Data(data, convergence=convergence) return uploader.upload(u) class AssistedUpload(unittest.TestCase): @@ -116,7 +116,7 @@ class AssistedUpload(unittest.TestCase): def _ready(res): assert u._helper - return upload_data(u, DATA) + return upload_data(u, DATA, convergence="some convergence string") d.addCallback(_ready) def _uploaded(results): uri = results.uri @@ -149,7 +149,7 @@ class AssistedUpload(unittest.TestCase): # this must be a multiple of 'required_shares'==k segsize = mathutil.next_multiple(segsize, k) - key = hashutil.content_hash_key_hash(k, n, segsize, DATA) + key = hashutil.convergence_hash(k, n, segsize, DATA, "test convergence string") assert len(key) == 16 encryptor = AES(key) SI = hashutil.storage_index_hash(key) @@ -169,7 +169,7 @@ class AssistedUpload(unittest.TestCase): def _ready(res): assert u._helper - return upload_data(u, DATA) + return upload_data(u, DATA, convergence="test convergence string") d.addCallback(_ready) def _uploaded(results): uri = results.uri @@ -200,7 +200,7 @@ class AssistedUpload(unittest.TestCase): def _ready(res): assert u._helper - return upload_data(u, DATA) + return upload_data(u, DATA, convergence="some convergence string") d.addCallback(_ready) def _uploaded(results): uri = results.uri diff --git a/src/allmydata/test/test_system.py b/src/allmydata/test/test_system.py index 33a4bc06..77092d80 100644 --- a/src/allmydata/test/test_system.py +++ b/src/allmydata/test/test_system.py @@ -256,15 +256,15 @@ class SystemTest(testutil.SignalMixin, testutil.PollMixin, unittest.TestCase): def test_upload_and_download_random_key(self): self.basedir = "system/SystemTest/test_upload_and_download_random_key" - return self._test_upload_and_download(False) + return self._test_upload_and_download(convergence=None) test_upload_and_download_random_key.timeout = 4800 - def test_upload_and_download_content_hash_key(self): - self.basedir = "system/SystemTest/test_upload_and_download_CHK" - return self._test_upload_and_download(True) - test_upload_and_download_content_hash_key.timeout = 4800 + def test_upload_and_download_convergent(self): + self.basedir = "system/SystemTest/test_upload_and_download_convergent" + return self._test_upload_and_download(convergence="some convergence string") + test_upload_and_download_convergent.timeout = 4800 - def _test_upload_and_download(self, contenthashkey): + def _test_upload_and_download(self, convergence): # we use 4000 bytes of data, which will result in about 400k written # to disk among all our simulated nodes DATA = "Some data to upload\n" * 200 @@ -287,7 +287,7 @@ class SystemTest(testutil.SignalMixin, testutil.PollMixin, unittest.TestCase): # tail segment is not the same length as the others. This actualy # gets rounded up to 1025 to be a multiple of the number of # required shares (since we use 25 out of 100 FEC). - up = upload.Data(DATA, contenthashkey=contenthashkey) + up = upload.Data(DATA, convergence=convergence) up.max_segment_size = 1024 d1 = u.upload(up) return d1 @@ -301,12 +301,12 @@ class SystemTest(testutil.SignalMixin, testutil.PollMixin, unittest.TestCase): d.addCallback(_upload_done) def _upload_again(res): - # Upload again. If contenthashkey then this ought to be + # Upload again. If using convergent encryption then this ought to be # short-circuited, however with the way we currently generate URIs # (i.e. because they include the roothash), we have to do all of the # encoding work, and only get to save on the upload part. log.msg("UPLOADING AGAIN") - up = upload.Data(DATA, contenthashkey=contenthashkey) + up = upload.Data(DATA, convergence=convergence) up.max_segment_size = 1024 d1 = self.uploader.upload(up) d.addCallback(_upload_again) @@ -372,7 +372,7 @@ class SystemTest(testutil.SignalMixin, testutil.PollMixin, unittest.TestCase): HELPER_DATA = "Data that needs help to upload" * 1000 def _upload_with_helper(res): - u = upload.Data(HELPER_DATA, contenthashkey=contenthashkey) + u = upload.Data(HELPER_DATA, convergence=convergence) d = self.extra_node.upload(u) def _uploaded(results): uri = results.uri @@ -385,7 +385,7 @@ class SystemTest(testutil.SignalMixin, testutil.PollMixin, unittest.TestCase): d.addCallback(_upload_with_helper) def _upload_duplicate_with_helper(res): - u = upload.Data(HELPER_DATA, contenthashkey=contenthashkey) + u = upload.Data(HELPER_DATA, convergence=convergence) u.debug_stash_RemoteEncryptedUploadable = True d = self.extra_node.upload(u) def _uploaded(results): @@ -398,13 +398,13 @@ class SystemTest(testutil.SignalMixin, testutil.PollMixin, unittest.TestCase): "uploadable started uploading, should have been avoided") d.addCallback(_check) return d - if contenthashkey: + if convergence is not None: d.addCallback(_upload_duplicate_with_helper) def _upload_resumable(res): DATA = "Data that needs help to upload and gets interrupted" * 1000 - u1 = CountingDataUploadable(DATA, contenthashkey=contenthashkey) - u2 = CountingDataUploadable(DATA, contenthashkey=contenthashkey) + u1 = CountingDataUploadable(DATA, convergence=convergence) + u2 = CountingDataUploadable(DATA, convergence=convergence) # we interrupt the connection after about 5kB by shutting down # the helper, then restartingit. @@ -490,7 +490,7 @@ class SystemTest(testutil.SignalMixin, testutil.PollMixin, unittest.TestCase): # to store the key locally and re-use it on the next upload of # this file, which isn't a bad thing to do, but we currently # don't do it.) - if contenthashkey: + if convergence is not None: # Make sure we did not have to read the whole file the # second time around . self.failUnless(bytes_sent < len(DATA), @@ -510,9 +510,9 @@ class SystemTest(testutil.SignalMixin, testutil.PollMixin, unittest.TestCase): def _check(newdata): self.failUnlessEqual(newdata, DATA) - # If using a content hash key, then also check that the helper - # has removed the temp file from its directories. - if contenthashkey: + # If using convergent encryption, then also check that the + # helper has removed the temp file from its directories. + if convergence is not None: basedir = os.path.join(self.getdir("client0"), "helper") files = os.listdir(os.path.join(basedir, "CHK_encoding")) self.failUnlessEqual(files, []) @@ -890,7 +890,7 @@ class SystemTest(testutil.SignalMixin, testutil.PollMixin, unittest.TestCase): return d def _do_publish1(self, res): - ut = upload.Data(self.data) + ut = upload.Data(self.data, convergence=None) c0 = self.clients[0] d = c0.create_empty_dirnode() def _made_root(new_dirnode): @@ -910,7 +910,7 @@ class SystemTest(testutil.SignalMixin, testutil.PollMixin, unittest.TestCase): return d def _do_publish2(self, res): - ut = upload.Data(self.data) + ut = upload.Data(self.data, convergence=None) d = self._subdir1_node.create_empty_directory(u"subdir2") d.addCallback(lambda subdir2: subdir2.add_file(u"mydata992", ut)) return d @@ -927,7 +927,7 @@ class SystemTest(testutil.SignalMixin, testutil.PollMixin, unittest.TestCase): def _do_publish_private(self, res): self.smalldata = "sssh, very secret stuff" - ut = upload.Data(self.smalldata) + ut = upload.Data(self.smalldata, convergence=None) d = self.clients[0].create_empty_dirnode() d.addCallback(self.log, "GOT private directory") def _got_new_dir(privnode): @@ -1009,7 +1009,7 @@ class SystemTest(testutil.SignalMixin, testutil.PollMixin, unittest.TestCase): d1.addCallback(lambda res: self.shouldFail2(NotMutableError, "mkdir(nope)", None, dirnode.create_empty_directory, u"nope")) d1.addCallback(self.log, "doing add_file(ro)") - ut = upload.Data("I will disappear, unrecorded and unobserved. The tragedy of my demise is made more poignant by its silence, but this beauty is not for you to ever know.") + ut = upload.Data("I will disappear, unrecorded and unobserved. The tragedy of my demise is made more poignant by its silence, but this beauty is not for you to ever know.", convergence="99i-p1x4-xd4-18yc-ywt-87uu-msu-zo -- completely and totally unguessable string (unless you read this)") d1.addCallback(lambda res: self.shouldFail2(NotMutableError, "add_file(nope)", None, dirnode.add_file, u"hope", ut)) d1.addCallback(self.log, "doing get(ro)") @@ -1345,7 +1345,7 @@ class SystemTest(testutil.SignalMixin, testutil.PollMixin, unittest.TestCase): d.addCallback(self._test_control2, control_furl_file) return d def _test_control2(self, rref, filename): - d = rref.callRemote("upload_from_file_to_uri", filename) + d = rref.callRemote("upload_from_file_to_uri", filename, convergence=None) downfile = os.path.join(self.basedir, "control.downfile") d.addCallback(lambda uri: rref.callRemote("download_from_uri_to_file", diff --git a/src/allmydata/test/test_upload.py b/src/allmydata/test/test_upload.py index 91f0e5a5..e15731a3 100644 --- a/src/allmydata/test/test_upload.py +++ b/src/allmydata/test/test_upload.py @@ -25,14 +25,14 @@ class Uploadable(unittest.TestCase): self.failUnlessEqual(s, expected) def test_filehandle_random_key(self): - return self._test_filehandle(True) + return self._test_filehandle(convergence=None) - def test_filehandle_content_hash_key(self): - return self._test_filehandle(False) + def test_filehandle_convergent_encryption(self): + return self._test_filehandle(convergence="some convergence string") - def _test_filehandle(self, randomkey): + def _test_filehandle(self, convergence): s = StringIO("a"*41) - u = upload.FileHandle(s, randomkey) + u = upload.FileHandle(s, convergence=convergence) d = u.get_size() d.addCallback(self.failUnlessEqual, 41) d.addCallback(lambda res: u.read(1)) @@ -50,7 +50,7 @@ class Uploadable(unittest.TestCase): f = open(fn, "w") f.write("a"*41) f.close() - u = upload.FileName(fn) + u = upload.FileName(fn, convergence=None) d = u.get_size() d.addCallback(self.failUnlessEqual, 41) d.addCallback(lambda res: u.read(1)) @@ -62,7 +62,7 @@ class Uploadable(unittest.TestCase): def test_data(self): s = "a"*41 - u = upload.Data(s) + u = upload.Data(s, convergence=None) d = u.get_size() d.addCallback(self.failUnlessEqual, 41) d.addCallback(lambda res: u.read(1)) @@ -169,13 +169,13 @@ SIZE_SMALL = 16 SIZE_LARGE = len(DATA) def upload_data(uploader, data): - u = upload.Data(data) + u = upload.Data(data, convergence=None) return uploader.upload(u) def upload_filename(uploader, filename): - u = upload.FileName(filename) + u = upload.FileName(filename, convergence=None) return uploader.upload(u) def upload_filehandle(uploader, fh): - u = upload.FileHandle(fh) + u = upload.FileHandle(fh, convergence=None) return uploader.upload(u) class GoodServer(unittest.TestCase): @@ -444,38 +444,57 @@ class PeerSelection(unittest.TestCase): class StorageIndex(unittest.TestCase): def test_params_must_matter(self): DATA = "I am some data" - u = upload.Data(DATA) + u = upload.Data(DATA, convergence="") eu = upload.EncryptAnUploadable(u) d1 = eu.get_storage_index() # CHK means the same data should encrypt the same way - u = upload.Data(DATA) + u = upload.Data(DATA, convergence="") eu = upload.EncryptAnUploadable(u) d1a = eu.get_storage_index() - # but if we change the encoding parameters, it should be different - u = upload.Data(DATA) + # but if we use a different convergence string it should be different + u = upload.Data(DATA, convergence="wheee!") + eu = upload.EncryptAnUploadable(u) + d1salt1 = eu.get_storage_index() + + # and if we add yet a different convergence it should be different again + u = upload.Data(DATA, convergence="NOT wheee!") + eu = upload.EncryptAnUploadable(u) + d1salt2 = eu.get_storage_index() + + # and if we use the first string again it should be the same as last time + u = upload.Data(DATA, convergence="wheee!") + eu = upload.EncryptAnUploadable(u) + d1salt1a = eu.get_storage_index() + + # and if we change the encoding parameters, it should be different (from the same convergence string with different encoding parameters) + u = upload.Data(DATA, convergence="") u.encoding_param_k = u.default_encoding_param_k + 1 eu = upload.EncryptAnUploadable(u) d2 = eu.get_storage_index() # and if we use a random key, it should be different than the CHK - u = upload.Data(DATA, contenthashkey=False) + u = upload.Data(DATA, convergence=None) eu = upload.EncryptAnUploadable(u) d3 = eu.get_storage_index() # and different from another instance - u = upload.Data(DATA, contenthashkey=False) + u = upload.Data(DATA, convergence=None) eu = upload.EncryptAnUploadable(u) d4 = eu.get_storage_index() - d = DeferredListShouldSucceed([d1,d1a,d2,d3,d4]) + d = DeferredListShouldSucceed([d1,d1a,d1salt1,d1salt2,d1salt1a,d2,d3,d4]) def _done(res): - si1, si1a, si2, si3, si4 = res + si1, si1a, si1salt1, si1salt2, si1salt1a, si2, si3, si4 = res self.failUnlessEqual(si1, si1a) self.failIfEqual(si1, si2) self.failIfEqual(si1, si3) self.failIfEqual(si1, si4) self.failIfEqual(si3, si4) + self.failIfEqual(si1salt1, si1) + self.failIfEqual(si1salt1, si1salt2) + self.failIfEqual(si1salt2, si1) + self.failUnlessEqual(si1salt1, si1salt1a) d.addCallback(_done) return d diff --git a/src/allmydata/test/test_util.py b/src/allmydata/test/test_util.py index d80f0903..98d6da71 100644 --- a/src/allmydata/test/test_util.py +++ b/src/allmydata/test/test_util.py @@ -408,8 +408,8 @@ class HashUtilTests(unittest.TestCase): self.failUnlessEqual(h1, h2) def test_chk(self): - h1 = hashutil.content_hash_key_hash(3, 10, 1000, "data") - h2 = hashutil.content_hash_key_hasher(3, 10, 1000) + h1 = hashutil.convergence_hash(3, 10, 1000, "data", "secret") + h2 = hashutil.convergence_hasher(3, 10, 1000, "secret") h2.update("data") h2 = h2.digest() self.failUnlessEqual(h1, h2) diff --git a/src/allmydata/test/test_web.py b/src/allmydata/test/test_web.py index 5711af92..a55b9efb 100644 --- a/src/allmydata/test/test_web.py +++ b/src/allmydata/test/test_web.py @@ -35,6 +35,7 @@ class FakeClient(service.MultiService): introducer_client = FakeIntroducerClient() _all_upload_status = [upload.UploadStatus()] _all_download_status = [download.DownloadStatus()] + convergence = "some random string" def connected_to_introducer(self): return False diff --git a/src/allmydata/upload.py b/src/allmydata/upload.py index a44ecdb8..384aea2b 100644 --- a/src/allmydata/upload.py +++ b/src/allmydata/upload.py @@ -11,7 +11,7 @@ from foolscap.logging import log from allmydata.util.hashutil import file_renewal_secret_hash, \ file_cancel_secret_hash, bucket_renewal_secret_hash, \ bucket_cancel_secret_hash, plaintext_hasher, \ - storage_index_hash, plaintext_segment_hasher, content_hash_key_hasher + storage_index_hash, plaintext_segment_hasher, convergence_hasher from allmydata import encode, storage, hashtree, uri from allmydata.util import base32, idlib, mathutil from allmydata.util.assertutil import precondition @@ -1084,13 +1084,20 @@ class BaseUploadable: class FileHandle(BaseUploadable): implements(IUploadable) - def __init__(self, filehandle, contenthashkey=True): + def __init__(self, filehandle, convergence): + """ + Upload the data from the filehandle. If convergence is None then a + random encryption key will be used, else the plaintext will be hashed, + then the hash will be hashed together with the string in the + "convergence" argument to form the encryption key." + """ + assert convergence is None or isinstance(convergence, str), (convergence, type(convergence)) self._filehandle = filehandle self._key = None - self._contenthashkey = contenthashkey + self.convergence = convergence self._size = None - def _get_encryption_key_content_hash(self): + def _get_encryption_key_convergent(self): if self._key is not None: return defer.succeed(self._key) @@ -1100,7 +1107,7 @@ class FileHandle(BaseUploadable): def _got(params): k, happy, n, segsize = params f = self._filehandle - enckey_hasher = content_hash_key_hasher(k, n, segsize) + enckey_hasher = convergence_hasher(k, n, segsize, self.convergence) f.seek(0) BLOCKSIZE = 64*1024 bytes_read = 0 @@ -1131,8 +1138,8 @@ class FileHandle(BaseUploadable): return defer.succeed(self._key) def get_encryption_key(self): - if self._contenthashkey: - return self._get_encryption_key_content_hash() + if self.convergence is not None: + return self._get_encryption_key_convergent() else: return self._get_encryption_key_random() @@ -1153,15 +1160,29 @@ class FileHandle(BaseUploadable): pass class FileName(FileHandle): - def __init__(self, filename, contenthashkey=True): - FileHandle.__init__(self, open(filename, "rb"), contenthashkey=contenthashkey) + def __init__(self, filename, convergence): + """ + Upload the data from the filename. If convergence is None then a + random encryption key will be used, else the plaintext will be hashed, + then the hash will be hashed together with the string in the + "convergence" argument to form the encryption key." + """ + assert convergence is None or isinstance(convergence, str), (convergence, type(convergence)) + FileHandle.__init__(self, open(filename, "rb"), convergence=convergence) def close(self): FileHandle.close(self) self._filehandle.close() class Data(FileHandle): - def __init__(self, data, contenthashkey=True): - FileHandle.__init__(self, StringIO(data), contenthashkey=contenthashkey) + def __init__(self, data, convergence): + """ + Upload the data from the data argument. If convergence is None then a + random encryption key will be used, else the plaintext will be hashed, + then the hash will be hashed together with the string in the + "convergence" argument to form the encryption key." + """ + assert convergence is None or isinstance(convergence, str), (convergence, type(convergence)) + FileHandle.__init__(self, StringIO(data), convergence=convergence) class Uploader(service.MultiService): """I am a service that allows file uploading. I am a service-child of the diff --git a/src/allmydata/util/hashutil.py b/src/allmydata/util/hashutil.py index add0a0f1..4e762ecf 100644 --- a/src/allmydata/util/hashutil.py +++ b/src/allmydata/util/hashutil.py @@ -68,7 +68,7 @@ PLAINTEXT_TAG = "allmydata_plaintext_v1" CIPHERTEXT_TAG = "allmydata_crypttext_v1" CIPHERTEXT_SEGMENT_TAG = "allmydata_crypttext_segment_v1" PLAINTEXT_SEGMENT_TAG = "allmydata_plaintext_segment_v1" -CONTENT_HASH_KEY_TAG = "allmydata_immutable_content_to_key_v1+" +CONVERGENT_ENCRYPTION_TAG = "allmydata_immutable_content_to_key_with_added_secret_v1+" CLIENT_RENEWAL_TAG = "allmydata_client_renewal_secret_v1" CLIENT_CANCEL_TAG = "allmydata_client_cancel_secret_v1" @@ -91,9 +91,9 @@ DIRNODE_CHILD_WRITECAP_TAG = "allmydata_mutable_writekey_and_salt_to_dirnode_chi def storage_index_hash(key): # storage index is truncated to 128 bits (16 bytes). We're only hashing a - # 16-byte value to get it, so there's no point in using a larger value. - # We use this same tagged hash to go from encryption key to storage index - # for random-keyed immutable files and content-hash-keyed immutabie + # 16-byte value to get it, so there's no point in using a larger value. We + # use this same tagged hash to go from encryption key to storage index for + # random-keyed immutable files and convergent-encryption immutabie # files. Mutable files use ssk_storage_index_hash(). return tagged_hash(STORAGE_INDEX_TAG, key, 16) @@ -129,15 +129,14 @@ def plaintext_segment_hasher(): KEYLEN = 16 -def content_hash_key_hash(k, n, segsize, data): - # This is defined to return a 16-byte AES key. +def convergence_hash(k, n, segsize, data, convergence): + h = convergence_hasher(k, n, segsize, convergence) + h.update(data) + return h.digest() +def convergence_hasher(k, n, segsize, convergence): + assert isinstance(convergence, str) param_tag = netstring("%d,%d,%d" % (k, n, segsize)) - tag = CONTENT_HASH_KEY_TAG + param_tag - h = tagged_hash(tag, data, KEYLEN) - return h -def content_hash_key_hasher(k, n, segsize): - param_tag = netstring("%d,%d,%d" % (k, n, segsize)) - tag = CONTENT_HASH_KEY_TAG + param_tag + tag = CONVERGENT_ENCRYPTION_TAG + netstring(convergence) + param_tag return tagged_hasher(tag, KEYLEN) def random_key(): diff --git a/src/allmydata/web/unlinked.py b/src/allmydata/web/unlinked.py index 696ee847..7e4ae655 100644 --- a/src/allmydata/web/unlinked.py +++ b/src/allmydata/web/unlinked.py @@ -14,8 +14,10 @@ class UnlinkedPUTCHKUploader(rend.Page): # "PUT /uri", to create an unlinked file. This is like PUT but # without the associated set_uri. - uploadable = FileHandle(req.content) - d = IClient(ctx).upload(uploadable) + client = IClient(ctx) + + uploadable = FileHandle(req.content, client.convergence) + d = client.upload(uploadable) d.addCallback(lambda results: results.uri) # that fires with the URI of the new file return d @@ -52,7 +54,7 @@ class UnlinkedPOSTCHKUploader(status.UploadResultsRendererMixin, rend.Page): assert req.method == "POST" self._done = observer.OneShotObserverList() fileobj = req.fields["file"].file - uploadable = FileHandle(fileobj) + uploadable = FileHandle(fileobj, client.convergence) d = client.upload(uploadable) d.addBoth(self._done.fire) diff --git a/src/allmydata/webish.py b/src/allmydata/webish.py index 35834a29..3b6485d1 100644 --- a/src/allmydata/webish.py +++ b/src/allmydata/webish.py @@ -867,7 +867,7 @@ class POSTHandler(rend.Page): return d2 d.addCallback(_checked) else: - uploadable = FileHandle(contents.file) + uploadable = FileHandle(contents.file, convergence=client.convergence) d = self._check_replacement(name) d.addCallback(lambda res: self._node.add_file(name, uploadable)) def _done(newnode): @@ -1047,6 +1047,7 @@ class PUTHandler(rend.Page): self._replace = replace def renderHTTP(self, ctx): + client = IClient(ctx) req = inevow.IRequest(ctx) t = self._t localfile = self._localfile @@ -1063,18 +1064,18 @@ class PUTHandler(rend.Page): d.addCallback(self._check_replacement, name, self._replace) if t == "upload": if localfile: - d.addCallback(self._upload_localfile, localfile, name) + d.addCallback(self._upload_localfile, localfile, name, convergence=client.convergence) else: # localdir # take the last step d.addCallback(self._get_or_create_directories, self._path[-1:]) - d.addCallback(self._upload_localdir, localdir) + d.addCallback(self._upload_localdir, localdir, convergence=client.convergence) elif t == "uri": d.addCallback(self._attach_uri, req.content, name) elif t == "mkdir": d.addCallback(self._mkdir, name) else: - d.addCallback(self._upload_file, req.content, name) + d.addCallback(self._upload_file, req.content, name, convergence=client.convergence) def _transform_error(f): errors = {BlockingFileError: http.BAD_REQUEST, @@ -1126,8 +1127,8 @@ class PUTHandler(rend.Page): d.addCallback(_done) return d - def _upload_file(self, node, contents, name): - uploadable = FileHandle(contents) + def _upload_file(self, node, contents, name, convergence): + uploadable = FileHandle(contents, convergence=convergence) d = node.add_file(name, uploadable) def _done(filenode): log.msg("webish upload complete", @@ -1136,8 +1137,8 @@ class PUTHandler(rend.Page): d.addCallback(_done) return d - def _upload_localfile(self, node, localfile, name): - uploadable = FileName(localfile) + def _upload_localfile(self, node, localfile, name, convergence): + uploadable = FileName(localfile, convergence=convergence) d = node.add_file(name, uploadable) d.addCallback(lambda filenode: filenode.get_uri()) return d @@ -1150,7 +1151,7 @@ class PUTHandler(rend.Page): d.addCallback(_done) return d - def _upload_localdir(self, node, localdir): + def _upload_localdir(self, node, localdir, convergence): # build up a list of files to upload. TODO: for now, these files and # directories must have UTF-8 encoded filenames: anything else will # cause the upload to break. @@ -1179,7 +1180,7 @@ class PUTHandler(rend.Page): if dir: d.addCallback(self._makedir, node, dir) for f in all_files: - d.addCallback(self._upload_one_file, node, localdir, f) + d.addCallback(self._upload_one_file, node, localdir, f, convergence=convergence) return d def _makedir(self, res, node, dir): @@ -1191,12 +1192,12 @@ class PUTHandler(rend.Page): d.addCallback(lambda parent: parent.create_empty_directory(dir[-1])) return d - def _upload_one_file(self, res, node, localdir, f): + def _upload_one_file(self, res, node, localdir, f, convergence): # get the parent. We can be sure this exists because we already # went through and created all the directories we require. localfile = os.path.join(localdir, *f) d = node.get_child_at_path(f[:-1]) - d.addCallback(self._upload_localfile, localfile, f[-1]) + d.addCallback(self._upload_localfile, localfile, f[-1], convergence=convergence) return d