From: Brian Warner Date: Fri, 25 Jan 2008 04:51:34 +0000 (-0700) Subject: offloaded upload: avoid tail-recursion problem that would break large files X-Git-Tag: allmydata-tahoe-0.8.0~229 X-Git-Url: https://git.rkrishnan.org/simplejson/...?a=commitdiff_plain;h=09a99ce8220ebfe77bbb45376c06981abcde1d83;p=tahoe-lafs%2Ftahoe-lafs.git offloaded upload: avoid tail-recursion problem that would break large files --- diff --git a/src/allmydata/upload.py b/src/allmydata/upload.py index 51a4c118..b1ea5702 100644 --- a/src/allmydata/upload.py +++ b/src/allmydata/upload.py @@ -5,6 +5,7 @@ from twisted.python import failure from twisted.internet import defer from twisted.application import service from foolscap import Referenceable +from foolscap import eventual from foolscap.logging import log from allmydata.util.hashutil import file_renewal_secret_hash, \ @@ -452,24 +453,52 @@ class EncryptAnUploadable: offset += this_segment + def read_encrypted(self, length, hash_only): # make sure our parameters have been set up first d = self.get_all_encoding_parameters() d.addCallback(lambda ignored: self._get_encryptor()) - # then fetch the plaintext - remaining = length + # then fetch and encrypt the plaintext. The unusual structure here + # (passing a Deferred *into* a function) is needed to avoid + # overflowing the stack: Deferreds don't optimize out tail recursion. + # We also pass in a list, to which _read_encrypted will append + # ciphertext. ciphertext = [] - while remaining: - # tolerate large length= values without consuming a lot of RAM - chunksize = min(remaining, self.CHUNKSIZE) - remaining -= chunksize - d.addCallback(lambda ignored: self.original.read(chunksize)) + d2 = defer.Deferred() + d.addCallback(lambda ignored: + self._read_encrypted(length, ciphertext, hash_only, d2)) + d.addCallback(lambda ignored: d2) + return d + + def _read_encrypted(self, remaining, ciphertext, hash_only, fire_when_done): + if not remaining: + fire_when_done.callback(ciphertext) + return None + # tolerate large length= values without consuming a lot of RAM by + # reading just a chunk (say 50kB) at a time. This only really matters + # when hash_only==True (i.e. resuming an interrupted upload), since + # that's the case where we will be skipping over a lot of data. + size = min(remaining, self.CHUNKSIZE) + remaining = remaining - size + # read a chunk of plaintext.. + d = defer.maybeDeferred(self.original.read, size) + # N.B.: if read() is synchronous, then since everything else is + # actually synchronous too, we'd blow the stack unless we stall for a + # tick. Once you accept a Deferred from IUploadable.read(), you must + # be prepared to have it fire immediately too. + d.addCallback(eventual.fireEventually) + def _good(plaintext): # and encrypt it.. # o/' over the fields we go, hashing all the way, sHA! sHA! sHA! o/' - d.addCallback(self._hash_and_encrypt_plaintext, hash_only) - d.addCallback(ciphertext.extend) - d.addCallback(lambda res: ciphertext) - return d + ct = self._hash_and_encrypt_plaintext(plaintext, hash_only) + ciphertext.extend(ct) + self._read_encrypted(remaining, ciphertext, hash_only, + fire_when_done) + def _err(why): + fire_when_done.errback(why) + d.addCallback(_good) + d.addErrback(_err) + return None def _hash_and_encrypt_plaintext(self, data, hash_only): assert isinstance(data, (tuple, list)), type(data) @@ -496,6 +525,7 @@ class EncryptAnUploadable: del chunk return cryptdata + def get_plaintext_hashtree_leaves(self, first, last, num_segments): if len(self._plaintext_segment_hashes) < num_segments: # close out the last one