From: Brian Warner Date: Thu, 7 Feb 2008 01:39:03 +0000 (-0700) Subject: upload: rework passing of default encoding parameters: move more responsibility into... X-Git-Tag: allmydata-tahoe-0.8.0~116 X-Git-Url: https://git.rkrishnan.org/provisioning?a=commitdiff_plain;h=81c5ceae1624a8aebbdee09450be0ac1c9e2ab87;p=tahoe-lafs%2Ftahoe-lafs.git upload: rework passing of default encoding parameters: move more responsibility into BaseUploadable --- diff --git a/src/allmydata/interfaces.py b/src/allmydata/interfaces.py index fcd773df..7b34365e 100644 --- a/src/allmydata/interfaces.py +++ b/src/allmydata/interfaces.py @@ -1085,6 +1085,9 @@ class IEncryptedUploadable(Interface): segment_size is larger than filesize, the difference must be stored as padding). + This usually passes through to the IUploadable method of the same + name. + The encoder strictly obeys the values returned by this method. To make an upload use non-default encoding parameters, you must arrange to control the values that this method returns. @@ -1132,34 +1135,46 @@ class IEncryptedUploadable(Interface): """Just like IUploadable.close().""" class IUploadable(Interface): + def set_default_encoding_parameters(params): + """Set the default encoding parameters, which must be a dict mapping + strings to ints. The meaningful keys are 'k', 'happy', 'n', and + 'max_segment_size'. These might have an influence on the final + encoding parameters returned by get_all_encoding_parameters(), if the + Uploadable doesn't have more specific preferences. + + This call is optional: if it is not used, the Uploadable will use + some built-in defaults. If used, this method must be called before + any other IUploadable methods to have any effect. + """ + def get_size(): """Return a Deferred that will fire with the length of the data to be uploaded, in bytes. This will be called before the data is actually used, to compute encoding parameters. """ - def get_maximum_segment_size(): - """Return a Deferred that fires with None or an integer. None - indicates that the Uploadable doesn't care about segment size, and - the IEncryptedUploadable wrapper will use a default of probably 1MB. - If provided, the integer will be used as the maximum segment size. - Larger values reduce hash overhead, smaller values reduce memory - footprint and cause data to be delivered in smaller pieces (which may - provide a smoother and more predictable download experience). + def get_all_encoding_parameters(): + """Return a Deferred that fires with a tuple of + (k,happy,n,segment_size). The segment_size will be used as-is, and + must match the following constraints: it must be a multiple of k, and + it shouldn't be unreasonably larger than the file size (if + segment_size is larger than filesize, the difference must be stored + as padding). - There are other constraints on the segment size (see - IEncryptedUploadable.get_encoding_parameters), so the final segment - size may be smaller than the one returned by this method. - """ + The relative values of k and n allow some IUploadables to request + better redundancy than others (in exchange for consuming more space + in the grid). - def get_encoding_parameters(): - """Return a Deferred that either fires with None or with a tuple of - (k,happy,n). None indicates that the Uploadable doesn't care how it - is encoded, causing the Uploader to use default k/happy/n (either - hard-coded or provided by the Introducer). + Larger values of segment_size reduce hash overhead, while smaller + values reduce memory footprint and cause data to be delivered in + smaller pieces (which may provide a smoother and more predictable + download experience). - This allows some IUploadables to request better redundancy than - others. + The encoder strictly obeys the values returned by this method. To + make an upload use non-default encoding parameters, you must arrange + to control the values that this method returns. One way to influence + them may be to call set_encoding_parameters() before calling + get_all_encoding_parameters(). """ def get_encryption_key(): diff --git a/src/allmydata/test/test_encode.py b/src/allmydata/test/test_encode.py index 5fec02ad..eb3d50df 100644 --- a/src/allmydata/test/test_encode.py +++ b/src/allmydata/test/test_encode.py @@ -166,9 +166,11 @@ class Encode(unittest.TestCase): # force use of multiple segments e = encode.Encoder() u = upload.Data(data) - params = {"k": 25, "happy": 75, "n": 100, - "max_segment_size": max_segment_size} - eu = upload.EncryptAnUploadable(u, params) + u.max_segment_size = max_segment_size + u.encoding_param_k = 25 + u.encoding_param_happy = 75 + u.encoding_param_n = 100 + eu = upload.EncryptAnUploadable(u) d = e.set_encrypted_uploadable(eu) all_shareholders = [] @@ -300,9 +302,11 @@ class Roundtrip(unittest.TestCase): e = encode.Encoder() u = upload.Data(data) # force use of multiple segments by using a low max_segment_size - params = {"k": k, "happy": happy, "n": n, - "max_segment_size": max_segment_size} - eu = upload.EncryptAnUploadable(u, params) + u.max_segment_size = max_segment_size + u.encoding_param_k = k + u.encoding_param_happy = happy + u.encoding_param_n = n + eu = upload.EncryptAnUploadable(u) d = e.set_encrypted_uploadable(eu) shareholders = {} diff --git a/src/allmydata/upload.py b/src/allmydata/upload.py index 8e43e02c..23fe41e6 100644 --- a/src/allmydata/upload.py +++ b/src/allmydata/upload.py @@ -331,10 +331,8 @@ class EncryptAnUploadable: implements(IEncryptedUploadable) CHUNKSIZE = 50*1000 - def __init__(self, original, default_encoding_parameters): + def __init__(self, original): self.original = IUploadable(original) - assert isinstance(default_encoding_parameters, dict) - self._default_encoding_parameters = default_encoding_parameters self._encryptor = None self._plaintext_hasher = plaintext_hasher() self._plaintext_segment_hasher = None @@ -360,38 +358,15 @@ class EncryptAnUploadable: def get_all_encoding_parameters(self): if self._encoding_parameters is not None: return defer.succeed(self._encoding_parameters) - d1 = self.get_size() - d2 = self.original.get_maximum_segment_size() - d3 = self.original.get_encoding_parameters() - d = defer.DeferredList([d1, d2, d3], - fireOnOneErrback=True, consumeErrors=True) - def _got_pieces(res): - file_size = res[0][1] - max_segsize = res[1][1] - params = res[2][1] - - defaults = self._default_encoding_parameters - if max_segsize is None: - max_segsize = defaults["max_segment_size"] - - if params is None: - k = defaults["k"] - happy = defaults["happy"] - n = defaults["n"] - else: - precondition(isinstance(params, tuple), params) - (k, happy, n) = params - - # for small files, shrink the segment size to avoid wasting space - segsize = min(max_segsize, file_size) - # this must be a multiple of 'required_shares'==k - segsize = mathutil.next_multiple(segsize, k) + d = self.original.get_all_encoding_parameters() + def _got(encoding_parameters): + (k, happy, n, segsize) = encoding_parameters self._segment_size = segsize # used by segment hashers - self._encoding_parameters = (k, happy, n, segsize) - self.log("my encoding parameters: %s" % - (self._encoding_parameters,), level=log.NOISY) - return self._encoding_parameters - d.addCallback(_got_pieces) + self._encoding_parameters = encoding_parameters + self.log("my encoding parameters: %s" % (encoding_parameters,), + level=log.NOISY) + return encoding_parameters + d.addCallback(_got) return d def _get_encryptor(self): @@ -555,10 +530,8 @@ class EncryptAnUploadable: class CHKUploader: peer_selector_class = Tahoe2PeerSelector - def __init__(self, client, default_encoding_parameters): + def __init__(self, client): self._client = client - assert isinstance(default_encoding_parameters, dict) - self._default_encoding_parameters = default_encoding_parameters self._log_number = self._client.log("CHKUploader starting") self._encoder = None self._results = UploadResults() @@ -580,7 +553,7 @@ class CHKUploader: uploadable = IUploadable(uploadable) self.log("starting upload of %s" % uploadable) - eu = EncryptAnUploadable(uploadable, self._default_encoding_parameters) + eu = EncryptAnUploadable(uploadable) d = self.start_encrypted(eu) def _uploaded(res): d1 = uploadable.get_encryption_key() @@ -708,9 +681,6 @@ class LiteralUploader: self._client = client self._results = UploadResults() - def set_params(self, encoding_parameters): - pass - def start(self, uploadable): uploadable = IUploadable(uploadable) d = uploadable.get_size() @@ -803,10 +773,8 @@ class RemoteEncryptedUploadable(Referenceable): class AssistedUploader: - def __init__(self, helper, default_encoding_parameters): + def __init__(self, helper): self._helper = helper - assert isinstance(default_encoding_parameters, dict) - self._default_encoding_parameters = default_encoding_parameters self._log_number = log.msg("AssistedUploader starting") def log(self, msg, parent=None, **kwargs): @@ -817,7 +785,7 @@ class AssistedUploader: def start(self, uploadable): self._started = time.time() u = IUploadable(uploadable) - eu = EncryptAnUploadable(u, self._default_encoding_parameters) + eu = EncryptAnUploadable(u) self._encuploadable = eu d = eu.get_size() d.addCallback(self._got_size) @@ -851,6 +819,7 @@ class AssistedUploader: def _got_storage_index(self, storage_index): self._storage_index = storage_index + def _contact_helper(self, res): now = self._time_contacting_helper_start = time.time() self._storage_index_elapsed = now - self._started @@ -858,6 +827,7 @@ class AssistedUploader: d = self._helper.callRemote("upload_chk", self._storage_index) d.addCallback(self._contacted_helper) return d + def _contacted_helper(self, (upload_results, upload_helper)): now = time.time() elapsed = now - self._time_contacting_helper_start @@ -918,15 +888,55 @@ class AssistedUploader: r.timings["total"] = now - self._started return r -class NoParameterPreferencesMixin: +class BaseUploadable: + default_max_segment_size = 1*MiB # overridden by max_segment_size + default_encoding_param_k = 3 # overridden by encoding_parameters + default_encoding_param_happy = 7 + default_encoding_param_n = 10 + max_segment_size = None - encoding_parameters = None - def get_maximum_segment_size(self): - return defer.succeed(self.max_segment_size) - def get_encoding_parameters(self): - return defer.succeed(self.encoding_parameters) + encoding_param_k = None + encoding_param_happy = None + encoding_param_n = None + + _all_encoding_parameters = None + + def set_default_encoding_parameters(self, default_params): + assert isinstance(default_params, dict) + for k,v in default_params.items(): + precondition(isinstance(k, str), k, v) + precondition(isinstance(v, int), k, v) + if "k" in default_params: + self.default_encoding_param_k = default_params["k"] + if "happy" in default_params: + self.default_encoding_param_happy = default_params["happy"] + if "n" in default_params: + self.default_encoding_param_n = default_params["n"] + if "max_segment_size" in default_params: + self.default_max_segment_size = default_params["max_segment_size"] + + def get_all_encoding_parameters(self): + if self._all_encoding_parameters: + return defer.succeed(self._all_encoding_parameters) + + max_segsize = self.max_segment_size or self.default_max_segment_size + k = self.encoding_param_k or self.default_encoding_param_k + happy = self.encoding_param_happy or self.default_encoding_param_happy + n = self.encoding_param_n or self.default_encoding_param_n + + d = self.get_size() + def _got_size(file_size): + # for small files, shrink the segment size to avoid wasting space + segsize = min(max_segsize, file_size) + # this must be a multiple of 'required_shares'==k + segsize = mathutil.next_multiple(segsize, k) + encoding_parameters = (k, happy, n, segsize) + self._all_encoding_parameters = encoding_parameters + return encoding_parameters + d.addCallback(_got_size) + return d -class FileHandle(NoParameterPreferencesMixin): +class FileHandle(BaseUploadable): implements(IUploadable) def __init__(self, filehandle, contenthashkey=True): @@ -1023,12 +1033,13 @@ class Uploader(service.MultiService): default_params = self.parent.get_encoding_parameters() precondition(isinstance(default_params, dict), default_params) precondition("max_segment_size" in default_params, default_params) + uploadable.set_default_encoding_parameters(default_params) if size <= self.URI_LIT_SIZE_THRESHOLD: uploader = LiteralUploader(self.parent) elif self._helper: - uploader = AssistedUploader(self._helper, default_params) + uploader = AssistedUploader(self._helper) else: - uploader = self.uploader_class(self.parent, default_params) + uploader = self.uploader_class(self.parent) return uploader.start(uploadable) d.addCallback(_got_size) def _done(res):