From: Brian Warner Date: Fri, 12 Jan 2007 03:51:27 +0000 (-0700) Subject: use the word 'codec' for erasure coding, for now. 'encode' is used for file-level... X-Git-Tag: tahoe_v0.1.0-0-UNSTABLE~377 X-Git-Url: https://git.rkrishnan.org/?a=commitdiff_plain;h=417c17755bdea0452088ff2cf4e2847e3e0ee6c2;p=tahoe-lafs%2Ftahoe-lafs.git use the word 'codec' for erasure coding, for now. 'encode' is used for file-level segmentation/hashing --- diff --git a/src/allmydata/codec.py b/src/allmydata/codec.py new file mode 100644 index 00000000..d337689c --- /dev/null +++ b/src/allmydata/codec.py @@ -0,0 +1,219 @@ +# -*- test-case-name: allmydata.test.test_encode_share -*- + +from zope.interface import implements +from twisted.internet import defer +import sha +from allmydata.util import idlib, mathutil +from allmydata.interfaces import IEncoder, IDecoder +from allmydata.py_ecc import rs_code + +def netstring(s): + return "%d:%s," % (len(s), s) + +class ReplicatingEncoder(object): + implements(IEncoder) + ENCODER_TYPE = 0 + + def set_params(self, data_size, required_shares, total_shares): + self.data_size = data_size + self.required_shares = required_shares + self.total_shares = total_shares + + def get_encoder_type(self): + return self.ENCODER_TYPE + + def get_serialized_params(self): + return "%d" % self.required_shares + + def get_share_size(self): + return self.data_size + + def encode(self, data): + shares = [(i,data) for i in range(self.total_shares)] + return defer.succeed(shares) + +class ReplicatingDecoder(object): + implements(IDecoder) + + def set_serialized_params(self, params): + self.required_shares = int(params) + + def decode(self, some_shares): + assert len(some_shares) >= self.required_shares + data = some_shares[0][1] + return defer.succeed(data) + + +class Encoder(object): + def __init__(self, infile, m): + self.infile = infile + self.k = 2 + self.m = m + + def do_upload(self, landlords): + dl = [] + data = self.infile.read() + for (peerid, bucket_num, remotebucket) in landlords: + dl.append(remotebucket.callRemote('write', data)) + dl.append(remotebucket.callRemote('close')) + + return defer.DeferredList(dl) + +class Decoder(object): + def __init__(self, outfile, k, m, verifierid): + self.outfile = outfile + self.k = 2 + self.m = m + self._verifierid = verifierid + + def start(self, buckets): + assert len(buckets) >= self.k + dl = [] + for bucketnum, bucket in buckets[:self.k]: + d = bucket.callRemote("read") + dl.append(d) + d2 = defer.DeferredList(dl) + d2.addCallback(self._got_all_data) + return d2 + + def _got_all_data(self, resultslist): + shares = [results for success,results in resultslist if success] + assert len(shares) >= self.k + # here's where the Reed-Solomon magic takes place + self.outfile.write(shares[0]) + hasher = sha.new(netstring("allmydata_v1_verifierid")) + hasher.update(shares[0]) + vid = hasher.digest() + if self._verifierid: + assert self._verifierid == vid, "%s != %s" % (idlib.b2a(self._verifierid), idlib.b2a(vid)) + + +class PyRSEncoder(object): + ENCODER_TYPE = 1 + + # we will break the data into vectors in which each element is a single + # byte (i.e. a single number from 0 to 255), and the length of the vector + # is equal to the number of required_shares. We use padding to make the + # last chunk of data long enough to match, and we record the data_size in + # the serialized parameters to strip this padding out on the receiving + # end. + + # TODO: this will write a 733kB file called 'ffield.lut.8' in the current + # directory the first time it is run, to cache the lookup table for later + # use. It appears to take about 15 seconds to create this the first time, + # and about 0.5s to load it in each time afterwards. Make sure this file + # winds up somewhere reasonable. + + # TODO: the encoder/decoder RSCode object depends upon the number of + # required/total shares, but not upon the data. We could probably save a + # lot of initialization time by caching a single instance and using it + # any time we use the same required/total share numbers (which will + # probably be always). + + # on my workstation (fluxx, a 3.5GHz Athlon), this encodes data at a rate + # of 6.7kBps. Zooko's mom's 1.8GHz G5 got 2.2kBps . slave3 took 40s to + # construct the LUT and encodes at 1.5kBps, and for some reason took more + # than 20 minutes to run the test_encode_share tests, so I disabled most + # of them. (uh, hello, it's running figleaf) + + def set_params(self, data_size, required_shares, total_shares): + assert required_shares <= total_shares + self.data_size = data_size + self.required_shares = required_shares + self.total_shares = total_shares + self.chunk_size = required_shares + self.num_chunks = mathutil.div_ceil(data_size, self.chunk_size) + self.last_chunk_padding = mathutil.pad_size(data_size, required_shares) + self.share_size = self.num_chunks + self.encoder = rs_code.RSCode(total_shares, required_shares, 8) + + def get_encoder_type(self): + return self.ENCODER_TYPE + + def get_serialized_params(self): + return "%d:%d:%d" % (self.data_size, self.required_shares, + self.total_shares) + + def get_share_size(self): + return self.share_size + + def encode(self, data): + share_data = [ [] for i in range(self.total_shares)] + for i in range(self.num_chunks): + # we take self.chunk_size bytes from the input string, and + # turn it into self.total_shares bytes. + offset = i*self.chunk_size + # Note string slices aren't an efficient way to use memory, so + # when we upgrade from the unusably slow py_ecc prototype to a + # fast ECC we should also fix up this memory usage (by using the + # array module). + chunk = data[offset:offset+self.chunk_size] + if i == self.num_chunks-1: + chunk = chunk + "\x00"*self.last_chunk_padding + assert len(chunk) == self.chunk_size + input_vector = [ord(x) for x in chunk] + assert len(input_vector) == self.required_shares + output_vector = self.encoder.Encode(input_vector) + assert len(output_vector) == self.total_shares + for i2,out in enumerate(output_vector): + share_data[i2].append(chr(out)) + + shares = [ (i, "".join(share_data[i])) + for i in range(self.total_shares) ] + return defer.succeed(shares) + +class PyRSDecoder(object): + + def set_serialized_params(self, params): + pieces = params.split(":") + self.data_size = int(pieces[0]) + self.required_shares = int(pieces[1]) + self.total_shares = int(pieces[2]) + + self.chunk_size = self.required_shares + self.num_chunks = mathutil.div_ceil(self.data_size, self.chunk_size) + self.last_chunk_padding = mathutil.pad_size(self.data_size, + self.required_shares) + self.share_size = self.num_chunks + self.encoder = rs_code.RSCode(self.total_shares, self.required_shares, + 8) + if False: + print "chunk_size: %d" % self.chunk_size + print "num_chunks: %d" % self.num_chunks + print "last_chunk_padding: %d" % self.last_chunk_padding + print "share_size: %d" % self.share_size + print "total_shares: %d" % self.total_shares + print "required_shares: %d" % self.required_shares + + def decode(self, some_shares): + chunk_size = self.chunk_size + assert len(some_shares) >= self.required_shares + chunks = [] + have_shares = {} + for share_num, share_data in some_shares: + have_shares[share_num] = share_data + for i in range(self.share_size): + # this takes one byte from each share, and turns the combination + # into a single chunk + received_vector = [] + for j in range(self.total_shares): + share = have_shares.get(j) + if share is not None: + received_vector.append(ord(share[i])) + else: + received_vector.append(None) + decoded_vector = self.encoder.DecodeImmediate(received_vector) + assert len(decoded_vector) == self.chunk_size + chunk = "".join([chr(x) for x in decoded_vector]) + chunks.append(chunk) + data = "".join(chunks) + if self.last_chunk_padding: + data = data[:-self.last_chunk_padding] + assert len(data) == self.data_size + return defer.succeed(data) + + +all_encoders = { + ReplicatingEncoder.ENCODER_TYPE: (ReplicatingEncoder, ReplicatingDecoder), + PyRSEncoder.ENCODER_TYPE: (PyRSEncoder, PyRSDecoder), + } diff --git a/src/allmydata/download.py b/src/allmydata/download.py index a344d039..8646f788 100644 --- a/src/allmydata/download.py +++ b/src/allmydata/download.py @@ -6,7 +6,7 @@ from twisted.internet import defer from twisted.application import service from allmydata.util import idlib -from allmydata import encode +from allmydata import codec class NotEnoughPeersError(Exception): pass @@ -34,8 +34,8 @@ class FileDownloader: n = self._shares = 4 k = self._desired_shares = 2 self._target.open() - self._decoder = encode.Decoder(self._target, k, n, - self._verifierid) + self._decoder = codec.Decoder(self._target, k, n, + self._verifierid) def start(self): log.msg("starting download") diff --git a/src/allmydata/encode.py b/src/allmydata/encode.py deleted file mode 100644 index d337689c..00000000 --- a/src/allmydata/encode.py +++ /dev/null @@ -1,219 +0,0 @@ -# -*- test-case-name: allmydata.test.test_encode_share -*- - -from zope.interface import implements -from twisted.internet import defer -import sha -from allmydata.util import idlib, mathutil -from allmydata.interfaces import IEncoder, IDecoder -from allmydata.py_ecc import rs_code - -def netstring(s): - return "%d:%s," % (len(s), s) - -class ReplicatingEncoder(object): - implements(IEncoder) - ENCODER_TYPE = 0 - - def set_params(self, data_size, required_shares, total_shares): - self.data_size = data_size - self.required_shares = required_shares - self.total_shares = total_shares - - def get_encoder_type(self): - return self.ENCODER_TYPE - - def get_serialized_params(self): - return "%d" % self.required_shares - - def get_share_size(self): - return self.data_size - - def encode(self, data): - shares = [(i,data) for i in range(self.total_shares)] - return defer.succeed(shares) - -class ReplicatingDecoder(object): - implements(IDecoder) - - def set_serialized_params(self, params): - self.required_shares = int(params) - - def decode(self, some_shares): - assert len(some_shares) >= self.required_shares - data = some_shares[0][1] - return defer.succeed(data) - - -class Encoder(object): - def __init__(self, infile, m): - self.infile = infile - self.k = 2 - self.m = m - - def do_upload(self, landlords): - dl = [] - data = self.infile.read() - for (peerid, bucket_num, remotebucket) in landlords: - dl.append(remotebucket.callRemote('write', data)) - dl.append(remotebucket.callRemote('close')) - - return defer.DeferredList(dl) - -class Decoder(object): - def __init__(self, outfile, k, m, verifierid): - self.outfile = outfile - self.k = 2 - self.m = m - self._verifierid = verifierid - - def start(self, buckets): - assert len(buckets) >= self.k - dl = [] - for bucketnum, bucket in buckets[:self.k]: - d = bucket.callRemote("read") - dl.append(d) - d2 = defer.DeferredList(dl) - d2.addCallback(self._got_all_data) - return d2 - - def _got_all_data(self, resultslist): - shares = [results for success,results in resultslist if success] - assert len(shares) >= self.k - # here's where the Reed-Solomon magic takes place - self.outfile.write(shares[0]) - hasher = sha.new(netstring("allmydata_v1_verifierid")) - hasher.update(shares[0]) - vid = hasher.digest() - if self._verifierid: - assert self._verifierid == vid, "%s != %s" % (idlib.b2a(self._verifierid), idlib.b2a(vid)) - - -class PyRSEncoder(object): - ENCODER_TYPE = 1 - - # we will break the data into vectors in which each element is a single - # byte (i.e. a single number from 0 to 255), and the length of the vector - # is equal to the number of required_shares. We use padding to make the - # last chunk of data long enough to match, and we record the data_size in - # the serialized parameters to strip this padding out on the receiving - # end. - - # TODO: this will write a 733kB file called 'ffield.lut.8' in the current - # directory the first time it is run, to cache the lookup table for later - # use. It appears to take about 15 seconds to create this the first time, - # and about 0.5s to load it in each time afterwards. Make sure this file - # winds up somewhere reasonable. - - # TODO: the encoder/decoder RSCode object depends upon the number of - # required/total shares, but not upon the data. We could probably save a - # lot of initialization time by caching a single instance and using it - # any time we use the same required/total share numbers (which will - # probably be always). - - # on my workstation (fluxx, a 3.5GHz Athlon), this encodes data at a rate - # of 6.7kBps. Zooko's mom's 1.8GHz G5 got 2.2kBps . slave3 took 40s to - # construct the LUT and encodes at 1.5kBps, and for some reason took more - # than 20 minutes to run the test_encode_share tests, so I disabled most - # of them. (uh, hello, it's running figleaf) - - def set_params(self, data_size, required_shares, total_shares): - assert required_shares <= total_shares - self.data_size = data_size - self.required_shares = required_shares - self.total_shares = total_shares - self.chunk_size = required_shares - self.num_chunks = mathutil.div_ceil(data_size, self.chunk_size) - self.last_chunk_padding = mathutil.pad_size(data_size, required_shares) - self.share_size = self.num_chunks - self.encoder = rs_code.RSCode(total_shares, required_shares, 8) - - def get_encoder_type(self): - return self.ENCODER_TYPE - - def get_serialized_params(self): - return "%d:%d:%d" % (self.data_size, self.required_shares, - self.total_shares) - - def get_share_size(self): - return self.share_size - - def encode(self, data): - share_data = [ [] for i in range(self.total_shares)] - for i in range(self.num_chunks): - # we take self.chunk_size bytes from the input string, and - # turn it into self.total_shares bytes. - offset = i*self.chunk_size - # Note string slices aren't an efficient way to use memory, so - # when we upgrade from the unusably slow py_ecc prototype to a - # fast ECC we should also fix up this memory usage (by using the - # array module). - chunk = data[offset:offset+self.chunk_size] - if i == self.num_chunks-1: - chunk = chunk + "\x00"*self.last_chunk_padding - assert len(chunk) == self.chunk_size - input_vector = [ord(x) for x in chunk] - assert len(input_vector) == self.required_shares - output_vector = self.encoder.Encode(input_vector) - assert len(output_vector) == self.total_shares - for i2,out in enumerate(output_vector): - share_data[i2].append(chr(out)) - - shares = [ (i, "".join(share_data[i])) - for i in range(self.total_shares) ] - return defer.succeed(shares) - -class PyRSDecoder(object): - - def set_serialized_params(self, params): - pieces = params.split(":") - self.data_size = int(pieces[0]) - self.required_shares = int(pieces[1]) - self.total_shares = int(pieces[2]) - - self.chunk_size = self.required_shares - self.num_chunks = mathutil.div_ceil(self.data_size, self.chunk_size) - self.last_chunk_padding = mathutil.pad_size(self.data_size, - self.required_shares) - self.share_size = self.num_chunks - self.encoder = rs_code.RSCode(self.total_shares, self.required_shares, - 8) - if False: - print "chunk_size: %d" % self.chunk_size - print "num_chunks: %d" % self.num_chunks - print "last_chunk_padding: %d" % self.last_chunk_padding - print "share_size: %d" % self.share_size - print "total_shares: %d" % self.total_shares - print "required_shares: %d" % self.required_shares - - def decode(self, some_shares): - chunk_size = self.chunk_size - assert len(some_shares) >= self.required_shares - chunks = [] - have_shares = {} - for share_num, share_data in some_shares: - have_shares[share_num] = share_data - for i in range(self.share_size): - # this takes one byte from each share, and turns the combination - # into a single chunk - received_vector = [] - for j in range(self.total_shares): - share = have_shares.get(j) - if share is not None: - received_vector.append(ord(share[i])) - else: - received_vector.append(None) - decoded_vector = self.encoder.DecodeImmediate(received_vector) - assert len(decoded_vector) == self.chunk_size - chunk = "".join([chr(x) for x in decoded_vector]) - chunks.append(chunk) - data = "".join(chunks) - if self.last_chunk_padding: - data = data[:-self.last_chunk_padding] - assert len(data) == self.data_size - return defer.succeed(data) - - -all_encoders = { - ReplicatingEncoder.ENCODER_TYPE: (ReplicatingEncoder, ReplicatingDecoder), - PyRSEncoder.ENCODER_TYPE: (PyRSEncoder, PyRSDecoder), - } diff --git a/src/allmydata/encode_new.py b/src/allmydata/encode_new.py index ec6fd66c..0da54501 100644 --- a/src/allmydata/encode_new.py +++ b/src/allmydata/encode_new.py @@ -5,7 +5,7 @@ from allmydata.chunk import HashTree, roundup_pow2 from allmydata.Crypto.Cipher import AES import sha from allmydata.util import mathutil -from allmydata.encode import PyRSEncoder +from allmydata.codec import PyRSEncoder def hash(data): return sha.new(data).digest() diff --git a/src/allmydata/test/test_codec.py b/src/allmydata/test/test_codec.py new file mode 100644 index 00000000..cec61bb3 --- /dev/null +++ b/src/allmydata/test/test_codec.py @@ -0,0 +1,153 @@ + +import os, time +from twisted.trial import unittest +from twisted.internet import defer +from twisted.python import log +from allmydata.codec import PyRSEncoder, PyRSDecoder, ReplicatingEncoder, ReplicatingDecoder +import random + +class Tester: + #enc_class = PyRSEncoder + #dec_class = PyRSDecoder + + def do_test(self, size, required_shares, total_shares): + data0 = os.urandom(size) + enc = self.enc_class() + enc.set_params(size, required_shares, total_shares) + serialized_params = enc.get_serialized_params() + log.msg("serialized_params: %s" % serialized_params) + d = enc.encode(data0) + def _done(shares): + self.failUnlessEqual(len(shares), total_shares) + self.shares = shares + d.addCallback(_done) + + def _decode(shares): + dec = self.dec_class() + dec.set_serialized_params(serialized_params) + d1 = dec.decode(shares) + return d1 + + def _check_data(data1): + self.failUnlessEqual(len(data1), len(data0)) + self.failUnless(data1 == data0) + + def _decode_all_ordered(res): + log.msg("_decode_all_ordered") + # can we decode using all of the shares? + return _decode(self.shares) + d.addCallback(_decode_all_ordered) + d.addCallback(_check_data) + + def _decode_all_shuffled(res): + log.msg("_decode_all_shuffled") + # can we decode, using all the shares, but in random order? + shuffled_shares = self.shares[:] + random.shuffle(shuffled_shares) + return _decode(shuffled_shares) + d.addCallback(_decode_all_shuffled) + d.addCallback(_check_data) + + def _decode_some(res): + log.msg("_decode_some") + # decode with a minimal subset of the shares + some_shares = self.shares[:required_shares] + return _decode(some_shares) + d.addCallback(_decode_some) + d.addCallback(_check_data) + + def _decode_some_random(res): + log.msg("_decode_some_random") + # use a randomly-selected minimal subset + some_shares = random.sample(self.shares, required_shares) + return _decode(some_shares) + d.addCallback(_decode_some_random) + d.addCallback(_check_data) + + def _decode_multiple(res): + log.msg("_decode_multiple") + # make sure we can re-use the decoder object + shares1 = random.sample(self.shares, required_shares) + shares2 = random.sample(self.shares, required_shares) + dec = self.dec_class() + dec.set_serialized_params(serialized_params) + d1 = dec.decode(shares1) + d1.addCallback(_check_data) + d1.addCallback(lambda res: dec.decode(shares2)) + d1.addCallback(_check_data) + return d1 + d.addCallback(_decode_multiple) + + return d + + def test_encode(self): + if os.uname()[1] == "slave3" and self.enc_class == PyRSEncoder: + raise unittest.SkipTest("slave3 is really slow") + return self.do_test(1000, 25, 100) + + def test_encode1(self): + return self.do_test(8, 8, 16) + + def test_encode2(self): + if os.uname()[1] == "slave3" and self.enc_class == PyRSEncoder: + raise unittest.SkipTest("slave3 is really slow") + return self.do_test(123, 25, 100) + + def test_sizes(self): + raise unittest.SkipTest("omg this would take forever") + d = defer.succeed(None) + for i in range(1, 100): + d.addCallback(lambda res,size: self.do_test(size, 4, 10), i) + return d + +class PyRS(unittest.TestCase, Tester): + enc_class = PyRSEncoder + dec_class = PyRSDecoder + +class Replicating(unittest.TestCase, Tester): + enc_class = ReplicatingEncoder + dec_class = ReplicatingDecoder + + +class BenchPyRS(unittest.TestCase): + enc_class = PyRSEncoder + def test_big(self): + size = 10000 + required_shares = 25 + total_shares = 100 + # this lets us use a persistent lookup table, stored outside the + # _trial_temp directory (which is deleted each time trial is run) + os.symlink("../ffield.lut.8", "ffield.lut.8") + enc = self.enc_class() + self.start() + enc.set_params(size, required_shares, total_shares) + serialized_params = enc.get_serialized_params() + print "encoder ready", self.stop() + self.start() + data0 = os.urandom(size) + print "data ready", self.stop() + self.start() + d = enc.encode(data0) + def _done(shares): + now_shares = time.time() + print "shares ready", self.stop() + self.start() + self.failUnlessEqual(len(shares), total_shares) + d.addCallback(_done) + d.addCallback(lambda res: enc.encode(data0)) + d.addCallback(_done) + d.addCallback(lambda res: enc.encode(data0)) + d.addCallback(_done) + return d + + def start(self): + self.start_time = time.time() + + def stop(self): + self.end_time = time.time() + return (self.end_time - self.start_time) + + +# to benchmark the encoder, delete this line +del BenchPyRS +# and then run 'make test TEST=allmydata.test.test_encode_share.BenchPyRS' diff --git a/src/allmydata/test/test_encode_share.py b/src/allmydata/test/test_encode_share.py deleted file mode 100644 index bf59d72b..00000000 --- a/src/allmydata/test/test_encode_share.py +++ /dev/null @@ -1,153 +0,0 @@ - -import os, time -from twisted.trial import unittest -from twisted.internet import defer -from twisted.python import log -from allmydata.encode import PyRSEncoder, PyRSDecoder, ReplicatingEncoder, ReplicatingDecoder -import random - -class Tester: - #enc_class = PyRSEncoder - #dec_class = PyRSDecoder - - def do_test(self, size, required_shares, total_shares): - data0 = os.urandom(size) - enc = self.enc_class() - enc.set_params(size, required_shares, total_shares) - serialized_params = enc.get_serialized_params() - log.msg("serialized_params: %s" % serialized_params) - d = enc.encode(data0) - def _done(shares): - self.failUnlessEqual(len(shares), total_shares) - self.shares = shares - d.addCallback(_done) - - def _decode(shares): - dec = self.dec_class() - dec.set_serialized_params(serialized_params) - d1 = dec.decode(shares) - return d1 - - def _check_data(data1): - self.failUnlessEqual(len(data1), len(data0)) - self.failUnless(data1 == data0) - - def _decode_all_ordered(res): - log.msg("_decode_all_ordered") - # can we decode using all of the shares? - return _decode(self.shares) - d.addCallback(_decode_all_ordered) - d.addCallback(_check_data) - - def _decode_all_shuffled(res): - log.msg("_decode_all_shuffled") - # can we decode, using all the shares, but in random order? - shuffled_shares = self.shares[:] - random.shuffle(shuffled_shares) - return _decode(shuffled_shares) - d.addCallback(_decode_all_shuffled) - d.addCallback(_check_data) - - def _decode_some(res): - log.msg("_decode_some") - # decode with a minimal subset of the shares - some_shares = self.shares[:required_shares] - return _decode(some_shares) - d.addCallback(_decode_some) - d.addCallback(_check_data) - - def _decode_some_random(res): - log.msg("_decode_some_random") - # use a randomly-selected minimal subset - some_shares = random.sample(self.shares, required_shares) - return _decode(some_shares) - d.addCallback(_decode_some_random) - d.addCallback(_check_data) - - def _decode_multiple(res): - log.msg("_decode_multiple") - # make sure we can re-use the decoder object - shares1 = random.sample(self.shares, required_shares) - shares2 = random.sample(self.shares, required_shares) - dec = self.dec_class() - dec.set_serialized_params(serialized_params) - d1 = dec.decode(shares1) - d1.addCallback(_check_data) - d1.addCallback(lambda res: dec.decode(shares2)) - d1.addCallback(_check_data) - return d1 - d.addCallback(_decode_multiple) - - return d - - def test_encode(self): - if os.uname()[1] == "slave3" and self.enc_class == PyRSEncoder: - raise unittest.SkipTest("slave3 is really slow") - return self.do_test(1000, 25, 100) - - def test_encode1(self): - return self.do_test(8, 8, 16) - - def test_encode2(self): - if os.uname()[1] == "slave3" and self.enc_class == PyRSEncoder: - raise unittest.SkipTest("slave3 is really slow") - return self.do_test(123, 25, 100) - - def test_sizes(self): - raise unittest.SkipTest("omg this would take forever") - d = defer.succeed(None) - for i in range(1, 100): - d.addCallback(lambda res,size: self.do_test(size, 4, 10), i) - return d - -class PyRS(unittest.TestCase, Tester): - enc_class = PyRSEncoder - dec_class = PyRSDecoder - -class Replicating(unittest.TestCase, Tester): - enc_class = ReplicatingEncoder - dec_class = ReplicatingDecoder - - -class BenchPyRS(unittest.TestCase): - enc_class = PyRSEncoder - def test_big(self): - size = 10000 - required_shares = 25 - total_shares = 100 - # this lets us use a persistent lookup table, stored outside the - # _trial_temp directory (which is deleted each time trial is run) - os.symlink("../ffield.lut.8", "ffield.lut.8") - enc = self.enc_class() - self.start() - enc.set_params(size, required_shares, total_shares) - serialized_params = enc.get_serialized_params() - print "encoder ready", self.stop() - self.start() - data0 = os.urandom(size) - print "data ready", self.stop() - self.start() - d = enc.encode(data0) - def _done(shares): - now_shares = time.time() - print "shares ready", self.stop() - self.start() - self.failUnlessEqual(len(shares), total_shares) - d.addCallback(_done) - d.addCallback(lambda res: enc.encode(data0)) - d.addCallback(_done) - d.addCallback(lambda res: enc.encode(data0)) - d.addCallback(_done) - return d - - def start(self): - self.start_time = time.time() - - def stop(self): - self.end_time = time.time() - return (self.end_time - self.start_time) - - -# to benchmark the encoder, delete this line -del BenchPyRS -# and then run 'make test TEST=allmydata.test.test_encode_share.BenchPyRS' diff --git a/src/allmydata/upload.py b/src/allmydata/upload.py index 5b8588ab..28908318 100644 --- a/src/allmydata/upload.py +++ b/src/allmydata/upload.py @@ -6,7 +6,7 @@ from twisted.application import service from foolscap import Referenceable from allmydata.util import idlib -from allmydata import encode +from allmydata import codec from cStringIO import StringIO import sha @@ -37,7 +37,7 @@ class FileUploader: def make_encoder(self): self._needed_shares = 4 self._shares = 4 - self._encoder = encode.Encoder(self._filehandle, self._shares) + self._encoder = codec.Encoder(self._filehandle, self._shares) self._share_size = self._size def set_verifierid(self, vid):