From: Brian Warner Date: Fri, 5 Jan 2007 07:06:42 +0000 (-0700) Subject: encoding: fix the last py_ecc problem, tests pass now X-Git-Tag: tahoe_v0.1.0-0-UNSTABLE~399 X-Git-Url: https://git.rkrishnan.org/components/%22news.html/...?a=commitdiff_plain;h=e1c6ee9dcf30bfe4be50376b2a5b34dc6ac76c61;p=tahoe-lafs%2Ftahoe-lafs.git encoding: fix the last py_ecc problem, tests pass now --- diff --git a/src/allmydata/encode.py b/src/allmydata/encode.py index b508eabc..80c01aad 100644 --- a/src/allmydata/encode.py +++ b/src/allmydata/encode.py @@ -98,6 +98,17 @@ class PyRSEncoder(object): # the serialized parameters to strip this padding out on the receiving # end. + # TODO: this will write a 733kB file called 'ffield.lut.8' in the current + # directory the first time it is run, to cache the lookup table for later + # use. It appears to take about 15 seconds to create this the first time. + # Make sure this file winds up somewhere reasonable. + + # TODO: the encoder/decoder RSCode object depends upon the number of + # required/total shares, but not upon the data. We could probably save a + # lot of initialization time by caching a single instance and using it + # any time we use the same required/total share numbers (which will + # probably be always). + def set_params(self, data_size, required_shares, total_shares): assert required_shares <= total_shares self.data_size = data_size @@ -159,12 +170,13 @@ class PyRSDecoder(object): self.share_size = self.num_chunks self.encoder = rs_code.RSCode(self.total_shares, self.required_shares, 8) - #print "chunk_size: %d" % self.chunk_size - #print "num_chunks: %d" % self.num_chunks - #print "last_chunk_padding: %d" % self.last_chunk_padding - #print "share_size: %d" % self.share_size - #print "total_shares: %d" % self.total_shares - #print "required_shares: %d" % self.required_shares + if False: + print "chunk_size: %d" % self.chunk_size + print "num_chunks: %d" % self.num_chunks + print "last_chunk_padding: %d" % self.last_chunk_padding + print "share_size: %d" % self.share_size + print "total_shares: %d" % self.total_shares + print "required_shares: %d" % self.required_shares def decode(self, some_shares): chunk_size = self.chunk_size @@ -176,7 +188,6 @@ class PyRSDecoder(object): for i in range(self.share_size): # this takes one byte from each share, and turns the combination # into a single chunk - #print "PULLING" received_vector = [] for j in range(self.total_shares): share = have_shares.get(j) @@ -186,16 +197,12 @@ class PyRSDecoder(object): received_vector.append(None) decoded_vector = self.encoder.DecodeImmediate(received_vector) assert len(decoded_vector) == self.chunk_size - #print "DECODED: %d" % len(decoded_vector) chunk = "".join([chr(x) for x in decoded_vector]) - #print "appending %d bytes" % len(chunk) chunks.append(chunk) data = "".join(chunks) - #print "pre-stripped length: %d" % len(data) if self.last_chunk_padding: data = data[:-self.last_chunk_padding] - #print "post-stripped length: %d" % len(data) - assert len(data) == chunk_size + assert len(data) == self.data_size return defer.succeed(data) diff --git a/src/allmydata/test/test_encode_share.py b/src/allmydata/test/test_encode_share.py index 41afd6bc..1f887351 100644 --- a/src/allmydata/test/test_encode_share.py +++ b/src/allmydata/test/test_encode_share.py @@ -2,6 +2,7 @@ import os from twisted.trial import unittest from twisted.internet import defer +from twisted.python import log from allmydata.encode import PyRSEncoder, PyRSDecoder, ReplicatingEncoder, ReplicatingDecoder import random @@ -14,6 +15,7 @@ class Tester: enc = self.enc_class() enc.set_params(size, required_shares, total_shares) serialized_params = enc.get_serialized_params() + log.msg("serialized_params: %s" % serialized_params) d = enc.encode(data0) def _done(shares): self.failUnlessEqual(len(shares), total_shares) @@ -31,20 +33,23 @@ class Tester: self.failUnless(data1 == data0) def _decode_all_ordered(res): + log.msg("_decode_all_ordered") # can we decode using all of the shares? return _decode(self.shares) d.addCallback(_decode_all_ordered) d.addCallback(_check_data) def _decode_all_shuffled(res): + log.msg("_decode_all_shuffled") # can we decode, using all the shares, but in random order? shuffled_shares = self.shares[:] random.shuffle(shuffled_shares) return _decode(shuffled_shares) d.addCallback(_decode_all_shuffled) d.addCallback(_check_data) - + def _decode_some(res): + log.msg("_decode_some") # decode with a minimal subset of the shares some_shares = self.shares[:required_shares] return _decode(some_shares) @@ -52,6 +57,7 @@ class Tester: d.addCallback(_check_data) def _decode_some_random(res): + log.msg("_decode_some_random") # use a randomly-selected minimal subset some_shares = random.sample(self.shares, required_shares) return _decode(some_shares) @@ -59,6 +65,7 @@ class Tester: d.addCallback(_check_data) def _decode_multiple(res): + log.msg("_decode_multiple") # make sure we can re-use the decoder object shares1 = random.sample(self.shares, required_shares) shares2 = random.sample(self.shares, required_shares) @@ -79,6 +86,9 @@ class Tester: def test_encode1(self): return self.do_test(8, 8, 16) + def test_encode2(self): + return self.do_test(123, 25, 100) + def test_sizes(self): raise unittest.SkipTest("omg this would take forever") d = defer.succeed(None)