encoding: fix the last py_ecc problem, tests pass now
authorBrian Warner <warner@allmydata.com>
Fri, 5 Jan 2007 07:06:42 +0000 (00:06 -0700)
committerBrian Warner <warner@allmydata.com>
Fri, 5 Jan 2007 07:06:42 +0000 (00:06 -0700)
src/allmydata/encode.py
src/allmydata/test/test_encode_share.py

index b508eabc0aff4669e2cb79c9b3f52bff90a3f2ad..80c01aadc9bb8339d9a70c2ac7efeb332d5e5052 100644 (file)
@@ -98,6 +98,17 @@ class PyRSEncoder(object):
     # the serialized parameters to strip this padding out on the receiving
     # end.
 
+    # TODO: this will write a 733kB file called 'ffield.lut.8' in the current
+    # directory the first time it is run, to cache the lookup table for later
+    # use. It appears to take about 15 seconds to create this the first time.
+    # Make sure this file winds up somewhere reasonable.
+
+    # TODO: the encoder/decoder RSCode object depends upon the number of
+    # required/total shares, but not upon the data. We could probably save a
+    # lot of initialization time by caching a single instance and using it
+    # any time we use the same required/total share numbers (which will
+    # probably be always).
+
     def set_params(self, data_size, required_shares, total_shares):
         assert required_shares <= total_shares
         self.data_size = data_size
@@ -159,12 +170,13 @@ class PyRSDecoder(object):
         self.share_size = self.num_chunks
         self.encoder = rs_code.RSCode(self.total_shares, self.required_shares,
                                       8)
-        #print "chunk_size: %d" % self.chunk_size
-        #print "num_chunks: %d" % self.num_chunks
-        #print "last_chunk_padding: %d" % self.last_chunk_padding
-        #print "share_size: %d" % self.share_size
-        #print "total_shares: %d" % self.total_shares
-        #print "required_shares: %d" % self.required_shares
+        if False:
+            print "chunk_size: %d" % self.chunk_size
+            print "num_chunks: %d" % self.num_chunks
+            print "last_chunk_padding: %d" % self.last_chunk_padding
+            print "share_size: %d" % self.share_size
+            print "total_shares: %d" % self.total_shares
+            print "required_shares: %d" % self.required_shares
 
     def decode(self, some_shares):
         chunk_size = self.chunk_size
@@ -176,7 +188,6 @@ class PyRSDecoder(object):
         for i in range(self.share_size):
             # this takes one byte from each share, and turns the combination
             # into a single chunk
-            #print "PULLING"
             received_vector = []
             for j in range(self.total_shares):
                 share = have_shares.get(j)
@@ -186,16 +197,12 @@ class PyRSDecoder(object):
                     received_vector.append(None)
             decoded_vector = self.encoder.DecodeImmediate(received_vector)
             assert len(decoded_vector) == self.chunk_size
-            #print "DECODED: %d" % len(decoded_vector)
             chunk = "".join([chr(x) for x in decoded_vector])
-            #print "appending %d bytes" % len(chunk)
             chunks.append(chunk)
         data = "".join(chunks)
-        #print "pre-stripped length: %d" % len(data)
         if self.last_chunk_padding:
             data = data[:-self.last_chunk_padding]
-        #print "post-stripped length: %d" % len(data)
-        assert len(data) == chunk_size
+        assert len(data) == self.data_size
         return defer.succeed(data)
 
 
index 41afd6bceba96f821883015973aab76f12a81596..1f887351093240119b9ec5f5068107ceecb8a786 100644 (file)
@@ -2,6 +2,7 @@
 import os
 from twisted.trial import unittest
 from twisted.internet import defer
+from twisted.python import log
 from allmydata.encode import PyRSEncoder, PyRSDecoder, ReplicatingEncoder, ReplicatingDecoder
 import random
 
@@ -14,6 +15,7 @@ class Tester:
         enc = self.enc_class()
         enc.set_params(size, required_shares, total_shares)
         serialized_params = enc.get_serialized_params()
+        log.msg("serialized_params: %s" % serialized_params)
         d = enc.encode(data0)
         def _done(shares):
             self.failUnlessEqual(len(shares), total_shares)
@@ -31,20 +33,23 @@ class Tester:
             self.failUnless(data1 == data0)
 
         def _decode_all_ordered(res):
+            log.msg("_decode_all_ordered")
             # can we decode using all of the shares?
             return _decode(self.shares)
         d.addCallback(_decode_all_ordered)
         d.addCallback(_check_data)
 
         def _decode_all_shuffled(res):
+            log.msg("_decode_all_shuffled")
             # can we decode, using all the shares, but in random order?
             shuffled_shares = self.shares[:]
             random.shuffle(shuffled_shares)
             return _decode(shuffled_shares)
         d.addCallback(_decode_all_shuffled)
         d.addCallback(_check_data)
-        
+
         def _decode_some(res):
+            log.msg("_decode_some")
             # decode with a minimal subset of the shares
             some_shares = self.shares[:required_shares]
             return _decode(some_shares)
@@ -52,6 +57,7 @@ class Tester:
         d.addCallback(_check_data)
 
         def _decode_some_random(res):
+            log.msg("_decode_some_random")
             # use a randomly-selected minimal subset
             some_shares = random.sample(self.shares, required_shares)
             return _decode(some_shares)
@@ -59,6 +65,7 @@ class Tester:
         d.addCallback(_check_data)
 
         def _decode_multiple(res):
+            log.msg("_decode_multiple")
             # make sure we can re-use the decoder object
             shares1 = random.sample(self.shares, required_shares)
             shares2 = random.sample(self.shares, required_shares)
@@ -79,6 +86,9 @@ class Tester:
     def test_encode1(self):
         return self.do_test(8, 8, 16)
 
+    def test_encode2(self):
+        return self.do_test(123, 25, 100)
+
     def test_sizes(self):
         raise unittest.SkipTest("omg this would take forever")
         d = defer.succeed(None)