encoding: fix the last py_ecc problem, tests pass now

author Brian Warner <warner@allmydata.com>

Fri, 5 Jan 2007 07:06:42 +0000 (00:06 -0700)

committer Brian Warner <warner@allmydata.com>

Fri, 5 Jan 2007 07:06:42 +0000 (00:06 -0700)
author Brian Warner <warner@allmydata.com>
Fri, 5 Jan 2007 07:06:42 +0000 (00:06 -0700)
committer Brian Warner <warner@allmydata.com>
Fri, 5 Jan 2007 07:06:42 +0000 (00:06 -0700)
diff --git a/src/allmydata/encode.py b/src/allmydata/encode.py

index b508eabc0aff4669e2cb79c9b3f52bff90a3f2ad..80c01aadc9bb8339d9a70c2ac7efeb332d5e5052 100644 (file)
--- a/src/allmydata/encode.py
+++ b/src/allmydata/encode.py
@@ -98,6 +98,17 @@ class PyRSEncoder(object):
      # the serialized parameters to strip this padding out on the receiving
      # end.
  
+    # TODO: this will write a 733kB file called 'ffield.lut.8' in the current
+    # directory the first time it is run, to cache the lookup table for later
+    # use. It appears to take about 15 seconds to create this the first time.
+    # Make sure this file winds up somewhere reasonable.
+
+    # TODO: the encoder/decoder RSCode object depends upon the number of
+    # required/total shares, but not upon the data. We could probably save a
+    # lot of initialization time by caching a single instance and using it
+    # any time we use the same required/total share numbers (which will
+    # probably be always).
+
      def set_params(self, data_size, required_shares, total_shares):
          assert required_shares <= total_shares
          self.data_size = data_size
@@ -159,12 +170,13 @@ class PyRSDecoder(object):
          self.share_size = self.num_chunks
          self.encoder = rs_code.RSCode(self.total_shares, self.required_shares,
                                        8)
-        #print "chunk_size: %d" % self.chunk_size
-        #print "num_chunks: %d" % self.num_chunks
-        #print "last_chunk_padding: %d" % self.last_chunk_padding
-        #print "share_size: %d" % self.share_size
-        #print "total_shares: %d" % self.total_shares
-        #print "required_shares: %d" % self.required_shares
+        if False:
+            print "chunk_size: %d" % self.chunk_size
+            print "num_chunks: %d" % self.num_chunks
+            print "last_chunk_padding: %d" % self.last_chunk_padding
+            print "share_size: %d" % self.share_size
+            print "total_shares: %d" % self.total_shares
+            print "required_shares: %d" % self.required_shares
  
      def decode(self, some_shares):
          chunk_size = self.chunk_size
@@ -176,7 +188,6 @@ class PyRSDecoder(object):
          for i in range(self.share_size):
              # this takes one byte from each share, and turns the combination
              # into a single chunk
-            #print "PULLING"
              received_vector = []
              for j in range(self.total_shares):
                  share = have_shares.get(j)
@@ -186,16 +197,12 @@ class PyRSDecoder(object):
                      received_vector.append(None)
              decoded_vector = self.encoder.DecodeImmediate(received_vector)
              assert len(decoded_vector) == self.chunk_size
-            #print "DECODED: %d" % len(decoded_vector)
              chunk = "".join([chr(x) for x in decoded_vector])
-            #print "appending %d bytes" % len(chunk)
              chunks.append(chunk)
          data = "".join(chunks)
-        #print "pre-stripped length: %d" % len(data)
          if self.last_chunk_padding:
              data = data[:-self.last_chunk_padding]
-        #print "post-stripped length: %d" % len(data)
-        assert len(data) == chunk_size
+        assert len(data) == self.data_size
          return defer.succeed(data)
  
  
diff --git a/src/allmydata/test/test_encode_share.py b/src/allmydata/test/test_encode_share.py

index 41afd6bceba96f821883015973aab76f12a81596..1f887351093240119b9ec5f5068107ceecb8a786 100644 (file)
--- a/src/allmydata/test/test_encode_share.py
+++ b/src/allmydata/test/test_encode_share.py
@@ -2,6 +2,7 @@
  import os
  from twisted.trial import unittest
  from twisted.internet import defer
+from twisted.python import log
  from allmydata.encode import PyRSEncoder, PyRSDecoder, ReplicatingEncoder, ReplicatingDecoder
  import random
  
@@ -14,6 +15,7 @@ class Tester:
          enc = self.enc_class()
          enc.set_params(size, required_shares, total_shares)
          serialized_params = enc.get_serialized_params()
+        log.msg("serialized_params: %s" % serialized_params)
          d = enc.encode(data0)
          def _done(shares):
              self.failUnlessEqual(len(shares), total_shares)
@@ -31,20 +33,23 @@ class Tester:
              self.failUnless(data1 == data0)
  
          def _decode_all_ordered(res):
+            log.msg("_decode_all_ordered")
              # can we decode using all of the shares?
              return _decode(self.shares)
          d.addCallback(_decode_all_ordered)
          d.addCallback(_check_data)
  
          def _decode_all_shuffled(res):
+            log.msg("_decode_all_shuffled")
              # can we decode, using all the shares, but in random order?
              shuffled_shares = self.shares[:]
              random.shuffle(shuffled_shares)
              return _decode(shuffled_shares)
          d.addCallback(_decode_all_shuffled)
          d.addCallback(_check_data)
-        
+
          def _decode_some(res):
+            log.msg("_decode_some")
              # decode with a minimal subset of the shares
              some_shares = self.shares[:required_shares]
              return _decode(some_shares)
@@ -52,6 +57,7 @@ class Tester:
          d.addCallback(_check_data)
  
          def _decode_some_random(res):
+            log.msg("_decode_some_random")
              # use a randomly-selected minimal subset
              some_shares = random.sample(self.shares, required_shares)
              return _decode(some_shares)
@@ -59,6 +65,7 @@ class Tester:
          d.addCallback(_check_data)
  
          def _decode_multiple(res):
+            log.msg("_decode_multiple")
              # make sure we can re-use the decoder object
              shares1 = random.sample(self.shares, required_shares)
              shares2 = random.sample(self.shares, required_shares)
@@ -79,6 +86,9 @@ class Tester:
      def test_encode1(self):
          return self.do_test(8, 8, 16)
  
+    def test_encode2(self):
+        return self.do_test(123, 25, 100)
+
      def test_sizes(self):
          raise unittest.SkipTest("omg this would take forever")
          d = defer.succeed(None)
author	Brian Warner <warner@allmydata.com>
	Fri, 5 Jan 2007 07:06:42 +0000 (00:06 -0700)
committer	Brian Warner <warner@allmydata.com>
	Fri, 5 Jan 2007 07:06:42 +0000 (00:06 -0700)
src/allmydata/encode.py		patch \| blob \| history
src/allmydata/test/test_encode_share.py		patch \| blob \| history