From: Brian Warner <warner@allmydata.com>
Date: Fri, 5 Jan 2007 06:50:21 +0000 (-0700)
Subject: fix our use of py_ecc (set log2FieldSize=8 explicitly)
X-Git-Tag: tahoe_v0.1.0-0-UNSTABLE~401
X-Git-Url: https://git.rkrishnan.org/pf/reliability?a=commitdiff_plain;h=c91d14dca8a49d3133b82a0024dcaf69a82a736e;p=tahoe-lafs%2Ftahoe-lafs.git

fix our use of py_ecc (set log2FieldSize=8 explicitly)
---

diff --git a/src/allmydata/encode.py b/src/allmydata/encode.py
index 28775ebc..b508eabc 100644
--- a/src/allmydata/encode.py
+++ b/src/allmydata/encode.py
@@ -99,6 +99,7 @@ class PyRSEncoder(object):
     # end.
 
     def set_params(self, data_size, required_shares, total_shares):
+        assert required_shares <= total_shares
         self.data_size = data_size
         self.required_shares = required_shares
         self.total_shares = total_shares
@@ -106,7 +107,7 @@ class PyRSEncoder(object):
         self.num_chunks = mathutil.div_ceil(data_size, self.chunk_size)
         self.last_chunk_padding = mathutil.pad_size(data_size, required_shares)
         self.share_size = self.num_chunks
-        self.encoder = rs_code.RSCode(total_shares, required_shares)
+        self.encoder = rs_code.RSCode(total_shares, required_shares, 8)
 
     def get_encoder_type(self):
         return self.ENCODER_TYPE
@@ -121,18 +122,23 @@ class PyRSEncoder(object):
     def encode(self, data):
         share_data = [ [] for i in range(self.total_shares)]
         for i in range(self.num_chunks):
+            # we take self.chunk_size bytes from the input string, and
+            # turn it into self.total_shares bytes.
             offset = i*self.chunk_size
+            # Note string slices aren't an efficient way to use memory, so
+            # when we upgrade from the unusably slow py_ecc prototype to a
+            # fast ECC we should also fix up this memory usage (by using the
+            # array module).
             chunk = data[offset:offset+self.chunk_size]
             if i == self.num_chunks-1:
                 chunk = chunk + "\x00"*self.last_chunk_padding
             assert len(chunk) == self.chunk_size
             input_vector = [ord(x) for x in chunk]
+            assert len(input_vector) == self.required_shares
             output_vector = self.encoder.Encode(input_vector)
             assert len(output_vector) == self.total_shares
             for i2,out in enumerate(output_vector):
-                out_chars = [chr(x) for x in out]
-                out_string = "".join(out_chars)
-                share_data[i2].append(out_string)
+                share_data[i2].append(chr(out))
 
         shares = [ (i, "".join(share_data[i]))
                    for i in range(self.total_shares) ]
@@ -151,31 +157,45 @@ class PyRSDecoder(object):
         self.last_chunk_padding = mathutil.pad_size(self.data_size,
                                                     self.required_shares)
         self.share_size = self.num_chunks
-        self.encoder = rs_code.RSCode(self.total_shares, self.required_shares)
+        self.encoder = rs_code.RSCode(self.total_shares, self.required_shares,
+                                      8)
+        #print "chunk_size: %d" % self.chunk_size
+        #print "num_chunks: %d" % self.num_chunks
+        #print "last_chunk_padding: %d" % self.last_chunk_padding
+        #print "share_size: %d" % self.share_size
+        #print "total_shares: %d" % self.total_shares
+        #print "required_shares: %d" % self.required_shares
 
     def decode(self, some_shares):
         chunk_size = self.chunk_size
         assert len(some_shares) >= self.required_shares
-        chunks = [ [] for i in range(self.num_chunks) ]
+        chunks = []
         have_shares = {}
         for share_num, share_data in some_shares:
             have_shares[share_num] = share_data
-        for i in range(self.num_chunks):
-            offset = i*chunk_size
+        for i in range(self.share_size):
+            # this takes one byte from each share, and turns the combination
+            # into a single chunk
+            #print "PULLING"
             received_vector = []
             for j in range(self.total_shares):
                 share = have_shares.get(j)
                 if share is not None:
-                    v1 = [ord(x) for x in share[offset:offset+chunk_size]]
-                    received_vector.append(v1)
+                    received_vector.append(ord(share[i]))
                 else:
                     received_vector.append(None)
             decoded_vector = self.encoder.DecodeImmediate(received_vector)
-            if i == self.num_chunks-1:
-                decoded_vector = decoded_vector[:-self.last_chunk_padding]
+            assert len(decoded_vector) == self.chunk_size
+            #print "DECODED: %d" % len(decoded_vector)
             chunk = "".join([chr(x) for x in decoded_vector])
+            #print "appending %d bytes" % len(chunk)
             chunks.append(chunk)
         data = "".join(chunks)
+        #print "pre-stripped length: %d" % len(data)
+        if self.last_chunk_padding:
+            data = data[:-self.last_chunk_padding]
+        #print "post-stripped length: %d" % len(data)
+        assert len(data) == chunk_size
         return defer.succeed(data)
 
 
diff --git a/src/allmydata/test/test_encode_share.py b/src/allmydata/test/test_encode_share.py
index a8516f62..41afd6bc 100644
--- a/src/allmydata/test/test_encode_share.py
+++ b/src/allmydata/test/test_encode_share.py
@@ -76,7 +76,11 @@ class Tester:
     def test_encode(self):
         return self.do_test(1000, 25, 100)
 
+    def test_encode1(self):
+        return self.do_test(8, 8, 16)
+
     def test_sizes(self):
+        raise unittest.SkipTest("omg this would take forever")
         d = defer.succeed(None)
         for i in range(1, 100):
             d.addCallback(lambda res,size: self.do_test(size, 4, 10), i)