From: zooko <zooko@zooko.com>
Date: Fri, 9 Nov 2007 16:43:59 +0000 (+0530)
Subject: zfec: add unit tests for easyfec, fix bug in easyfec padding, add docstrings explaini... 
X-Git-Url: https://git.rkrishnan.org/%5B/%5D%20/uri/%22file:/FOOURL?a=commitdiff_plain;h=d665f17e15ce1a4d0f86fc359dceaf572d49954c;p=tahoe-lafs%2Fzfec.git

zfec: add unit tests for easyfec, fix bug in easyfec padding, add docstrings explaining how to use easyfec padding correctly

darcs-hash:f7eb0510f00bd2003292fb324854e72b9b7cc683
---

diff --git a/zfec/zfec/easyfec.py b/zfec/zfec/easyfec.py
index 8b5e896..db39f5b 100644
--- a/zfec/zfec/easyfec.py
+++ b/zfec/zfec/easyfec.py
@@ -17,26 +17,31 @@ class Encoder(object):
     def encode(self, data):
         """
         @param data: string
+
+        @return: a sequence of m blocks -- any k of which suffice to
+            reconstruct the input data
         """
         chunksize = div_ceil(len(data), self.fec.k)
-        l = [ data[i*chunksize:(i+1)*chunksize] for i in range(self.fec.k) ]
-        # padding
-        if l and (len(l[-1]) != chunksize):
-            l[-1] = l[-1] + ('\x00'*(chunksize-len(l[-1])))
+        l = [ data[i*chunksize:(i+1)*chunksize] + "\x00" * min(chunksize, (((i+1)*chunksize)-len(data))) for i in range(self.fec.k) ]
         assert len(l) == self.fec.k, (len(l), self.fec.k,)
-        res = self.fec.encode(l)
-        return res
+        assert (not l) or (not [ x for x in l if len(x) != len(l[0]) ], (len(l), [ ab(x) for x in l ], chunksize, self.fec.k, len(data),))
+        return self.fec.encode(l)
         
 class Decoder(object):
     def __init__(self, k, m):
         self.fec = zfec.Decoder(k, m)
 
-    def decode(self, blocks, sharenums, padlen=0):
-        blocks = self.fec.decode(blocks, sharenums)
-        data = ''.join(blocks)
+    def decode(self, blocks, sharenums, padlen):
+        """
+        @param padlen: the number of bytes of padding to strip off;  Note that
+            the padlen is always equal to (blocksize times k) minus the length
+            of data.  (Therefore, padlen can be 0.)
+        """
+        data = ''.join(self.fec.decode(blocks, sharenums))
         if padlen:
-            data = data[:-padlen]
-        return data
+            return data[:-padlen]
+        else:
+            return data
 
 # zfec -- fast forward error correction library with Python interface
 # 
diff --git a/zfec/zfec/test/test_zfec.py b/zfec/zfec/test/test_zfec.py
index 56ca276..9a376a1 100755
--- a/zfec/zfec/test/test_zfec.py
+++ b/zfec/zfec/test/test_zfec.py
@@ -60,9 +60,10 @@ def _h_easy(k, m, s):
     blocks = [ x[1] for x in nums_and_blocks ]
     nums = [ x[0] for x in nums_and_blocks ]
     decer = zfec.easyfec.Decoder(k, m)
-    decodeds = decer.decode(blocks, nums)
-    assert len(decodeds) == len(s), (len(decodeds), len(s),)
-    assert decodeds == s, (decodeds, s,)
+    
+    decodeds = decer.decode(blocks, nums, padlen=k*len(blocks[0]) - len(s))
+    assert len(decodeds) == len(s), (ab(decodeds), ab(s), k, m)
+    assert decodeds == s, (ab(decodeds), ab(s),)
 
 def _help_test_random_easy():
     m = random.randrange(1, 257)
@@ -94,7 +95,7 @@ class ZFecTest(unittest.TestCase):
         decer = zfec.Decoder(2, 4)
 
         try:
-            decer.decode(98) # first argument is not a sequence
+            decer.decode(98, []) # first argument is not a sequence
         except TypeError, e:
             assert "First argument was not a sequence" in str(e), e
         else:
@@ -122,7 +123,7 @@ class EasyFecTest(unittest.TestCase):
             print "%d randomized tests pass." % (i+1)
 
     def test_random(self):
-        for i in range(2**8):
+        for i in range(2**10):
             _help_test_random_easy()
         if VERBOSE:
             print "%d randomized tests pass." % (i+1)
@@ -131,21 +132,21 @@ class EasyFecTest(unittest.TestCase):
         decer = zfec.easyfec.Decoder(2, 4)
 
         try:
-            decer.decode(98, [0, 1]) # first argument is not a sequence
+            decer.decode(98, [0, 1], 0) # first argument is not a sequence
         except TypeError, e:
             assert "First argument was not a sequence" in str(e), e
         else:
             raise "Should have gotten TypeError for wrong type of second argument."
 
         try:
-            decer.decode("ab", ["c", "d",])
+            decer.decode("ab", ["c", "d",], 0)
         except zfec.Error, e:
             assert "Precondition violation: second argument is required to contain int" in str(e), e
         else:
             raise "Should have gotten zfec.Error for wrong type of second argument."
 
         try:
-            decer.decode("ab", 98) # not a sequence at all
+            decer.decode("ab", 98, 0) # not a sequence at all
         except TypeError, e:
             assert "Second argument was not a sequence" in str(e), e
         else:
@@ -182,6 +183,7 @@ class FileFec(unittest.TestCase):
         try:
             tempf = tempdir.file(TESTFNAME, 'w+b')
             tempf.write(teststr)
+            tempf.flush()
             tempf.seek(0)
 
             # encode the file
@@ -200,9 +202,10 @@ class FileFec(unittest.TestCase):
             # decode from the share files
             outf = tempdir.file('recovered-testfile.txt', 'w+b')
             zfec.filefec.decode_from_files(outf, sharefs, verbose=VERBOSE)
+            outf.flush()
             outf.seek(0)
             recovereddata = outf.read()
-            assert recovereddata == teststr
+            assert recovereddata == teststr, (ab(recovereddata), ab(teststr),)
         finally:
             tempdir.shutdown()