pyfec: add easyfec wrapper which takes a single string and splits it into input share...

author zooko <zooko@zooko.com>

Wed, 28 Mar 2007 03:14:30 +0000 (08:44 +0530)

committer zooko <zooko@zooko.com>

Wed, 28 Mar 2007 03:14:30 +0000 (08:44 +0530)
author zooko <zooko@zooko.com>
Wed, 28 Mar 2007 03:14:30 +0000 (08:44 +0530)
committer zooko <zooko@zooko.com>
Wed, 28 Mar 2007 03:14:30 +0000 (08:44 +0530)
diff --git a/pyfec/fec/easyfec.py b/pyfec/fec/easyfec.py

new file mode 100644 (file)

index 0000000..3c05813
--- /dev/null
+++ b/pyfec/fec/easyfec.py
@@ -0,0 +1,28 @@
+import fec
+
+# div_ceil() was copied from the pyutil library.
+def div_ceil(n, d):
+    """
+    The smallest integer k such that k*d >= n.
+    """
+    return (n/d) + (n%d != 0)
+
+
+class Encoder(object):
+    def __init__(self, k, m):
+        self.fec = fec.Encoder(k, m)
+
+    def encode(self, data):
+        """
+        @param data: string
+        """
+        chunksize = div_ceil(len(data), self.fec.k)
+        numchunks = div_ceil(len(data), chunksize)
+        l = [ data[i:i+chunksize] for i in range(numchunks) ]
+        if len(l[-1]) != len(l[0]):
+            l[-1] = l[-1] + ('\x00'*(len(l[0])-len(l[-1])))
+        return self.fec.encode(l)
+        
+    def decode(self, shares):
+        return self.fec.decode(shares)
+        
diff --git a/pyfec/fec/filefec.py b/pyfec/fec/filefec.py

index a4736f5e36d857e41e024a879434efec6af9a60a..c857427098f30742d83ec3b72af03f1e4afdf93e 100644 (file)
--- a/pyfec/fec/filefec.py
+++ b/pyfec/fec/filefec.py
@@ -23,10 +23,34 @@
  # along with this program; if not, write to the Free Software
  # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
  
-import fec
+import easyfec, fec
  
  import array, random
  
+def encode_to_files_easyfec(inf, prefix, k, m):
+    """
+    Encode inf, writing the shares to named $prefix+$shareid.
+    """
+    l = [ open(prefix+str(shareid), "wb") for shareid in range(m) ]
+    def cb(shares, length):
+        assert len(shares) == len(l)
+        for i in range(len(shares)):
+            l[i].write(shares[i])
+
+    encode_file_stringy_easyfec(inf, cb, k, m, chunksize=4096)
+ 
+def encode_to_files_stringy(inf, prefix, k, m):
+    """
+    Encode inf, writing the shares to named $prefix+$shareid.
+    """
+    l = [ open(prefix+str(shareid), "wb") for shareid in range(m) ]
+    def cb(shares, length):
+        assert len(shares) == len(l)
+        for i in range(len(shares)):
+            l[i].write(shares[i])
+
+    encode_file_stringy(inf, cb, k, m, chunksize=4096)
+ 
  def encode_to_files(inf, prefix, k, m):
      """
      Encode inf, writing the shares to named $prefix+$shareid.
@@ -214,3 +238,34 @@ def encode_file_not_really(inf, cb, k, m, chunksize=4096):
          # res = enc.encode(l)
          # print "...finished to encode()"
          cb(l, indatasize)
+
+def encode_file_stringy_easyfec(inf, cb, k, m, chunksize=4096):
+    """
+    Read in the contents of inf, encode, and call cb with the results.
+
+    First, chunksize*k bytes will be read from inf, then encoded into m
+    "result shares".  Then cb will be invoked, passing a list of the m result
+    shares as its first argument, and the length of the encoded data as its
+    second argument.  (The length of the encoded data is always equal to
+    k*chunksize, until the last iteration, when the end of the file has been
+    reached and less than k*chunksize bytes could be read from the file.)
+    This procedure is iterated until the end of the file is reached, in which
+    case the space of the input that is unused is filled with zeroes before
+    encoding.
+
+    @param inf the file object from which to read the data
+    @param cb the callback to be invoked with the results
+    @param k the number of shares required to reconstruct the file
+    @param m the total number of shares created
+    @param chunksize how much data to read from inf for each of the k input 
+        shares
+    """
+    enc = easyfec.Encoder(k, m)
+
+    indatasize = k*chunksize # will be reset to shorter upon EOF
+    indata = inf.read(indatasize)
+    while indata:
+        res = enc.encode(indata)
+        cb(res, indatasize)
+        indata = inf.read(indatasize)
+
diff --git a/pyfec/fec/test/bench_pyfec.py b/pyfec/fec/test/bench_pyfec.py

index 25967ee8bab41c661a2bdcb6d41364653c6d2a83..afa1dc0c3c8448aed9cdaa021a0a03d69ae797cb 100644 (file)
--- a/pyfec/fec/test/bench_pyfec.py
+++ b/pyfec/fec/test/bench_pyfec.py
@@ -27,14 +27,22 @@ import fec
  
  import array, random
  
-def bench_encode_to_files_shuffle_decode_from_files(verbose=False):
-    FILESIZE=1000000
+def f_easyfec(filesize):
+    return bench_encode_to_files_shuffle_decode_from_files(filesize, verbose=False, encodefunc=fec.filefec.encode_to_files_easyfec)
+    
+def f_fec_stringy(filesize):
+    return bench_encode_to_files_shuffle_decode_from_files(filesize, verbose=False, encodefunc=fec.filefec.encode_to_files_stringy)
+    
+def f_fec(filesize):
+    return bench_encode_to_files_shuffle_decode_from_files(filesize, verbose=False, encodefunc=fec.filefec.encode_to_files)
+    
+def bench_encode_to_files_shuffle_decode_from_files(filesize=1000000, verbose=False, encodefunc=fec.filefec.encode_to_files):
      CHUNKSIZE=4096
      PREFIX="testshare"
      K=25
      M=100
      import os, time
-    left=FILESIZE
+    left=filesize
      outfile = open("tmpranddata", "wb")
      try:
          while left:
@@ -45,10 +53,10 @@ def bench_encode_to_files_shuffle_decode_from_files(verbose=False):
          outfile = None
          infile = open("tmpranddata", "rb")
          st = time.time()
-        fec.filefec.encode_to_files(infile, PREFIX, K, M)
+        encodefunc(infile, PREFIX, K, M)
          so = time.time()
          if verbose:
-            print "Encoded %s byte file into %d share files in %0.2f seconds, or %0.2f million bytes per second" % (FILESIZE, M, so-st, FILESIZE/((so-st)*1000000),)
+            print "Encoded %s byte file into %d share files in %0.2f seconds, or %0.2f million bytes per second" % (filesize, M, so-st, filesize/((so-st)*filesize),)
          enctime = so-st
          # Now delete m-k of the tempfiles at random.
          tempfs = [ f for f in os.listdir(".") if f.startswith(PREFIX) ]
@@ -57,10 +65,10 @@ def bench_encode_to_files_shuffle_decode_from_files(verbose=False):
              os.remove(victimtempf)
          recoveredfile = open("tmpranddata-recovered", "wb")
          st = time.time()
-        fec.filefec.decode_from_files(recoveredfile, 1000000, PREFIX, K, M)
+        fec.filefec.decode_from_files(recoveredfile, filesize, PREFIX, K, M)
          so = time.time()
          if verbose:
-            print "Decoded %s byte file from %d share files in %0.2f seconds, or %0.2f million bytes per second" % (FILESIZE, K, so-st, FILESIZE/((so-st)*1000000),)
+            print "Decoded %s byte file from %d share files in %0.2f seconds, or %0.2f million bytes per second" % (filesize, K, so-st, filesize/((so-st)*filesize),)
          return enctime + (so-st)
      finally:
          # os.remove("tmpranddata")
author	zooko <zooko@zooko.com>
	Wed, 28 Mar 2007 03:14:30 +0000 (08:44 +0530)
committer	zooko <zooko@zooko.com>
	Wed, 28 Mar 2007 03:14:30 +0000 (08:44 +0530)
pyfec/fec/easyfec.py	[new file with mode: 0644]	patch \| blob
pyfec/fec/filefec.py		patch \| blob \| history
pyfec/fec/test/bench_pyfec.py		patch \| blob \| history