zfec/zunfec: make the cmdline programs take input from stdin

author Ramakrishnan Muthukrishnan <ram@rkrishnan.org>

Sun, 24 Jan 2016 07:03:26 +0000 (12:33 +0530)

committer Ramakrishnan Muthukrishnan <ram@rkrishnan.org>

Sun, 24 Jan 2016 07:03:26 +0000 (12:33 +0530)
author Ramakrishnan Muthukrishnan <ram@rkrishnan.org>
Sun, 24 Jan 2016 07:03:26 +0000 (12:33 +0530)
committer Ramakrishnan Muthukrishnan <ram@rkrishnan.org>
Sun, 24 Jan 2016 07:03:26 +0000 (12:33 +0530)
diff --git a/zfec/cmdline_zfec.py b/zfec/cmdline_zfec.py

index 2eee2b0e2726e9e754021472ac41e6e69184627e..e56dc5b5b9d99d8c052a340031bd169425c80e13 100755 (executable)
--- a/zfec/cmdline_zfec.py
+++ b/zfec/cmdline_zfec.py
@@ -57,9 +57,12 @@ def main():
          if args.requiredshares == args.totalshares:
              print "warning: silly parameters: requiredshares == totalshares, which means that all shares will be required in order to reconstruct the file.  You could use \"split\" for the same effect.  But proceeding to do it anyway..."
  
-    args.inputfile.seek(0, 2)
-    fsize = args.inputfile.tell()
-    args.inputfile.seek(0, 0)
+    if args.inputfile.name == '<stdin>':
+      fsize = None
+    else:
+      args.inputfile.seek(0, 2)
+      fsize = args.inputfile.tell()
+      args.inputfile.seek(0, 0)
      return filefec.encode_to_files(args.inputfile, fsize, args.output_dir, args.prefix, args.requiredshares, args.totalshares, args.suffix, args.force, args.verbose)
  
  # zfec -- fast forward error correction library with Python interface
diff --git a/zfec/cmdline_zunfec.py b/zfec/cmdline_zunfec.py

index 8fc0c040dfd7a325ff2a6fe1cba0b093852aab0a..514b468daad08ac60bac2bb9da3b15804387c9e5 100755 (executable)
--- a/zfec/cmdline_zunfec.py
+++ b/zfec/cmdline_zunfec.py
@@ -30,7 +30,9 @@ def main():
          print "At least two sharefiles are required."
          return 1
  
-    if args.force:
+    if args.outputfile == '-':
+        outf = sys.stdout
+    elif args.force:
          outf = open(args.outputfile, 'wb')
      else:
          try:
@@ -49,7 +51,7 @@ def main():
      for fn in args.sharefiles:
          sharefs.append(open(fn, 'rb'))
      try:
-        ret = filefec.decode_from_files(outf, sharefs, args.verbose)
+        ret = filefec.decode_from_files(outf, sharefs, args.verbose, sys.stdout==outf)
      except filefec.InsufficientShareFilesError, e:
          print str(e)
          return 3
diff --git a/zfec/filefec.py b/zfec/filefec.py

index 48f3d9d790edac6b15b9cf433f99d2af9b1d274f..942096eed1d2a9220bdbccc026c3b7a1dc5a921b 100644 (file)
--- a/zfec/filefec.py
+++ b/zfec/filefec.py
@@ -2,7 +2,7 @@ import easyfec, zfec
  from pyutil import fileutil
  from pyutil.mathutil import pad_size, log_ceil
  
-import array, os, struct
+import array, os, struct, sys
  
  CHUNKSIZE = 4096
  
@@ -164,14 +164,21 @@ def encode_to_files(inf, fsize, dirname, prefix, k, m, suffix=".fec", overwrite=
      Encode inf, writing the shares to specially named, newly created files.
  
      @param fsize: calling read() on inf must yield fsize bytes of data and
-        then raise an EOFError
+        then raise an EOFError.  If None, the file is being read as a stream
+        and the size is not known in advance, header must be updated after
+        the reading is complete.
      @param dirname: the name of the directory into which the sharefiles will
          be written
      """
      mlen = len(str(m))
      format = FORMAT_FORMAT % (mlen, mlen,)
  
-    padbytes = pad_size(fsize, k)
+    if fsize is None:
+        # Streaming case, file size is unknown.
+        # We'll recompute the pad at the end and write it
+        padbytes = pad_size(0, k)
+    else:
+        padbytes = pad_size(fsize, k)
  
      fns = []
      fs = []
@@ -197,15 +204,30 @@ def encode_to_files(inf, fsize, dirname, prefix, k, m, suffix=".fec", overwrite=
              oldsumlen = sumlen[0]
              sumlen[0] += length
              if verbose:
-                if int((float(oldsumlen) / fsize) * 10) != int((float(sumlen[0]) / fsize) * 10):
-                    print str(int((float(sumlen[0]) / fsize) * 10) * 10) + "% ...",
-
-            if sumlen[0] > fsize:
+                if fsize is None:
+                    # 30MB takes a short while to write on a normal drive.
+                    interval = MILLION_BYTES * 30
+                    if int((float(oldsumlen) / interval)) != int((float(sumlen[0]) / interval)):
+                        print str(int((float(sumlen[0]) / MILLION_BYTES))) + "MB ...",
+                else:
+                    if int((float(oldsumlen) / fsize) * 10) != int((float(sumlen[0]) / fsize) * 10):
+                        print str(int((float(sumlen[0]) / fsize) * 10) * 10) + "% ...",
+                sys.stdout.flush()
+
+            if fsize is not None and sumlen[0] > fsize:
                  raise IOError("Wrong file size -- possibly the size of the file changed during encoding.  Original size: %d, observed size at least: %s" % (fsize, sumlen[0],))
-            for i in range(len(blocks)):
-                data = blocks[i]
-                fs[i].write(data)
-                length -= len(data)
+            if length != 0:
+                for i in range(len(blocks)):
+                    data = blocks[i]
+                    fs[i].write(data)
+                    length -= len(data)
+            # EOF signal, if no fsize, update pad
+            elif fsize is None:
+                padbytes = pad_size(oldsumlen, k)
+                for shnum in range(len(fs)):
+                    hdr = _build_header(m, k, padbytes, shnum)
+                    fs[shnum].seek(0)
+                    fs[shnum].write(hdr)
  
          encode_file_stringy_easyfec(inf, cb, k, m, chunksize=4096)
      except EnvironmentError, le:
@@ -232,7 +254,7 @@ def encode_to_files(inf, fsize, dirname, prefix, k, m, suffix=".fec", overwrite=
  # Thanks.
  MILLION_BYTES=10**6
  
-def decode_from_files(outf, infiles, verbose=False):
+def decode_from_files(outf, infiles, verbose=False, is_stream=False):
      """
      Decode from the first k files in infiles, writing the results to outf.
      """
@@ -277,16 +299,20 @@ def decode_from_files(outf, infiles, verbose=False):
              outf.write(resultdata)
              byteswritten += len(resultdata)
              if verbose:
-                if ((byteswritten - len(resultdata)) / (10*MILLION_BYTES)) != (byteswritten / (10*MILLION_BYTES)):
-                    print str(byteswritten / MILLION_BYTES) + " MB ...",
+                if ((byteswritten - len(resultdata)) / (30*MILLION_BYTES)) != (byteswritten / (30*MILLION_BYTES)):
+                    message = str(byteswritten / MILLION_BYTES) + "MB ..."
+                    outpipe = is_stream and sys.stderr or sys.stdout
+                    print >> outpipe, message,
+                    outpipe.flush()
          else:
              # Then this was a short read, so we've reached the end of the sharefiles.
              resultdata = dec.decode(chunks, shnums, padlen)
              outf.write(resultdata)
-            return # Done.
+            break # Done.
      if verbose:
-        print
-        print "Done!"
+        outpipe = is_stream and sys.stderr or sys.stdout
+        print >> outpipe
+        print >> outpipe, "Done!"
  
  def encode_file(inf, cb, k, m, chunksize=4096):
      """
@@ -493,6 +519,8 @@ def encode_file_stringy_easyfec(inf, cb, k, m, chunksize=4096):
          res = enc.encode(indata)
          cb(res, len(indata))
          indata = inf.read(readsize)
+    # Final callback to update pad length in header for streaming case.
+    cb([''] * m, 0)
  
  # zfec -- fast forward error correction library with Python interface
  #
diff --git a/zfec/test/test_zfec.py b/zfec/test/test_zfec.py

index 46bbb1cb6bbdeaf8c12a2982a5c326ceafcf02f4..b2d9a5b9e923bba1d2f982b1d540410550cf4df1 100755 (executable)
--- a/zfec/test/test_zfec.py
+++ b/zfec/test/test_zfec.py
@@ -246,6 +246,10 @@ class FileFec(unittest.TestCase):
                          assert (rm, rk, rpad, rsh,) == (m, k, pad, sh,), h
  
      def _help_test_filefec(self, teststr, k, m, numshs=None):
+        self._do_help_test_filefec(teststr, k, m, numshs=numshs, withsize=False)
+        self._do_help_test_filefec(teststr, k, m, numshs=numshs, withsize=True)
+
+    def _do_help_test_filefec(self, teststr, k, m, numshs=None, withsize=True):
          if numshs == None:
              numshs = m
  
@@ -253,7 +257,10 @@ class FileFec(unittest.TestCase):
          PREFIX = "test"
          SUFFIX = ".fec"
  
-        fsize = len(teststr)
+        if withsize:
+          fsize = len(teststr)
+        else:
+          fsize = None
  
          tempdir = fileutil.NamedTemporaryDirectory(cleanup=True)
          try:
author	Ramakrishnan Muthukrishnan <ram@rkrishnan.org>
	Sun, 24 Jan 2016 07:03:26 +0000 (12:33 +0530)
committer	Ramakrishnan Muthukrishnan <ram@rkrishnan.org>
	Sun, 24 Jan 2016 07:03:26 +0000 (12:33 +0530)
zfec/cmdline_zfec.py		patch \| blob \| history
zfec/cmdline_zunfec.py		patch \| blob \| history
zfec/filefec.py		patch \| blob \| history
zfec/test/test_zfec.py		patch \| blob \| history