From ee66b029898d18b8ba179d100afe275428814519 Mon Sep 17 00:00:00 2001
From: Zooko O'Whielacronx <zooko@zooko.com>
Date: Sat, 14 Apr 2007 12:00:10 -0700
Subject: [PATCH] pyfec: add bin/fec and bin/unfec, do better handling and
 reporting of various errors

---
 pyfec/bin/fec                | 37 ++++++++++++++++++++
 pyfec/bin/unfec              | 25 ++++++++++++++
 pyfec/fec/filefec.py         | 65 ++++++++++++++++++++----------------
 pyfec/fec/test/test_pyfec.py | 12 +++----
 4 files changed, 103 insertions(+), 36 deletions(-)
 create mode 100644 pyfec/bin/fec
 create mode 100644 pyfec/bin/unfec

diff --git a/pyfec/bin/fec b/pyfec/bin/fec
new file mode 100644
index 00000000..170933ce
--- /dev/null
+++ b/pyfec/bin/fec
@@ -0,0 +1,37 @@
+#!/usr/bin/env python
+
+# import bindann
+# import bindann.monkeypatch.all
+
+import sys
+
+from fec.util import argparse
+
+from fec import filefec
+
+parser = argparse.ArgumentParser(description="Encode a file into a set of share files, a subset of which can later be used to recover the original file.")
+
+parser.add_argument('inputfile', help='file to encode or "-" for stdin', type=argparse.FileType('rb'), metavar='INF')
+parser.add_argument('-d', '--output-dir', help='directory in which share file names will be created', default='.', metavar='D')
+parser.add_argument('-p', '--prefix', help='prefix for share file names; If omitted, the name of the input file will be used.', metavar='P')
+parser.add_argument('-s', '--suffix', help='suffix for share file names', default='.fec', metavar='S')
+parser.add_argument('-m', '--totalshares', help='the total number of share files created', default=16, metavar='M')
+parser.add_argument('-k', '--requiredshares', help='the number of share files required to reconstruct', default=4, metavar='K')
+parser.add_argument('-v', '--verbose', help='print out messages about progress', action='store_true')
+args = parser.parse_args()
+
+if args.prefix is None:
+    args.prefix = args.inputfile.name
+    if args.prefix == "<stdin>":
+        args.prefix = ""
+
+if args.totalshares < 3 or args.totalshares > 256 or args.requiredshares < 2 or args.requiredshares >= args.totalshares:
+    print "Invalid parameters, requiredshares: %s, totalshares:%s\nPlease see the accompanying documentation." % (args.requiredshares, args.totalshares,)
+    sys.exit(1)
+
+args.inputfile.seek(0, 2)
+fsize = args.inputfile.tell()
+args.inputfile.seek(0, 0)
+ret = filefec.encode_to_files(args.inputfile, fsize, args.output_dir, args.prefix, args.requiredshares, args.totalshares, args.suffix, args.verbose)
+
+sys.exit(ret)
diff --git a/pyfec/bin/unfec b/pyfec/bin/unfec
new file mode 100644
index 00000000..15a87ae0
--- /dev/null
+++ b/pyfec/bin/unfec
@@ -0,0 +1,25 @@
+#!/usr/bin/env python
+
+# import bindann
+# import bindann.monkeypatch.all
+
+import sys
+
+from fec.util import argparse
+
+from fec import filefec
+
+parser = argparse.ArgumentParser(description="Decode data from share files.")
+
+parser.add_argument('outputfile', help='file to write the resulting data to, or "-" for stdout', type=argparse.FileType('wb'), metavar='OUTF')
+parser.add_argument('sharefiles', nargs='+', help='shares file to read the encoded data from', type=argparse.FileType('rb'), metavar='SHAREFILE')
+parser.add_argument('-v', '--verbose', help='print out messages about progress', action='store_true')
+args = parser.parse_args()
+
+if len(args.sharefiles) < 2:
+    print "At least two sharefiles are required."
+    sys.exit(1)
+
+ret = filefec.decode_from_files(args.outputfile, args.sharefiles, args.verbose)
+
+sys.exit(ret)
diff --git a/pyfec/fec/filefec.py b/pyfec/fec/filefec.py
index b2279628..f86abce8 100644
--- a/pyfec/fec/filefec.py
+++ b/pyfec/fec/filefec.py
@@ -104,14 +104,20 @@ def _parse_header(inf):
         bytes of inf will be read
     """
     # The first 8 bits always encode m.
-    byte = ord(inf.read(1))
+    ch = inf.read(1)
+    if not ch:
+        raise fec.Error("Share files were corrupted -- share file %r didn't have a complete metadata header at the front.  Perhaps the file was truncated." % (inf.name,))
+    byte = ord(ch)
     m = byte + 3
 
     # The next few bits encode k.
     kbits = log_ceil(m-2, 2) # num bits needed to store all possible values of k
     b2_bits_left = 8-kbits
     kbitmask = MASK(kbits) << b2_bits_left
-    byte = ord(inf.read(1))
+    ch = inf.read(1)
+    if not ch:
+        raise fec.Error("Share files were corrupted -- share file %r didn't have a complete metadata header at the front.  Perhaps the file was truncated." % (inf.name,))
+    byte = ord(ch)
     k = ((byte & kbitmask) >> b2_bits_left) + 2
 
     shbits = log_ceil(m, 2) # num bits needed to store all possible values of shnum
@@ -121,7 +127,10 @@ def _parse_header(inf):
 
     needed_padbits = padbits - b2_bits_left
     if needed_padbits > 0:
-        byte = struct.unpack(">B", inf.read(1))[0]
+        ch = inf.read(1)
+        if not ch:
+            raise fec.Error("Share files were corrupted -- share file %r didn't have a complete metadata header at the front.  Perhaps the file was truncated." % (inf.name,))
+        byte = struct.unpack(">B", ch)[0]
         val <<= 8
         val |= byte 
         needed_padbits -= 8
@@ -132,7 +141,10 @@ def _parse_header(inf):
 
     needed_shbits = shbits - extrabits
     if needed_shbits > 0:
-        byte = struct.unpack(">B", inf.read(1))[0]
+        ch = inf.read(1)
+        if not ch:
+            raise fec.Error("Share files were corrupted -- share file %r didn't have a complete metadata header at the front.  Perhaps the file was truncated." % (inf.name,))
+        byte = struct.unpack(">B", ch)[0]
         val <<= 8
         val |= byte 
         needed_shbits -= 8
@@ -205,39 +217,34 @@ def encode_to_files(inf, fsize, dirname, prefix, k, m, suffix=".fec", verbose=Fa
         print "Done!"
     return 0
 
-def decode_from_files(outf, dirname, prefix, suffix=".fec", verbose=False):
+def decode_from_files(outf, infiles, verbose=False):
     """
-    Decode from the first k files in the directory whose names match the
-    pattern, writing the results to outf.
+    Decode from the first k files in infiles, writing the results to outf.
     """
-    RE=re.compile(RE_FORMAT % (prefix, suffix,))
-
+    assert len(infiles) >= 2
     infs = []
     shnums = []
     m = None
     k = None
     padlen = None
 
-    for fn in os.listdir(dirname):
-        if RE.match(fn):
-            f = open(os.path.join(dirname, fn), "rb")
-
-            (nm, nk, npadlen, shnum,) = _parse_header(f)
-            if not (m is None or m == nm):
-                raise fec.Error("Share files were corrupted -- share file %s said that m was %s but another share file previously said that m was %s" % (f, nm, m,))
-            m = nm
-            if not (k is None or k == nk):
-                raise fec.Error("Share files were corrupted -- share file %s said that k was %s but another share file previously said that k was %s" % (f, nk, k,))
-            k = nk
-            if not (padlen is None or padlen == npadlen):
-                raise fec.Error("Share files were corrupted -- share file %s said that pad length was %s but another share file previously said that pad length was %s" % (f, npadlen, padlen,))
-            padlen = npadlen
-
-            infs.append(f)
-            shnums.append(shnum)
-
-            if len(infs) == k:
-                break
+    for f in infiles:
+        (nm, nk, npadlen, shnum,) = _parse_header(f)
+        if not (m is None or m == nm):
+            raise fec.Error("Share files were corrupted -- share file %r said that m was %s but another share file previously said that m was %s" % (f.name, nm, m,))
+        m = nm
+        if not (k is None or k == nk):
+            raise fec.Error("Share files were corrupted -- share file %r said that k was %s but another share file previously said that k was %s" % (f.name, nk, k,))
+        k = nk
+        if not (padlen is None or padlen == npadlen):
+            raise fec.Error("Share files were corrupted -- share file %r said that pad length was %s but another share file previously said that pad length was %s" % (f.name, npadlen, padlen,))
+        padlen = npadlen
+
+        infs.append(f)
+        shnums.append(shnum)
+
+        if len(infs) == k:
+            break
 
     dec = easyfec.Decoder(k, m)
 
diff --git a/pyfec/fec/test/test_pyfec.py b/pyfec/fec/test/test_pyfec.py
index 7731e28e..6d282e90 100644
--- a/pyfec/fec/test/test_pyfec.py
+++ b/pyfec/fec/test/test_pyfec.py
@@ -168,18 +168,16 @@ class FileFec(unittest.TestCase):
             # encode the file
             fec.filefec.encode_to_files(open(tempfn, 'rb'), fsize, tempdir.name, PREFIX, k, m, SUFFIX, verbose=VERBOSE)
 
-            # delete some share files
-            fns = os.listdir(tempdir.name)
+            # select some share files
             RE=re.compile(fec.filefec.RE_FORMAT % (PREFIX, SUFFIX,))
-            sharefs = [ fn for fn in fns if RE.match(fn) ]
+            fns = os.listdir(tempdir.name)
+            sharefs = [ open(os.path.join(tempdir.name, fn), "rb") for fn in fns if RE.match(fn) ]
             random.shuffle(sharefs)
-            while len(sharefs) > numshs:
-                shfn = sharefs.pop()
-                fec.util.fileutil.remove(os.path.join(tempdir.name, shfn))
+            del sharefs[numshs:]
 
             # decode from the share files
             outf = open(os.path.join(tempdir.name, 'recovered-testfile.txt'), 'wb')
-            fec.filefec.decode_from_files(outf, tempdir.name, PREFIX, SUFFIX, verbose=VERBOSE)
+            fec.filefec.decode_from_files(outf, sharefs, verbose=VERBOSE)
             outf.close()
 
             tempfn = open(os.path.join(tempdir.name, 'recovered-testfile.txt'), 'rb')
-- 
2.45.2