From: Zooko O'Whielacronx zooko@zooko.com Date: Sat, 14 Apr 2007 19:00:10 +0000 (+0530) Subject: pyfec: add bin/fec and bin/unfec, do better handling and reporting of various errors X-Git-Url: https://git.rkrishnan.org/%5B/%5D%20/file/URI:LIT:krugkidfnzsc4/@@named=/simplejson/?a=commitdiff_plain;h=4280e20ad49eb1ab6dc1406671dc22726b996717;p=tahoe-lafs%2Fzfec.git pyfec: add bin/fec and bin/unfec, do better handling and reporting of various errors darcs-hash:41c8ab3594866cae6ea3a80a2211b2e36cc02c0e --- diff --git a/pyfec/bin/fec b/pyfec/bin/fec new file mode 100755 index 0000000..170933c --- /dev/null +++ b/pyfec/bin/fec @@ -0,0 +1,37 @@ +#!/usr/bin/env python + +# import bindann +# import bindann.monkeypatch.all + +import sys + +from fec.util import argparse + +from fec import filefec + +parser = argparse.ArgumentParser(description="Encode a file into a set of share files, a subset of which can later be used to recover the original file.") + +parser.add_argument('inputfile', help='file to encode or "-" for stdin', type=argparse.FileType('rb'), metavar='INF') +parser.add_argument('-d', '--output-dir', help='directory in which share file names will be created', default='.', metavar='D') +parser.add_argument('-p', '--prefix', help='prefix for share file names; If omitted, the name of the input file will be used.', metavar='P') +parser.add_argument('-s', '--suffix', help='suffix for share file names', default='.fec', metavar='S') +parser.add_argument('-m', '--totalshares', help='the total number of share files created', default=16, metavar='M') +parser.add_argument('-k', '--requiredshares', help='the number of share files required to reconstruct', default=4, metavar='K') +parser.add_argument('-v', '--verbose', help='print out messages about progress', action='store_true') +args = parser.parse_args() + +if args.prefix is None: + args.prefix = args.inputfile.name + if args.prefix == "": + args.prefix = "" + +if args.totalshares < 3 or args.totalshares > 256 or args.requiredshares < 2 or args.requiredshares >= args.totalshares: + print "Invalid parameters, requiredshares: %s, totalshares:%s\nPlease see the accompanying documentation." % (args.requiredshares, args.totalshares,) + sys.exit(1) + +args.inputfile.seek(0, 2) +fsize = args.inputfile.tell() +args.inputfile.seek(0, 0) +ret = filefec.encode_to_files(args.inputfile, fsize, args.output_dir, args.prefix, args.requiredshares, args.totalshares, args.suffix, args.verbose) + +sys.exit(ret) diff --git a/pyfec/bin/unfec b/pyfec/bin/unfec new file mode 100755 index 0000000..15a87ae --- /dev/null +++ b/pyfec/bin/unfec @@ -0,0 +1,25 @@ +#!/usr/bin/env python + +# import bindann +# import bindann.monkeypatch.all + +import sys + +from fec.util import argparse + +from fec import filefec + +parser = argparse.ArgumentParser(description="Decode data from share files.") + +parser.add_argument('outputfile', help='file to write the resulting data to, or "-" for stdout', type=argparse.FileType('wb'), metavar='OUTF') +parser.add_argument('sharefiles', nargs='+', help='shares file to read the encoded data from', type=argparse.FileType('rb'), metavar='SHAREFILE') +parser.add_argument('-v', '--verbose', help='print out messages about progress', action='store_true') +args = parser.parse_args() + +if len(args.sharefiles) < 2: + print "At least two sharefiles are required." + sys.exit(1) + +ret = filefec.decode_from_files(args.outputfile, args.sharefiles, args.verbose) + +sys.exit(ret) diff --git a/pyfec/fec/filefec.py b/pyfec/fec/filefec.py index b227962..f86abce 100644 --- a/pyfec/fec/filefec.py +++ b/pyfec/fec/filefec.py @@ -104,14 +104,20 @@ def _parse_header(inf): bytes of inf will be read """ # The first 8 bits always encode m. - byte = ord(inf.read(1)) + ch = inf.read(1) + if not ch: + raise fec.Error("Share files were corrupted -- share file %r didn't have a complete metadata header at the front. Perhaps the file was truncated." % (inf.name,)) + byte = ord(ch) m = byte + 3 # The next few bits encode k. kbits = log_ceil(m-2, 2) # num bits needed to store all possible values of k b2_bits_left = 8-kbits kbitmask = MASK(kbits) << b2_bits_left - byte = ord(inf.read(1)) + ch = inf.read(1) + if not ch: + raise fec.Error("Share files were corrupted -- share file %r didn't have a complete metadata header at the front. Perhaps the file was truncated." % (inf.name,)) + byte = ord(ch) k = ((byte & kbitmask) >> b2_bits_left) + 2 shbits = log_ceil(m, 2) # num bits needed to store all possible values of shnum @@ -121,7 +127,10 @@ def _parse_header(inf): needed_padbits = padbits - b2_bits_left if needed_padbits > 0: - byte = struct.unpack(">B", inf.read(1))[0] + ch = inf.read(1) + if not ch: + raise fec.Error("Share files were corrupted -- share file %r didn't have a complete metadata header at the front. Perhaps the file was truncated." % (inf.name,)) + byte = struct.unpack(">B", ch)[0] val <<= 8 val |= byte needed_padbits -= 8 @@ -132,7 +141,10 @@ def _parse_header(inf): needed_shbits = shbits - extrabits if needed_shbits > 0: - byte = struct.unpack(">B", inf.read(1))[0] + ch = inf.read(1) + if not ch: + raise fec.Error("Share files were corrupted -- share file %r didn't have a complete metadata header at the front. Perhaps the file was truncated." % (inf.name,)) + byte = struct.unpack(">B", ch)[0] val <<= 8 val |= byte needed_shbits -= 8 @@ -205,39 +217,34 @@ def encode_to_files(inf, fsize, dirname, prefix, k, m, suffix=".fec", verbose=Fa print "Done!" return 0 -def decode_from_files(outf, dirname, prefix, suffix=".fec", verbose=False): +def decode_from_files(outf, infiles, verbose=False): """ - Decode from the first k files in the directory whose names match the - pattern, writing the results to outf. + Decode from the first k files in infiles, writing the results to outf. """ - RE=re.compile(RE_FORMAT % (prefix, suffix,)) - + assert len(infiles) >= 2 infs = [] shnums = [] m = None k = None padlen = None - for fn in os.listdir(dirname): - if RE.match(fn): - f = open(os.path.join(dirname, fn), "rb") - - (nm, nk, npadlen, shnum,) = _parse_header(f) - if not (m is None or m == nm): - raise fec.Error("Share files were corrupted -- share file %s said that m was %s but another share file previously said that m was %s" % (f, nm, m,)) - m = nm - if not (k is None or k == nk): - raise fec.Error("Share files were corrupted -- share file %s said that k was %s but another share file previously said that k was %s" % (f, nk, k,)) - k = nk - if not (padlen is None or padlen == npadlen): - raise fec.Error("Share files were corrupted -- share file %s said that pad length was %s but another share file previously said that pad length was %s" % (f, npadlen, padlen,)) - padlen = npadlen - - infs.append(f) - shnums.append(shnum) - - if len(infs) == k: - break + for f in infiles: + (nm, nk, npadlen, shnum,) = _parse_header(f) + if not (m is None or m == nm): + raise fec.Error("Share files were corrupted -- share file %r said that m was %s but another share file previously said that m was %s" % (f.name, nm, m,)) + m = nm + if not (k is None or k == nk): + raise fec.Error("Share files were corrupted -- share file %r said that k was %s but another share file previously said that k was %s" % (f.name, nk, k,)) + k = nk + if not (padlen is None or padlen == npadlen): + raise fec.Error("Share files were corrupted -- share file %r said that pad length was %s but another share file previously said that pad length was %s" % (f.name, npadlen, padlen,)) + padlen = npadlen + + infs.append(f) + shnums.append(shnum) + + if len(infs) == k: + break dec = easyfec.Decoder(k, m) diff --git a/pyfec/fec/test/test_pyfec.py b/pyfec/fec/test/test_pyfec.py index 7731e28..6d282e9 100755 --- a/pyfec/fec/test/test_pyfec.py +++ b/pyfec/fec/test/test_pyfec.py @@ -168,18 +168,16 @@ class FileFec(unittest.TestCase): # encode the file fec.filefec.encode_to_files(open(tempfn, 'rb'), fsize, tempdir.name, PREFIX, k, m, SUFFIX, verbose=VERBOSE) - # delete some share files - fns = os.listdir(tempdir.name) + # select some share files RE=re.compile(fec.filefec.RE_FORMAT % (PREFIX, SUFFIX,)) - sharefs = [ fn for fn in fns if RE.match(fn) ] + fns = os.listdir(tempdir.name) + sharefs = [ open(os.path.join(tempdir.name, fn), "rb") for fn in fns if RE.match(fn) ] random.shuffle(sharefs) - while len(sharefs) > numshs: - shfn = sharefs.pop() - fec.util.fileutil.remove(os.path.join(tempdir.name, shfn)) + del sharefs[numshs:] # decode from the share files outf = open(os.path.join(tempdir.name, 'recovered-testfile.txt'), 'wb') - fec.filefec.decode_from_files(outf, tempdir.name, PREFIX, SUFFIX, verbose=VERBOSE) + fec.filefec.decode_from_files(outf, sharefs, verbose=VERBOSE) outf.close() tempfn = open(os.path.join(tempdir.name, 'recovered-testfile.txt'), 'rb')