don't need a separate compressor in that case.
- * Performance Measurements
+ * Performance
On my Athlon 64 2.4 GHz workstation (running Linux), the "zfec" command-line
tool encoded a 160 MB file with m=100, k=94 (about 6% redundancy) in 3.9
On my old PowerPC G4 867 MHz Mac laptop, it encoded from a file at about 1.3
million bytes per second.
+Here is a paper analyzing the performance of various erasure codes and their
+implementations, including zfec:
+
+http://www.usenix.org/events/fast09/tech/full_papers/plank/plank.pdf
+
+Zfec shows good performance on different machines and with different values of
+K and M. It also has a nice small memory footprint.
+
* API
def _encode_file(N):
filefec.encode_file(open(FNAME, "rb"), donothing, K, M)
-
+
def _encode_file_stringy(N):
filefec.encode_file_stringy(open(FNAME, "rb"), donothing, K, M)
-
+
def _encode_file_stringy_easyfec(N):
filefec.encode_file_stringy_easyfec(open(FNAME, "rb"), donothing, K, M)
#!/usr/bin/env python
# zfec -- fast forward error correction library with Python interface
-#
-# Copyright (C) 2007-2009 Allmydata, Inc.
+#
+# Copyright (C) 2007-2010 Allmydata, Inc.
# Author: Zooko Wilcox-O'Hearn
-#
+#
# This file is part of zfec.
#
# See README.txt for licensing information.
trove_classifiers=[
"Development Status :: 5 - Production/Stable",
"Environment :: Console",
- "License :: OSI Approved :: GNU General Public License (GPL)",
+ "License :: OSI Approved :: GNU General Public License (GPL)",
"License :: DFSG approved",
"License :: Other/Proprietary License",
- "Intended Audience :: Developers",
+ "Intended Audience :: Developers",
"Intended Audience :: End Users/Desktop",
"Intended Audience :: System Administrators",
"Operating System :: Microsoft",
"Operating System :: POSIX",
"Operating System :: MacOS :: MacOS X",
"Operating System :: Microsoft :: Windows :: Windows NT/2000",
- "Operating System :: OS Independent",
- "Natural Language :: English",
- "Programming Language :: C",
+ "Operating System :: OS Independent",
+ "Natural Language :: English",
+ "Programming Language :: C",
"Programming Language :: Python",
"Programming Language :: Python :: 2",
"Programming Language :: Python :: 2.4",
"Topic :: System :: Distributed Computing",
"Topic :: Software Development :: Libraries",
"Topic :: Communications :: Usenet News",
- "Topic :: System :: Archiving :: Backup",
- "Topic :: System :: Archiving :: Mirroring",
- "Topic :: System :: Archiving",
+ "Topic :: System :: Archiving :: Backup",
+ "Topic :: System :: Archiving :: Mirroring",
+ "Topic :: System :: Archiving",
]
PKG = "zfec"
"""
zfec -- fast forward error correction library with Python interface
-maintainer web site: U{http://allmydata.com/source/zfec}
+maintainer web site: U{http://tahoe-lafs.org/source/zfec}
-zfec web site: U{http://allmydata.com/source/zfec}
+zfec web site: U{http://tahoe-lafs.org/source/zfec}
"""
__version__ = "unknown"
quiet_pyflakes=[__version__, Error, Encoder, Decoder, cmdline_zunfec, filefec, cmdline_zfec, easyfec]
# zfec -- fast forward error correction library with Python interface
-#
-# Copyright (C) 2007 Allmydata, Inc.
+#
+# Copyright (C) 2007-2010 Allmydata, Inc.
# Author: Zooko Wilcox-O'Hearn
# mailto:zooko@zooko.com
-#
+#
# This file is part of zfec.
#
# See README.txt for licensing information.
}
oldsz = sz;
}
-
+
/* Allocate space for all of the check blocks. */
for (i=0; i<num_desired_blocks; i++) {
/**
* zfec -- fast forward error correction library with Python interface
- *
- * Copyright (C) 2007 Allmydata, Inc.
+ *
+ * Copyright (C) 2007-2010 Zooko Wilcox-O'Hearn
* Author: Zooko Wilcox-O'Hearn
- *
+ *
* This file is part of zfec.
- *
+ *
* See README.txt for licensing information.
*/
* Robert Morelos-Zaragoza (robert@spectra.eng.hawaii.edu) and Hari
* Thirumoorthy (harit@spectra.eng.hawaii.edu), Aug 1995
*
- * Modifications by Dan Rubenstein (see Modifications.txt for
+ * Modifications by Dan Rubenstein (see Modifications.txt for
* their description.
* Modifications (C) 1998 Dan Rubenstein (drubenst@cs.umass.edu)
*
raise CorruptedShareFilesError("Share files were corrupted -- share file %r didn't have a complete metadata header at the front. Perhaps the file was truncated." % (inf.name,))
byte = struct.unpack(">B", ch)[0]
val <<= 8
- val |= byte
+ val |= byte
needed_padbits -= 8
assert needed_padbits <= 0
extrabits = -needed_padbits
raise CorruptedShareFilesError("Share files were corrupted -- share file %r didn't have a complete metadata header at the front. Perhaps the file was truncated." % (inf.name,))
byte = struct.unpack(">B", ch)[0]
val <<= 8
- val |= byte
+ val |= byte
needed_shbits -= 8
assert needed_shbits <= 0
"""
Encode inf, writing the shares to specially named, newly created files.
- @param fsize: calling read() on inf must yield fsize bytes of data and
+ @param fsize: calling read() on inf must yield fsize bytes of data and
then raise an EOFError
@param dirname: the name of the directory into which the sharefiles will
be written
if verbose:
if int((float(oldsumlen) / fsize) * 10) != int((float(sumlen[0]) / fsize) * 10):
print str(int((float(sumlen[0]) / fsize) * 10) * 10) + "% ...",
-
+
if sumlen[0] > fsize:
raise IOError("Wrong file size -- possibly the size of the file changed during encoding. Original size: %d, observed size at least: %s" % (fsize, sumlen[0],))
for i in range(len(blocks)):
fileutil.remove_if_possible(fn)
return 1
if verbose:
- print
+ print
print "Done!"
return 0
"""
Read in the contents of inf, encode, and call cb with the results.
- First, k "input blocks" will be read from inf, each input block being of
- size chunksize. Then these k blocks will be encoded into m "result
- blocks". Then cb will be invoked, passing a list of the m result blocks
- as its first argument, and the length of the encoded data as its second
- argument. (The length of the encoded data is always equal to k*chunksize,
- until the last iteration, when the end of the file has been reached and
- less than k*chunksize bytes could be read from the file.) This procedure
- is iterated until the end of the file is reached, in which case the space
+ First, k "input blocks" will be read from inf, each input block being of
+ size chunksize. Then these k blocks will be encoded into m "result
+ blocks". Then cb will be invoked, passing a list of the m result blocks
+ as its first argument, and the length of the encoded data as its second
+ argument. (The length of the encoded data is always equal to k*chunksize,
+ until the last iteration, when the end of the file has been reached and
+ less than k*chunksize bytes could be read from the file.) This procedure
+ is iterated until the end of the file is reached, in which case the space
of the input blocks that is unused is filled with zeroes before encoding.
Note that the sequence passed in calls to cb() contains mutable array
- objects in its first k elements whose contents will be overwritten when
- the next segment is read from the input file. Therefore the
- implementation of cb() has to either be finished with those first k arrays
- before returning, or if it wants to keep the contents of those arrays for
- subsequent use after it has returned then it must make a copy of them to
+ objects in its first k elements whose contents will be overwritten when
+ the next segment is read from the input file. Therefore the
+ implementation of cb() has to either be finished with those first k arrays
+ before returning, or if it wants to keep the contents of those arrays for
+ subsequent use after it has returned then it must make a copy of them to
keep.
@param inf the file object from which to read the data
@param cb the callback to be invoked with the results
@param k the number of shares required to reconstruct the file
@param m the total number of shares created
- @param chunksize how much data to read from inf for each of the k input
+ @param chunksize how much data to read from inf for each of the k input
blocks
"""
enc = zfec.Encoder(k, m)
except EOFError:
eof = True
indatasize = i*chunksize + len(a)
-
+
# padding
a.fromstring("\x00" * (chunksize-len(a)))
i += 1
except EOFError:
eof = True
indatasize = i*chunksize + len(a)
-
+
# padding
a.fromstring("\x00" * (chunksize-len(a)))
i += 1
except EOFError:
eof = True
indatasize = i*chunksize + len(a)
-
+
# padding
a.fromstring("\x00" * (chunksize-len(a)))
i += 1
"""
Read in the contents of inf, encode, and call cb with the results.
- First, k "input blocks" will be read from inf, each input block being of
- size chunksize. Then these k blocks will be encoded into m "result
- blocks". Then cb will be invoked, passing a list of the m result blocks
- as its first argument, and the length of the encoded data as its second
- argument. (The length of the encoded data is always equal to k*chunksize,
- until the last iteration, when the end of the file has been reached and
- less than k*chunksize bytes could be read from the file.) This procedure
- is iterated until the end of the file is reached, in which case the part
+ First, k "input blocks" will be read from inf, each input block being of
+ size chunksize. Then these k blocks will be encoded into m "result
+ blocks". Then cb will be invoked, passing a list of the m result blocks
+ as its first argument, and the length of the encoded data as its second
+ argument. (The length of the encoded data is always equal to k*chunksize,
+ until the last iteration, when the end of the file has been reached and
+ less than k*chunksize bytes could be read from the file.) This procedure
+ is iterated until the end of the file is reached, in which case the part
of the input shares that is unused is filled with zeroes before encoding.
@param inf the file object from which to read the data
@param cb the callback to be invoked with the results
@param k the number of shares required to reconstruct the file
@param m the total number of shares created
- @param chunksize how much data to read from inf for each of the k input
+ @param chunksize how much data to read from inf for each of the k input
blocks
"""
enc = zfec.Encoder(k, m)
l.append(inf.read(chunksize))
if len(l[-1]) < chunksize:
indatasize = i*chunksize + len(l[-1])
-
+
# padding
l[-1] = l[-1] + "\x00" * (chunksize-len(l[-1]))
while i<k:
@param cb the callback to be invoked with the results
@param k the number of shares required to reconstruct the file
@param m the total number of shares created
- @param chunksize how much data to read from inf for each of the k input
+ @param chunksize how much data to read from inf for each of the k input
blocks
"""
enc = easyfec.Encoder(k, m)
indata = inf.read(readsize)
# zfec -- fast forward error correction library with Python interface
-#
-# Copyright (C) 2007 Allmydata, Inc.
+#
+# Copyright (C) 2007-2010 Allmydata, Inc.
# Author: Zooko Wilcox-O'Hearn
-#
+#
# This file is part of zfec.
#
# See README.txt for licensing information.
blocks = [ x[1] for x in nums_and_blocks ]
nums = [ x[0] for x in nums_and_blocks ]
decer = zfec.easyfec.Decoder(k, m)
-
+
decodeds = decer.decode(blocks, nums, padlen=k*len(blocks[0]) - len(s))
assert len(decodeds) == len(s), (ab(decodeds), ab(s), k, m)
assert decodeds == s, (ab(decodeds), ab(s),)
k = random.randrange(1, m+1)
s = randstr(l)
_h_easy(k, m, s)
-
+
class ZFecTest(unittest.TestCase):
def test_from_agl_c(self):
self.failUnless(zfec._fec.test_from_agl())
d = zfec.Decoder(3, 5)
r0, r1, r2 = d.decode((b2, b3, b4), (1, 2, 3))
-
+
# print "after decoding:"
# print "b0: %s, b1: %s" % tuple(base64.b16encode(x) for x in [b0, b1])
def test_filefec_min_shares_with_crcrlflf(self, noisy=VERBOSE):
return self._help_test_filefec("Yellow Whirled!A\r\r\n\n", 3, 8, numshs=3)
-
+
class Cmdline(unittest.TestCase):
def test_basic(self, noisy=VERBOSE):
tempdir = fileutil.NamedTemporaryDirectory(cleanup=True)
DEFAULT_M=8
DEFAULT_K=3
sys.argv = ["zfec", os.path.join(tempdir.name, "test.data"),]
-
+
retcode = zfec.cmdline_zfec.main()
assert retcode == 0, retcode
sys.argv = ["zunfec",]
sys.argv.extend(sharefns)
sys.argv.extend(['-o', os.path.join(tempdir.name, 'test.data-recovered'),])
-
+
retcode = zfec.cmdline_zunfec.main()
assert retcode == 0, retcode
import filecmp