From: zooko Date: Tue, 4 May 2010 09:57:58 +0000 (+0530) Subject: whitespace, docstrings, copyright statements X-Git-Url: https://git.rkrishnan.org/simplejson/components//%22news.html/%22?a=commitdiff_plain;h=faff7ee7dc90dcb277cff233c74fa4b31d750b52;p=tahoe-lafs%2Fzfec.git whitespace, docstrings, copyright statements Ignore-this: afed20b517f8d3f8a229a52dfa185085 darcs-hash:85f4eed17c792be5d90b60778381a2ec229503b1 --- diff --git a/zfec/README.txt b/zfec/README.txt index 49fac55..ac2959a 100644 --- a/zfec/README.txt +++ b/zfec/README.txt @@ -118,7 +118,7 @@ Note that if 7z is used for archiving then it also does compression, so you don't need a separate compressor in that case. - * Performance Measurements + * Performance On my Athlon 64 2.4 GHz workstation (running Linux), the "zfec" command-line tool encoded a 160 MB file with m=100, k=94 (about 6% redundancy) in 3.9 @@ -139,6 +139,14 @@ at about 6.8 million bytes per second. On my old PowerPC G4 867 MHz Mac laptop, it encoded from a file at about 1.3 million bytes per second. +Here is a paper analyzing the performance of various erasure codes and their +implementations, including zfec: + +http://www.usenix.org/events/fast09/tech/full_papers/plank/plank.pdf + +Zfec shows good performance on different machines and with different values of +K and M. It also has a nice small memory footprint. + * API diff --git a/zfec/bench/bench_zfec.py b/zfec/bench/bench_zfec.py index 30b847c..c3b25ca 100644 --- a/zfec/bench/bench_zfec.py +++ b/zfec/bench/bench_zfec.py @@ -40,10 +40,10 @@ def hashem(results, reslenthing): def _encode_file(N): filefec.encode_file(open(FNAME, "rb"), donothing, K, M) - + def _encode_file_stringy(N): filefec.encode_file_stringy(open(FNAME, "rb"), donothing, K, M) - + def _encode_file_stringy_easyfec(N): filefec.encode_file_stringy_easyfec(open(FNAME, "rb"), donothing, K, M) diff --git a/zfec/setup.py b/zfec/setup.py index 3684b6d..21e9739 100755 --- a/zfec/setup.py +++ b/zfec/setup.py @@ -1,10 +1,10 @@ #!/usr/bin/env python # zfec -- fast forward error correction library with Python interface -# -# Copyright (C) 2007-2009 Allmydata, Inc. +# +# Copyright (C) 2007-2010 Allmydata, Inc. # Author: Zooko Wilcox-O'Hearn -# +# # This file is part of zfec. # # See README.txt for licensing information. @@ -63,10 +63,10 @@ if DEBUGMODE: trove_classifiers=[ "Development Status :: 5 - Production/Stable", "Environment :: Console", - "License :: OSI Approved :: GNU General Public License (GPL)", + "License :: OSI Approved :: GNU General Public License (GPL)", "License :: DFSG approved", "License :: Other/Proprietary License", - "Intended Audience :: Developers", + "Intended Audience :: Developers", "Intended Audience :: End Users/Desktop", "Intended Audience :: System Administrators", "Operating System :: Microsoft", @@ -76,9 +76,9 @@ trove_classifiers=[ "Operating System :: POSIX", "Operating System :: MacOS :: MacOS X", "Operating System :: Microsoft :: Windows :: Windows NT/2000", - "Operating System :: OS Independent", - "Natural Language :: English", - "Programming Language :: C", + "Operating System :: OS Independent", + "Natural Language :: English", + "Programming Language :: C", "Programming Language :: Python", "Programming Language :: Python :: 2", "Programming Language :: Python :: 2.4", @@ -89,9 +89,9 @@ trove_classifiers=[ "Topic :: System :: Distributed Computing", "Topic :: Software Development :: Libraries", "Topic :: Communications :: Usenet News", - "Topic :: System :: Archiving :: Backup", - "Topic :: System :: Archiving :: Mirroring", - "Topic :: System :: Archiving", + "Topic :: System :: Archiving :: Backup", + "Topic :: System :: Archiving :: Mirroring", + "Topic :: System :: Archiving", ] PKG = "zfec" diff --git a/zfec/zfec/__init__.py b/zfec/zfec/__init__.py index a5716c0..d3e742f 100644 --- a/zfec/zfec/__init__.py +++ b/zfec/zfec/__init__.py @@ -1,9 +1,9 @@ """ zfec -- fast forward error correction library with Python interface -maintainer web site: U{http://allmydata.com/source/zfec} +maintainer web site: U{http://tahoe-lafs.org/source/zfec} -zfec web site: U{http://allmydata.com/source/zfec} +zfec web site: U{http://tahoe-lafs.org/source/zfec} """ __version__ = "unknown" @@ -21,11 +21,11 @@ import easyfec, filefec, cmdline_zfec, cmdline_zunfec quiet_pyflakes=[__version__, Error, Encoder, Decoder, cmdline_zunfec, filefec, cmdline_zfec, easyfec] # zfec -- fast forward error correction library with Python interface -# -# Copyright (C) 2007 Allmydata, Inc. +# +# Copyright (C) 2007-2010 Allmydata, Inc. # Author: Zooko Wilcox-O'Hearn # mailto:zooko@zooko.com -# +# # This file is part of zfec. # # See README.txt for licensing information. diff --git a/zfec/zfec/_fecmodule.c b/zfec/zfec/_fecmodule.c index e9449c9..9d04dc4 100644 --- a/zfec/zfec/_fecmodule.c +++ b/zfec/zfec/_fecmodule.c @@ -168,7 +168,7 @@ Encoder_encode(Encoder *self, PyObject *args) { } oldsz = sz; } - + /* Allocate space for all of the check blocks. */ for (i=0; iB", ch)[0] val <<= 8 - val |= byte + val |= byte needed_padbits -= 8 assert needed_padbits <= 0 extrabits = -needed_padbits @@ -147,7 +147,7 @@ def _parse_header(inf): raise CorruptedShareFilesError("Share files were corrupted -- share file %r didn't have a complete metadata header at the front. Perhaps the file was truncated." % (inf.name,)) byte = struct.unpack(">B", ch)[0] val <<= 8 - val |= byte + val |= byte needed_shbits -= 8 assert needed_shbits <= 0 @@ -163,7 +163,7 @@ def encode_to_files(inf, fsize, dirname, prefix, k, m, suffix=".fec", overwrite= """ Encode inf, writing the shares to specially named, newly created files. - @param fsize: calling read() on inf must yield fsize bytes of data and + @param fsize: calling read() on inf must yield fsize bytes of data and then raise an EOFError @param dirname: the name of the directory into which the sharefiles will be written @@ -199,7 +199,7 @@ def encode_to_files(inf, fsize, dirname, prefix, k, m, suffix=".fec", overwrite= if verbose: if int((float(oldsumlen) / fsize) * 10) != int((float(sumlen[0]) / fsize) * 10): print str(int((float(sumlen[0]) / fsize) * 10) * 10) + "% ...", - + if sumlen[0] > fsize: raise IOError("Wrong file size -- possibly the size of the file changed during encoding. Original size: %d, observed size at least: %s" % (fsize, sumlen[0],)) for i in range(len(blocks)): @@ -222,7 +222,7 @@ def encode_to_files(inf, fsize, dirname, prefix, k, m, suffix=".fec", overwrite= fileutil.remove_if_possible(fn) return 1 if verbose: - print + print print "Done!" return 0 @@ -292,29 +292,29 @@ def encode_file(inf, cb, k, m, chunksize=4096): """ Read in the contents of inf, encode, and call cb with the results. - First, k "input blocks" will be read from inf, each input block being of - size chunksize. Then these k blocks will be encoded into m "result - blocks". Then cb will be invoked, passing a list of the m result blocks - as its first argument, and the length of the encoded data as its second - argument. (The length of the encoded data is always equal to k*chunksize, - until the last iteration, when the end of the file has been reached and - less than k*chunksize bytes could be read from the file.) This procedure - is iterated until the end of the file is reached, in which case the space + First, k "input blocks" will be read from inf, each input block being of + size chunksize. Then these k blocks will be encoded into m "result + blocks". Then cb will be invoked, passing a list of the m result blocks + as its first argument, and the length of the encoded data as its second + argument. (The length of the encoded data is always equal to k*chunksize, + until the last iteration, when the end of the file has been reached and + less than k*chunksize bytes could be read from the file.) This procedure + is iterated until the end of the file is reached, in which case the space of the input blocks that is unused is filled with zeroes before encoding. Note that the sequence passed in calls to cb() contains mutable array - objects in its first k elements whose contents will be overwritten when - the next segment is read from the input file. Therefore the - implementation of cb() has to either be finished with those first k arrays - before returning, or if it wants to keep the contents of those arrays for - subsequent use after it has returned then it must make a copy of them to + objects in its first k elements whose contents will be overwritten when + the next segment is read from the input file. Therefore the + implementation of cb() has to either be finished with those first k arrays + before returning, or if it wants to keep the contents of those arrays for + subsequent use after it has returned then it must make a copy of them to keep. @param inf the file object from which to read the data @param cb the callback to be invoked with the results @param k the number of shares required to reconstruct the file @param m the total number of shares created - @param chunksize how much data to read from inf for each of the k input + @param chunksize how much data to read from inf for each of the k input blocks """ enc = zfec.Encoder(k, m) @@ -335,7 +335,7 @@ def encode_file(inf, cb, k, m, chunksize=4096): except EOFError: eof = True indatasize = i*chunksize + len(a) - + # padding a.fromstring("\x00" * (chunksize-len(a))) i += 1 @@ -374,7 +374,7 @@ def encode_file_not_really(inf, cb, k, m, chunksize=4096): except EOFError: eof = True indatasize = i*chunksize + len(a) - + # padding a.fromstring("\x00" * (chunksize-len(a))) i += 1 @@ -406,7 +406,7 @@ def encode_file_not_really_and_hash(inf, cb, k, m, chunksize=4096): except EOFError: eof = True indatasize = i*chunksize + len(a) - + # padding a.fromstring("\x00" * (chunksize-len(a))) i += 1 @@ -424,21 +424,21 @@ def encode_file_stringy(inf, cb, k, m, chunksize=4096): """ Read in the contents of inf, encode, and call cb with the results. - First, k "input blocks" will be read from inf, each input block being of - size chunksize. Then these k blocks will be encoded into m "result - blocks". Then cb will be invoked, passing a list of the m result blocks - as its first argument, and the length of the encoded data as its second - argument. (The length of the encoded data is always equal to k*chunksize, - until the last iteration, when the end of the file has been reached and - less than k*chunksize bytes could be read from the file.) This procedure - is iterated until the end of the file is reached, in which case the part + First, k "input blocks" will be read from inf, each input block being of + size chunksize. Then these k blocks will be encoded into m "result + blocks". Then cb will be invoked, passing a list of the m result blocks + as its first argument, and the length of the encoded data as its second + argument. (The length of the encoded data is always equal to k*chunksize, + until the last iteration, when the end of the file has been reached and + less than k*chunksize bytes could be read from the file.) This procedure + is iterated until the end of the file is reached, in which case the part of the input shares that is unused is filled with zeroes before encoding. @param inf the file object from which to read the data @param cb the callback to be invoked with the results @param k the number of shares required to reconstruct the file @param m the total number of shares created - @param chunksize how much data to read from inf for each of the k input + @param chunksize how much data to read from inf for each of the k input blocks """ enc = zfec.Encoder(k, m) @@ -454,7 +454,7 @@ def encode_file_stringy(inf, cb, k, m, chunksize=4096): l.append(inf.read(chunksize)) if len(l[-1]) < chunksize: indatasize = i*chunksize + len(l[-1]) - + # padding l[-1] = l[-1] + "\x00" * (chunksize-len(l[-1])) while i