From: zooko Date: Wed, 28 Mar 2007 03:14:30 +0000 (+0530) Subject: pyfec: add easyfec wrapper which takes a single string and splits it into input share... X-Git-Url: https://git.rkrishnan.org/(%5B%5E?a=commitdiff_plain;h=c68daf73ab8a35737c51c0f40f96ac1f6b817391;p=tahoe-lafs%2Fzfec.git pyfec: add easyfec wrapper which takes a single string and splits it into input shares and pads, then passes it on to the inner fec object darcs-hash:ff2a8043e87d06451b910f760316aa8967062f69 --- diff --git a/pyfec/fec/easyfec.py b/pyfec/fec/easyfec.py new file mode 100644 index 0000000..3c05813 --- /dev/null +++ b/pyfec/fec/easyfec.py @@ -0,0 +1,28 @@ +import fec + +# div_ceil() was copied from the pyutil library. +def div_ceil(n, d): + """ + The smallest integer k such that k*d >= n. + """ + return (n/d) + (n%d != 0) + + +class Encoder(object): + def __init__(self, k, m): + self.fec = fec.Encoder(k, m) + + def encode(self, data): + """ + @param data: string + """ + chunksize = div_ceil(len(data), self.fec.k) + numchunks = div_ceil(len(data), chunksize) + l = [ data[i:i+chunksize] for i in range(numchunks) ] + if len(l[-1]) != len(l[0]): + l[-1] = l[-1] + ('\x00'*(len(l[0])-len(l[-1]))) + return self.fec.encode(l) + + def decode(self, shares): + return self.fec.decode(shares) + diff --git a/pyfec/fec/filefec.py b/pyfec/fec/filefec.py index a4736f5..c857427 100644 --- a/pyfec/fec/filefec.py +++ b/pyfec/fec/filefec.py @@ -23,10 +23,34 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -import fec +import easyfec, fec import array, random +def encode_to_files_easyfec(inf, prefix, k, m): + """ + Encode inf, writing the shares to named $prefix+$shareid. + """ + l = [ open(prefix+str(shareid), "wb") for shareid in range(m) ] + def cb(shares, length): + assert len(shares) == len(l) + for i in range(len(shares)): + l[i].write(shares[i]) + + encode_file_stringy_easyfec(inf, cb, k, m, chunksize=4096) + +def encode_to_files_stringy(inf, prefix, k, m): + """ + Encode inf, writing the shares to named $prefix+$shareid. + """ + l = [ open(prefix+str(shareid), "wb") for shareid in range(m) ] + def cb(shares, length): + assert len(shares) == len(l) + for i in range(len(shares)): + l[i].write(shares[i]) + + encode_file_stringy(inf, cb, k, m, chunksize=4096) + def encode_to_files(inf, prefix, k, m): """ Encode inf, writing the shares to named $prefix+$shareid. @@ -214,3 +238,34 @@ def encode_file_not_really(inf, cb, k, m, chunksize=4096): # res = enc.encode(l) # print "...finished to encode()" cb(l, indatasize) + +def encode_file_stringy_easyfec(inf, cb, k, m, chunksize=4096): + """ + Read in the contents of inf, encode, and call cb with the results. + + First, chunksize*k bytes will be read from inf, then encoded into m + "result shares". Then cb will be invoked, passing a list of the m result + shares as its first argument, and the length of the encoded data as its + second argument. (The length of the encoded data is always equal to + k*chunksize, until the last iteration, when the end of the file has been + reached and less than k*chunksize bytes could be read from the file.) + This procedure is iterated until the end of the file is reached, in which + case the space of the input that is unused is filled with zeroes before + encoding. + + @param inf the file object from which to read the data + @param cb the callback to be invoked with the results + @param k the number of shares required to reconstruct the file + @param m the total number of shares created + @param chunksize how much data to read from inf for each of the k input + shares + """ + enc = easyfec.Encoder(k, m) + + indatasize = k*chunksize # will be reset to shorter upon EOF + indata = inf.read(indatasize) + while indata: + res = enc.encode(indata) + cb(res, indatasize) + indata = inf.read(indatasize) + diff --git a/pyfec/fec/test/bench_pyfec.py b/pyfec/fec/test/bench_pyfec.py index 25967ee..afa1dc0 100644 --- a/pyfec/fec/test/bench_pyfec.py +++ b/pyfec/fec/test/bench_pyfec.py @@ -27,14 +27,22 @@ import fec import array, random -def bench_encode_to_files_shuffle_decode_from_files(verbose=False): - FILESIZE=1000000 +def f_easyfec(filesize): + return bench_encode_to_files_shuffle_decode_from_files(filesize, verbose=False, encodefunc=fec.filefec.encode_to_files_easyfec) + +def f_fec_stringy(filesize): + return bench_encode_to_files_shuffle_decode_from_files(filesize, verbose=False, encodefunc=fec.filefec.encode_to_files_stringy) + +def f_fec(filesize): + return bench_encode_to_files_shuffle_decode_from_files(filesize, verbose=False, encodefunc=fec.filefec.encode_to_files) + +def bench_encode_to_files_shuffle_decode_from_files(filesize=1000000, verbose=False, encodefunc=fec.filefec.encode_to_files): CHUNKSIZE=4096 PREFIX="testshare" K=25 M=100 import os, time - left=FILESIZE + left=filesize outfile = open("tmpranddata", "wb") try: while left: @@ -45,10 +53,10 @@ def bench_encode_to_files_shuffle_decode_from_files(verbose=False): outfile = None infile = open("tmpranddata", "rb") st = time.time() - fec.filefec.encode_to_files(infile, PREFIX, K, M) + encodefunc(infile, PREFIX, K, M) so = time.time() if verbose: - print "Encoded %s byte file into %d share files in %0.2f seconds, or %0.2f million bytes per second" % (FILESIZE, M, so-st, FILESIZE/((so-st)*1000000),) + print "Encoded %s byte file into %d share files in %0.2f seconds, or %0.2f million bytes per second" % (filesize, M, so-st, filesize/((so-st)*filesize),) enctime = so-st # Now delete m-k of the tempfiles at random. tempfs = [ f for f in os.listdir(".") if f.startswith(PREFIX) ] @@ -57,10 +65,10 @@ def bench_encode_to_files_shuffle_decode_from_files(verbose=False): os.remove(victimtempf) recoveredfile = open("tmpranddata-recovered", "wb") st = time.time() - fec.filefec.decode_from_files(recoveredfile, 1000000, PREFIX, K, M) + fec.filefec.decode_from_files(recoveredfile, filesize, PREFIX, K, M) so = time.time() if verbose: - print "Decoded %s byte file from %d share files in %0.2f seconds, or %0.2f million bytes per second" % (FILESIZE, K, so-st, FILESIZE/((so-st)*1000000),) + print "Decoded %s byte file from %d share files in %0.2f seconds, or %0.2f million bytes per second" % (filesize, K, so-st, filesize/((so-st)*filesize),) return enctime + (so-st) finally: # os.remove("tmpranddata")