From: Zooko O'Whielacronx Date: Thu, 1 Feb 2007 23:07:00 +0000 (-0700) Subject: use pyfec instead of py_ecc for erasure coding and update API to codec X-Git-Url: https://git.rkrishnan.org/components/module-simplejson._speedups.html?a=commitdiff_plain;h=dd4ad3d542ed0dc0bf8ab31280776ae079eba1c2;p=tahoe-lafs%2Ftahoe-lafs.git use pyfec instead of py_ecc for erasure coding and update API to codec --- diff --git a/setup.py b/setup.py index 1f51cdc7..16f34be9 100644 --- a/setup.py +++ b/setup.py @@ -11,10 +11,8 @@ setup( version="0.0.1", packages=["allmydata", "allmydata.test", "allmydata.util", "allmydata.filetree", "allmydata.scripts", - "allmydata.py_ecc", ], package_dir={ "allmydata": "src/allmydata", - "allmydata.py_ecc": "src/py_ecc", }, scripts = ["bin/allmydata-tahoe"], package_data={ 'allmydata': ['web/*.xhtml'] }, diff --git a/src/allmydata/codec.py b/src/allmydata/codec.py index ac610834..84be2631 100644 --- a/src/allmydata/codec.py +++ b/src/allmydata/codec.py @@ -4,12 +4,25 @@ from zope.interface import implements from twisted.internet import defer import sha from allmydata.util import idlib, mathutil +from allmydata.util.assertutil import _assert, precondition from allmydata.interfaces import ICodecEncoder, ICodecDecoder -from allmydata.py_ecc import rs_code +import fec def netstring(s): return "%d:%s," % (len(s), s) +from base64 import b32encode +def ab(x): # debuggery + if len(x) >= 3: + return "%s:%s" % (len(x), b32encode(x[-3:]),) + elif len(x) == 2: + return "%s:%s" % (len(x), b32encode(x[-2:]),) + elif len(x) == 1: + return "%s:%s" % (len(x), b32encode(x[-1:]),) + elif len(x) == 0: + return "%s:%s" % (len(x), "--empty--",) + + class ReplicatingEncoder(object): implements(ICodecEncoder) ENCODER_TYPE = "rep" @@ -28,12 +41,11 @@ class ReplicatingEncoder(object): def get_share_size(self): return self.data_size - def encode(self, data, num_shares=None): - if num_shares is None: - num_shares = self.max_shares - assert num_shares <= self.max_shares - shares = [(i,data) for i in range(num_shares)] - return defer.succeed(shares) + def encode(self, data, desired_shareids=None): + if desired_shareids is None: + desired_shareids = range(self.max_shares) + shares = [data for i in desired_shareids] + return defer.succeed((shares, desired_shareids)) class ReplicatingDecoder(object): implements(ICodecDecoder) @@ -44,10 +56,10 @@ class ReplicatingDecoder(object): def get_required_shares(self): return self.required_shares - def decode(self, some_shares): - assert len(some_shares) >= self.required_shares - data = some_shares[0][1] - return defer.succeed(data) + def decode(self, some_shares, their_shareids): + assert len(some_shares) == self.required_shares + assert len(some_shares) == len(their_shareids) + return defer.succeed(some_shares[0]) class Encoder(object): @@ -68,7 +80,7 @@ class Encoder(object): class Decoder(object): def __init__(self, outfile, k, m, verifierid): self.outfile = outfile - self.k = 2 + self.k = k self.m = m self._verifierid = verifierid @@ -94,107 +106,57 @@ class Decoder(object): assert self._verifierid == vid, "%s != %s" % (idlib.b2a(self._verifierid), idlib.b2a(vid)) -class PyRSEncoder(object): +class CRSEncoder(object): implements(ICodecEncoder) - ENCODER_TYPE = "pyrs" - - # we will break the data into vectors in which each element is a single - # byte (i.e. a single number from 0 to 255), and the length of the vector - # is equal to the number of required_shares. We use padding to make the - # last chunk of data long enough to match, and we record the data_size in - # the serialized parameters to strip this padding out on the receiving - # end. - - # TODO: this will write a 733kB file called 'ffield.lut.8' in the current - # directory the first time it is run, to cache the lookup table for later - # use. It appears to take about 15 seconds to create this the first time, - # and about 0.5s to load it in each time afterwards. Make sure this file - # winds up somewhere reasonable. - - # TODO: the encoder/decoder RSCode object depends upon the number of - # required/total shares, but not upon the data. We could probably save a - # lot of initialization time by caching a single instance and using it - # any time we use the same required/total share numbers (which will - # probably be always). - - # on my workstation (fluxx, a 3.5GHz Athlon), this encodes data at a rate - # of 6.7kBps. Zooko's mom's 1.8GHz G5 got 2.2kBps . slave3 took 40s to - # construct the LUT and encodes at 1.5kBps, and for some reason took more - # than 20 minutes to run the test_encode_share tests, so I disabled most - # of them. (uh, hello, it's running figleaf) + ENCODER_TYPE = 2 def set_params(self, data_size, required_shares, max_shares): assert required_shares <= max_shares self.data_size = data_size self.required_shares = required_shares self.max_shares = max_shares - self.chunk_size = required_shares - self.num_chunks = mathutil.div_ceil(data_size, self.chunk_size) - self.last_chunk_padding = mathutil.pad_size(data_size, required_shares) - self.share_size = self.num_chunks - self.encoder = rs_code.RSCode(max_shares, required_shares, 8) + self.share_size = mathutil.div_ceil(data_size, required_shares) + self.last_share_padding = mathutil.pad_size(self.share_size, required_shares) + self.encoder = fec.Encoder(required_shares, max_shares) def get_encoder_type(self): return self.ENCODER_TYPE def get_serialized_params(self): - return "%d-%d-%d" % (self.data_size, self.required_shares, + return "%d:%d:%d" % (self.data_size, self.required_shares, self.max_shares) def get_share_size(self): return self.share_size - def encode(self, data, num_shares=None): - if num_shares is None: - num_shares = self.max_shares - assert num_shares <= self.max_shares - # we create self.max_shares shares, then throw out any extra ones - # so that we always return exactly num_shares shares. - - share_data = [ [] for i in range(self.max_shares)] - for i in range(self.num_chunks): - # we take self.chunk_size bytes from the input string, and - # turn it into self.max_shares bytes. - offset = i*self.chunk_size - # Note string slices aren't an efficient way to use memory, so - # when we upgrade from the unusably slow py_ecc prototype to a - # fast ECC we should also fix up this memory usage (by using the - # array module). - chunk = data[offset:offset+self.chunk_size] - if i == self.num_chunks-1: - chunk = chunk + "\x00"*self.last_chunk_padding - assert len(chunk) == self.chunk_size - input_vector = [ord(x) for x in chunk] - assert len(input_vector) == self.required_shares - output_vector = self.encoder.Encode(input_vector) - assert len(output_vector) == self.max_shares - for i2,out in enumerate(output_vector): - share_data[i2].append(chr(out)) - - shares = [ (i, "".join(share_data[i])) - for i in range(num_shares) ] - return defer.succeed(shares) - -class PyRSDecoder(object): + def encode(self, inshares, desired_share_ids=None): + precondition(desired_share_ids is None or len(desired_share_ids) <= self.max_shares, desired_share_ids, self.max_shares) + + if desired_share_ids is None: + desired_share_ids = range(self.max_shares) + + for inshare in inshares: + assert len(inshare) == self.share_size, (len(inshare), self.share_size, self.data_size, self.required_shares) + shares = self.encoder.encode(inshares, desired_share_ids) + + return defer.succeed((shares, desired_share_ids)) + +class CRSDecoder(object): implements(ICodecDecoder) def set_serialized_params(self, params): - pieces = params.split("-") + pieces = params.split(":") self.data_size = int(pieces[0]) self.required_shares = int(pieces[1]) self.max_shares = int(pieces[2]) self.chunk_size = self.required_shares self.num_chunks = mathutil.div_ceil(self.data_size, self.chunk_size) - self.last_chunk_padding = mathutil.pad_size(self.data_size, - self.required_shares) self.share_size = self.num_chunks - self.encoder = rs_code.RSCode(self.max_shares, self.required_shares, - 8) + self.decoder = fec.Decoder(self.required_shares, self.max_shares) if False: print "chunk_size: %d" % self.chunk_size print "num_chunks: %d" % self.num_chunks - print "last_chunk_padding: %d" % self.last_chunk_padding print "share_size: %d" % self.share_size print "max_shares: %d" % self.max_shares print "required_shares: %d" % self.required_shares @@ -202,37 +164,15 @@ class PyRSDecoder(object): def get_required_shares(self): return self.required_shares - def decode(self, some_shares): - chunk_size = self.chunk_size - assert len(some_shares) >= self.required_shares - chunks = [] - have_shares = {} - for share_num, share_data in some_shares: - have_shares[share_num] = share_data - for i in range(self.share_size): - # this takes one byte from each share, and turns the combination - # into a single chunk - received_vector = [] - for j in range(self.max_shares): - share = have_shares.get(j) - if share is not None: - received_vector.append(ord(share[i])) - else: - received_vector.append(None) - decoded_vector = self.encoder.DecodeImmediate(received_vector) - assert len(decoded_vector) == self.chunk_size - chunk = "".join([chr(x) for x in decoded_vector]) - chunks.append(chunk) - data = "".join(chunks) - if self.last_chunk_padding: - data = data[:-self.last_chunk_padding] - assert len(data) == self.data_size - return defer.succeed(data) + def decode(self, some_shares, their_shareids): + precondition(len(some_shares) == len(their_shareids), len(some_shares), len(their_shareids)) + precondition(len(some_shares) == self.required_shares, len(some_shares), self.required_shares) + return defer.succeed(self.decoder.decode(some_shares, their_shareids)) all_encoders = { ReplicatingEncoder.ENCODER_TYPE: (ReplicatingEncoder, ReplicatingDecoder), - PyRSEncoder.ENCODER_TYPE: (PyRSEncoder, PyRSDecoder), + CRSEncoder.ENCODER_TYPE: (CRSEncoder, CRSDecoder), } def get_decoder_by_name(name): diff --git a/src/allmydata/encode_new.py b/src/allmydata/encode_new.py index 0afb9d1f..1f03e364 100644 --- a/src/allmydata/encode_new.py +++ b/src/allmydata/encode_new.py @@ -5,7 +5,7 @@ from allmydata.chunk import HashTree, roundup_pow2 from allmydata.Crypto.Cipher import AES import sha from allmydata.util import mathutil -from allmydata.codec import PyRSEncoder +from allmydata.codec import CRSEncoder def hash(data): return sha.new(data).digest() @@ -90,15 +90,13 @@ class Encoder(object): self.num_segments = mathutil.div_ceil(self.file_size, self.segment_size) def setup_encoder(self): - self.encoder = PyRSEncoder() + self.encoder = CRSEncoder() self.encoder.set_params(self.segment_size, self.required_shares, self.num_shares) - self.share_size = self.encoder.get_share_size() - def get_reservation_size(self): self.num_shares = 100 - self.share_size = self.file_size / self.required_shares + self.share_size = mathutil.div_ceil(self.file_size, self.required_shares) overhead = self.compute_overhead() return self.share_size + overhead @@ -126,52 +124,57 @@ class Encoder(object): return d def do_segment(self, segnum): - segment_plaintext = self.infile.read(self.segment_size) - segment_crypttext = self.cryptor.encrypt(segment_plaintext) - del segment_plaintext - assert self.encoder.max_shares == self.num_shares - d = self.encoder.encode(segment_crypttext) + chunks = [] + subsharesize = self.encoder.get_share_size() + for i in range(self.required_shares): + d = self.infile.read(subsharesize) + if len(d) < subsharesize: + # padding + d += ('\x00' * (subsharesize - len(d))) + d = self.cryptor.encrypt(d) + chunks.append(d) + d = self.encoder.encode(chunks) d.addCallback(self._encoded_segment) return d - def _encoded_segment(self, subshare_tuples): + def _encoded_segment(self, (shares, shareids)): dl = [] - for share_num,subshare in subshare_tuples: - d = self.send_subshare(share_num, self.segment_num, subshare) + for shareid,subshare in zip(shareids, shares): + d = self.send_subshare(shareid, self.segment_num, subshare) dl.append(d) - self.subshare_hashes[share_num].append(hash(subshare)) + self.subshare_hashes[shareid].append(hash(subshare)) self.segment_num += 1 return defer.DeferredList(dl) - def send_subshare(self, share_num, segment_num, subshare): + def send_subshare(self, shareid, segment_num, subshare): #if False: # offset = hash_size + segment_num * segment_size - # return self.send(share_num, "write", subshare, offset) - return self.send(share_num, "put_subshare", segment_num, subshare) + # return self.send(shareid, "write", subshare, offset) + return self.send(shareid, "put_subshare", segment_num, subshare) - def send(self, share_num, methname, *args, **kwargs): - ll = self.landlords[share_num] + def send(self, shareid, methname, *args, **kwargs): + ll = self.landlords[shareid] return ll.callRemote(methname, *args, **kwargs) def send_all_subshare_hash_trees(self): dl = [] - for share_num,hashes in enumerate(self.subshare_hashes): + for shareid,hashes in enumerate(self.subshare_hashes): # hashes is a list of the hashes of all subshares that were sent - # to shareholder[share_num]. - dl.append(self.send_one_subshare_hash_tree(share_num, hashes)) + # to shareholder[shareid]. + dl.append(self.send_one_subshare_hash_tree(shareid, hashes)) return defer.DeferredList(dl) - def send_one_subshare_hash_tree(self, share_num, subshare_hashes): + def send_one_subshare_hash_tree(self, shareid, subshare_hashes): t = HashTree(subshare_hashes) all_hashes = list(t) # all_hashes[0] is the root hash, == hash(ah[1]+ah[2]) # all_hashes[1] is the left child, == hash(ah[3]+ah[4]) # all_hashes[n] == hash(all_hashes[2*n+1] + all_hashes[2*n+2]) - self.share_root_hashes[share_num] = t[0] + self.share_root_hashes[shareid] = t[0] if False: block = "".join(all_hashes) - return self.send(share_num, "write", block, offset=0) - return self.send(share_num, "put_subshare_hashes", all_hashes) + return self.send(shareid, "write", block, offset=0) + return self.send(shareid, "put_subshare_hashes", all_hashes) def send_all_share_hash_trees(self): dl = [] @@ -192,13 +195,13 @@ class Encoder(object): dl.append(self.send_one_share_hash_tree(i, hashes)) return defer.DeferredList(dl) - def send_one_share_hash_tree(self, share_num, needed_hashes): - return self.send(share_num, "put_share_hashes", needed_hashes) + def send_one_share_hash_tree(self, shareid, needed_hashes): + return self.send(shareid, "put_share_hashes", needed_hashes) def close_all_shareholders(self): dl = [] - for share_num in range(self.num_shares): - dl.append(self.send(share_num, "close")) + for shareid in range(self.num_shares): + dl.append(self.send(shareid, "close")) return defer.DeferredList(dl) def done(self): diff --git a/src/allmydata/interfaces.py b/src/allmydata/interfaces.py index 08a7ce13..73cc3c36 100644 --- a/src/allmydata/interfaces.py +++ b/src/allmydata/interfaces.py @@ -112,48 +112,35 @@ class ICodecEncoder(Interface): """ def encode(inshares, desired_share_ids=None): - """Encode a chunk of data. This may be called multiple times. Each - call is independent. + """Encode some data. This may be called multiple times. Each call is + independent. - The data is required to be a string with a length that exactly - matches the data_size promised by set_params(). + inshares is a sequence of length required_shares, containing buffers, + where each buffer contains the next contiguous non-overlapping + segment of the input data. Each buffer is required to be the same + length, and the sum of the lengths of the buffers is required to be + exactly the data_size promised by set_params(). (This implies that + the data has to be padded before being passed to encode(), unless of + course it already happens to be an even multiple of required_shares in + length.) - 'num_shares', if provided, is required to be equal or less than the - 'max_shares' set in set_params. If 'num_shares' is left at None, - this method will produce 'max_shares' shares. This can be used to - minimize the work that the encoder needs to do if we initially - thought that we would need, say, 100 shares, but now that it is time - to actually encode the data we only have 75 peers to send data to. + 'desired_share_ids', if provided, is required to be a sequence of ints, + each of which is required to be >= 0 and < max_shares. For each call, encode() will return a Deferred that fires with two - lists, one containing shares and the other containing the sharenums, - which is an int from 0 to num_shares-1. The get_share_size() method - can be used to determine the length of the 'sharedata' strings - returned by encode(). + lists, one containing shares and the other containing the shareids. + The get_share_size() method can be used to determine the length of the + share strings returned by encode(). - The sharedatas and their corresponding sharenums are required to be - kept together during storage and retrieval. Specifically, the share - data is useless by itself: the decoder needs to be told which share is - which by providing it with both the share number and the actual - share data. + The shares and their corresponding shareids are required to be kept + together during storage and retrieval. Specifically, the share data is + useless by itself: the decoder needs to be told which share is which + by providing it with both the shareid and the actual share data. The memory usage of this function is expected to be on the order of - total_shares * get_share_size(). + + (max_shares - required_shares) * get_share_size(). """ - # design note: we could embed the share number in the sharedata by - # returning bencode((sharenum,sharedata)). The advantage would be - # making it easier to keep these two pieces together, and probably - # avoiding a round trip when reading the remote bucket (although this - # could be achieved by changing RIBucketReader.read to - # read_data_and_metadata). The disadvantage is that the share number - # wants to be exposed to the storage/bucket layer (specifically to - # handle the next stage of peer-selection algorithm in which we - # propose to keep share#A on a given peer and they are allowed to - # tell us that they already have share#B). Also doing this would make - # the share size somewhat variable (one-digit sharenumbers will be a - # byte shorter than two-digit sharenumbers), unless we zero-pad the - # sharenumbers based upon the max_total_shares declared in - # set_params. class ICodecDecoder(Interface): def set_serialized_params(params): @@ -167,17 +154,21 @@ class ICodecDecoder(Interface): def decode(some_shares, their_shareids): """Decode a partial list of shares into data. - 'some_shares' is required to be a list of buffers of sharedata, a + 'some_shares' is required to be a sequence of buffers of sharedata, a subset of the shares returned by ICodecEncode.encode(). Each share is required to be of the same length. The i'th element of their_shareids - is required to be the share id (or "share num") of the i'th buffer in - some_shares. + is required to be the shareid of the i'th buffer in some_shares. This returns a Deferred which fires with a sequence of buffers. This sequence will contain all of the segments of the original data, in order. The sum of the lengths of all of the buffers will be the 'data_size' value passed into the original ICodecEncode.set_params() - call. + call. Note that some of the elements in the result sequence may be + references to the elements of the some_shares input sequence. In + particular, this means that if those share objects are mutable (e.g. + arrays) and if they are changed then both the input (the 'some_shares' + parameter) and the output (the value given when the deferred is + triggered) will change. The length of 'some_shares' is required to be exactly the value of 'required_shares' passed into the original ICodecEncode.set_params() diff --git a/src/allmydata/test/test_codec.py b/src/allmydata/test/test_codec.py index f7e84cef..2df53675 100644 --- a/src/allmydata/test/test_codec.py +++ b/src/allmydata/test/test_codec.py @@ -3,71 +3,57 @@ import os, time from twisted.trial import unittest from twisted.internet import defer from twisted.python import log -from allmydata.codec import PyRSEncoder, PyRSDecoder, ReplicatingEncoder, ReplicatingDecoder +from allmydata.codec import ReplicatingEncoder, ReplicatingDecoder, CRSEncoder, CRSDecoder import random +from allmydata.util import mathutil class Tester: - #enc_class = PyRSEncoder - #dec_class = PyRSDecoder - def do_test(self, size, required_shares, max_shares, fewer_shares=None): - data0 = os.urandom(size) + data0s = [os.urandom(mathutil.div_ceil(size, required_shares)) for i in range(required_shares)] enc = self.enc_class() enc.set_params(size, required_shares, max_shares) serialized_params = enc.get_serialized_params() log.msg("serialized_params: %s" % serialized_params) - d = enc.encode(data0) - def _done_encoding_all(shares): + d = enc.encode(data0s) + def _done_encoding_all((shares, shareids)): self.failUnlessEqual(len(shares), max_shares) self.shares = shares + self.shareids = shareids d.addCallback(_done_encoding_all) if fewer_shares is not None: - # also validate that the num_shares= parameter works - d.addCallback(lambda res: enc.encode(data0, fewer_shares)) - def _check_fewer_shares(some_shares): - self.failUnlessEqual(len(some_shares), fewer_shares) + # also validate that the desired_shareids= parameter works + desired_shareids = random.sample(range(max_shares), fewer_shares) + d.addCallback(lambda res: enc.encode(data0s, desired_shareids)) + def _check_fewer_shares((some_shares, their_shareids)): + self.failUnlessEqual(tuple(their_shareids), tuple(desired_shareids)) d.addCallback(_check_fewer_shares) - def _decode(shares): + def _decode((shares, shareids)): dec = self.dec_class() dec.set_serialized_params(serialized_params) - d1 = dec.decode(shares) + d1 = dec.decode(shares, shareids) return d1 def _check_data(decoded_shares): - data1 = "".join(decoded_shares) - self.failUnlessEqual(len(data1), len(data0)) - self.failUnless(data1 == data0) - - def _decode_all_ordered(res): - log.msg("_decode_all_ordered") - # can we decode using all of the shares? - return _decode(self.shares) - d.addCallback(_decode_all_ordered) - d.addCallback(_check_data) - - def _decode_all_shuffled(res): - log.msg("_decode_all_shuffled") - # can we decode, using all the shares, but in random order? - shuffled_shares = self.shares[:] - random.shuffle(shuffled_shares) - return _decode(shuffled_shares) - d.addCallback(_decode_all_shuffled) - d.addCallback(_check_data) + self.failUnlessEqual(len(decoded_shares), len(data0s)) + self.failUnless(tuple(decoded_shares) == tuple(data0s)) def _decode_some(res): log.msg("_decode_some") # decode with a minimal subset of the shares some_shares = self.shares[:required_shares] - return _decode(some_shares) + some_shareids = self.shareids[:required_shares] + return _decode((some_shares, some_shareids)) d.addCallback(_decode_some) d.addCallback(_check_data) def _decode_some_random(res): log.msg("_decode_some_random") # use a randomly-selected minimal subset - some_shares = random.sample(self.shares, required_shares) - return _decode(some_shares) + l = random.sample(zip(self.shares, self.shareids), required_shares) + some_shares = [ x[0] for x in l ] + some_shareids = [ x[1] for x in l ] + return _decode((some_shares, some_shareids)) d.addCallback(_decode_some_random) d.addCallback(_check_data) @@ -75,12 +61,17 @@ class Tester: log.msg("_decode_multiple") # make sure we can re-use the decoder object shares1 = random.sample(self.shares, required_shares) - shares2 = random.sample(self.shares, required_shares) + sharesl1 = random.sample(zip(self.shares, self.shareids), required_shares) + shares1 = [ x[0] for x in sharesl1 ] + shareids1 = [ x[1] for x in sharesl1 ] + sharesl2 = random.sample(zip(self.shares, self.shareids), required_shares) + shares2 = [ x[0] for x in sharesl2 ] + shareids2 = [ x[1] for x in sharesl2 ] dec = self.dec_class() dec.set_serialized_params(serialized_params) - d1 = dec.decode(shares1) + d1 = dec.decode(shares1, shareids1) d1.addCallback(_check_data) - d1.addCallback(lambda res: dec.decode(shares2)) + d1.addCallback(lambda res: dec.decode(shares2, shareids2)) d1.addCallback(_check_data) return d1 d.addCallback(_decode_multiple) @@ -88,73 +79,25 @@ class Tester: return d def test_encode(self): - if os.uname()[1] == "slave3" and self.enc_class == PyRSEncoder: - raise unittest.SkipTest("slave3 is really slow") return self.do_test(1000, 25, 100) def test_encode1(self): return self.do_test(8, 8, 16) def test_encode2(self): - if os.uname()[1] == "slave3" and self.enc_class == PyRSEncoder: - raise unittest.SkipTest("slave3 is really slow") return self.do_test(123, 25, 100, 90) def test_sizes(self): - raise unittest.SkipTest("omg this would take forever") d = defer.succeed(None) for i in range(1, 100): d.addCallback(lambda res,size: self.do_test(size, 4, 10), i) return d -class PyRS(unittest.TestCase, Tester): - enc_class = PyRSEncoder - dec_class = PyRSDecoder - class Replicating(unittest.TestCase, Tester): enc_class = ReplicatingEncoder dec_class = ReplicatingDecoder +class CRS(unittest.TestCase, Tester): + enc_class = CRSEncoder + dec_class = CRSDecoder -class BenchPyRS(unittest.TestCase): - enc_class = PyRSEncoder - def test_big(self): - size = 10000 - required_shares = 25 - max_shares = 100 - # this lets us use a persistent lookup table, stored outside the - # _trial_temp directory (which is deleted each time trial is run) - os.symlink("../ffield.lut.8", "ffield.lut.8") - enc = self.enc_class() - self.start() - enc.set_params(size, required_shares, max_shares) - serialized_params = enc.get_serialized_params() - print "encoder ready", self.stop() - self.start() - data0 = os.urandom(size) - print "data ready", self.stop() - self.start() - d = enc.encode(data0) - def _done(shares): - now_shares = time.time() - print "shares ready", self.stop() - self.start() - self.failUnlessEqual(len(shares), max_shares) - d.addCallback(_done) - d.addCallback(lambda res: enc.encode(data0)) - d.addCallback(_done) - d.addCallback(lambda res: enc.encode(data0)) - d.addCallback(_done) - return d - - def start(self): - self.start_time = time.time() - - def stop(self): - self.end_time = time.time() - return (self.end_time - self.start_time) - - -# to benchmark the encoder, delete this line -del BenchPyRS -# and then run 'make test TEST=allmydata.test.test_encode_share.BenchPyRS' diff --git a/src/allmydata/test/trial_figleaf.py b/src/allmydata/test/trial_figleaf.py index 57859f12..f614926b 100644 --- a/src/allmydata/test/trial_figleaf.py +++ b/src/allmydata/test/trial_figleaf.py @@ -60,11 +60,8 @@ from twisted.trial.reporter import TreeReporter, VerboseTextReporter # finish in printSummary. from allmydata.util import figleaf -# don't cover py_ecc, it takes forever -from allmydata.py_ecc import rs_code import os -py_ecc_dir = os.path.realpath(os.path.dirname(rs_code.__file__)) -figleaf.start(ignore_prefixes=[py_ecc_dir]) +figleaf.start() class FigleafReporter(TreeReporter): diff --git a/src/allmydata/upload.py b/src/allmydata/upload.py index 7b39c1cc..23da1bc6 100644 --- a/src/allmydata/upload.py +++ b/src/allmydata/upload.py @@ -216,7 +216,7 @@ class FileUploader: assert sorted(self.sharemap.keys()) == range(len(landlords)) # encode all the data at once: this class does not use segmentation data = self._filehandle.read() - d = self._encoder.encode(data, len(landlords)) + d = self._encoder.encode(data, self.sharemap.keys()) d.addCallback(self._send_all_shares) d.addCallback(lambda res: self._encoder.get_serialized_params()) return d @@ -229,17 +229,16 @@ class FileUploader: bucket.callRemote("close")) return d - def _send_all_shares(self, shares): + def _send_all_shares(self, (shares, shareids)): dl = [] - for share in shares: - (sharenum,sharedata) = share + for (shareid, share) in zip(shareids, shares): if self.debug: - log.msg(" writing share %d" % sharenum) - metadata = bencode.bencode(sharenum) - assert len(sharedata) == self._share_size - assert isinstance(sharedata, str) - bucket = self.sharemap[sharenum] - d = self._send_one_share(bucket, sharedata, metadata) + log.msg(" writing share %d" % shareid) + metadata = bencode.bencode(shareid) + assert len(share) == self._share_size + assert isinstance(share, str) + bucket = self.sharemap[shareid] + d = self._send_one_share(bucket, share, metadata) dl.append(d) return DeferredListShouldSucceed(dl) diff --git a/src/py_ecc/__init__.py b/src/py_ecc/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/src/py_ecc/ffield.py b/src/py_ecc/ffield.py deleted file mode 100644 index a17a3594..00000000 --- a/src/py_ecc/ffield.py +++ /dev/null @@ -1,764 +0,0 @@ - -# Copyright Emin Martinian 2002. See below for license terms. -# Version Control Info: $Id: ffield.py,v 1.10 2003/10/28 21:19:43 emin Exp $ - -""" -This package contains the FField class designed to perform calculations -in finite fields of characteristic two. The following docstrings provide -detailed information on various topics: - - FField.__doc__ Describes the methods of the FField class and how - to use them. - - FElement.__doc__ Describes the FElement class and how to use it. - - fields_doc Briefly describes what a finite field is and - establishes notation for further documentation. - - design_doc Discusses the design of the FField class and attempts - to justify why certain decisions were made. - - license_doc Describes the license and lack of warranty for - this code. - - testing_doc Describes some tests to make sure the code is working - as well as some of the built in testing routines. - -""" - -import string, random, os, os.path, cPickle - - -# The following list of primitive polynomials are the Conway Polynomials -# from the list at -# http://www.math.rwth-aachen.de/~Frank.Luebeck/ConwayPol/cp2.html - -gPrimitivePolys = {} -gPrimitivePolysCondensed = { - 1 : (1,0), - 2 : (2,1,0), - 3 : (3,1,0), - 4 : (4,1,0), - 5 : (5,2,0), - 6 : (6,4,3,1,0), - 7 : (7,1,0), - 8 : (8,4,3,2,0), - 9 : (9,4,0), - 10 : (10,6,5,3,2,1,0), - 11 : (11,2,0), - 12 : (12,7,6,5,3,1,0), - 13 : (13,4,3,1,0), - 14 : (14,7,5,3,0), - 15 : (15,5,4,2,0), - 16 : (16,5,3,2,0), - 17 : (17,3,0), - 18 : (18,12,10,1,0), - 19 : (19,5,2,1,0), - 20 : (20,10,9,7,6,5,4,1,0), - 21 : (21,6,5,2,0), - 22 : (22,12,11,10,9,8,6,5,0), - 23 : (23,5,0), - 24 : (24,16,15,14,13,10,9,7,5,3,0), - 25 : (25,8,6,2,0), - 26 : (26,14,10,8,7,6,4,1,0), - 27 : (27,12,10,9,7,5,3,2,0), - 28 : (28,13,7,6,5,2,0), - 29 : (29,2,0), - 30 : (30,17,16,13,11,7,5,3,2,1,0), - 31 : (31,3,0), - 32 : (32,15,9,7,4,3,0), - 33 : (33,13,12,11,10,8,6,3,0), - 34 : (34,16,15,12,11,8,7,6,5,4,2,1,0), - 35 : (35, 11, 10, 7, 5, 2, 0), - 36 : (36, 23, 22, 20, 19, 17, 14, 13, 8, 6, 5, 1, 0), - 37 : (37, 5, 4, 3, 2, 1, 0), - 38 : (38, 14, 10, 9, 8, 5, 2, 1, 0), - 39 : (39, 15, 12, 11, 10, 9, 7, 6, 5, 2 , 0), - 40 : (40, 23, 21, 18, 16, 15, 13, 12, 8, 5, 3, 1, 0), - 97 : (97,6,0), - 100 : (100,15,0) - } - -for n in gPrimitivePolysCondensed.keys(): - gPrimitivePolys[n] = [0]*(n+1) - if (n < 16): - unity = 1 - else: - unity = long(1) - for index in gPrimitivePolysCondensed[n]: - gPrimitivePolys[n][index] = unity - - -class FField: - """ - The FField class implements a finite field calculator. The - following functions are provided: - - __init__ - Add - Subtract - Multiply - Inverse - Divide - FindDegree - MultiplyWithoutReducing - ExtendedEuclid - FullDivision - ShowCoefficients - ShowPolynomial - GetRandomElement - ConvertListToElement - TestFullDivision - TestInverse - - Most of these methods take integers or longs representing field - elements as arguments and return integers representing the desired - field elements as output. See ffield.fields_doc for an explanation - of the integer representation of field elements. - - Example of how to use the FField class: - ->>> import ffield ->>> F = ffield.FField(5) # create the field GF(2^5) ->>> a = 7 # field elements are denoted as integers from 0 to 2^5-1 ->>> b = 15 ->>> F.ShowPolynomial(a) # show the polynomial representation of a -'x^2 + x^1 + 1' ->>> F.ShowPolynomial(b) -'x^3 + x^2 + x^1 + 1' ->>> c = F.Multiply(a,b) # multiply a and b modulo the field generator ->>> c -4 ->>> F.ShowPolynomial(c) -'x^2' ->>> F.Multiply(c,F.Inverse(a)) == b # verify multiplication works -1 ->>> F.Multiply(c,F.Inverse(b)) == a # verify multiplication works -1 ->>> d = F.Divide(c,b) # since c = F.Multiply(a,b), d should give a ->>> d -7 - - See documentation on the appropriate method for further details. - """ - - def __init__(self,n,gen=0,useLUT=-1): - """ - This method constructs the field GF(2^p). It takes one - required argument, n = p, and two optional arguments, gen, - representing the coefficients of the generator polynomial - (of degree n) to use and useLUT describing whether to use - a lookup table. If no gen argument is provided, the - Conway Polynomial of degree n is obtained from the table - gPrimitivePolys. - - If useLUT = 1 then a lookup table is used for - computing finite field multiplies and divides. - If useLUT = 0 then no lookup table is used. - If useLUT = -1 (the default), then the code - decides when a lookup table should be used. - - Note that you can look at the generator for the field object - F by looking at F.generator. - """ - - self.n = n - if (gen): - self.generator = gen - else: - self.generator = self.ConvertListToElement(gPrimitivePolys[n]) - - - if (useLUT == 1 or (useLUT == -1 and self.n < 10)): # use lookup table - self.unity = 1 - self.Inverse = self.DoInverseForSmallField - self.PrepareLUT() - self.Multiply = self.LUTMultiply - self.Divide = self.LUTDivide - self.Inverse = lambda x: self.LUTDivide(1,x) - elif (self.n < 15): - self.unity = 1 - self.Inverse = self.DoInverseForSmallField - self.Multiply = self.DoMultiply - self.Divide = self.DoDivide - else: # Need to use longs for larger fields - self.unity = long(1) - self.Inverse = self.DoInverseForBigField - self.Multiply = lambda a,b: self.DoMultiply(long(a),long(b)) - self.Divide = lambda a,b: self.DoDivide(long(a),long(b)) - - - - def PrepareLUT(self): - fieldSize = 1 << self.n - lutName = 'ffield.lut.' + `self.n` - if (os.path.exists(lutName)): - fd = open(lutName,'r') - self.lut = cPickle.load(fd) - fd.close() - else: - self.lut = LUT() - self.lut.mulLUT = range(fieldSize) - self.lut.divLUT = range(fieldSize) - self.lut.mulLUT[0] = [0]*fieldSize - self.lut.divLUT[0] = ['NaN']*fieldSize - for i in range(1,fieldSize): - self.lut.mulLUT[i] = map(lambda x: self.DoMultiply(i,x), - range(fieldSize)) - self.lut.divLUT[i] = map(lambda x: self.DoDivide(i,x), - range(fieldSize)) - fd = open(lutName,'w') - cPickle.dump(self.lut,fd) - fd.close() - - - def LUTMultiply(self,i,j): - return self.lut.mulLUT[i][j] - - def LUTDivide(self,i,j): - return self.lut.divLUT[i][j] - - def Add(self,x,y): - """ - Adds two field elements and returns the result. - """ - - return x ^ y - - def Subtract(self,x,y): - """ - Subtracts the second argument from the first and returns - the result. In fields of characteristic two this is the same - as the Add method. - """ - return self.Add(x,y) - - def DoMultiply(self,f,v): - """ - Multiplies two field elements (modulo the generator - self.generator) and returns the result. - - See MultiplyWithoutReducing if you don't want multiplication - modulo self.generator. - """ - m = self.MultiplyWithoutReducing(f,v) - return self.FullDivision(m,self.generator, - self.FindDegree(m),self.n)[1] - - def DoInverseForSmallField(self,f): - """ - Computes the multiplicative inverse of its argument and - returns the result. - """ - return self.ExtendedEuclid(1,f,self.generator, - self.FindDegree(f),self.n)[1] - - def DoInverseForBigField(self,f): - """ - Computes the multiplicative inverse of its argument and - returns the result. - """ - return self.ExtendedEuclid(self.unity,long(f),self.generator, - self.FindDegree(long(f)),self.n)[1] - - def DoDivide(self,f,v): - """ - Divide(f,v) returns f * v^-1. - """ - return self.DoMultiply(f,self.Inverse(v)) - - def FindDegree(self,v): - """ - Find the degree of the polynomial representing the input field - element v. This takes O(degree(v)) operations. - - A faster version requiring only O(log(degree(v))) - could be written using binary search... - """ - - if (v): - result = -1 - while(v): - v = v >> 1 - result = result + 1 - return result - else: - return 0 - - def MultiplyWithoutReducing(self,f,v): - """ - Multiplies two field elements and does not take the result - modulo self.generator. You probably should not use this - unless you know what you are doing; look at Multiply instead. - - NOTE: If you are using fields larger than GF(2^15), you should - make sure that f and v are longs not integers. - """ - - result = 0 - mask = self.unity - i = 0 - while (i <= self.n): - if (mask & v): - result = result ^ f - f = f << 1 - mask = mask << 1 - i = i + 1 - return result - - - def ExtendedEuclid(self,d,a,b,aDegree,bDegree): - """ - Takes arguments (d,a,b,aDegree,bDegree) where d = gcd(a,b) - and returns the result of the extended Euclid algorithm - on (d,a,b). - """ - if (b == 0): - return (a,self.unity,0) - else: - (floorADivB, aModB) = self.FullDivision(a,b,aDegree,bDegree) - (d,x,y) = self.ExtendedEuclid(d, b, aModB, bDegree, - self.FindDegree(aModB)) - return (d,y,self.Subtract(x,self.DoMultiply(floorADivB,y))) - - def FullDivision(self,f,v,fDegree,vDegree): - """ - Takes four arguments, f, v, fDegree, and vDegree where - fDegree and vDegree are the degrees of the field elements - f and v represented as a polynomials. - This method returns the field elements a and b such that - - f(x) = a(x) * v(x) + b(x). - - That is, a is the divisor and b is the remainder, or in - other words a is like floor(f/v) and b is like f modulo v. - """ - - result = 0 - i = fDegree - mask = self.unity << i - while (i >= vDegree): - if (mask & f): - result = result ^ (self.unity << (i - vDegree)) - f = self.Subtract(f, v << (i - vDegree)) - i = i - 1 - mask = mask >> self.unity - return (result,f) - - - def ShowCoefficients(self,f): - """ - Show coefficients of input field element represented as a - polynomial in decreasing order. - """ - - fDegree = self.n - - result = [] - for i in range(fDegree,-1,-1): - if ((self.unity << i) & f): - result.append(1) - else: - result.append(0) - - return result - - def ShowPolynomial(self,f): - """ - Show input field element represented as a polynomial. - """ - - fDegree = self.FindDegree(f) - result = '' - - if (f == 0): - return '0' - - for i in range(fDegree,0,-1): - if ((1 << i) & f): - result = result + (' x^' + `i`) - if (1 & f): - result = result + ' ' + `1` - return string.replace(string.strip(result),' ',' + ') - - def GetRandomElement(self,nonZero=0,maxDegree=None): - """ - Return an element from the field chosen uniformly at random - or, if the optional argument nonZero is true, chosen uniformly - at random from the non-zero elements, or, if the optional argument - maxDegree is provided, ensure that the result has degree less - than maxDegree. - """ - - if (None == maxDegree): - maxDegree = self.n - if (maxDegree <= 1 and nonZero): - return 1 - if (maxDegree < 31): - return random.randint(nonZero != 0,(1<= to the degree of the - generator for the field, then you will have to call take the - result modulo the generator to get a proper element in the - field. - """ - - temp = map(lambda a, b: a << b, l, range(len(l)-1,-1,-1)) - return reduce(lambda a, b: a | b, temp) - - def TestFullDivision(self): - """ - Test the FullDivision function by generating random polynomials - a(x) and b(x) and checking whether (c,d) == FullDivision(a,b) - satsifies b*c + d == a - """ - f = 0 - - a = self.GetRandomElement(nonZero=1) - b = self.GetRandomElement(nonZero=1) - aDegree = self.FindDegree(a) - bDegree = self.FindDegree(b) - - (c,d) = self.FullDivision(a,b,aDegree,bDegree) - recon = self.Add(d, self.Multiply(c,b)) - assert (recon == a), ('TestFullDivision failed: a=' - + `a` + ', b=' + `b` + ', c=' - + `c` + ', d=' + `d` + ', recon=', recon) - - def TestInverse(self): - """ - This function tests the Inverse function by generating - a random non-zero polynomials a(x) and checking if - a * Inverse(a) == 1. - """ - - a = self.GetRandomElement(nonZero=1) - aInv = self.Inverse(a) - prod = self.Multiply(a,aInv) - assert 1 == prod, ('TestInverse failed:' + 'a=' + `a` + ', aInv=' - + `aInv` + ', prod=' + `prod`) - -class LUT: - """ - Lookup table used to speed up some finite field operations. - """ - pass - - -class FElement: - """ - This class provides field elements which overload the - +,-,*,%,//,/ operators to be the appropriate field operation. - Note that before creating FElement objects you must first - create an FField object. For example, - ->>> import ffield ->>> F = FField(5) ->>> e1 = FElement(F,7) ->>> e1 -x^2 + x^1 + 1 ->>> e2 = FElement(F,19) ->>> e2 -x^4 + x^1 + 1 ->>> e3 = e1 + e2 ->>> e3 -x^4 + x^2 ->>> e4 = e3 / e2 ->>> e4 -x^4 + x^3 + x^2 + x^1 + 1 ->>> e4 * e2 == (e3) -1 - - """ - - def __init__(self,field,e): - """ - The constructor takes two arguments, field, and e where - field is an FField object and e is an integer representing - an element in FField. - - The result is a new FElement instance. - """ - self.f = e - self.field = field - - def __add__(self,other): - assert self.field == other.field - return FElement(self.field,self.field.Add(self.f,other.f)) - - def __mul__(self,other): - assert self.field == other.field - return FElement(self.field,self.field.Multiply(self.f,other.f)) - - def __mod__(self,o): - assert self.field == o.field - return FElement(self.field, - self.field.FullDivision(self.f,o.f, - self.field.FindDegree(self.f), - self.field.FindDegree(o.f))[1]) - - def __floordiv__(self,o): - assert self.field == o.field - return FElement(self.field, - self.field.FullDivision(self.f,o.f, - self.field.FindDegree(self.f), - self.field.FindDegree(o.f))[0]) - - def __div__(self,other): - assert self.field == other.field - return FElement(self.field,self.field.Divide(self.f,other.f)) - - def __str__(self): - return self.field.ShowPolynomial(self.f) - - def __repr__(self): - return self.__str__() - - def __eq__(self,other): - assert self.field == other.field - return self.f == other.f - -def FullTest(testsPerField=10,sizeList=None): - """ - This function runs TestInverse and TestFullDivision for testsPerField - random field elements for each field size in sizeList. For example, - if sizeList = (1,5,7), then thests are run on GF(2), GF(2^5), and - GF(2^7). If sizeList == None (which is the default), then every - field is tested. - """ - - if (None == sizeList): - sizeList = gPrimitivePolys.keys() - for i in sizeList: - F = FField(i) - for j in range(testsPerField): - F.TestInverse() - F.TestFullDivision() - - -fields_doc = """ -Roughly speaking a finite field is a finite collection of elements -where most of the familiar rules of math work. Specifically, you -can add, subtract, multiply, and divide elements of a field and -continue to get elements in the field. This is useful because -computers usually store and send information in fixed size chunks. -Thus many useful algorithms can be described as elementary operations -(e.g. addition, subtract, multiplication, and division) of these chunks. - -Currently this package only deals with fields of characteristic 2. That -is all fields we consider have exactly 2^p elements for some integer p. -We denote such fields as GF(2^p) and work with the elements represented -as p-1 degree polynomials in the indeterminate x. That is an element of -the field GF(2^p) looks something like - - f(x) = c_{p-1} x^{p-1} + c_{p-2} x^{p-2} + ... + c_0 - -where the coefficients c_i are in binary. - -Addition is performed by simply adding coefficients of degree i -modulo 2. For example, if we have two field elements f and v -represented as f(x) = x^2 + 1 and v(x) = x + 1 then s = f + v -is given by (x^2 + 1) + (x + 1) = x^2 + x. Multiplication is -performed modulo a p degree generator polynomial g(x). -For example, if f and v are as in the above example, then s = s * v -is given by (x^2 + 1) + (x + 1) mod g(x). Subtraction turns out -to be the same as addition for fields of characteristic 2. Division -is defined as f / v = f * v^-1 where v^-1 is the multiplicative -inverse of v. Multiplicative inverses in groups and fields -can be calculated using the extended Euclid algorithm. - -Roughly speaking the intuition for why multiplication is -performed modulo g(x), is because we want to make sure s * v -returns an element in the field. Elements of the field are -polynomials of degree p-1, but regular multiplication could -yield terms of degree greater than p-1. Therefore we need a -rule for 'reducing' terms of degree p or greater back down -to terms of degree at most p-1. The 'reduction rule' is -taking things modulo g(x). - -For another way to think of -taking things modulo g(x) as a 'reduction rule', imagine -g(x) = x^7 + x + 1 and we want to take some polynomial, -f(x) = x^8 + x^3 + x, modulo g(x). We can think of g(x) -as telling us that we can replace every occurence of -x^7 with x + 1. Thus f(x) becomes x * x^7 + x^3 + x which -becomes x * (x + 1) + x^3 + x = x^3 + x^2 . Essentially, taking -polynomials mod x^7 by replacing all x^7 terms with x + 1 will -force down the degree of f(x) until it is below 7 (the leading power -of g(x). See a book on abstract algebra for more details. -""" - -design_doc = """ -The FField class implements a finite field calculator for fields of -characteristic two. This uses a representation of field elements -as integers and has various methods to calculate the result of -adding, subtracting, multiplying, dividing, etc. field elements -represented AS INTEGERS OR LONGS. - -The FElement class provides objects which act like a new kind of -numeric type (i.e. they overload the +,-,*,%,//,/ operators, and -print themselves as polynomials instead of integers). - -Use the FField class for efficient storage and calculation. -Use the FElement class if you want to play around with finite -field math the way you would in something like Matlab or -Mathematica. - --------------------------------------------------------------------- - WHY PYTHON? - -You may wonder why a finite field calculator written in Python would -be useful considering all the C/C++/Java code already written to do -the same thing (and probably faster too). The goals of this project -are as follows, please keep them in mind if you make changes: - -o Provide an easy to understand implementation of field operations. - Python lends itself well to comments and documentation. Hence, - we hope that in addition to being useful by itself, this project - will make it easier for people to implement finite field - computations in other languages. If you've ever looked at some - of the highly optimized finite field code written in C, you will - understand the need for a clear reference implementation of such - operations. - -o Provide easy access to a finite field calculator. - Since you can just start up the Python interpreter and do - computations, a finite field calculator in Python lets you try - things out, check your work for other algorithms, etc. - Furthermore since a wealth of numerical packages exist for python, - you can easily write simulations or algorithms which draw upon - such routines with finite fields. - -o Provide a platform independent framework for coding in Python. - Many useful error control codes can be implemented based on - finite fields. Some examples include error/erasure correction, - cyclic redundancy checks (CRCs), and secret sharing. Since - Python has a number of other useful Internet features being able - to implement these kinds of codes makes Python a better framework - for network programming. - -o Leverages Python arbitrary precision code for large fields. - If you want to do computations over very large fields, for example - GF(2^p) with p > 31 you have to write lots of ugly bit field - code in most languages. Since Python has built in support for - arbitrary precision integers, you can make this code work for - arbitrary field sizes provided you operate on longs instead of - ints. That is if you give as input numbers like - 0L, 1L, 1L << 55, etc., most of the code should work. - --------------------------------------------------------------------- - BASIC DESIGN - - -The basic idea is to index entries in the finite field of interest -using integers and design the class methods to work properly on this -representation. Using integers is efficient since integers are easy -to store and manipulate and allows us to handle arbitrary field sizes -without changing the code if we instead switch to using longs. - -Specifically, an integer represents a bit string - - c = c_{p-1} c_{p-2} ... c_0. - -which we interpret as the coefficients of a polynomial representing a -field element - - f(x) = c_{p-1} x^{p-1} + c_{p-2} x^{p-2} + ... + c_0. - --------------------------------------------------------------------- - FUTURE -In the future, support for fields of other -characteristic may be added (if people want them). Since computers -have built in parallelized operations for fields of characteristic -two (i.e. bitwise and, or, xor, etc.), this implementation uses -such operations to make most of the computations efficient. - -""" - - -license_doc = """ - This code was originally written by Emin Martinian (emin@allegro.mit.edu). - You may copy, modify, redistribute in source or binary form as long - as credit is given to the original author. Specifically, please - include some kind of comment or docstring saying that Emin Martinian - was one of the original authors. Also, if you publish anything based - on this work, it would be nice to cite the original author and any - other contributers. - - There is NO WARRANTY for this software just as there is no warranty - for GNU software (although this is not GNU software). Specifically - we adopt the same policy towards warranties as the GNU project: - - BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY -FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN -OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES -PROVIDE THE PROGRAM 'AS IS' WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED -OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS -TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE -PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, -REPAIR OR CORRECTION. - - IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR -REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, -INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING -OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED -TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY -YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER -PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE -POSSIBILITY OF SUCH DAMAGES. -""" - -testing_doc = """ -The FField class has a number of built in testing functions such as -TestFullDivision, TestInverse. The simplest thing to -do is to call the FullTest method. - ->>> import ffield ->>> ffield.FullTest(sizeList=None,testsPerField=100) - -# To decrease the testing time you can either decrease the testsPerField -# or you can only test the field sizes you care about by doing something -# like sizeList = [2,7,20] in the ffield.FullTest command above. - -If any problems occur, assertion errors are raised. Otherwise -nothing is returned. Note that you can also use the doctest -package to test all the python examples in the documentation -by typing 'python ffield.py' or 'python -v ffield.py' at the -command line. -""" - - -# The following code is used to make the doctest package -# check examples in docstrings. - -__test__ = { - 'testing_doc' : testing_doc -} - -def _test(): - import doctest, ffield - return doctest.testmod(ffield) - -if __name__ == "__main__": - print 'Starting automated tests (this may take a while)' - _test() - print 'Tests passed.' - diff --git a/src/py_ecc/file_ecc.py b/src/py_ecc/file_ecc.py deleted file mode 100644 index 66d9b65b..00000000 --- a/src/py_ecc/file_ecc.py +++ /dev/null @@ -1,218 +0,0 @@ - -# Copyright Emin Martinian 2002. See below for license terms. -# Version Control Info: $Id: file_ecc.py,v 1.4 2003/10/28 21:28:29 emin Exp $ - -__doc__ = """ -This package implements an erasure correction code for files. -Specifically it lets you take a file F and break it into N -pieces (which are named F.p_0, F.p_1, ..., F.p_N-1) such that -F can be recovered from any K pieces. Since the size of each -piece is F/K (plus some small header information). - -How is this better than simply repeating copies of a file? - -Firstly, this package lets you get finer grained -redunancy control since producing a duplicate copy of a file -requires at least 100% redundancy while this package lets you -expand the redunancy by n/k (e.g. if n=11, k=10 only 10% -redundancy is added). - -Secondly, using a Reed-Solomon code as is done in this package, -allows better loss resistance. For example, assume you just -divided a file F into 4 pieces, F.1, F.2, ..., F.4, duplicated -each piece and placed them each on a different disk. If the -two disks containing a copy of F.1 go down then you can no longer -recover F. - -With the Reed-Solomon code used in this package, if you use n=8, k=4 -you divide F into 8 pieces such that as long as at least 4 pieces are -available recovery can occur. Thus if you placed each piece on a -seprate disk, you could recover data as if any combination of 4 or -less disks fail. - -The docstrings for the functions EncodeFile and DecodeFiles -provide detailed information on usage and the docstring -license_doc describes the license and lack of warranty. - -The following is an example of how to use this file: - ->>> import file_ecc ->>> testFile = '/bin/ls' # A reasonable size file for testing. ->>> prefix = '/tmp/ls_backup' # Prefix for shares of file. ->>> names = file_ecc.EncodeFile(testFile,prefix,15,11) # break into N=15 pieces - -# Imagine that only pieces [0,1,5,4,13,8,9,10,11,12,14] are available. ->>> decList = map(lambda x: prefix + '.p_' + `x`,[0,1,5,4,13,8,9,10,11,12,14]) - ->>> decodedFile = '/tmp/ls.r' # Choose where we want reconstruction to go. ->>> file_ecc.DecodeFiles(decList,decodedFile) ->>> fd1 = open(testFile,'rb') ->>> fd2 = open(decodedFile,'rb') ->>> fd1.read() == fd2.read() -1 -""" - - - -from rs_code import RSCode -from array import array - -import os, struct, string - -headerSep = '|' - -def GetFileSize(fname): - return os.stat(fname)[6] - -def MakeHeader(fname,n,k,size): - return string.join(['RS_PARITY_PIECE_HEADER','FILE',fname, - 'n',`n`,'k',`k`,'size',`size`,'piece'], - headerSep) + headerSep - -def ParseHeader(header): - return string.split(header,headerSep) - -def ReadEncodeAndWriteBlock(readSize,inFD,outFD,code): - buffer = array('B') - buffer.fromfile(inFD,readSize) - for i in range(readSize,code.k): - buffer.append(0) - codeVec = code.Encode(buffer) - for j in range(code.n): - outFD[j].write(struct.pack('B',codeVec[j])) - -def EncodeFile(fname,prefix,n,k): - """ - Function: EncodeFile(fname,prefix,n,k) - Description: Encodes the file named by fname into n pieces named - prefix.p_0, prefix.p_1, ..., prefix.p_n-1. At least - k of these pieces are needed for recovering fname. - Each piece is roughly the size of fname / k (there - is a very small overhead due to some header information). - - Returns a list containing names of files for the pieces. - - Note n and k must satisfy 0 < k < n < 257. - Use the DecodeFiles function for decoding. - """ - fileList = [] - if (n > 256 or k >= n or k <= 0): - raise Exception, 'Invalid (n,k), need 0 < k < n < 257.' - inFD = open(fname,'rb') - inSize = GetFileSize(fname) - header = MakeHeader(fname,n,k,inSize) - code = RSCode(n,k,8,shouldUseLUT=-(k!=1)) - outFD = range(n) - for i in range(n): - outFileName = prefix + '.p_' + `i` - fileList.append(outFileName) - outFD[i] = open(outFileName,'wb') - outFD[i].write(header + `i` + '\n') - - if (k == 1): # just doing repetition coding - str = inFD.read(1024) - while (str): - map( lambda x: x.write(str), outFD) - str = inFD.read(256) - else: # do the full blown RS encodding - for i in range(0, (inSize/k)*k,k): - ReadEncodeAndWriteBlock(k,inFD,outFD,code) - - if ((inSize % k) > 0): - ReadEncodeAndWriteBlock(inSize % k,inFD,outFD,code) - - return fileList - -def ExtractPieceNums(fnames,headers): - l = range(len(fnames)) - pieceNums = range(len(fnames)) - for i in range(len(fnames)): - l[i] = ParseHeader(headers[i]) - for i in range(len(fnames)): - if (l[i][0] != 'RS_PARITY_PIECE_HEADER' or - l[i][2] != l[0][2] or l[i][4] != l[0][4] or - l[i][6] != l[0][6] or l[i][8] != l[0][8]): - raise Exception, 'File ' + `fnames[i]` + ' has incorrect header.' - pieceNums[i] = int(l[i][10]) - (n,k,size) = (int(l[0][4]),int(l[0][6]),long(l[0][8])) - if (len(pieceNums) < k): - raise Exception, ('Not enough parity for decoding; needed ' - + `l[0][6]` + ' got ' + `len(fnames)` + '.') - return (n,k,size,pieceNums) - -def ReadDecodeAndWriteBlock(writeSize,inFDs,outFD,code): - buffer = array('B') - for j in range(code.k): - buffer.fromfile(inFDs[j],1) - result = code.Decode(buffer.tolist()) - for j in range(writeSize): - outFD.write(struct.pack('B',result[j])) - - -def DecodeFiles(fnames,outName): - """ - Function: DecodeFiles(fnames,outName) - Description: Takes pieces of a file created using EncodeFiles and - recovers the original file placing it in outName. - The argument fnames must be a list of at least k - file names generated using EncodeFiles. - """ - inFDs = range(len(fnames)) - headers = range(len(fnames)) - for i in range(len(fnames)): - inFDs[i] = open(fnames[i],'rb') - headers[i] = inFDs[i].readline() - (n,k,inSize,pieceNums) = ExtractPieceNums(fnames,headers) - outFD = open(outName,'wb') - code = RSCode(n,k,8) - decList = pieceNums[0:k] - code.PrepareDecoder(decList) - for i in range(0, (inSize/k)*k,k): - ReadDecodeAndWriteBlock(k,inFDs,outFD,code) - if ((inSize%k)>0): - ReadDecodeAndWriteBlock(inSize%k,inFDs,outFD,code) - -license_doc = """ - This code was originally written by Emin Martinian (emin@allegro.mit.edu). - You may copy, modify, redistribute in source or binary form as long - as credit is given to the original author. Specifically, please - include some kind of comment or docstring saying that Emin Martinian - was one of the original authors. Also, if you publish anything based - on this work, it would be nice to cite the original author and any - other contributers. - - There is NO WARRANTY for this software just as there is no warranty - for GNU software (although this is not GNU software). Specifically - we adopt the same policy towards warranties as the GNU project: - - BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY -FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN -OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES -PROVIDE THE PROGRAM 'AS IS' WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED -OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS -TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE -PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, -REPAIR OR CORRECTION. - - IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR -REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, -INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING -OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED -TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY -YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER -PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE -POSSIBILITY OF SUCH DAMAGES. -""" - -# The following code is used to make the doctest package -# check examples in docstrings. - -def _test(): - import doctest, file_ecc - return doctest.testmod(file_ecc) - -if __name__ == "__main__": - _test() - print 'Tests passed' diff --git a/src/py_ecc/genericmatrix.py b/src/py_ecc/genericmatrix.py deleted file mode 100644 index 1b38b4f0..00000000 --- a/src/py_ecc/genericmatrix.py +++ /dev/null @@ -1,869 +0,0 @@ - -# Copyright Emin Martinian 2002. See below for license terms. -# Version Control Info: $Id: genericmatrix.py,v 1.7 2003/10/28 21:18:41 emin Exp $ - -""" -This package implements the GenericMatrix class to provide matrix -operations for any type that supports the multiply, add, subtract, -and divide operators. For example, this package can be used to -do matrix calculations over finite fields using the ffield package -available at http://martinian.com. - -The following docstrings provide detailed information on various topics: - - GenericMatrix.__doc__ Describes the methods of the GenericMatrix - class and how to use them. - - license_doc Describes the license and lack of warranty - for this code. - - testing_doc Describes some tests to make sure the code works. - -""" - -import operator - -class GenericMatrix: - - """ - The GenericMatrix class implements a matrix with works with - any generic type supporting addition, subtraction, multiplication, - and division. Matrix multiplication, addition, and subtraction - are implemented as are methods for finding inverses, - LU (actually LUP) decompositions, and determinants. A complete - list of user callable methods is: - - __init__ - __repr__ - __mul__ - __add__ - __sub__ - __setitem__ - __getitem__ - Size - SetRow - GetRow - GetColumn - Copy - MakeSimilarMatrix - SwapRows - MulRow - AddRow - AddCol - MulAddRow - LeftMulColumnVec - LowerGaussianElim - Inverse - Determinant - LUP - - A quick and dirty example of how to use the GenericMatrix class - for matricies of floats is provided below. - ->>> import genericmatrix ->>> v = genericmatrix.GenericMatrix((3,3)) ->>> v.SetRow(0,[0.0, -1.0, 1.0]) ->>> v.SetRow(1,[1.0, 1.0, 1.0]) ->>> v.SetRow(2,[1.0, 1.0, -1.0]) ->>> v - ->>> vi = v.Inverse() ->>> vi - ->>> (vi * v) - v.MakeSimilarMatrix(v.Size(),'i') - - -# See what happens when we try to invert a non-invertible matrix - ->>> v[0,1] = 0.0 ->>> v - ->>> abs(v.Determinant()) -0.0 ->>> v.Inverse() -Traceback (most recent call last): - ... -ValueError: matrix not invertible - -# LUP decomposition will still work even if Inverse() won't. - ->>> (l,u,p) = v.LUP() ->>> l - ->>> u - ->>> p - ->>> p * v - l * u - - -# Operate on some column vectors using v. -# The LeftMulColumnVec methods lets us do this without having -# to construct a new GenericMatrix to represent each column vector. ->>> v.LeftMulColumnVec([1.0,2.0,3.0]) -[3.0, 6.0, 0.0] ->>> v.LeftMulColumnVec([1.0,-2.0,1.0]) -[1.0, 0.0, -2.0] - -# Most of the stuff above could be done with something like matlab. -# But, with this package you can do matrix ops for finite fields. ->>> XOR = lambda x,y: x^y ->>> AND = lambda x,y: x&y ->>> DIV = lambda x,y: x ->>> m = GenericMatrix(size=(3,4),zeroElement=0,identityElement=1,add=XOR,mul=AND,sub=XOR,div=DIV) ->>> m.SetRow(0,[0,1,0,0]) ->>> m.SetRow(1,[0,1,0,1]) ->>> m.SetRow(2,[0,0,1,0]) ->>> # You can't invert m since it isn't square, but you can still ->>> # get the LUP decomposition or solve a system of equations. ->>> (l,u,p) = v.LUP() ->>> p*v-l*u - ->>> b = [1,0,1] ->>> x = m.Solve(b) ->>> b == m.LeftMulColumnVec(x) -1 - - """ - - - def __init__(self, size=(2,2), zeroElement=0.0, identityElement=1.0, - add=operator.__add__, sub=operator.__sub__, - mul=operator.__mul__, div = operator.__div__, - eq = operator.__eq__, str=lambda x:`x`, - equalsZero = None,fillMode='z'): - """ - Function: __init__(size,zeroElement,identityElement, - add,sub,mul,div,eq,str,equalsZero fillMode) - - Description: This is the constructor for the GenericMatrix - class. All arguments are optional and default - to producing a 2-by-2 zero matrix for floats. - A detailed description of arguments follows: - - size: A tuple of the form (numRows, numColumns) - zeroElement: An object representing the additive - identity (i.e. 'zero') for the data - type of interest. - - identityElement: An object representing the multiplicative - identity (i.e. 'one') for the data - type of interest. - - add,sub,mul,div: Functions implementing basic arithmetic - operations for the type of interest. - - eq: A function such that eq(x,y) == 1 if and only if x == y. - - str: A function used to produce a string representation of - the type of interest. - - equalsZero: A function used to decide if an element is - essentially zero. For floats, you could use - lambda x: abs(x) < 1e-6. - - fillMode: This can either be 'e' in which case the contents - of the matrix are left empty, 'z', in which case - the matrix is filled with zeros, 'i' in which - case an identity matrix is created, or a two - argument function which is called with the row - and column of each index and produces the value - for that entry. Default is 'z'. - """ - if (None == equalsZero): - equalsZero = lambda x: self.eq(self.zeroElement,x) - - self.equalsZero = equalsZero - self.add = add - self.sub = sub - self.mul = mul - self.div = div - self.eq = eq - self.str = str - self.zeroElement = zeroElement - self.identityElement = identityElement - self.rows, self.cols = size - self.data = [] - - - def q(x,y,z): - if (x): - return y - else: - return z - - if (fillMode == 'e'): - return - elif (fillMode == 'z'): - fillMode = lambda x,y: self.zeroElement - elif (fillMode == 'i'): - fillMode = lambda x,y: q(self.eq(x,y),self.identityElement, - self.zeroElement) - - for i in range(self.rows): - self.data.append(map(fillMode,[i]*self.cols,range(self.cols))) - - def MakeSimilarMatrix(self,size,fillMode): - """ - MakeSimilarMatrix(self,size,fillMode) - - Return a matrix of the given size filled according to fillMode - with the same zeroElement, identityElement, add, sub, etc. - as self. - - For example, self.MakeSimilarMatrix(self.Size(),'i') returns - an identity matrix of the same shape as self. - """ - return GenericMatrix(size=size,zeroElement=self.zeroElement, - identityElement=self.identityElement, - add=self.add,sub=self.sub, - mul=self.mul,div=self.div,eq=self.eq, - str=self.str,equalsZero=self.equalsZero, - fillMode=fillMode) - - - def __repr__(self): - m = 0 - # find the fattest element - for r in self.data: - for c in r: - l = len(self.str(c)) - if l > m: - m = l - f = '%%%ds' % (m+1) - s = '' - return s - - def __mul__(self,other): - if (self.cols != other.rows): - raise ValueError, "dimension mismatch" - result = self.MakeSimilarMatrix((self.rows,other.cols),'z') - - for i in range(self.rows): - for j in range(other.cols): - result.data[i][j] = reduce(self.add, - map(self.mul,self.data[i], - other.GetColumn(j))) - return result - - def __add__(self,other): - if (self.cols != other.rows): - raise ValueError, "dimension mismatch" - result = self.MakeSimilarMatrix(size=self.Size(),fillMode='z') - for i in range(self.rows): - for j in range(other.cols): - result.data[i][j] = self.add(self.data[i][j],other.data[i][j]) - return result - - def __sub__(self,other): - if (self.cols != other.cols or self.rows != other.rows): - raise ValueError, "dimension mismatch" - result = self.MakeSimilarMatrix(size=self.Size(),fillMode='z') - for i in range(self.rows): - for j in range(other.cols): - result.data[i][j] = self.sub(self.data[i][j], - other.data[i][j]) - return result - - def __setitem__ (self, (x,y), data): - "__setitem__((x,y),data) sets item row x and column y to data." - self.data[x][y] = data - - def __getitem__ (self, (x,y)): - "__getitem__((x,y)) gets item at row x and column y." - return self.data[x][y] - - def Size (self): - "returns (rows, columns)" - return (len(self.data), len(self.data[0])) - - def SetRow(self,r,result): - "SetRow(r,result) sets row r to result." - - assert len(result) == self.cols, ('Wrong # columns in row: ' + - 'expected ' + `self.cols` + ', got ' - + `len(result)`) - self.data[r] = list(result) - - def GetRow(self,r): - "GetRow(r) returns a copy of row r." - return list(self.data[r]) - - def GetColumn(self,c): - "GetColumn(c) returns a copy of column c." - if (c >= self.cols): - raise ValueError, 'matrix does not have that many columns' - result = [] - for r in self.data: - result.append(r[c]) - return result - - def Transpose(self): - oldData = self.data - self.data = [] - for r in range(self.cols): - self.data.append([]) - for c in range(self.rows): - self.data[r].append(oldData[c][r]) - rows = self.rows - self.rows = self.cols - self.cols = rows - - def Copy(self): - result = self.MakeSimilarMatrix(size=self.Size(),fillMode='e') - - for r in self.data: - result.data.append(list(r)) - return result - - def SubMatrix(self,rowStart,rowEnd,colStart=0,colEnd=None): - """ - SubMatrix(self,rowStart,rowEnd,colStart,colEnd) - Create and return a sub matrix containg rows - rowStart through rowEnd (inclusive) and columns - colStart through colEnd (inclusive). - """ - if (not colEnd): - colEnd = self.cols-1 - if (rowEnd >= self.rows): - raise ValueError, 'rowEnd too big: rowEnd >= self.rows' - result = self.MakeSimilarMatrix((rowEnd-rowStart+1,colEnd-colStart+1), - 'e') - - for i in range(rowStart,rowEnd+1): - result.data.append(list(self.data[i][colStart:(colEnd+1)])) - - return result - - def UnSubMatrix(self,rowStart,rowEnd,colStart,colEnd): - """ - UnSubMatrix(self,rowStart,rowEnd,colStart,colEnd) - Create and return a sub matrix containg everything except - rows rowStart through rowEnd (inclusive) - and columns colStart through colEnd (inclusive). - """ - result = self.MakeSimilarMatrix((self.rows-(rowEnd-rowStart), - self.cols-(colEnd-colStart)),'e') - - for i in range(0,rowStart) + range(rowEnd,self.rows): - result.data.append(list(self.data[i][0:colStart] + - self.data[i][colEnd:])) - - return result - - - def SwapRows(self,i,j): - temp = list(self.data[i]) - self.data[i] = list(self.data[j]) - self.data[j] = temp - - def MulRow(self,r,m,start=0): - """ - Function: MulRow(r,m,start=0) - Multiply row r by m starting at optional column start (default 0). - """ - row = self.data[r] - for i in range(start,self.cols): - row[i] = self.mul(row[i],m) - - def AddRow(self,i,j): - """ - Add row i to row j. - """ - self.data[j] = map(self.add,self.data[i],self.data[j]) - - def AddCol(self,i,j): - """ - Add column i to column j. - """ - for r in range(self.rows): - self.data[r][j] = self.add(self.data[r][i],self.data[r][j]) - - def MulAddRow(self,m,i,j): - """ - Multiply row i by m and add to row j. - """ - self.data[j] = map(self.add, - map(self.mul,[m]*self.cols,self.data[i]), - self.data[j]) - - def LeftMulColumnVec(self,colVec): - """ - Function: LeftMulColumnVec(c) - Purpose: Compute the result of self * c. - Description: This function taks as input a list c, - computes the desired result and returns it - as a list. This is sometimes more convenient - than constructed a new GenericMatrix to represent - c, computing the result and extracting c to a list. - """ - if (self.cols != len(colVec)): - raise ValueError, 'dimension mismatch' - result = range(self.rows) - for r in range(self.rows): - result[r] = reduce(self.add,map(self.mul,self.data[r],colVec)) - return result - - def FindRowLeader(self,startRow,c): - for r in range(startRow,self.rows): - if (not self.eq(self.zeroElement,self.data[r][c])): - return r - return -1 - - def FindColLeader(self,r,startCol): - for c in range(startCol,self.cols): - if (not self.equalsZero(self.data[r][c])): - return c - return -1 - - def PartialLowerGaussElim(self,rowIndex,colIndex,resultInv): - """ - Function: PartialLowerGaussElim(rowIndex,colIndex,resultInv) - - This function does partial Gaussian elimination on the part of - the matrix on and below the main diagonal starting from - rowIndex. In addition to modifying self, this function - applies the required elmentary row operations to the input - matrix resultInv. - - By partial, what we mean is that if this function encounters - an element on the diagonal which is 0, it stops and returns - the corresponding rowIndex. The caller can then permute - self or apply some other operation to eliminate the zero - and recall PartialLowerGaussElim. - - This function is meant to be combined with UpperInverse - to compute inverses and LU decompositions. - """ - - lastRow = self.rows-1 - while (rowIndex < lastRow): - if (colIndex >= self.cols): - return (rowIndex, colIndex) - if (self.eq(self.zeroElement,self.data[rowIndex][colIndex])): - # self[rowIndex,colIndex] = 0 so quit. - return (rowIndex, colIndex) - divisor = self.div(self.identityElement, - self.data[rowIndex][colIndex]) - for k in range(rowIndex+1,self.rows): - nextTerm = self.data[k][colIndex] - if (self.zeroElement != nextTerm): - multiple = self.mul(divisor,self.sub(self.zeroElement, - nextTerm)) - self.MulAddRow(multiple,rowIndex,k) - resultInv.MulAddRow(multiple,rowIndex,k) - rowIndex = rowIndex + 1 - colIndex = colIndex + 1 - return (rowIndex, colIndex) - - def LowerGaussianElim(self,resultInv=''): - """ - Function: LowerGaussianElim(r) - Purpose: Perform Gaussian elimination on self to eliminate - all terms below the diagonal. - Description: This method modifies self via Gaussian elimination - and applies the elementary row operations used in - this transformation to the input matrix, r - (if one is provided, otherwise a matrix with - identity elements on the main diagonal is - created to serve the role of r). - - Thus if the input, r, is an identity matrix, after - the call it will represent the transformation - made to perform Gaussian elimination. - - The matrix r is returned. - """ - if (resultInv == ''): - resultInv = self.MakeSimilarMatrix(self.Size(),'i') - - (rowIndex,colIndex) = (0,0) - lastRow = min(self.rows - 1,self.cols) - lastCol = self.cols - 1 - while( rowIndex < lastRow and colIndex < lastCol): - leader = self.FindRowLeader(rowIndex,colIndex) - if (leader < 0): - colIndex = colIndex + 1 - continue - if (leader != rowIndex): - resultInv.AddRow(leader,rowIndex) - self.AddRow(leader,rowIndex) - (rowIndex,colIndex) = ( - self.PartialLowerGaussElim(rowIndex,colIndex,resultInv)) - return resultInv - - def UpperInverse(self,resultInv=''): - """ - Function: UpperInverse(resultInv) - - Assumes that self is an upper triangular matrix like - - [a b c ... ] - [0 d e ... ] - [0 0 f ... ] - [. . ] - [. . ] - [. . ] - - and performs Gaussian elimination to transform self into - the identity matrix. The required elementary row operations - are applied to the matrix resultInv passed as input. For - example, if the identity matrix is passed as input, then the - value returned is the inverse of self before the function - was called. - - If no matrix, resultInv, is provided as input then one is - created with identity elements along the main diagonal. - In either case, resultInv is returned as output. - """ - if (resultInv == ''): - resultInv = self.MakeSimilarMatrix(self.Size(),'i') - lastCol = min(self.rows,self.cols) - for colIndex in range(0,lastCol): - if (self.zeroElement == self.data[colIndex][colIndex]): - raise ValueError, 'matrix not invertible' - divisor = self.div(self.identityElement, - self.data[colIndex][colIndex]) - if (self.identityElement != divisor): - self.MulRow(colIndex,divisor,colIndex) - resultInv.MulRow(colIndex,divisor) - for rowToElim in range(0,colIndex): - multiple = self.sub(self.zeroElement, - self.data[rowToElim][colIndex]) - self.MulAddRow(multiple,colIndex,rowToElim) - resultInv.MulAddRow(multiple,colIndex,rowToElim) - return resultInv - - def Inverse(self): - """ - Function: Inverse - Description: Returns the inverse of self without modifying - self. An exception is raised if the matrix - is not invertable. - """ - - workingCopy = self.Copy() - result = self.MakeSimilarMatrix(self.Size(),'i') - workingCopy.LowerGaussianElim(result) - workingCopy.UpperInverse(result) - return result - - def Determinant(self): - """ - Function: Determinant - Description: Returns the determinant of the matrix or raises - a ValueError if the matrix is not square. - """ - if (self.rows != self.cols): - raise ValueError, 'matrix not square' - workingCopy = self.Copy() - result = self.MakeSimilarMatrix(self.Size(),'i') - workingCopy.LowerGaussianElim(result) - det = self.identityElement - for i in range(self.rows): - det = det * workingCopy.data[i][i] - return det - - def LUP(self): - """ - Function: (l,u,p) = self.LUP() - Purpose: Compute the LUP decomposition of self. - Description: This function returns three matrices - l, u, and p such that p * self = l * u - where l, u, and p have the following properties: - - l is lower triangular with ones on the diagonal - u is upper triangular - p is a permutation matrix. - - The idea behind the algorithm is to first - do Gaussian elimination to obtain an upper - triangular matrix u and lower triangular matrix - r such that r * self = u, then by inverting r to - get l = r ^-1 we obtain self = r^-1 * u = l * u. - Note tha since r is lower triangular its - inverse must also be lower triangular. - - Where does the p come in? Well, with some - matrices our technique doesn't work due to - zeros appearing on the diagonal of r. So we - apply some permutations to the orginal to - prevent this. - - """ - upper = self.Copy() - resultInv = self.MakeSimilarMatrix(self.Size(),'i') - perm = self.MakeSimilarMatrix((self.rows,self.rows),'i') - - (rowIndex,colIndex) = (0,0) - lastRow = self.rows - 1 - lastCol = self.cols - 1 - while( rowIndex < lastRow and colIndex < lastCol ): - leader = upper.FindRowLeader(rowIndex,colIndex) - if (leader < 0): - colIndex = colIndex+1 - continue - if (leader != rowIndex): - upper.SwapRows(leader,rowIndex) - resultInv.SwapRows(leader,rowIndex) - perm.SwapRows(leader,rowIndex) - (rowIndex,colIndex) = ( - upper.PartialLowerGaussElim(rowIndex,colIndex,resultInv)) - - lower = self.MakeSimilarMatrix((self.rows,self.rows),'i') - resultInv.LowerGaussianElim(lower) - resultInv.UpperInverse(lower) - # possible optimization: due perm*lower explicitly without - # relying on the * operator. - return (perm*lower, upper, perm) - - def Solve(self,b): - """ - Solve(self,b): - - b: A list. - - Returns the values of x such that Ax = b. - - This is done using the LUP decomposition by - noting that Ax = b implies PAx = Pb implies LUx = Pb. - First we solve for Ly = Pb and then we solve Ux = y. - The following is an example of how to use Solve: - ->>> # Floating point example ->>> import genericmatrix ->>> A = genericmatrix.GenericMatrix(size=(2,5),str=lambda x: '%.4f' % x) ->>> A.SetRow(0,[0.0, 0.0, 0.160, 0.550, 0.280]) ->>> A.SetRow(1,[0.0, 0.0, 0.745, 0.610, 0.190]) ->>> A - ->>> b = [0.975, 0.350] ->>> x = A.Solve(b) ->>> z = A.LeftMulColumnVec(x) ->>> diff = reduce(lambda xx,yy: xx+yy,map(lambda aa,bb: abs(aa-bb),b,z)) ->>> diff > 1e-6 -0 ->>> # Boolean example ->>> XOR = lambda x,y: x^y ->>> AND = lambda x,y: x&y ->>> DIV = lambda x,y: x ->>> m=GenericMatrix(size=(3,6),zeroElement=0,identityElement=1,add=XOR,mul=AND,sub=XOR,div=DIV) ->>> m.SetRow(0,[1,0,0,1,0,1]) ->>> m.SetRow(1,[0,1,1,0,1,0]) ->>> m.SetRow(2,[0,1,0,1,1,0]) ->>> b = [0, 1, 1] ->>> x = m.Solve(b) ->>> z = m.LeftMulColumnVec(x) ->>> z -[0, 1, 1] - - """ - assert self.cols >= self.rows - - (L,U,P) = self.LUP() - Pb = P.LeftMulColumnVec(b) - y = [0]*len(Pb) - for row in range(L.rows): - y[row] = Pb[row] - for i in range(row+1,L.rows): - Pb[i] = L.sub(Pb[i],L.mul(L[i,row],Pb[row])) - x = [0]*self.cols - curRow = self.rows-1 - - for curRow in range(len(y)-1,-1,-1): - col = U.FindColLeader(curRow,0) - assert col > -1 - x[col] = U.div(y[curRow],U[curRow,col]) - y[curRow] = x[col] - for i in range(0,curRow): - y[i] = U.sub(y[i],U.mul(U[i,col],y[curRow])) - return x - - -def DotProduct(mul,add,x,y): - """ - Function: DotProduct(mul,add,x,y) - Description: Return the dot product of lists x and y using mul and - add as the multiplication and addition operations. - """ - assert len(x) == len(y), 'sizes do not match' - return reduce(add,map(mul,x,y)) - -class GenericMatrixTester: - def DoTests(self,numTests,sizeList): - """ - Function: DoTests(numTests,sizeList) - - Description: For each test, run numTests tests for square - matrices with the sizes in sizeList. - """ - - for size in sizeList: - self.RandomInverseTest(size,numTests) - self.RandomLUPTest(size,numTests) - self.RandomSolveTest(size,numTests) - self.RandomDetTest(size,numTests) - - - def MakeRandom(self,s): - import random - r = GenericMatrix(size=s,fillMode=lambda x,y: random.random(), - equalsZero = lambda x: abs(x) < 1e-6) - return r - - def MatAbs(self,m): - r = -1 - (N,M) = m.Size() - for i in range(0,N): - for j in range(0,M): - if (abs(m[i,j]) > r): - r = abs(m[i,j]) - return r - - def RandomInverseTest(self,s,n): - ident = GenericMatrix(size=(s,s),fillMode='i') - for i in range(n): - m = self.MakeRandom((s,s)) - assert self.MatAbs(ident - m * m.Inverse()) < 1e-6, ( - 'offender = ' + `m`) - - def RandomLUPTest(self,s,n): - ident = GenericMatrix(size=(s,s),fillMode='i') - for i in range(n): - m = self.MakeRandom((s,s)) - (l,u,p) = m.LUP() - assert self.MatAbs(p*m - l*u) < 1e-6, 'offender = ' + `m` - - def RandomSolveTest(self,s,n): - import random - if (s <= 1): - return - extraEquations=3 - - for i in range(n): - m = self.MakeRandom((s,s+extraEquations)) - for j in range(extraEquations): - colToKill = random.randrange(s+extraEquations) - for r in range(m.rows): - m[r,colToKill] = 0.0 - b = map(lambda x: random.random(), range(s)) - x = m.Solve(b) - z = m.LeftMulColumnVec(x) - diff = reduce(lambda xx,yy:xx+yy, map(lambda aa,bb:abs(aa-bb),b,z)) - assert diff < 1e-6, ('offenders: m = ' + `m` + '\nx = ' + `x` - + '\nb = ' + `b` + '\ndiff = ' + `diff`) - - def RandomDetTest(self,s,n): - for i in range(n): - m1 = self.MakeRandom((s,s)) - m2 = self.MakeRandom((s,s)) - prod = m1 * m2 - assert (abs(m1.Determinant() * m2.Determinant() - - prod.Determinant() ) - < 1e-6), 'offenders = ' + `m1` + `m2` - - -license_doc = """ - This code was originally written by Emin Martinian (emin@allegro.mit.edu). - You may copy, modify, redistribute in source or binary form as long - as credit is given to the original author. Specifically, please - include some kind of comment or docstring saying that Emin Martinian - was one of the original authors. Also, if you publish anything based - on this work, it would be nice to cite the original author and any - other contributers. - - There is NO WARRANTY for this software just as there is no warranty - for GNU software (although this is not GNU software). Specifically - we adopt the same policy towards warranties as the GNU project: - - BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY -FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN -OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES -PROVIDE THE PROGRAM 'AS IS' WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED -OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS -TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE -PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, -REPAIR OR CORRECTION. - - IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR -REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, -INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING -OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED -TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY -YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER -PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE -POSSIBILITY OF SUCH DAMAGES. -""" - -testing_doc = """ -The GenericMatrixTester class contains some simple -testing functions such as RandomInverseTest, RandomLUPTest, -RandomSolveTest, and RandomDetTest which generate random floating -point values and test the appropriate routines. The simplest way to -run these tests is via - ->>> import genericmatrix ->>> t = genericmatrix.GenericMatrixTester() ->>> t.DoTests(100,[1,2,3,4,5,10]) - -# runs 100 tests each for sizes 1-5, 10 -# note this may take a few minutes - -If any problems occur, assertion errors are raised. Otherwise -nothing is returned. Note that you can also use the doctest -package to test all the python examples in the documentation -by typing 'python genericmatrix.py' or 'python -v genericmatrix.py' at the -command line. -""" - - -# The following code is used to make the doctest package -# check examples in docstrings when you enter - -__test__ = { - 'testing_doc' : testing_doc -} - -def _test(): - import doctest, genericmatrix - return doctest.testmod(genericmatrix) - -if __name__ == "__main__": - _test() - print 'Tests Passed.' diff --git a/src/py_ecc/rs_code.py b/src/py_ecc/rs_code.py deleted file mode 100644 index e0703143..00000000 --- a/src/py_ecc/rs_code.py +++ /dev/null @@ -1,246 +0,0 @@ - -# Copyright Emin Martinian 2002. See below for license terms. -# Version Control Info: $Id: rs_code.py,v 1.5 2003/07/04 01:30:05 emin Exp $ - -""" -This package implements the RSCode class designed to do -Reed-Solomon encoding and (erasure) decoding. The following -docstrings provide detailed information on various topics. - - RSCode.__doc__ Describes the RSCode class and how to use it. - - license_doc Describes the license and lack of warranty. - -""" - -import ffield -import genericmatrix -import math - - -class RSCode: - """ - The RSCode class implements a Reed-Solomon code - (currently, only erasure decoding not error decoding is - implemented). The relevant methods are: - - __init__ - Encode - DecodeImmediate - Decode - PrepareDecoder - RandomTest - - A breif example of how to use the code follows: - ->>> import rs_code - -# Create a coder for an (n,k) = (16,8) code and test -# decoding for a simple erasure pattern. - ->>> C = rs_code.RSCode(16,8) ->>> inVec = range(8) ->>> codedVec = C.Encode(inVec) ->>> receivedVec = list(codedVec) - -# now erase some entries in the encoded vector by setting them to None ->>> receivedVec[3] = None; receivedVec[9] = None; receivedVec[12] = None ->>> receivedVec -[0, 1, 2, None, 4, 5, 6, 7, 8, None, 10, 11, None, 13, 14, 15] ->>> decVec = C.DecodeImmediate(receivedVec) ->>> decVec -[0, 1, 2, 3, 4, 5, 6, 7] - -# Now try the random testing method for more complete coverage. -# Note this will take a while. ->>> for k in range(1,8): -... for p in range(1,12): -... C = rs_code.RSCode(k+p,k) -... C.RandomTest(25) ->>> for k in range(1,8): -... for p in range(1,12): -... C = rs_code.RSCode(k+p,k,systematic=0) -... C.RandomTest(25) -""" - - def __init__(self,n,k,log2FieldSize=-1,systematic=1,shouldUseLUT=-1): - """ - Function: __init__(n,k,log2FieldSize,systematic,shouldUseLUT) - Purpose: Create a Reed-Solomon coder for an (n,k) code. - Notes: The last parameters, log2FieldSize, systematic - and shouldUseLUT are optional. - - The log2FieldSize parameter - represents the base 2 logarithm of the field size. - If it is omitted, the field GF(2^p) is used where - p is the smalles integer where 2^p >= n. - - If systematic is true then a systematic encoder - is created (i.e. one where the first k symbols - of the encoded result always match the data). - - If shouldUseLUT = 1 then a lookup table is used for - computing finite field multiplies and divides. - If shouldUseLUT = 0 then no lookup table is used. - If shouldUseLUT = -1 (the default), then the code - decides when a lookup table should be used. - """ - if (log2FieldSize < 0): - log2FieldSize = int(math.ceil(math.log(n)/math.log(2))) - self.field = ffield.FField(log2FieldSize,useLUT=shouldUseLUT) - self.n = n - self.k = k - self.fieldSize = 1 << log2FieldSize - self.CreateEncoderMatrix() - if (systematic): - self.encoderMatrix.Transpose() - self.encoderMatrix.LowerGaussianElim() - self.encoderMatrix.UpperInverse() - self.encoderMatrix.Transpose() - - def __repr__(self): - rep = ('') - return rep - - def CreateEncoderMatrix(self): - self.encoderMatrix = genericmatrix.GenericMatrix( - (self.n,self.k),0,1,self.field.Add,self.field.Subtract, - self.field.Multiply,self.field.Divide) - self.encoderMatrix[0,0] = 1 - for i in range(0,self.n): - term = 1 - for j in range(0, self.k): - self.encoderMatrix[i,j] = term - term = self.field.Multiply(term,i) - - - def Encode(self,data): - """ - Function: Encode(data) - Purpose: Encode a list of length k into length n. - """ - assert len(data)==self.k, 'Encode: input data must be size k list.' - - return self.encoderMatrix.LeftMulColumnVec(data) - - def PrepareDecoder(self,unErasedLocations): - """ - Function: PrepareDecoder(erasedTerms) - Description: The input unErasedLocations is a list of the first - self.k elements of the codeword which were - NOT erased. For example, if the 0th, 5th, - and 7th symbols of a (16,5) code were erased, - then PrepareDecoder([1,2,3,4,6]) would - properly prepare for decoding. - """ - if (len(unErasedLocations) != self.k): - raise ValueError, 'input must be exactly length k' - - limitedEncoder = genericmatrix.GenericMatrix( - (self.k,self.k),0,1,self.field.Add,self.field.Subtract, - self.field.Multiply,self.field.Divide) - for i in range(0,self.k): - limitedEncoder.SetRow( - i,self.encoderMatrix.GetRow(unErasedLocations[i])) - self.decoderMatrix = limitedEncoder.Inverse() - - def Decode(self,unErasedTerms): - """ - Function: Decode(unErasedTerms) - Purpose: Use the - Description: - """ - return self.decoderMatrix.LeftMulColumnVec(unErasedTerms) - - def DecodeImmediate(self,data): - """ - Function: DecodeImmediate(data) - Description: Takes as input a data vector of length self.n - where erased symbols are set to None and - returns the decoded result provided that - at least self.k symbols are not None. - - For example, for an (n,k) = (6,4) code, a - decodable input vector would be - [2, 0, None, 1, 2, None]. - """ - - if (len(data) != self.n): - raise ValueError, 'input must be a length n list' - - unErasedLocations = [] - unErasedTerms = [] - for i in range(self.n): - if (None != data[i]): - unErasedLocations.append(i) - unErasedTerms.append(data[i]) - self.PrepareDecoder(unErasedLocations[0:self.k]) - return self.Decode(unErasedTerms[0:self.k]) - - def RandomTest(self,numTests): - import random - - maxErasures = self.n-self.k - for i in range(numTests): - inVec = range(self.k) - for j in range(self.k): - inVec[j] = random.randint(0, (1<