From: Zooko O'Whielacronx Date: Fri, 29 Dec 2006 20:50:53 +0000 (-0700) Subject: make encode_new use py_ecc for real live erasure coding X-Git-Tag: tahoe_v0.1.0-0-UNSTABLE~416 X-Git-Url: https://git.rkrishnan.org/%5B/frontends/flags/%22news.html/quickstart.html?a=commitdiff_plain;h=b0315fc549c2a91f7abd7df93e937801c0585ab0;p=tahoe-lafs%2Ftahoe-lafs.git make encode_new use py_ecc for real live erasure coding (This patch is not tested -- I'm working on a Mac which doesn't have gcc installed... (gcc is necessary for the crypto module.) I will now attempt to connect to a better development computer over my mom's staticy, failure-prone, 14.4 K modem...) --- diff --git a/src/allmydata/encode_new.py b/src/allmydata/encode_new.py index 50a4f69b..eb6172d2 100644 --- a/src/allmydata/encode_new.py +++ b/src/allmydata/encode_new.py @@ -5,6 +5,8 @@ from twisted.internet import defer from allmydata.chunk import HashTree, roundup_pow2 from Crypto.Cipher import AES import sha +from allmydata.util import mathutil +from allmydata.util.assertutil import _assert, precondition def hash(data): return sha.new(data).digest() @@ -67,14 +69,19 @@ class Encoder(object): infile.seek(0, 2) self.file_size = infile.tell() infile.seek(0, 0) - fsize = 1.0 * self.file_size - self.segment_size = 1024 - self.num_segments = int(math.ceil(fsize / self.segment_size)) self.num_shares = 100 self.required_shares = 25 + + # The segment size needs to be an even multiple of required_shares. + # (See encode_segment().) + self.segment_size = mathutil.next_multiple(1024, self.required_shares) + self.num_segments = mathutil.div_ceil(self.file_size, self.segment_size) + self.share_size = self.file_size / self.required_shares + self.fecer = rs_code.RSCode(self.num_shares, self.required_shares) + def get_reservation_size(self): self.num_shares = 100 self.share_size = self.file_size / self.required_shares @@ -104,8 +111,16 @@ class Encoder(object): return d def encode_segment(self, crypttext): - shares = [crypttext] * self.num_shares - return shares + precondition((len(crypttext) % self.required_shares) == 0, len(crypttext), self.required_shares, len(crypttext) % self.required_shares) + subshares = [[] for x in range(self.num_shares)] + # Note string slices aren't an efficient way to use memory, so when we + # upgrade from the unusably slow py_ecc prototype to a fast ECC we + # should also fix up this memory usage (by using the array module). + for i in range(0, len(crypttext), self.required_shares): + words = self.fecer.Encode(crypttext[i:i+self.required_shares]) + for (subshare, word,) in zip(subshares, words): + subshare.append(word) + return [ ''.join(subshare) for subshare in subshares ] def do_segment(self, segnum): segment_plaintext = self.infile.read(self.segment_size)