use the word 'codec' for erasure coding, for now. 'encode' is used for file-level...

author Brian Warner <warner@allmydata.com>

Fri, 12 Jan 2007 03:51:27 +0000 (20:51 -0700)

committer Brian Warner <warner@allmydata.com>

Fri, 12 Jan 2007 03:51:27 +0000 (20:51 -0700)
author Brian Warner <warner@allmydata.com>
Fri, 12 Jan 2007 03:51:27 +0000 (20:51 -0700)
committer Brian Warner <warner@allmydata.com>
Fri, 12 Jan 2007 03:51:27 +0000 (20:51 -0700)
diff --git a/src/allmydata/codec.py b/src/allmydata/codec.py

new file mode 100644 (file)

index 0000000..d337689
--- /dev/null
+++ b/src/allmydata/codec.py
@@ -0,0 +1,219 @@
+# -*- test-case-name: allmydata.test.test_encode_share -*-
+
+from zope.interface import implements
+from twisted.internet import defer
+import sha
+from allmydata.util import idlib, mathutil
+from allmydata.interfaces import IEncoder, IDecoder
+from allmydata.py_ecc import rs_code
+
+def netstring(s):
+    return "%d:%s," % (len(s), s)
+
+class ReplicatingEncoder(object):
+    implements(IEncoder)
+    ENCODER_TYPE = 0
+
+    def set_params(self, data_size, required_shares, total_shares):
+        self.data_size = data_size
+        self.required_shares = required_shares
+        self.total_shares = total_shares
+
+    def get_encoder_type(self):
+        return self.ENCODER_TYPE
+
+    def get_serialized_params(self):
+        return "%d" % self.required_shares
+
+    def get_share_size(self):
+        return self.data_size
+
+    def encode(self, data):
+        shares = [(i,data) for i in range(self.total_shares)]
+        return defer.succeed(shares)
+
+class ReplicatingDecoder(object):
+    implements(IDecoder)
+
+    def set_serialized_params(self, params):
+        self.required_shares = int(params)
+
+    def decode(self, some_shares):
+        assert len(some_shares) >= self.required_shares
+        data = some_shares[0][1]
+        return defer.succeed(data)
+
+
+class Encoder(object):
+    def __init__(self, infile, m):
+        self.infile = infile
+        self.k = 2
+        self.m = m
+
+    def do_upload(self, landlords):
+        dl = []
+        data = self.infile.read()
+        for (peerid, bucket_num, remotebucket) in landlords:
+            dl.append(remotebucket.callRemote('write', data))
+            dl.append(remotebucket.callRemote('close'))
+
+        return defer.DeferredList(dl)
+
+class Decoder(object):
+    def __init__(self, outfile, k, m, verifierid):
+        self.outfile = outfile
+        self.k = 2
+        self.m = m
+        self._verifierid = verifierid
+
+    def start(self, buckets):
+        assert len(buckets) >= self.k
+        dl = []
+        for bucketnum, bucket in buckets[:self.k]:
+            d = bucket.callRemote("read")
+            dl.append(d)
+        d2 = defer.DeferredList(dl)
+        d2.addCallback(self._got_all_data)
+        return d2
+
+    def _got_all_data(self, resultslist):
+        shares = [results for success,results in resultslist if success]
+        assert len(shares) >= self.k
+        # here's where the Reed-Solomon magic takes place
+        self.outfile.write(shares[0])
+        hasher = sha.new(netstring("allmydata_v1_verifierid"))
+        hasher.update(shares[0])
+        vid = hasher.digest()
+        if self._verifierid:
+            assert self._verifierid == vid, "%s != %s" % (idlib.b2a(self._verifierid), idlib.b2a(vid))
+
+
+class PyRSEncoder(object):
+    ENCODER_TYPE = 1
+
+    # we will break the data into vectors in which each element is a single
+    # byte (i.e. a single number from 0 to 255), and the length of the vector
+    # is equal to the number of required_shares. We use padding to make the
+    # last chunk of data long enough to match, and we record the data_size in
+    # the serialized parameters to strip this padding out on the receiving
+    # end.
+
+    # TODO: this will write a 733kB file called 'ffield.lut.8' in the current
+    # directory the first time it is run, to cache the lookup table for later
+    # use. It appears to take about 15 seconds to create this the first time,
+    # and about 0.5s to load it in each time afterwards. Make sure this file
+    # winds up somewhere reasonable.
+
+    # TODO: the encoder/decoder RSCode object depends upon the number of
+    # required/total shares, but not upon the data. We could probably save a
+    # lot of initialization time by caching a single instance and using it
+    # any time we use the same required/total share numbers (which will
+    # probably be always).
+
+    # on my workstation (fluxx, a 3.5GHz Athlon), this encodes data at a rate
+    # of 6.7kBps. Zooko's mom's 1.8GHz G5 got 2.2kBps . slave3 took 40s to
+    # construct the LUT and encodes at 1.5kBps, and for some reason took more
+    # than 20 minutes to run the test_encode_share tests, so I disabled most
+    # of them. (uh, hello, it's running figleaf)
+
+    def set_params(self, data_size, required_shares, total_shares):
+        assert required_shares <= total_shares
+        self.data_size = data_size
+        self.required_shares = required_shares
+        self.total_shares = total_shares
+        self.chunk_size = required_shares
+        self.num_chunks = mathutil.div_ceil(data_size, self.chunk_size)
+        self.last_chunk_padding = mathutil.pad_size(data_size, required_shares)
+        self.share_size = self.num_chunks
+        self.encoder = rs_code.RSCode(total_shares, required_shares, 8)
+
+    def get_encoder_type(self):
+        return self.ENCODER_TYPE
+
+    def get_serialized_params(self):
+        return "%d:%d:%d" % (self.data_size, self.required_shares,
+                             self.total_shares)
+
+    def get_share_size(self):
+        return self.share_size
+
+    def encode(self, data):
+        share_data = [ [] for i in range(self.total_shares)]
+        for i in range(self.num_chunks):
+            # we take self.chunk_size bytes from the input string, and
+            # turn it into self.total_shares bytes.
+            offset = i*self.chunk_size
+            # Note string slices aren't an efficient way to use memory, so
+            # when we upgrade from the unusably slow py_ecc prototype to a
+            # fast ECC we should also fix up this memory usage (by using the
+            # array module).
+            chunk = data[offset:offset+self.chunk_size]
+            if i == self.num_chunks-1:
+                chunk = chunk + "\x00"*self.last_chunk_padding
+            assert len(chunk) == self.chunk_size
+            input_vector = [ord(x) for x in chunk]
+            assert len(input_vector) == self.required_shares
+            output_vector = self.encoder.Encode(input_vector)
+            assert len(output_vector) == self.total_shares
+            for i2,out in enumerate(output_vector):
+                share_data[i2].append(chr(out))
+
+        shares = [ (i, "".join(share_data[i]))
+                   for i in range(self.total_shares) ]
+        return defer.succeed(shares)
+
+class PyRSDecoder(object):
+
+    def set_serialized_params(self, params):
+        pieces = params.split(":")
+        self.data_size = int(pieces[0])
+        self.required_shares = int(pieces[1])
+        self.total_shares = int(pieces[2])
+
+        self.chunk_size = self.required_shares
+        self.num_chunks = mathutil.div_ceil(self.data_size, self.chunk_size)
+        self.last_chunk_padding = mathutil.pad_size(self.data_size,
+                                                    self.required_shares)
+        self.share_size = self.num_chunks
+        self.encoder = rs_code.RSCode(self.total_shares, self.required_shares,
+                                      8)
+        if False:
+            print "chunk_size: %d" % self.chunk_size
+            print "num_chunks: %d" % self.num_chunks
+            print "last_chunk_padding: %d" % self.last_chunk_padding
+            print "share_size: %d" % self.share_size
+            print "total_shares: %d" % self.total_shares
+            print "required_shares: %d" % self.required_shares
+
+    def decode(self, some_shares):
+        chunk_size = self.chunk_size
+        assert len(some_shares) >= self.required_shares
+        chunks = []
+        have_shares = {}
+        for share_num, share_data in some_shares:
+            have_shares[share_num] = share_data
+        for i in range(self.share_size):
+            # this takes one byte from each share, and turns the combination
+            # into a single chunk
+            received_vector = []
+            for j in range(self.total_shares):
+                share = have_shares.get(j)
+                if share is not None:
+                    received_vector.append(ord(share[i]))
+                else:
+                    received_vector.append(None)
+            decoded_vector = self.encoder.DecodeImmediate(received_vector)
+            assert len(decoded_vector) == self.chunk_size
+            chunk = "".join([chr(x) for x in decoded_vector])
+            chunks.append(chunk)
+        data = "".join(chunks)
+        if self.last_chunk_padding:
+            data = data[:-self.last_chunk_padding]
+        assert len(data) == self.data_size
+        return defer.succeed(data)
+
+
+all_encoders = {
+    ReplicatingEncoder.ENCODER_TYPE: (ReplicatingEncoder, ReplicatingDecoder),
+    PyRSEncoder.ENCODER_TYPE: (PyRSEncoder, PyRSDecoder),
+    }
diff --git a/src/allmydata/download.py b/src/allmydata/download.py

index a344d039805bd461f0e07770fe80dd26d6c32100..8646f788f1b7c447df810b4ac35408ac99d81c44 100644 (file)
--- a/src/allmydata/download.py
+++ b/src/allmydata/download.py
@@ -6,7 +6,7 @@ from twisted.internet import defer
  from twisted.application import service
  
  from allmydata.util import idlib
-from allmydata import encode
+from allmydata import codec
  
  class NotEnoughPeersError(Exception):
      pass
@@ -34,8 +34,8 @@ class FileDownloader:
          n = self._shares = 4
          k = self._desired_shares = 2
          self._target.open()
-        self._decoder = encode.Decoder(self._target, k, n,
-                                       self._verifierid)
+        self._decoder = codec.Decoder(self._target, k, n,
+                                      self._verifierid)
  
      def start(self):
          log.msg("starting download")
diff --git a/src/allmydata/encode.py b/src/allmydata/encode.py

deleted file mode 100644 (file)

index d337689..0000000
--- a/src/allmydata/encode.py
+++ /dev/null
@@ -1,219 +0,0 @@
-# -*- test-case-name: allmydata.test.test_encode_share -*-
-
-from zope.interface import implements
-from twisted.internet import defer
-import sha
-from allmydata.util import idlib, mathutil
-from allmydata.interfaces import IEncoder, IDecoder
-from allmydata.py_ecc import rs_code
-
-def netstring(s):
-    return "%d:%s," % (len(s), s)
-
-class ReplicatingEncoder(object):
-    implements(IEncoder)
-    ENCODER_TYPE = 0
-
-    def set_params(self, data_size, required_shares, total_shares):
-        self.data_size = data_size
-        self.required_shares = required_shares
-        self.total_shares = total_shares
-
-    def get_encoder_type(self):
-        return self.ENCODER_TYPE
-
-    def get_serialized_params(self):
-        return "%d" % self.required_shares
-
-    def get_share_size(self):
-        return self.data_size
-
-    def encode(self, data):
-        shares = [(i,data) for i in range(self.total_shares)]
-        return defer.succeed(shares)
-
-class ReplicatingDecoder(object):
-    implements(IDecoder)
-
-    def set_serialized_params(self, params):
-        self.required_shares = int(params)
-
-    def decode(self, some_shares):
-        assert len(some_shares) >= self.required_shares
-        data = some_shares[0][1]
-        return defer.succeed(data)
-
-
-class Encoder(object):
-    def __init__(self, infile, m):
-        self.infile = infile
-        self.k = 2
-        self.m = m
-
-    def do_upload(self, landlords):
-        dl = []
-        data = self.infile.read()
-        for (peerid, bucket_num, remotebucket) in landlords:
-            dl.append(remotebucket.callRemote('write', data))
-            dl.append(remotebucket.callRemote('close'))
-
-        return defer.DeferredList(dl)
-
-class Decoder(object):
-    def __init__(self, outfile, k, m, verifierid):
-        self.outfile = outfile
-        self.k = 2
-        self.m = m
-        self._verifierid = verifierid
-
-    def start(self, buckets):
-        assert len(buckets) >= self.k
-        dl = []
-        for bucketnum, bucket in buckets[:self.k]:
-            d = bucket.callRemote("read")
-            dl.append(d)
-        d2 = defer.DeferredList(dl)
-        d2.addCallback(self._got_all_data)
-        return d2
-
-    def _got_all_data(self, resultslist):
-        shares = [results for success,results in resultslist if success]
-        assert len(shares) >= self.k
-        # here's where the Reed-Solomon magic takes place
-        self.outfile.write(shares[0])
-        hasher = sha.new(netstring("allmydata_v1_verifierid"))
-        hasher.update(shares[0])
-        vid = hasher.digest()
-        if self._verifierid:
-            assert self._verifierid == vid, "%s != %s" % (idlib.b2a(self._verifierid), idlib.b2a(vid))
-
-
-class PyRSEncoder(object):
-    ENCODER_TYPE = 1
-
-    # we will break the data into vectors in which each element is a single
-    # byte (i.e. a single number from 0 to 255), and the length of the vector
-    # is equal to the number of required_shares. We use padding to make the
-    # last chunk of data long enough to match, and we record the data_size in
-    # the serialized parameters to strip this padding out on the receiving
-    # end.
-
-    # TODO: this will write a 733kB file called 'ffield.lut.8' in the current
-    # directory the first time it is run, to cache the lookup table for later
-    # use. It appears to take about 15 seconds to create this the first time,
-    # and about 0.5s to load it in each time afterwards. Make sure this file
-    # winds up somewhere reasonable.
-
-    # TODO: the encoder/decoder RSCode object depends upon the number of
-    # required/total shares, but not upon the data. We could probably save a
-    # lot of initialization time by caching a single instance and using it
-    # any time we use the same required/total share numbers (which will
-    # probably be always).
-
-    # on my workstation (fluxx, a 3.5GHz Athlon), this encodes data at a rate
-    # of 6.7kBps. Zooko's mom's 1.8GHz G5 got 2.2kBps . slave3 took 40s to
-    # construct the LUT and encodes at 1.5kBps, and for some reason took more
-    # than 20 minutes to run the test_encode_share tests, so I disabled most
-    # of them. (uh, hello, it's running figleaf)
-
-    def set_params(self, data_size, required_shares, total_shares):
-        assert required_shares <= total_shares
-        self.data_size = data_size
-        self.required_shares = required_shares
-        self.total_shares = total_shares
-        self.chunk_size = required_shares
-        self.num_chunks = mathutil.div_ceil(data_size, self.chunk_size)
-        self.last_chunk_padding = mathutil.pad_size(data_size, required_shares)
-        self.share_size = self.num_chunks
-        self.encoder = rs_code.RSCode(total_shares, required_shares, 8)
-
-    def get_encoder_type(self):
-        return self.ENCODER_TYPE
-
-    def get_serialized_params(self):
-        return "%d:%d:%d" % (self.data_size, self.required_shares,
-                             self.total_shares)
-
-    def get_share_size(self):
-        return self.share_size
-
-    def encode(self, data):
-        share_data = [ [] for i in range(self.total_shares)]
-        for i in range(self.num_chunks):
-            # we take self.chunk_size bytes from the input string, and
-            # turn it into self.total_shares bytes.
-            offset = i*self.chunk_size
-            # Note string slices aren't an efficient way to use memory, so
-            # when we upgrade from the unusably slow py_ecc prototype to a
-            # fast ECC we should also fix up this memory usage (by using the
-            # array module).
-            chunk = data[offset:offset+self.chunk_size]
-            if i == self.num_chunks-1:
-                chunk = chunk + "\x00"*self.last_chunk_padding
-            assert len(chunk) == self.chunk_size
-            input_vector = [ord(x) for x in chunk]
-            assert len(input_vector) == self.required_shares
-            output_vector = self.encoder.Encode(input_vector)
-            assert len(output_vector) == self.total_shares
-            for i2,out in enumerate(output_vector):
-                share_data[i2].append(chr(out))
-
-        shares = [ (i, "".join(share_data[i]))
-                   for i in range(self.total_shares) ]
-        return defer.succeed(shares)
-
-class PyRSDecoder(object):
-
-    def set_serialized_params(self, params):
-        pieces = params.split(":")
-        self.data_size = int(pieces[0])
-        self.required_shares = int(pieces[1])
-        self.total_shares = int(pieces[2])
-
-        self.chunk_size = self.required_shares
-        self.num_chunks = mathutil.div_ceil(self.data_size, self.chunk_size)
-        self.last_chunk_padding = mathutil.pad_size(self.data_size,
-                                                    self.required_shares)
-        self.share_size = self.num_chunks
-        self.encoder = rs_code.RSCode(self.total_shares, self.required_shares,
-                                      8)
-        if False:
-            print "chunk_size: %d" % self.chunk_size
-            print "num_chunks: %d" % self.num_chunks
-            print "last_chunk_padding: %d" % self.last_chunk_padding
-            print "share_size: %d" % self.share_size
-            print "total_shares: %d" % self.total_shares
-            print "required_shares: %d" % self.required_shares
-
-    def decode(self, some_shares):
-        chunk_size = self.chunk_size
-        assert len(some_shares) >= self.required_shares
-        chunks = []
-        have_shares = {}
-        for share_num, share_data in some_shares:
-            have_shares[share_num] = share_data
-        for i in range(self.share_size):
-            # this takes one byte from each share, and turns the combination
-            # into a single chunk
-            received_vector = []
-            for j in range(self.total_shares):
-                share = have_shares.get(j)
-                if share is not None:
-                    received_vector.append(ord(share[i]))
-                else:
-                    received_vector.append(None)
-            decoded_vector = self.encoder.DecodeImmediate(received_vector)
-            assert len(decoded_vector) == self.chunk_size
-            chunk = "".join([chr(x) for x in decoded_vector])
-            chunks.append(chunk)
-        data = "".join(chunks)
-        if self.last_chunk_padding:
-            data = data[:-self.last_chunk_padding]
-        assert len(data) == self.data_size
-        return defer.succeed(data)
-
-
-all_encoders = {
-    ReplicatingEncoder.ENCODER_TYPE: (ReplicatingEncoder, ReplicatingDecoder),
-    PyRSEncoder.ENCODER_TYPE: (PyRSEncoder, PyRSDecoder),
-    }
diff --git a/src/allmydata/encode_new.py b/src/allmydata/encode_new.py

index ec6fd66c6627b2896ff1cd70ddaf779327d83cd5..0da54501c0d8c1cb522e05150039acb137bbc72c 100644 (file)
--- a/src/allmydata/encode_new.py
+++ b/src/allmydata/encode_new.py
@@ -5,7 +5,7 @@ from allmydata.chunk import HashTree, roundup_pow2
  from allmydata.Crypto.Cipher import AES
  import sha
  from allmydata.util import mathutil
-from allmydata.encode import PyRSEncoder
+from allmydata.codec import PyRSEncoder
  
  def hash(data):
      return sha.new(data).digest()
diff --git a/src/allmydata/test/test_codec.py b/src/allmydata/test/test_codec.py

new file mode 100644 (file)

index 0000000..cec61bb
--- /dev/null
+++ b/src/allmydata/test/test_codec.py
@@ -0,0 +1,153 @@
+
+import os, time
+from twisted.trial import unittest
+from twisted.internet import defer
+from twisted.python import log
+from allmydata.codec import PyRSEncoder, PyRSDecoder, ReplicatingEncoder, ReplicatingDecoder
+import random
+
+class Tester:
+    #enc_class = PyRSEncoder
+    #dec_class = PyRSDecoder
+
+    def do_test(self, size, required_shares, total_shares):
+        data0 = os.urandom(size)
+        enc = self.enc_class()
+        enc.set_params(size, required_shares, total_shares)
+        serialized_params = enc.get_serialized_params()
+        log.msg("serialized_params: %s" % serialized_params)
+        d = enc.encode(data0)
+        def _done(shares):
+            self.failUnlessEqual(len(shares), total_shares)
+            self.shares = shares
+        d.addCallback(_done)
+
+        def _decode(shares):
+            dec = self.dec_class()
+            dec.set_serialized_params(serialized_params)
+            d1 = dec.decode(shares)
+            return d1
+
+        def _check_data(data1):
+            self.failUnlessEqual(len(data1), len(data0))
+            self.failUnless(data1 == data0)
+
+        def _decode_all_ordered(res):
+            log.msg("_decode_all_ordered")
+            # can we decode using all of the shares?
+            return _decode(self.shares)
+        d.addCallback(_decode_all_ordered)
+        d.addCallback(_check_data)
+
+        def _decode_all_shuffled(res):
+            log.msg("_decode_all_shuffled")
+            # can we decode, using all the shares, but in random order?
+            shuffled_shares = self.shares[:]
+            random.shuffle(shuffled_shares)
+            return _decode(shuffled_shares)
+        d.addCallback(_decode_all_shuffled)
+        d.addCallback(_check_data)
+
+        def _decode_some(res):
+            log.msg("_decode_some")
+            # decode with a minimal subset of the shares
+            some_shares = self.shares[:required_shares]
+            return _decode(some_shares)
+        d.addCallback(_decode_some)
+        d.addCallback(_check_data)
+
+        def _decode_some_random(res):
+            log.msg("_decode_some_random")
+            # use a randomly-selected minimal subset
+            some_shares = random.sample(self.shares, required_shares)
+            return _decode(some_shares)
+        d.addCallback(_decode_some_random)
+        d.addCallback(_check_data)
+
+        def _decode_multiple(res):
+            log.msg("_decode_multiple")
+            # make sure we can re-use the decoder object
+            shares1 = random.sample(self.shares, required_shares)
+            shares2 = random.sample(self.shares, required_shares)
+            dec = self.dec_class()
+            dec.set_serialized_params(serialized_params)
+            d1 = dec.decode(shares1)
+            d1.addCallback(_check_data)
+            d1.addCallback(lambda res: dec.decode(shares2))
+            d1.addCallback(_check_data)
+            return d1
+        d.addCallback(_decode_multiple)
+
+        return d
+
+    def test_encode(self):
+        if os.uname()[1] == "slave3" and self.enc_class == PyRSEncoder:
+            raise unittest.SkipTest("slave3 is really slow")
+        return self.do_test(1000, 25, 100)
+
+    def test_encode1(self):
+        return self.do_test(8, 8, 16)
+
+    def test_encode2(self):
+        if os.uname()[1] == "slave3" and self.enc_class == PyRSEncoder:
+            raise unittest.SkipTest("slave3 is really slow")
+        return self.do_test(123, 25, 100)
+
+    def test_sizes(self):
+        raise unittest.SkipTest("omg this would take forever")
+        d = defer.succeed(None)
+        for i in range(1, 100):
+            d.addCallback(lambda res,size: self.do_test(size, 4, 10), i)
+        return d
+
+class PyRS(unittest.TestCase, Tester):
+    enc_class = PyRSEncoder
+    dec_class = PyRSDecoder
+
+class Replicating(unittest.TestCase, Tester):
+    enc_class = ReplicatingEncoder
+    dec_class = ReplicatingDecoder
+
+
+class BenchPyRS(unittest.TestCase):
+    enc_class = PyRSEncoder
+    def test_big(self):
+        size = 10000
+        required_shares = 25
+        total_shares = 100
+        # this lets us use a persistent lookup table, stored outside the
+        # _trial_temp directory (which is deleted each time trial is run)
+        os.symlink("../ffield.lut.8", "ffield.lut.8")
+        enc = self.enc_class()
+        self.start()
+        enc.set_params(size, required_shares, total_shares)
+        serialized_params = enc.get_serialized_params()
+        print "encoder ready", self.stop()
+        self.start()
+        data0 = os.urandom(size)
+        print "data ready", self.stop()
+        self.start()
+        d = enc.encode(data0)
+        def _done(shares):
+            now_shares = time.time()
+            print "shares ready", self.stop()
+            self.start()
+            self.failUnlessEqual(len(shares), total_shares)
+        d.addCallback(_done)
+        d.addCallback(lambda res: enc.encode(data0))
+        d.addCallback(_done)
+        d.addCallback(lambda res: enc.encode(data0))
+        d.addCallback(_done)
+        return d
+
+    def start(self):
+        self.start_time = time.time()
+
+    def stop(self):
+        self.end_time = time.time()
+        return (self.end_time - self.start_time)
+
+
+# to benchmark the encoder, delete this line
+del BenchPyRS
+# and then run 'make test TEST=allmydata.test.test_encode_share.BenchPyRS'
diff --git a/src/allmydata/test/test_encode_share.py b/src/allmydata/test/test_encode_share.py

deleted file mode 100644 (file)

index bf59d72..0000000
--- a/src/allmydata/test/test_encode_share.py
+++ /dev/null
@@ -1,153 +0,0 @@
-
-import os, time
-from twisted.trial import unittest
-from twisted.internet import defer
-from twisted.python import log
-from allmydata.encode import PyRSEncoder, PyRSDecoder, ReplicatingEncoder, ReplicatingDecoder
-import random
-
-class Tester:
-    #enc_class = PyRSEncoder
-    #dec_class = PyRSDecoder
-
-    def do_test(self, size, required_shares, total_shares):
-        data0 = os.urandom(size)
-        enc = self.enc_class()
-        enc.set_params(size, required_shares, total_shares)
-        serialized_params = enc.get_serialized_params()
-        log.msg("serialized_params: %s" % serialized_params)
-        d = enc.encode(data0)
-        def _done(shares):
-            self.failUnlessEqual(len(shares), total_shares)
-            self.shares = shares
-        d.addCallback(_done)
-
-        def _decode(shares):
-            dec = self.dec_class()
-            dec.set_serialized_params(serialized_params)
-            d1 = dec.decode(shares)
-            return d1
-
-        def _check_data(data1):
-            self.failUnlessEqual(len(data1), len(data0))
-            self.failUnless(data1 == data0)
-
-        def _decode_all_ordered(res):
-            log.msg("_decode_all_ordered")
-            # can we decode using all of the shares?
-            return _decode(self.shares)
-        d.addCallback(_decode_all_ordered)
-        d.addCallback(_check_data)
-
-        def _decode_all_shuffled(res):
-            log.msg("_decode_all_shuffled")
-            # can we decode, using all the shares, but in random order?
-            shuffled_shares = self.shares[:]
-            random.shuffle(shuffled_shares)
-            return _decode(shuffled_shares)
-        d.addCallback(_decode_all_shuffled)
-        d.addCallback(_check_data)
-
-        def _decode_some(res):
-            log.msg("_decode_some")
-            # decode with a minimal subset of the shares
-            some_shares = self.shares[:required_shares]
-            return _decode(some_shares)
-        d.addCallback(_decode_some)
-        d.addCallback(_check_data)
-
-        def _decode_some_random(res):
-            log.msg("_decode_some_random")
-            # use a randomly-selected minimal subset
-            some_shares = random.sample(self.shares, required_shares)
-            return _decode(some_shares)
-        d.addCallback(_decode_some_random)
-        d.addCallback(_check_data)
-
-        def _decode_multiple(res):
-            log.msg("_decode_multiple")
-            # make sure we can re-use the decoder object
-            shares1 = random.sample(self.shares, required_shares)
-            shares2 = random.sample(self.shares, required_shares)
-            dec = self.dec_class()
-            dec.set_serialized_params(serialized_params)
-            d1 = dec.decode(shares1)
-            d1.addCallback(_check_data)
-            d1.addCallback(lambda res: dec.decode(shares2))
-            d1.addCallback(_check_data)
-            return d1
-        d.addCallback(_decode_multiple)
-
-        return d
-
-    def test_encode(self):
-        if os.uname()[1] == "slave3" and self.enc_class == PyRSEncoder:
-            raise unittest.SkipTest("slave3 is really slow")
-        return self.do_test(1000, 25, 100)
-
-    def test_encode1(self):
-        return self.do_test(8, 8, 16)
-
-    def test_encode2(self):
-        if os.uname()[1] == "slave3" and self.enc_class == PyRSEncoder:
-            raise unittest.SkipTest("slave3 is really slow")
-        return self.do_test(123, 25, 100)
-
-    def test_sizes(self):
-        raise unittest.SkipTest("omg this would take forever")
-        d = defer.succeed(None)
-        for i in range(1, 100):
-            d.addCallback(lambda res,size: self.do_test(size, 4, 10), i)
-        return d
-
-class PyRS(unittest.TestCase, Tester):
-    enc_class = PyRSEncoder
-    dec_class = PyRSDecoder
-
-class Replicating(unittest.TestCase, Tester):
-    enc_class = ReplicatingEncoder
-    dec_class = ReplicatingDecoder
-
-
-class BenchPyRS(unittest.TestCase):
-    enc_class = PyRSEncoder
-    def test_big(self):
-        size = 10000
-        required_shares = 25
-        total_shares = 100
-        # this lets us use a persistent lookup table, stored outside the
-        # _trial_temp directory (which is deleted each time trial is run)
-        os.symlink("../ffield.lut.8", "ffield.lut.8")
-        enc = self.enc_class()
-        self.start()
-        enc.set_params(size, required_shares, total_shares)
-        serialized_params = enc.get_serialized_params()
-        print "encoder ready", self.stop()
-        self.start()
-        data0 = os.urandom(size)
-        print "data ready", self.stop()
-        self.start()
-        d = enc.encode(data0)
-        def _done(shares):
-            now_shares = time.time()
-            print "shares ready", self.stop()
-            self.start()
-            self.failUnlessEqual(len(shares), total_shares)
-        d.addCallback(_done)
-        d.addCallback(lambda res: enc.encode(data0))
-        d.addCallback(_done)
-        d.addCallback(lambda res: enc.encode(data0))
-        d.addCallback(_done)
-        return d
-
-    def start(self):
-        self.start_time = time.time()
-
-    def stop(self):
-        self.end_time = time.time()
-        return (self.end_time - self.start_time)
-
-
-# to benchmark the encoder, delete this line
-del BenchPyRS
-# and then run 'make test TEST=allmydata.test.test_encode_share.BenchPyRS'
diff --git a/src/allmydata/upload.py b/src/allmydata/upload.py

index 5b8588ab2abc8d449a6420294adb9818f81a9235..28908318f2c3ba59385cbfe82c66aea7efd2435d 100644 (file)
--- a/src/allmydata/upload.py
+++ b/src/allmydata/upload.py
@@ -6,7 +6,7 @@ from twisted.application import service
  from foolscap import Referenceable
  
  from allmydata.util import idlib
-from allmydata import encode
+from allmydata import codec
  
  from cStringIO import StringIO
  import sha
@@ -37,7 +37,7 @@ class FileUploader:
      def make_encoder(self):
          self._needed_shares = 4
          self._shares = 4
-        self._encoder = encode.Encoder(self._filehandle, self._shares)
+        self._encoder = codec.Encoder(self._filehandle, self._shares)
          self._share_size = self._size
  
      def set_verifierid(self, vid):
author	Brian Warner <warner@allmydata.com>
	Fri, 12 Jan 2007 03:51:27 +0000 (20:51 -0700)
committer	Brian Warner <warner@allmydata.com>
	Fri, 12 Jan 2007 03:51:27 +0000 (20:51 -0700)
src/allmydata/codec.py	[new file with mode: 0644]	patch \| blob
src/allmydata/download.py		patch \| blob \| history
src/allmydata/encode.py	[deleted file]	patch \| blob \| history
src/allmydata/encode_new.py		patch \| blob \| history
src/allmydata/test/test_codec.py	[new file with mode: 0644]	patch \| blob
src/allmydata/test/test_encode_share.py	[deleted file]	patch \| blob \| history
src/allmydata/upload.py		patch \| blob \| history