switch upload to use encode_new, fix a few things (but not nearly all of them)

author Brian Warner <warner@allmydata.com>

Fri, 30 Mar 2007 18:53:03 +0000 (11:53 -0700)

committer Brian Warner <warner@allmydata.com>

Fri, 30 Mar 2007 18:53:03 +0000 (11:53 -0700)
author Brian Warner <warner@allmydata.com>
Fri, 30 Mar 2007 18:53:03 +0000 (11:53 -0700)
committer Brian Warner <warner@allmydata.com>
Fri, 30 Mar 2007 18:53:03 +0000 (11:53 -0700)
diff --git a/src/allmydata/codec.py b/src/allmydata/codec.py

index 7cbd9f4da29763fe3b0177c74e02c549cd03c018..2e9efa8488e92121f7dca6b47f91c243b771a453 100644 (file)
--- a/src/allmydata/codec.py
+++ b/src/allmydata/codec.py
@@ -39,9 +39,6 @@ class ReplicatingEncoder(object):
      def get_serialized_params(self):
          return "%d" % self.required_shares
  
-    def get_share_size(self):
-        return self.data_size
-
      def get_block_size(self):
          return self.data_size
  
@@ -97,9 +94,6 @@ class CRSEncoder(object):
          return "%d-%d-%d" % (self.data_size, self.required_shares,
                               self.max_shares)
  
-    def get_share_size(self):
-        return self.share_size
-
      def get_block_size(self):
          return self.share_size
  
diff --git a/src/allmydata/encode_new.py b/src/allmydata/encode_new.py

index 2105d260b7dce77346274ab0beee73dd8bdb13ef..5d0458d72a7c48aa350ac1c417ba066384b77db7 100644 (file)
--- a/src/allmydata/encode_new.py
+++ b/src/allmydata/encode_new.py
@@ -75,6 +75,8 @@ PiB=1024*TiB
  
  class Encoder(object):
      implements(IEncoder)
+    NEEDED_SHARES = 25
+    TOTAL_SHARES = 100
  
      def setup(self, infile):
          self.infile = infile
@@ -82,28 +84,37 @@ class Encoder(object):
          self.file_size = infile.tell()
          infile.seek(0, 0)
  
-        self.num_shares = 100
-        self.required_shares = 25
+        self.num_shares = self.TOTAL_SHARES
+        self.required_shares = self.NEEDED_SHARES
  
          self.segment_size = min(2*MiB, self.file_size)
+        self.setup_codec()
  
-    def get_reservation_size(self):
+    def setup_codec(self):
+        self._codec = CRSEncoder()
+        self._codec.set_params(self.segment_size, self.required_shares,
+                               self.num_shares)
+
+    def get_share_size(self):
          share_size = mathutil.div_ceil(self.file_size, self.required_shares)
          overhead = self.compute_overhead()
          return share_size + overhead
      def compute_overhead(self):
          return 0
+    def get_block_size(self):
+        return self._codec.get_block_size()
  
      def set_shareholders(self, landlords):
          self.landlords = landlords.copy()
  
      def start(self):
+        #paddedsize = self._size + mathutil.pad_size(self._size, self.needed_shares)
          self.num_segments = mathutil.div_ceil(self.file_size,
                                                self.segment_size)
          self.share_size = mathutil.div_ceil(self.file_size,
                                              self.required_shares)
          self.setup_encryption()
-        self.setup_encoder()
+        self.setup_codec()
          d = defer.succeed(None)
          for i in range(self.num_segments):
              d.addCallback(lambda res: self.do_segment(i))
@@ -124,11 +135,6 @@ class Encoder(object):
          # that we sent to that landlord.
          self.share_root_hashes = [None] * self.num_shares
  
-    def setup_encoder(self):
-        self.encoder = CRSEncoder()
-        self.encoder.set_params(self.segment_size, self.required_shares,
-                                self.num_shares)
-
      def do_segment(self, segnum):
          chunks = []
          # the ICodecEncoder API wants to receive a total of self.segment_size
@@ -137,7 +143,7 @@ class Encoder(object):
          # these pieces need to be the same size as the share which the codec
          # will generate. Therefore we must feed it with input_piece_size that
          # equals the output share size.
-        input_piece_size = self.encoder.get_share_size()
+        input_piece_size = self._codec.get_block_size()
  
          # as a result, the number of input pieces per encode() call will be
          # equal to the number of required shares with which the codec was
@@ -154,7 +160,7 @@ class Encoder(object):
                  input_piece += ('\x00' * (input_piece_size - len(input_piece)))
              encrypted_piece = self.cryptor.encrypt(input_piece)
              chunks.append(encrypted_piece)
-        d = self.encoder.encode(chunks)
+        d = self._codec.encode(chunks)
          d.addCallback(self._encoded_segment)
          return d
  
diff --git a/src/allmydata/interfaces.py b/src/allmydata/interfaces.py

index 1cfb5dae4715eccf527e2bbe8596934e986c6187..cc235779f1e2d34491cf241f3336462fa6284c59 100644 (file)
--- a/src/allmydata/interfaces.py
+++ b/src/allmydata/interfaces.py
@@ -165,10 +165,6 @@ class ICodecEncoder(Interface):
          """Return the length of the shares that encode() will produce.
          """
  
-    def get_share_size():
-        """Return the length of the shares that encode() will produce.
-        """
-
      def encode_proposal(data, desired_share_ids=None):
          """Encode some data.
  
@@ -332,11 +328,25 @@ class IEncoder(Interface):
          before calling get_reservation_size().
          """
  
-    def get_reservation_size():
+    def get_share_size():
          """I return the size of the data that will be stored on each
-        shareholder. It is useful to determine this size before asking
-        potential shareholders whether they will grant a lease or not, since
-        their answers will depend upon how much space we need.
+        shareholder. This is aggregate amount of data that will be sent to
+        the shareholder, summed over all the put_block() calls I will ever
+        make.
+
+        TODO: this might also include some amount of overhead, like the size
+        of all the hashes. We need to decide whether this is useful or not.
+
+        It is useful to determine this size before asking potential
+        shareholders whether they will grant a lease or not, since their
+        answers will depend upon how much space we need.
+        """
+
+    def get_block_size(): # TODO: can we avoid exposing this?
+        """I return the size of the individual blocks that will be delivered
+        to a shareholder's put_block() method. By knowing this, the
+        shareholder will be able to keep all blocks in a single file and
+        still provide random access when reading them.
          """
  
      def set_shareholders(shareholders):
diff --git a/src/allmydata/test/test_encode.py b/src/allmydata/test/test_encode.py

index aa1764342f5e14d6b78e956e292765edd1bc9c80..0eb54222f2cb6c9135c6ff5a5917dbf40e80cc66 100644 (file)
--- a/src/allmydata/test/test_encode.py
+++ b/src/allmydata/test/test_encode.py
@@ -62,6 +62,7 @@ class UpDown(unittest.TestCase):
          NUM_SHARES = 100
          assert e.num_shares == NUM_SHARES # else we'll be completely confused
          e.segment_size = 25 # force use of multiple segments
+        e.setup_codec() # need to rebuild the codec for that change
          NUM_SEGMENTS = 4
          assert (NUM_SEGMENTS-1)*e.segment_size < len(data) <= NUM_SEGMENTS*e.segment_size
          shareholders = {}
diff --git a/src/allmydata/upload.py b/src/allmydata/upload.py

index d0f36c20e5101ca2d323c7109481a54b894079bd..2fbfd0c3398bdb2e2fd36b5d8a9d8767553379b1 100644 (file)
--- a/src/allmydata/upload.py
+++ b/src/allmydata/upload.py
@@ -5,7 +5,7 @@ from twisted.application import service
  from foolscap import Referenceable
  
  from allmydata.util import idlib, mathutil
-from allmydata import codec
+from allmydata import encode_new
  from allmydata.uri import pack_uri
  from allmydata.interfaces import IUploadable, IUploader
  
@@ -45,8 +45,6 @@ class PeerTracker:
  
  class FileUploader:
      debug = False
-    ENCODERCLASS = codec.CRSEncoder
-
  
      def __init__(self, client):
          self._client = client
@@ -83,20 +81,16 @@ class FileUploader:
          assert self.needed_shares
  
          # create the encoder, so we can know how large the shares will be
-        self._encoder = self.ENCODERCLASS()
-        self._last_seg_encoder = self.ENCODERCLASS() # This one is for encoding the final segment, which might be shorter than the others.
-        self._codec_name = self._encoder.get_encoder_type()
-        self._encoder.set_params(self.segment_size, self.needed_shares, self.total_shares)
-xyz
-
-        paddedsize = self._size + mathutil.pad_size(self._size, self.needed_shares)
-
-        self._block_size = self._encoder.get_block_size()
+        self._encoder = encode_new.Encoder()
+        self._encoder.setup(infile)
+        share_size = self._encoder.get_share_size()
+        block_size = self._encoder.get_block_size()
  
-        # first step: who should we upload to?
+        # we are responsible for locating the shareholders. self._encoder is
+        # responsible for handling the data and sending out the shares.
          peers = self._client.get_permuted_peers(self._verifierid)
          assert peers
-        trackers = [ (permutedid, PeerTracker(peerid, conn, self._share_size, self._block_size, self._verifierid),)
+        trackers = [ (permutedid, PeerTracker(peerid, conn, share_size, block_size, self._verifierid),)
                       for permutedid, peerid, conn in peers ]
          ring_things = [] # a list of (position_in_ring, whatami, x) where whatami is 0 if x is a sharenum or else 1 if x is a PeerTracker instance
          ring_things.extend([ (permutedpeerid, 1, peer,) for permutedpeerid, peer in trackers ])
@@ -196,7 +190,7 @@ xyz
  
      def _compute_uri(self, roothash):
          params = self._encoder.get_serialized_params()
-        return pack_uri(self._codec_name, params, self._verifierid, roothash, self.needed_shares, self.total_shares, self._size, self._encoder.segment_size)
+        return pack_uri(self._encoder.get_encoder_type(), params, self._verifierid, roothash, self.needed_shares, self.total_shares, self._size, self._encoder.segment_size)
  
  
  def netstring(s):
author	Brian Warner <warner@allmydata.com>
	Fri, 30 Mar 2007 18:53:03 +0000 (11:53 -0700)
committer	Brian Warner <warner@allmydata.com>
	Fri, 30 Mar 2007 18:53:03 +0000 (11:53 -0700)
src/allmydata/codec.py		patch \| blob \| history
src/allmydata/encode_new.py		patch \| blob \| history
src/allmydata/interfaces.py		patch \| blob \| history
src/allmydata/test/test_encode.py		patch \| blob \| history
src/allmydata/upload.py		patch \| blob \| history