From: Brian Warner <warner@lothar.com>
Date: Fri, 6 Apr 2007 04:17:42 +0000 (-0700)
Subject: rename encode_new.py to encode.py, now that there isn't an old one anymore
X-Git-Url: https://git.rkrishnan.org/components/com_hotproperty/reliability?a=commitdiff_plain;h=919ca3e9029f84d347eb18976aad426f5c3a5ea9;p=tahoe-lafs%2Ftahoe-lafs.git

rename encode_new.py to encode.py, now that there isn't an old one anymore
---

diff --git a/src/allmydata/encode.py b/src/allmydata/encode.py
new file mode 100644
index 00000000..a0e92922
--- /dev/null
+++ b/src/allmydata/encode.py
@@ -0,0 +1,284 @@
+# -*- test-case-name: allmydata.test.test_encode -*-
+
+from zope.interface import implements
+from twisted.internet import defer
+from twisted.python import log
+from allmydata.chunk import HashTree, roundup_pow2
+from allmydata.Crypto.Cipher import AES
+from allmydata.util import mathutil, hashutil
+from allmydata.util.assertutil import _assert
+from allmydata.codec import CRSEncoder
+from allmydata.interfaces import IEncoder
+
+"""
+
+The goal of the encoder is to turn the original file into a series of
+'shares'. Each share is going to a 'shareholder' (nominally each shareholder
+is a different host, but for small meshes there may be overlap). The number
+of shares is chosen to hit our reliability goals (more shares on more
+machines means more reliability), and is limited by overhead (proportional to
+numshares or log(numshares)) and the encoding technology in use (Reed-Solomon
+only permits 256 shares total). It is also constrained by the amount of data
+we want to send to each host. For estimating purposes, think of 100 shares
+out of which we need 25 to reconstruct the file.
+
+The encoder starts by cutting the original file into segments. All segments
+except the last are of equal size. The segment size is chosen to constrain
+the memory footprint (which will probably vary between 1x and 4x segment
+size) and to constrain the overhead (which will be proportional to either the
+number of segments or log(number of segments)).
+
+
+Each segment (A,B,C) is read into memory, encrypted, and encoded into
+blocks. The 'share' (say, share #1) that makes it out to a host is a
+collection of these blocks (block A1, B1, C1), plus some hash-tree
+information necessary to validate the data upon retrieval. Only one segment
+is handled at a time: all blocks for segment A are delivered before any
+work is begun on segment B.
+
+As blocks are created, we retain the hash of each one. The list of
+block hashes for a single share (say, hash(A1), hash(B1), hash(C1)) is
+used to form the base of a Merkle hash tree for that share (hashtrees[1]).
+This hash tree has one terminal leaf per block. The complete block hash
+tree is sent to the shareholder after all the data has been sent. At
+retrieval time, the decoder will ask for specific pieces of this tree before
+asking for blocks, whichever it needs to validate those blocks.
+
+(Note: we don't really need to generate this whole block hash tree
+ourselves. It would be sufficient to have the shareholder generate it and
+just tell us the root. This gives us an extra level of validation on the
+transfer, though, and it is relatively cheap to compute.)
+
+Each of these block hash trees has a root hash. The collection of these
+root hashes for all shares are collected into the 'share hash tree', which
+has one terminal leaf per share. After sending the blocks and the complete
+block hash tree to each shareholder, we send them the portion of the share
+hash tree that is necessary to validate their share. The root of the share
+hash tree is put into the URI.
+
+"""
+
+def pad(s, l, c='\x00'):
+    """
+    Return string s with enough chars c appended to it to make its length be
+    an even multiple of l bytes.
+
+    @param s the original string
+    @param l the length of the resulting padded string in bytes
+    @param c the pad char
+    """
+    return s + c * mathutil.pad_size(len(s), l)
+
+KiB=1024
+MiB=1024*KiB
+GiB=1024*MiB
+TiB=1024*GiB
+PiB=1024*TiB
+
+class Encoder(object):
+    implements(IEncoder)
+    NEEDED_SHARES = 25
+    TOTAL_SHARES = 100
+
+    def setup(self, infile):
+        self.infile = infile
+        infile.seek(0, 2)
+        self.file_size = infile.tell()
+        infile.seek(0, 0)
+
+        self.num_shares = self.TOTAL_SHARES
+        self.required_shares = self.NEEDED_SHARES
+
+        self.segment_size = min(2*MiB, self.file_size)
+        # this must be a multiple of self.required_shares
+        self.segment_size = mathutil.next_multiple(self.segment_size,
+                                                   self.required_shares)
+        self.setup_codec()
+
+    def setup_codec(self):
+        assert self.segment_size % self.required_shares == 0
+        self._codec = CRSEncoder()
+        self._codec.set_params(self.segment_size,
+                               self.required_shares, self.num_shares)
+
+        # the "tail" is the last segment. This segment may or may not be
+        # shorter than all other segments. We use the "tail codec" to handle
+        # it. If the tail is short, we use a different codec instance. In
+        # addition, the tail codec must be fed data which has been padded out
+        # to the right size.
+        self.tail_size = self.file_size % self.segment_size
+        if not self.tail_size:
+            self.tail_size = self.segment_size
+
+        # the tail codec is responsible for encoding tail_size bytes
+        padded_tail_size = mathutil.next_multiple(self.tail_size,
+                                                  self.required_shares)
+        self._tail_codec = CRSEncoder()
+        self._tail_codec.set_params(padded_tail_size,
+                                    self.required_shares, self.num_shares)
+
+    def get_share_size(self):
+        share_size = mathutil.div_ceil(self.file_size, self.required_shares)
+        overhead = self.compute_overhead()
+        return share_size + overhead
+    def compute_overhead(self):
+        return 0
+    def get_block_size(self):
+        return self._codec.get_block_size()
+
+    def set_shareholders(self, landlords):
+        assert isinstance(landlords, dict)
+        for k in landlords:
+            # it would be nice to:
+            #assert RIBucketWriter.providedBy(landlords[k])
+            pass
+        self.landlords = landlords.copy()
+
+    def start(self):
+        #paddedsize = self._size + mathutil.pad_size(self._size, self.needed_shares)
+        self.num_segments = mathutil.div_ceil(self.file_size,
+                                              self.segment_size)
+        self.share_size = mathutil.div_ceil(self.file_size,
+                                            self.required_shares)
+        self.setup_encryption()
+        self.setup_codec()
+        d = defer.succeed(None)
+
+        for i in range(self.num_segments-1):
+            d.addCallback(lambda res: self.do_segment(i))
+        d.addCallback(lambda res: self.do_tail_segment(self.num_segments-1))
+
+        d.addCallback(lambda res: self.send_all_subshare_hash_trees())
+        d.addCallback(lambda res: self.send_all_share_hash_trees())
+        d.addCallback(lambda res: self.close_all_shareholders())
+        d.addCallback(lambda res: self.done())
+        return d
+
+    def setup_encryption(self):
+        self.key = "\x00"*16
+        self.cryptor = AES.new(key=self.key, mode=AES.MODE_CTR,
+                               counterstart="\x00"*16)
+        self.segment_num = 0
+        self.subshare_hashes = [[] for x in range(self.num_shares)]
+        # subshare_hashes[i] is a list that will be accumulated and then send
+        # to landlord[i]. This list contains a hash of each segment_share
+        # that we sent to that landlord.
+        self.share_root_hashes = [None] * self.num_shares
+
+    def do_segment(self, segnum):
+        chunks = []
+        codec = self._codec
+        # the ICodecEncoder API wants to receive a total of self.segment_size
+        # bytes on each encode() call, broken up into a number of
+        # identically-sized pieces. Due to the way the codec algorithm works,
+        # these pieces need to be the same size as the share which the codec
+        # will generate. Therefore we must feed it with input_piece_size that
+        # equals the output share size.
+        input_piece_size = codec.get_block_size()
+
+        # as a result, the number of input pieces per encode() call will be
+        # equal to the number of required shares with which the codec was
+        # constructed. You can think of the codec as chopping up a
+        # 'segment_size' of data into 'required_shares' shares (not doing any
+        # fancy math at all, just doing a split), then creating some number
+        # of additional shares which can be substituted if the primary ones
+        # are unavailable
+
+        for i in range(self.required_shares):
+            input_piece = self.infile.read(input_piece_size)
+            # non-tail segments should be the full segment size
+            assert len(input_piece) == input_piece_size
+            encrypted_piece = self.cryptor.encrypt(input_piece)
+            chunks.append(encrypted_piece)
+        d = codec.encode(chunks)
+        d.addCallback(self._encoded_segment, segnum)
+        return d
+
+    def do_tail_segment(self, segnum):
+        chunks = []
+        codec = self._tail_codec
+        input_piece_size = codec.get_block_size()
+
+        for i in range(self.required_shares):
+            input_piece = self.infile.read(input_piece_size)
+            if len(input_piece) < input_piece_size:
+                # padding
+                input_piece += ('\x00' * (input_piece_size - len(input_piece)))
+            encrypted_piece = self.cryptor.encrypt(input_piece)
+            chunks.append(encrypted_piece)
+        d = codec.encode(chunks)
+        d.addCallback(self._encoded_segment, segnum)
+        return d
+
+    def _encoded_segment(self, (shares, shareids), segnum):
+        _assert(set(shareids) == set(self.landlords.keys()),
+                shareids=shareids, landlords=self.landlords)
+        dl = []
+        for i in range(len(shares)):
+            subshare = shares[i]
+            shareid = shareids[i]
+            d = self.send_subshare(shareid, segnum, subshare)
+            dl.append(d)
+            subshare_hash = hashutil.tagged_hash("encoded subshare", subshare)
+            self.subshare_hashes[shareid].append(subshare_hash)
+        dl = defer.DeferredList(dl)
+        def _logit(res):
+            log.msg("%s uploaded %s / %s bytes of your file." % (self, self.segment_size*(segnum+1), self.segment_size*self.num_segments))
+            return res
+        dl.addCallback(_logit)
+        return dl
+
+    def send_subshare(self, shareid, segment_num, subshare):
+        return self.send(shareid, "put_block", segment_num, subshare)
+
+    def send(self, shareid, methname, *args, **kwargs):
+        ll = self.landlords[shareid]
+        return ll.callRemote(methname, *args, **kwargs)
+
+    def send_all_subshare_hash_trees(self):
+        dl = []
+        for shareid,hashes in enumerate(self.subshare_hashes):
+            # hashes is a list of the hashes of all subshares that were sent
+            # to shareholder[shareid].
+            dl.append(self.send_one_subshare_hash_tree(shareid, hashes))
+        return defer.DeferredList(dl)
+
+    def send_one_subshare_hash_tree(self, shareid, subshare_hashes):
+        t = HashTree(subshare_hashes)
+        all_hashes = list(t)
+        # all_hashes[0] is the root hash, == hash(ah[1]+ah[2])
+        # all_hashes[1] is the left child, == hash(ah[3]+ah[4])
+        # all_hashes[n] == hash(all_hashes[2*n+1] + all_hashes[2*n+2])
+        self.share_root_hashes[shareid] = t[0]
+        return self.send(shareid, "put_block_hashes", all_hashes)
+
+    def send_all_share_hash_trees(self):
+        dl = []
+        for h in self.share_root_hashes:
+            assert h
+        # create the share hash tree
+        t = HashTree(self.share_root_hashes)
+        # the root of this hash tree goes into our URI
+        self.root_hash = t[0]
+        # now send just the necessary pieces out to each shareholder
+        for i in range(self.num_shares):
+            # the HashTree is given a list of leaves: 0,1,2,3..n .
+            # These become nodes A+0,A+1,A+2.. of the tree, where A=n-1
+            tree_width = roundup_pow2(self.num_shares)
+            base_index = i + tree_width - 1
+            needed_hash_indices = t.needed_for(base_index)
+            hashes = [(hi, t[hi]) for hi in needed_hash_indices]
+            dl.append(self.send_one_share_hash_tree(i, hashes))
+        return defer.DeferredList(dl)
+
+    def send_one_share_hash_tree(self, shareid, needed_hashes):
+        return self.send(shareid, "put_share_hashes", needed_hashes)
+
+    def close_all_shareholders(self):
+        dl = []
+        for shareid in range(self.num_shares):
+            dl.append(self.send(shareid, "close"))
+        return defer.DeferredList(dl)
+
+    def done(self):
+        return self.root_hash
diff --git a/src/allmydata/encode_new.py b/src/allmydata/encode_new.py
deleted file mode 100644
index a0e92922..00000000
--- a/src/allmydata/encode_new.py
+++ /dev/null
@@ -1,284 +0,0 @@
-# -*- test-case-name: allmydata.test.test_encode -*-
-
-from zope.interface import implements
-from twisted.internet import defer
-from twisted.python import log
-from allmydata.chunk import HashTree, roundup_pow2
-from allmydata.Crypto.Cipher import AES
-from allmydata.util import mathutil, hashutil
-from allmydata.util.assertutil import _assert
-from allmydata.codec import CRSEncoder
-from allmydata.interfaces import IEncoder
-
-"""
-
-The goal of the encoder is to turn the original file into a series of
-'shares'. Each share is going to a 'shareholder' (nominally each shareholder
-is a different host, but for small meshes there may be overlap). The number
-of shares is chosen to hit our reliability goals (more shares on more
-machines means more reliability), and is limited by overhead (proportional to
-numshares or log(numshares)) and the encoding technology in use (Reed-Solomon
-only permits 256 shares total). It is also constrained by the amount of data
-we want to send to each host. For estimating purposes, think of 100 shares
-out of which we need 25 to reconstruct the file.
-
-The encoder starts by cutting the original file into segments. All segments
-except the last are of equal size. The segment size is chosen to constrain
-the memory footprint (which will probably vary between 1x and 4x segment
-size) and to constrain the overhead (which will be proportional to either the
-number of segments or log(number of segments)).
-
-
-Each segment (A,B,C) is read into memory, encrypted, and encoded into
-blocks. The 'share' (say, share #1) that makes it out to a host is a
-collection of these blocks (block A1, B1, C1), plus some hash-tree
-information necessary to validate the data upon retrieval. Only one segment
-is handled at a time: all blocks for segment A are delivered before any
-work is begun on segment B.
-
-As blocks are created, we retain the hash of each one. The list of
-block hashes for a single share (say, hash(A1), hash(B1), hash(C1)) is
-used to form the base of a Merkle hash tree for that share (hashtrees[1]).
-This hash tree has one terminal leaf per block. The complete block hash
-tree is sent to the shareholder after all the data has been sent. At
-retrieval time, the decoder will ask for specific pieces of this tree before
-asking for blocks, whichever it needs to validate those blocks.
-
-(Note: we don't really need to generate this whole block hash tree
-ourselves. It would be sufficient to have the shareholder generate it and
-just tell us the root. This gives us an extra level of validation on the
-transfer, though, and it is relatively cheap to compute.)
-
-Each of these block hash trees has a root hash. The collection of these
-root hashes for all shares are collected into the 'share hash tree', which
-has one terminal leaf per share. After sending the blocks and the complete
-block hash tree to each shareholder, we send them the portion of the share
-hash tree that is necessary to validate their share. The root of the share
-hash tree is put into the URI.
-
-"""
-
-def pad(s, l, c='\x00'):
-    """
-    Return string s with enough chars c appended to it to make its length be
-    an even multiple of l bytes.
-
-    @param s the original string
-    @param l the length of the resulting padded string in bytes
-    @param c the pad char
-    """
-    return s + c * mathutil.pad_size(len(s), l)
-
-KiB=1024
-MiB=1024*KiB
-GiB=1024*MiB
-TiB=1024*GiB
-PiB=1024*TiB
-
-class Encoder(object):
-    implements(IEncoder)
-    NEEDED_SHARES = 25
-    TOTAL_SHARES = 100
-
-    def setup(self, infile):
-        self.infile = infile
-        infile.seek(0, 2)
-        self.file_size = infile.tell()
-        infile.seek(0, 0)
-
-        self.num_shares = self.TOTAL_SHARES
-        self.required_shares = self.NEEDED_SHARES
-
-        self.segment_size = min(2*MiB, self.file_size)
-        # this must be a multiple of self.required_shares
-        self.segment_size = mathutil.next_multiple(self.segment_size,
-                                                   self.required_shares)
-        self.setup_codec()
-
-    def setup_codec(self):
-        assert self.segment_size % self.required_shares == 0
-        self._codec = CRSEncoder()
-        self._codec.set_params(self.segment_size,
-                               self.required_shares, self.num_shares)
-
-        # the "tail" is the last segment. This segment may or may not be
-        # shorter than all other segments. We use the "tail codec" to handle
-        # it. If the tail is short, we use a different codec instance. In
-        # addition, the tail codec must be fed data which has been padded out
-        # to the right size.
-        self.tail_size = self.file_size % self.segment_size
-        if not self.tail_size:
-            self.tail_size = self.segment_size
-
-        # the tail codec is responsible for encoding tail_size bytes
-        padded_tail_size = mathutil.next_multiple(self.tail_size,
-                                                  self.required_shares)
-        self._tail_codec = CRSEncoder()
-        self._tail_codec.set_params(padded_tail_size,
-                                    self.required_shares, self.num_shares)
-
-    def get_share_size(self):
-        share_size = mathutil.div_ceil(self.file_size, self.required_shares)
-        overhead = self.compute_overhead()
-        return share_size + overhead
-    def compute_overhead(self):
-        return 0
-    def get_block_size(self):
-        return self._codec.get_block_size()
-
-    def set_shareholders(self, landlords):
-        assert isinstance(landlords, dict)
-        for k in landlords:
-            # it would be nice to:
-            #assert RIBucketWriter.providedBy(landlords[k])
-            pass
-        self.landlords = landlords.copy()
-
-    def start(self):
-        #paddedsize = self._size + mathutil.pad_size(self._size, self.needed_shares)
-        self.num_segments = mathutil.div_ceil(self.file_size,
-                                              self.segment_size)
-        self.share_size = mathutil.div_ceil(self.file_size,
-                                            self.required_shares)
-        self.setup_encryption()
-        self.setup_codec()
-        d = defer.succeed(None)
-
-        for i in range(self.num_segments-1):
-            d.addCallback(lambda res: self.do_segment(i))
-        d.addCallback(lambda res: self.do_tail_segment(self.num_segments-1))
-
-        d.addCallback(lambda res: self.send_all_subshare_hash_trees())
-        d.addCallback(lambda res: self.send_all_share_hash_trees())
-        d.addCallback(lambda res: self.close_all_shareholders())
-        d.addCallback(lambda res: self.done())
-        return d
-
-    def setup_encryption(self):
-        self.key = "\x00"*16
-        self.cryptor = AES.new(key=self.key, mode=AES.MODE_CTR,
-                               counterstart="\x00"*16)
-        self.segment_num = 0
-        self.subshare_hashes = [[] for x in range(self.num_shares)]
-        # subshare_hashes[i] is a list that will be accumulated and then send
-        # to landlord[i]. This list contains a hash of each segment_share
-        # that we sent to that landlord.
-        self.share_root_hashes = [None] * self.num_shares
-
-    def do_segment(self, segnum):
-        chunks = []
-        codec = self._codec
-        # the ICodecEncoder API wants to receive a total of self.segment_size
-        # bytes on each encode() call, broken up into a number of
-        # identically-sized pieces. Due to the way the codec algorithm works,
-        # these pieces need to be the same size as the share which the codec
-        # will generate. Therefore we must feed it with input_piece_size that
-        # equals the output share size.
-        input_piece_size = codec.get_block_size()
-
-        # as a result, the number of input pieces per encode() call will be
-        # equal to the number of required shares with which the codec was
-        # constructed. You can think of the codec as chopping up a
-        # 'segment_size' of data into 'required_shares' shares (not doing any
-        # fancy math at all, just doing a split), then creating some number
-        # of additional shares which can be substituted if the primary ones
-        # are unavailable
-
-        for i in range(self.required_shares):
-            input_piece = self.infile.read(input_piece_size)
-            # non-tail segments should be the full segment size
-            assert len(input_piece) == input_piece_size
-            encrypted_piece = self.cryptor.encrypt(input_piece)
-            chunks.append(encrypted_piece)
-        d = codec.encode(chunks)
-        d.addCallback(self._encoded_segment, segnum)
-        return d
-
-    def do_tail_segment(self, segnum):
-        chunks = []
-        codec = self._tail_codec
-        input_piece_size = codec.get_block_size()
-
-        for i in range(self.required_shares):
-            input_piece = self.infile.read(input_piece_size)
-            if len(input_piece) < input_piece_size:
-                # padding
-                input_piece += ('\x00' * (input_piece_size - len(input_piece)))
-            encrypted_piece = self.cryptor.encrypt(input_piece)
-            chunks.append(encrypted_piece)
-        d = codec.encode(chunks)
-        d.addCallback(self._encoded_segment, segnum)
-        return d
-
-    def _encoded_segment(self, (shares, shareids), segnum):
-        _assert(set(shareids) == set(self.landlords.keys()),
-                shareids=shareids, landlords=self.landlords)
-        dl = []
-        for i in range(len(shares)):
-            subshare = shares[i]
-            shareid = shareids[i]
-            d = self.send_subshare(shareid, segnum, subshare)
-            dl.append(d)
-            subshare_hash = hashutil.tagged_hash("encoded subshare", subshare)
-            self.subshare_hashes[shareid].append(subshare_hash)
-        dl = defer.DeferredList(dl)
-        def _logit(res):
-            log.msg("%s uploaded %s / %s bytes of your file." % (self, self.segment_size*(segnum+1), self.segment_size*self.num_segments))
-            return res
-        dl.addCallback(_logit)
-        return dl
-
-    def send_subshare(self, shareid, segment_num, subshare):
-        return self.send(shareid, "put_block", segment_num, subshare)
-
-    def send(self, shareid, methname, *args, **kwargs):
-        ll = self.landlords[shareid]
-        return ll.callRemote(methname, *args, **kwargs)
-
-    def send_all_subshare_hash_trees(self):
-        dl = []
-        for shareid,hashes in enumerate(self.subshare_hashes):
-            # hashes is a list of the hashes of all subshares that were sent
-            # to shareholder[shareid].
-            dl.append(self.send_one_subshare_hash_tree(shareid, hashes))
-        return defer.DeferredList(dl)
-
-    def send_one_subshare_hash_tree(self, shareid, subshare_hashes):
-        t = HashTree(subshare_hashes)
-        all_hashes = list(t)
-        # all_hashes[0] is the root hash, == hash(ah[1]+ah[2])
-        # all_hashes[1] is the left child, == hash(ah[3]+ah[4])
-        # all_hashes[n] == hash(all_hashes[2*n+1] + all_hashes[2*n+2])
-        self.share_root_hashes[shareid] = t[0]
-        return self.send(shareid, "put_block_hashes", all_hashes)
-
-    def send_all_share_hash_trees(self):
-        dl = []
-        for h in self.share_root_hashes:
-            assert h
-        # create the share hash tree
-        t = HashTree(self.share_root_hashes)
-        # the root of this hash tree goes into our URI
-        self.root_hash = t[0]
-        # now send just the necessary pieces out to each shareholder
-        for i in range(self.num_shares):
-            # the HashTree is given a list of leaves: 0,1,2,3..n .
-            # These become nodes A+0,A+1,A+2.. of the tree, where A=n-1
-            tree_width = roundup_pow2(self.num_shares)
-            base_index = i + tree_width - 1
-            needed_hash_indices = t.needed_for(base_index)
-            hashes = [(hi, t[hi]) for hi in needed_hash_indices]
-            dl.append(self.send_one_share_hash_tree(i, hashes))
-        return defer.DeferredList(dl)
-
-    def send_one_share_hash_tree(self, shareid, needed_hashes):
-        return self.send(shareid, "put_share_hashes", needed_hashes)
-
-    def close_all_shareholders(self):
-        dl = []
-        for shareid in range(self.num_shares):
-            dl.append(self.send(shareid, "close"))
-        return defer.DeferredList(dl)
-
-    def done(self):
-        return self.root_hash
diff --git a/src/allmydata/test/test_encode.py b/src/allmydata/test/test_encode.py
index e0383759..71a91d7e 100644
--- a/src/allmydata/test/test_encode.py
+++ b/src/allmydata/test/test_encode.py
@@ -3,11 +3,11 @@
 from twisted.trial import unittest
 from twisted.internet import defer
 from foolscap import eventual
-from allmydata import encode_new, download
+from allmydata import encode, download
 from allmydata.uri import pack_uri
 from cStringIO import StringIO
 
-class MyEncoder(encode_new.Encoder):
+class MyEncoder(encode.Encoder):
     def send(self, share_num, methname, *args, **kwargs):
         if False and share_num < 10:
             print "send[%d].%s()" % (share_num, methname)
@@ -92,7 +92,7 @@ class FakeBucketWriter:
 
 class Encode(unittest.TestCase):
     def test_send(self):
-        e = encode_new.Encoder()
+        e = encode.Encoder()
         data = "happy happy joy joy" * 4
         e.setup(StringIO(data))
         NUM_SHARES = 100
@@ -131,7 +131,7 @@ class Encode(unittest.TestCase):
 
 class Roundtrip(unittest.TestCase):
     def send_and_recover(self, NUM_SHARES, NUM_PEERS, NUM_SEGMENTS=4):
-        e = encode_new.Encoder()
+        e = encode.Encoder()
         data = "happy happy joy joy" * 4
         e.setup(StringIO(data))
 
diff --git a/src/allmydata/upload.py b/src/allmydata/upload.py
index e77cec5e..bb8b8329 100644
--- a/src/allmydata/upload.py
+++ b/src/allmydata/upload.py
@@ -5,7 +5,7 @@ from twisted.application import service
 from foolscap import Referenceable
 
 from allmydata.util import idlib
-from allmydata import encode_new
+from allmydata import encode
 from allmydata.uri import pack_uri
 from allmydata.interfaces import IUploadable, IUploader
 
@@ -92,7 +92,7 @@ class FileUploader:
         assert self.needed_shares
 
         # create the encoder, so we can know how large the shares will be
-        self._encoder = encode_new.Encoder()
+        self._encoder = encode.Encoder()
         self._encoder.setup(self._filehandle)
         share_size = self._encoder.get_share_size()
         block_size = self._encoder.get_block_size()