move encode/upload/download/checker.py into a new immutable/ directory. No behavior...

author Brian Warner <warner@allmydata.com>

Wed, 16 Jul 2008 20:14:39 +0000 (13:14 -0700)

committer Brian Warner <warner@allmydata.com>

Wed, 16 Jul 2008 20:14:39 +0000 (13:14 -0700)
author Brian Warner <warner@allmydata.com>
Wed, 16 Jul 2008 20:14:39 +0000 (13:14 -0700)
committer Brian Warner <warner@allmydata.com>
Wed, 16 Jul 2008 20:14:39 +0000 (13:14 -0700)
diff --git a/src/allmydata/checker.py b/src/allmydata/checker.py

deleted file mode 100644 (file)

index 8e559c4..0000000
--- a/src/allmydata/checker.py
+++ /dev/null
@@ -1,204 +0,0 @@
-
-"""
-Given a StorageIndex, count how many shares we can find.
-
-This does no verification of the shares whatsoever. If the peer claims to
-have the share, we believe them.
-"""
-
-from zope.interface import implements
-from twisted.internet import defer
-from twisted.python import log
-from allmydata.interfaces import IVerifierURI, ICheckerResults
-from allmydata import download, storage
-from allmydata.util import hashutil, base32
-
-class Results:
-    implements(ICheckerResults)
-
-    def __init__(self, storage_index):
-        # storage_index might be None for, say, LIT files
-        self.storage_index = storage_index
-        if storage_index is None:
-            self.storage_index_s = "<none>"
-        else:
-            self.storage_index_s = base32.b2a(storage_index)[:6]
-
-    def is_healthy(self):
-        return self.healthy
-
-    def html_summary(self):
-        if self.healthy:
-            return "<span>healthy</span>"
-        return "<span>NOT HEALTHY</span>"
-
-    def html(self):
-        s = "<div>\n"
-        s += "<h1>Checker Results for Immutable SI=%s</h1>\n" % self.storage_index_s
-        if self.healthy:
-            s += "<h2>Healthy!</h2>\n"
-        else:
-            s += "<h2>Not Healthy!</h2>\n"
-        s += "</div>\n"
-        return s
-
-
-class SimpleCHKFileChecker:
-    """Return a list of (needed, total, found, sharemap), where sharemap maps
-    share number to a list of (binary) nodeids of the shareholders."""
-
-    def __init__(self, peer_getter, uri_to_check):
-        self.peer_getter = peer_getter
-        self.found_shares = set()
-        self.uri_to_check = IVerifierURI(uri_to_check)
-        self.sharemap = {}
-
-    '''
-    def check_synchronously(self, si):
-        # this is how we would write this class if we were using synchronous
-        # messages (or if we used promises).
-        found = set()
-        for (pmpeerid, peerid, connection) in self.peer_getter(storage_index):
-            buckets = connection.get_buckets(si)
-            found.update(buckets.keys())
-        return len(found)
-    '''
-
-    def check(self):
-        d = self._get_all_shareholders(self.uri_to_check.storage_index)
-        d.addCallback(self._done)
-        return d
-
-    def _get_all_shareholders(self, storage_index):
-        dl = []
-        for (peerid, ss) in self.peer_getter("storage", storage_index):
-            d = ss.callRemote("get_buckets", storage_index)
-            d.addCallbacks(self._got_response, self._got_error,
-                           callbackArgs=(peerid,))
-            dl.append(d)
-        return defer.DeferredList(dl)
-
-    def _got_response(self, buckets, peerid):
-        # buckets is a dict: maps shum to an rref of the server who holds it
-        self.found_shares.update(buckets.keys())
-        for k in buckets:
-            if k not in self.sharemap:
-                self.sharemap[k] = []
-            self.sharemap[k].append(peerid)
-
-    def _got_error(self, f):
-        if f.check(KeyError):
-            pass
-        log.err(f)
-        pass
-
-    def _done(self, res):
-        u = self.uri_to_check
-        r = Results(self.uri_to_check.storage_index)
-        r.healthy = bool(len(self.found_shares) >= u.needed_shares)
-        r.stuff = (u.needed_shares, u.total_shares, len(self.found_shares),
-                   self.sharemap)
-        return r
-
-class VerifyingOutput:
-    def __init__(self, total_length, results):
-        self._crypttext_hasher = hashutil.crypttext_hasher()
-        self.length = 0
-        self.total_length = total_length
-        self._segment_number = 0
-        self._crypttext_hash_tree = None
-        self._opened = False
-        self._results = results
-        results.healthy = False
-
-    def setup_hashtrees(self, plaintext_hashtree, crypttext_hashtree):
-        self._crypttext_hash_tree = crypttext_hashtree
-
-    def write_segment(self, crypttext):
-        self.length += len(crypttext)
-
-        self._crypttext_hasher.update(crypttext)
-        if self._crypttext_hash_tree:
-            ch = hashutil.crypttext_segment_hasher()
-            ch.update(crypttext)
-            crypttext_leaves = {self._segment_number: ch.digest()}
-            self._crypttext_hash_tree.set_hashes(leaves=crypttext_leaves)
-
-        self._segment_number += 1
-
-    def close(self):
-        self.crypttext_hash = self._crypttext_hasher.digest()
-
-    def finish(self):
-        self._results.healthy = True
-        return self._results
-
-
-class SimpleCHKFileVerifier(download.FileDownloader):
-    # this reconstructs the crypttext, which verifies that at least 'k' of
-    # the shareholders are around and have valid data. It does not check the
-    # remaining shareholders, and it cannot verify the plaintext.
-    check_plaintext_hash = False
-
-    def __init__(self, client, u):
-        self._client = client
-
-        u = IVerifierURI(u)
-        self._storage_index = u.storage_index
-        self._uri_extension_hash = u.uri_extension_hash
-        self._total_shares = u.total_shares
-        self._size = u.size
-        self._num_needed_shares = u.needed_shares
-
-        self._si_s = storage.si_b2a(self._storage_index)
-        self.init_logging()
-
-        r = Results(self._storage_index)
-        self._output = VerifyingOutput(self._size, r)
-        self._paused = False
-        self._stopped = False
-
-        self._results = None
-        self.active_buckets = {} # k: shnum, v: bucket
-        self._share_buckets = [] # list of (sharenum, bucket) tuples
-        self._share_vbuckets = {} # k: shnum, v: set of ValidatedBuckets
-        self._uri_extension_sources = []
-
-        self._uri_extension_data = None
-
-        self._fetch_failures = {"uri_extension": 0,
-                                "plaintext_hashroot": 0,
-                                "plaintext_hashtree": 0,
-                                "crypttext_hashroot": 0,
-                                "crypttext_hashtree": 0,
-                                }
-
-    def init_logging(self):
-        self._log_prefix = prefix = storage.si_b2a(self._storage_index)[:5]
-        num = self._client.log("SimpleCHKFileVerifier(%s): starting" % prefix)
-        self._log_number = num
-
-    def log(self, msg, parent=None):
-        if parent is None:
-            parent = self._log_number
-        return self._client.log("SimpleCHKFileVerifier(%s): %s"
-                                % (self._log_prefix, msg),
-                                parent=parent)
-
-
-    def start(self):
-        log.msg("starting download [%s]" % storage.si_b2a(self._storage_index)[:5])
-
-        # first step: who should we download from?
-        d = defer.maybeDeferred(self._get_all_shareholders)
-        d.addCallback(self._got_all_shareholders)
-        # now get the uri_extension block from somebody and validate it
-        d.addCallback(self._obtain_uri_extension)
-        d.addCallback(self._got_uri_extension)
-        d.addCallback(self._get_hashtrees)
-        d.addCallback(self._create_validated_buckets)
-        # once we know that, we can download blocks from everybody
-        d.addCallback(self._download_all_segments)
-        d.addCallback(self._done)
-        return d
-
diff --git a/src/allmydata/client.py b/src/allmydata/client.py

index 4e1121a5b65497a229edf17661cc13efab3489ab..47f4fd20e32638987e73bb83f45240b6f520fba0 100644 (file)
--- a/src/allmydata/client.py
+++ b/src/allmydata/client.py
@@ -12,13 +12,13 @@ from pycryptopp.publickey import rsa
  
  import allmydata
  from allmydata.storage import StorageServer
-from allmydata.upload import Uploader
-from allmydata.download import Downloader
+from allmydata.immutable.upload import Uploader
+from allmydata.immutable.download import Downloader
+from allmydata.immutable.filenode import FileNode, LiteralFileNode
  from allmydata.offloaded import Helper
  from allmydata.control import ControlServer
  from allmydata.introducer.client import IntroducerClient
  from allmydata.util import hashutil, base32, testutil
-from allmydata.filenode import FileNode, LiteralFileNode
  from allmydata.uri import LiteralFileURI
  from allmydata.dirnode import NewDirectoryNode
  from allmydata.mutable.node import MutableFileNode, MutableWatcher
diff --git a/src/allmydata/control.py b/src/allmydata/control.py

index 99a3139b144f44c7b94bbe20ca1f57b07e026d14..ce8590c95ed5b138ed10247b0c33cd7af337ff27 100644 (file)
--- a/src/allmydata/control.py
+++ b/src/allmydata/control.py
@@ -6,7 +6,7 @@ from twisted.internet import defer
  from foolscap import Referenceable
  from allmydata.interfaces import RIControlClient
  from allmydata.util import testutil, fileutil, mathutil
-from allmydata import upload, download
+from allmydata.immutable import upload, download
  from twisted.python import log
  
  def get_memory_usage():
diff --git a/src/allmydata/download.py b/src/allmydata/download.py

deleted file mode 100644 (file)

index a0bf0e1..0000000
--- a/src/allmydata/download.py
+++ /dev/null
@@ -1,1100 +0,0 @@
-
-import os, random, weakref, itertools, time
-from zope.interface import implements
-from twisted.internet import defer
-from twisted.internet.interfaces import IPushProducer, IConsumer
-from twisted.application import service
-from foolscap.eventual import eventually
-
-from allmydata.util import base32, mathutil, hashutil, log
-from allmydata.util.assertutil import _assert
-from allmydata import codec, hashtree, storage, uri
-from allmydata.interfaces import IDownloadTarget, IDownloader, IFileURI, \
-     IDownloadStatus, IDownloadResults
-from allmydata.encode import NotEnoughSharesError
-from pycryptopp.cipher.aes import AES
-
-class HaveAllPeersError(Exception):
-    # we use this to jump out of the loop
-    pass
-
-class BadURIExtensionHashValue(Exception):
-    pass
-class BadPlaintextHashValue(Exception):
-    pass
-class BadCrypttextHashValue(Exception):
-    pass
-
-class DownloadStopped(Exception):
-    pass
-
-class DownloadResults:
-    implements(IDownloadResults)
-
-    def __init__(self):
-        self.servers_used = set()
-        self.server_problems = {}
-        self.servermap = {}
-        self.timings = {}
-        self.file_size = None
-
-class Output:
-    def __init__(self, downloadable, key, total_length, log_parent,
-                 download_status):
-        self.downloadable = downloadable
-        self._decryptor = AES(key)
-        self._crypttext_hasher = hashutil.crypttext_hasher()
-        self._plaintext_hasher = hashutil.plaintext_hasher()
-        self.length = 0
-        self.total_length = total_length
-        self._segment_number = 0
-        self._plaintext_hash_tree = None
-        self._crypttext_hash_tree = None
-        self._opened = False
-        self._log_parent = log_parent
-        self._status = download_status
-        self._status.set_progress(0.0)
-
-    def log(self, *args, **kwargs):
-        if "parent" not in kwargs:
-            kwargs["parent"] = self._log_parent
-        if "facility" not in kwargs:
-            kwargs["facility"] = "download.output"
-        return log.msg(*args, **kwargs)
-
-    def setup_hashtrees(self, plaintext_hashtree, crypttext_hashtree):
-        self._plaintext_hash_tree = plaintext_hashtree
-        self._crypttext_hash_tree = crypttext_hashtree
-
-    def write_segment(self, crypttext):
-        self.length += len(crypttext)
-        self._status.set_progress( float(self.length) / self.total_length )
-
-        # memory footprint: 'crypttext' is the only segment_size usage
-        # outstanding. While we decrypt it into 'plaintext', we hit
-        # 2*segment_size.
-        self._crypttext_hasher.update(crypttext)
-        if self._crypttext_hash_tree:
-            ch = hashutil.crypttext_segment_hasher()
-            ch.update(crypttext)
-            crypttext_leaves = {self._segment_number: ch.digest()}
-            self.log(format="crypttext leaf hash (%(bytes)sB) [%(segnum)d] is %(hash)s",
-                     bytes=len(crypttext),
-                     segnum=self._segment_number, hash=base32.b2a(ch.digest()),
-                     level=log.NOISY)
-            self._crypttext_hash_tree.set_hashes(leaves=crypttext_leaves)
-
-        plaintext = self._decryptor.process(crypttext)
-        del crypttext
-
-        # now we're back down to 1*segment_size.
-
-        self._plaintext_hasher.update(plaintext)
-        if self._plaintext_hash_tree:
-            ph = hashutil.plaintext_segment_hasher()
-            ph.update(plaintext)
-            plaintext_leaves = {self._segment_number: ph.digest()}
-            self.log(format="plaintext leaf hash (%(bytes)sB) [%(segnum)d] is %(hash)s",
-                     bytes=len(plaintext),
-                     segnum=self._segment_number, hash=base32.b2a(ph.digest()),
-                     level=log.NOISY)
-            self._plaintext_hash_tree.set_hashes(leaves=plaintext_leaves)
-
-        self._segment_number += 1
-        # We're still at 1*segment_size. The Downloadable is responsible for
-        # any memory usage beyond this.
-        if not self._opened:
-            self._opened = True
-            self.downloadable.open(self.total_length)
-        self.downloadable.write(plaintext)
-
-    def fail(self, why):
-        # this is really unusual, and deserves maximum forensics
-        if why.check(DownloadStopped):
-            # except DownloadStopped just means the consumer aborted the
-            # download, not so scary
-            self.log("download stopped", level=log.UNUSUAL)
-        else:
-            self.log("download failed!", failure=why, level=log.SCARY)
-        self.downloadable.fail(why)
-
-    def close(self):
-        self.crypttext_hash = self._crypttext_hasher.digest()
-        self.plaintext_hash = self._plaintext_hasher.digest()
-        self.log("download finished, closing IDownloadable", level=log.NOISY)
-        self.downloadable.close()
-
-    def finish(self):
-        return self.downloadable.finish()
-
-class ValidatedBucket:
-    """I am a front-end for a remote storage bucket, responsible for
-    retrieving and validating data from that bucket.
-
-    My get_block() method is used by BlockDownloaders.
-    """
-
-    def __init__(self, sharenum, bucket,
-                 share_hash_tree, roothash,
-                 num_blocks):
-        self.sharenum = sharenum
-        self.bucket = bucket
-        self._share_hash = None # None means not validated yet
-        self.share_hash_tree = share_hash_tree
-        self._roothash = roothash
-        self.block_hash_tree = hashtree.IncompleteHashTree(num_blocks)
-        self.started = False
-
-    def get_block(self, blocknum):
-        if not self.started:
-            d = self.bucket.start()
-            def _started(res):
-                self.started = True
-                return self.get_block(blocknum)
-            d.addCallback(_started)
-            return d
-
-        # the first time we use this bucket, we need to fetch enough elements
-        # of the share hash tree to validate it from our share hash up to the
-        # hashroot.
-        if not self._share_hash:
-            d1 = self.bucket.get_share_hashes()
-        else:
-            d1 = defer.succeed([])
-
-        # we might need to grab some elements of our block hash tree, to
-        # validate the requested block up to the share hash
-        needed = self.block_hash_tree.needed_hashes(blocknum)
-        if needed:
-            # TODO: get fewer hashes, use get_block_hashes(needed)
-            d2 = self.bucket.get_block_hashes()
-        else:
-            d2 = defer.succeed([])
-
-        d3 = self.bucket.get_block(blocknum)
-
-        d = defer.gatherResults([d1, d2, d3])
-        d.addCallback(self._got_data, blocknum)
-        return d
-
-    def _got_data(self, res, blocknum):
-        sharehashes, blockhashes, blockdata = res
-        blockhash = None # to make logging it safe
-
-        try:
-            if not self._share_hash:
-                sh = dict(sharehashes)
-                sh[0] = self._roothash # always use our own root, from the URI
-                sht = self.share_hash_tree
-                if sht.get_leaf_index(self.sharenum) not in sh:
-                    raise hashtree.NotEnoughHashesError
-                sht.set_hashes(sh)
-                self._share_hash = sht.get_leaf(self.sharenum)
-
-            blockhash = hashutil.block_hash(blockdata)
-            #log.msg("checking block_hash(shareid=%d, blocknum=%d) len=%d "
-            #        "%r .. %r: %s" %
-            #        (self.sharenum, blocknum, len(blockdata),
-            #         blockdata[:50], blockdata[-50:], base32.b2a(blockhash)))
-
-            # we always validate the blockhash
-            bh = dict(enumerate(blockhashes))
-            # replace blockhash root with validated value
-            bh[0] = self._share_hash
-            self.block_hash_tree.set_hashes(bh, {blocknum: blockhash})
-
-        except (hashtree.BadHashError, hashtree.NotEnoughHashesError):
-            # log.WEIRD: indicates undetected disk/network error, or more
-            # likely a programming error
-            log.msg("hash failure in block=%d, shnum=%d on %s" %
-                    (blocknum, self.sharenum, self.bucket))
-            if self._share_hash:
-                log.msg(""" failure occurred when checking the block_hash_tree.
-                This suggests that either the block data was bad, or that the
-                block hashes we received along with it were bad.""")
-            else:
-                log.msg(""" the failure probably occurred when checking the
-                share_hash_tree, which suggests that the share hashes we
-                received from the remote peer were bad.""")
-            log.msg(" have self._share_hash: %s" % bool(self._share_hash))
-            log.msg(" block length: %d" % len(blockdata))
-            log.msg(" block hash: %s" % base32.b2a_or_none(blockhash))
-            if len(blockdata) < 100:
-                log.msg(" block data: %r" % (blockdata,))
-            else:
-                log.msg(" block data start/end: %r .. %r" %
-                        (blockdata[:50], blockdata[-50:]))
-            log.msg(" root hash: %s" % base32.b2a(self._roothash))
-            log.msg(" share hash tree:\n" + self.share_hash_tree.dump())
-            log.msg(" block hash tree:\n" + self.block_hash_tree.dump())
-            lines = []
-            for i,h in sorted(sharehashes):
-                lines.append("%3d: %s" % (i, base32.b2a_or_none(h)))
-            log.msg(" sharehashes:\n" + "\n".join(lines) + "\n")
-            lines = []
-            for i,h in enumerate(blockhashes):
-                lines.append("%3d: %s" % (i, base32.b2a_or_none(h)))
-            log.msg(" blockhashes:\n" + "\n".join(lines) + "\n")
-            raise
-
-        # If we made it here, the block is good. If the hash trees didn't
-        # like what they saw, they would have raised a BadHashError, causing
-        # our caller to see a Failure and thus ignore this block (as well as
-        # dropping this bucket).
-        return blockdata
-
-
-
-class BlockDownloader:
-    """I am responsible for downloading a single block (from a single bucket)
-    for a single segment.
-
-    I am a child of the SegmentDownloader.
-    """
-
-    def __init__(self, vbucket, blocknum, parent, results):
-        self.vbucket = vbucket
-        self.blocknum = blocknum
-        self.parent = parent
-        self.results = results
-        self._log_number = self.parent.log("starting block %d" % blocknum)
-
-    def log(self, msg, parent=None):
-        if parent is None:
-            parent = self._log_number
-        return self.parent.log(msg, parent=parent)
-
-    def start(self, segnum):
-        lognum = self.log("get_block(segnum=%d)" % segnum)
-        started = time.time()
-        d = self.vbucket.get_block(segnum)
-        d.addCallbacks(self._hold_block, self._got_block_error,
-                       callbackArgs=(started, lognum,), errbackArgs=(lognum,))
-        return d
-
-    def _hold_block(self, data, started, lognum):
-        if self.results:
-            elapsed = time.time() - started
-            peerid = self.vbucket.bucket.get_peerid()
-            if peerid not in self.results.timings["fetch_per_server"]:
-                self.results.timings["fetch_per_server"][peerid] = []
-            self.results.timings["fetch_per_server"][peerid].append(elapsed)
-        self.log("got block", parent=lognum)
-        self.parent.hold_block(self.blocknum, data)
-
-    def _got_block_error(self, f, lognum):
-        self.log("BlockDownloader[%d] got error: %s" % (self.blocknum, f),
-                 parent=lognum)
-        if self.results:
-            peerid = self.vbucket.bucket.get_peerid()
-            self.results.server_problems[peerid] = str(f)
-        self.parent.bucket_failed(self.vbucket)
-
-class SegmentDownloader:
-    """I am responsible for downloading all the blocks for a single segment
-    of data.
-
-    I am a child of the FileDownloader.
-    """
-
-    def __init__(self, parent, segmentnumber, needed_shares, results):
-        self.parent = parent
-        self.segmentnumber = segmentnumber
-        self.needed_blocks = needed_shares
-        self.blocks = {} # k: blocknum, v: data
-        self.results = results
-        self._log_number = self.parent.log("starting segment %d" %
-                                           segmentnumber)
-
-    def log(self, msg, parent=None):
-        if parent is None:
-            parent = self._log_number
-        return self.parent.log(msg, parent=parent)
-
-    def start(self):
-        return self._download()
-
-    def _download(self):
-        d = self._try()
-        def _done(res):
-            if len(self.blocks) >= self.needed_blocks:
-                # we only need self.needed_blocks blocks
-                # we want to get the smallest blockids, because they are
-                # more likely to be fast "primary blocks"
-                blockids = sorted(self.blocks.keys())[:self.needed_blocks]
-                blocks = []
-                for blocknum in blockids:
-                    blocks.append(self.blocks[blocknum])
-                return (blocks, blockids)
-            else:
-                return self._download()
-        d.addCallback(_done)
-        return d
-
-    def _try(self):
-        # fill our set of active buckets, maybe raising NotEnoughSharesError
-        active_buckets = self.parent._activate_enough_buckets()
-        # Now we have enough buckets, in self.parent.active_buckets.
-
-        # in test cases, bd.start might mutate active_buckets right away, so
-        # we need to put off calling start() until we've iterated all the way
-        # through it.
-        downloaders = []
-        for blocknum, vbucket in active_buckets.iteritems():
-            bd = BlockDownloader(vbucket, blocknum, self, self.results)
-            downloaders.append(bd)
-            if self.results:
-                self.results.servers_used.add(vbucket.bucket.get_peerid())
-        l = [bd.start(self.segmentnumber) for bd in downloaders]
-        return defer.DeferredList(l, fireOnOneErrback=True)
-
-    def hold_block(self, blocknum, data):
-        self.blocks[blocknum] = data
-
-    def bucket_failed(self, vbucket):
-        self.parent.bucket_failed(vbucket)
-
-class DownloadStatus:
-    implements(IDownloadStatus)
-    statusid_counter = itertools.count(0)
-
-    def __init__(self):
-        self.storage_index = None
-        self.size = None
-        self.helper = False
-        self.status = "Not started"
-        self.progress = 0.0
-        self.paused = False
-        self.stopped = False
-        self.active = True
-        self.results = None
-        self.counter = self.statusid_counter.next()
-        self.started = time.time()
-
-    def get_started(self):
-        return self.started
-    def get_storage_index(self):
-        return self.storage_index
-    def get_size(self):
-        return self.size
-    def using_helper(self):
-        return self.helper
-    def get_status(self):
-        status = self.status
-        if self.paused:
-            status += " (output paused)"
-        if self.stopped:
-            status += " (output stopped)"
-        return status
-    def get_progress(self):
-        return self.progress
-    def get_active(self):
-        return self.active
-    def get_results(self):
-        return self.results
-    def get_counter(self):
-        return self.counter
-
-    def set_storage_index(self, si):
-        self.storage_index = si
-    def set_size(self, size):
-        self.size = size
-    def set_helper(self, helper):
-        self.helper = helper
-    def set_status(self, status):
-        self.status = status
-    def set_paused(self, paused):
-        self.paused = paused
-    def set_stopped(self, stopped):
-        self.stopped = stopped
-    def set_progress(self, value):
-        self.progress = value
-    def set_active(self, value):
-        self.active = value
-    def set_results(self, value):
-        self.results = value
-
-class FileDownloader:
-    implements(IPushProducer)
-    check_crypttext_hash = True
-    check_plaintext_hash = True
-    _status = None
-
-    def __init__(self, client, u, downloadable):
-        self._client = client
-
-        u = IFileURI(u)
-        self._storage_index = u.storage_index
-        self._uri_extension_hash = u.uri_extension_hash
-        self._total_shares = u.total_shares
-        self._size = u.size
-        self._num_needed_shares = u.needed_shares
-
-        self._si_s = storage.si_b2a(self._storage_index)
-        self.init_logging()
-
-        self._started = time.time()
-        self._status = s = DownloadStatus()
-        s.set_status("Starting")
-        s.set_storage_index(self._storage_index)
-        s.set_size(self._size)
-        s.set_helper(False)
-        s.set_active(True)
-
-        self._results = DownloadResults()
-        s.set_results(self._results)
-        self._results.file_size = self._size
-        self._results.timings["servers_peer_selection"] = {}
-        self._results.timings["fetch_per_server"] = {}
-        self._results.timings["cumulative_fetch"] = 0.0
-        self._results.timings["cumulative_decode"] = 0.0
-        self._results.timings["cumulative_decrypt"] = 0.0
-        self._results.timings["paused"] = 0.0
-
-        if IConsumer.providedBy(downloadable):
-            downloadable.registerProducer(self, True)
-        self._downloadable = downloadable
-        self._output = Output(downloadable, u.key, self._size, self._log_number,
-                              self._status)
-        self._paused = False
-        self._stopped = False
-
-        self.active_buckets = {} # k: shnum, v: bucket
-        self._share_buckets = [] # list of (sharenum, bucket) tuples
-        self._share_vbuckets = {} # k: shnum, v: set of ValidatedBuckets
-        self._uri_extension_sources = []
-
-        self._uri_extension_data = None
-
-        self._fetch_failures = {"uri_extension": 0,
-                                "plaintext_hashroot": 0,
-                                "plaintext_hashtree": 0,
-                                "crypttext_hashroot": 0,
-                                "crypttext_hashtree": 0,
-                                }
-
-    def init_logging(self):
-        self._log_prefix = prefix = storage.si_b2a(self._storage_index)[:5]
-        num = self._client.log(format="FileDownloader(%(si)s): starting",
-                               si=storage.si_b2a(self._storage_index))
-        self._log_number = num
-
-    def log(self, *args, **kwargs):
-        if "parent" not in kwargs:
-            kwargs["parent"] = self._log_number
-        if "facility" not in kwargs:
-            kwargs["facility"] = "tahoe.download"
-        return log.msg(*args, **kwargs)
-
-    def pauseProducing(self):
-        if self._paused:
-            return
-        self._paused = defer.Deferred()
-        self._paused_at = time.time()
-        if self._status:
-            self._status.set_paused(True)
-
-    def resumeProducing(self):
-        if self._paused:
-            paused_for = time.time() - self._paused_at
-            self._results.timings['paused'] += paused_for
-            p = self._paused
-            self._paused = None
-            eventually(p.callback, None)
-            if self._status:
-                self._status.set_paused(False)
-
-    def stopProducing(self):
-        self.log("Download.stopProducing")
-        self._stopped = True
-        self.resumeProducing()
-        if self._status:
-            self._status.set_stopped(True)
-            self._status.set_active(False)
-
-    def start(self):
-        self.log("starting download")
-
-        # first step: who should we download from?
-        d = defer.maybeDeferred(self._get_all_shareholders)
-        d.addCallback(self._got_all_shareholders)
-        # now get the uri_extension block from somebody and validate it
-        d.addCallback(self._obtain_uri_extension)
-        d.addCallback(self._got_uri_extension)
-        d.addCallback(self._get_hashtrees)
-        d.addCallback(self._create_validated_buckets)
-        # once we know that, we can download blocks from everybody
-        d.addCallback(self._download_all_segments)
-        def _finished(res):
-            if self._status:
-                self._status.set_status("Finished")
-                self._status.set_active(False)
-                self._status.set_paused(False)
-            if IConsumer.providedBy(self._downloadable):
-                self._downloadable.unregisterProducer()
-            return res
-        d.addBoth(_finished)
-        def _failed(why):
-            if self._status:
-                self._status.set_status("Failed")
-                self._status.set_active(False)
-            self._output.fail(why)
-            return why
-        d.addErrback(_failed)
-        d.addCallback(self._done)
-        return d
-
-    def _get_all_shareholders(self):
-        dl = []
-        for (peerid,ss) in self._client.get_permuted_peers("storage",
-                                                           self._storage_index):
-            d = ss.callRemote("get_buckets", self._storage_index)
-            d.addCallbacks(self._got_response, self._got_error,
-                           callbackArgs=(peerid,))
-            dl.append(d)
-        self._responses_received = 0
-        self._queries_sent = len(dl)
-        if self._status:
-            self._status.set_status("Locating Shares (%d/%d)" %
-                                    (self._responses_received,
-                                     self._queries_sent))
-        return defer.DeferredList(dl)
-
-    def _got_response(self, buckets, peerid):
-        self._responses_received += 1
-        if self._results:
-            elapsed = time.time() - self._started
-            self._results.timings["servers_peer_selection"][peerid] = elapsed
-        if self._status:
-            self._status.set_status("Locating Shares (%d/%d)" %
-                                    (self._responses_received,
-                                     self._queries_sent))
-        for sharenum, bucket in buckets.iteritems():
-            b = storage.ReadBucketProxy(bucket, peerid, self._si_s)
-            self.add_share_bucket(sharenum, b)
-            self._uri_extension_sources.append(b)
-            if self._results:
-                if peerid not in self._results.servermap:
-                    self._results.servermap[peerid] = set()
-                self._results.servermap[peerid].add(sharenum)
-
-    def add_share_bucket(self, sharenum, bucket):
-        # this is split out for the benefit of test_encode.py
-        self._share_buckets.append( (sharenum, bucket) )
-
-    def _got_error(self, f):
-        self._client.log("Somebody failed. -- %s" % (f,))
-
-    def bucket_failed(self, vbucket):
-        shnum = vbucket.sharenum
-        del self.active_buckets[shnum]
-        s = self._share_vbuckets[shnum]
-        # s is a set of ValidatedBucket instances
-        s.remove(vbucket)
-        # ... which might now be empty
-        if not s:
-            # there are no more buckets which can provide this share, so
-            # remove the key. This may prompt us to use a different share.
-            del self._share_vbuckets[shnum]
-
-    def _got_all_shareholders(self, res):
-        if self._results:
-            now = time.time()
-            self._results.timings["peer_selection"] = now - self._started
-
-        if len(self._share_buckets) < self._num_needed_shares:
-            raise NotEnoughSharesError
-
-        #for s in self._share_vbuckets.values():
-        #    for vb in s:
-        #        assert isinstance(vb, ValidatedBucket), \
-        #               "vb is %s but should be a ValidatedBucket" % (vb,)
-
-    def _unpack_uri_extension_data(self, data):
-        return uri.unpack_extension(data)
-
-    def _obtain_uri_extension(self, ignored):
-        # all shareholders are supposed to have a copy of uri_extension, and
-        # all are supposed to be identical. We compute the hash of the data
-        # that comes back, and compare it against the version in our URI. If
-        # they don't match, ignore their data and try someone else.
-        if self._status:
-            self._status.set_status("Obtaining URI Extension")
-
-        self._uri_extension_fetch_started = time.time()
-        def _validate(proposal, bucket):
-            h = hashutil.uri_extension_hash(proposal)
-            if h != self._uri_extension_hash:
-                self._fetch_failures["uri_extension"] += 1
-                msg = ("The copy of uri_extension we received from "
-                       "%s was bad: wanted %s, got %s" %
-                       (bucket,
-                        base32.b2a(self._uri_extension_hash),
-                        base32.b2a(h)))
-                self.log(msg, level=log.SCARY)
-                raise BadURIExtensionHashValue(msg)
-            return self._unpack_uri_extension_data(proposal)
-        return self._obtain_validated_thing(None,
-                                            self._uri_extension_sources,
-                                            "uri_extension",
-                                            "get_uri_extension", (), _validate)
-
-    def _obtain_validated_thing(self, ignored, sources, name, methname, args,
-                                validatorfunc):
-        if not sources:
-            raise NotEnoughSharesError("started with zero peers while fetching "
-                                      "%s" % name)
-        bucket = sources[0]
-        sources = sources[1:]
-        #d = bucket.callRemote(methname, *args)
-        d = bucket.startIfNecessary()
-        d.addCallback(lambda res: getattr(bucket, methname)(*args))
-        d.addCallback(validatorfunc, bucket)
-        def _bad(f):
-            self.log("%s from vbucket %s failed:" % (name, bucket),
-                     failure=f, level=log.WEIRD)
-            if not sources:
-                raise NotEnoughSharesError("ran out of peers, last error was %s"
-                                          % (f,))
-            # try again with a different one
-            return self._obtain_validated_thing(None, sources, name,
-                                                methname, args, validatorfunc)
-        d.addErrback(_bad)
-        return d
-
-    def _got_uri_extension(self, uri_extension_data):
-        if self._results:
-            elapsed = time.time() - self._uri_extension_fetch_started
-            self._results.timings["uri_extension"] = elapsed
-
-        d = self._uri_extension_data = uri_extension_data
-
-        self._codec = codec.get_decoder_by_name(d['codec_name'])
-        self._codec.set_serialized_params(d['codec_params'])
-        self._tail_codec = codec.get_decoder_by_name(d['codec_name'])
-        self._tail_codec.set_serialized_params(d['tail_codec_params'])
-
-        crypttext_hash = d.get('crypttext_hash', None) # optional
-        if crypttext_hash:
-            assert isinstance(crypttext_hash, str)
-            assert len(crypttext_hash) == 32
-        self._crypttext_hash = crypttext_hash
-        self._plaintext_hash = d.get('plaintext_hash', None) # optional
-
-        self._roothash = d['share_root_hash']
-
-        self._segment_size = segment_size = d['segment_size']
-        self._total_segments = mathutil.div_ceil(self._size, segment_size)
-        self._current_segnum = 0
-
-        self._share_hashtree = hashtree.IncompleteHashTree(d['total_shares'])
-        self._share_hashtree.set_hashes({0: self._roothash})
-
-    def _get_hashtrees(self, res):
-        self._get_hashtrees_started = time.time()
-        if self._status:
-            self._status.set_status("Retrieving Hash Trees")
-        d = defer.maybeDeferred(self._get_plaintext_hashtrees)
-        d.addCallback(self._get_crypttext_hashtrees)
-        d.addCallback(self._setup_hashtrees)
-        return d
-
-    def _get_plaintext_hashtrees(self):
-        # plaintext hashes are optional. If the root isn't in the UEB, then
-        # the share will be holding an empty list. We don't even bother
-        # fetching it.
-        if "plaintext_root_hash" not in self._uri_extension_data:
-            self._plaintext_hashtree = None
-            return
-        def _validate_plaintext_hashtree(proposal, bucket):
-            if proposal[0] != self._uri_extension_data['plaintext_root_hash']:
-                self._fetch_failures["plaintext_hashroot"] += 1
-                msg = ("The copy of the plaintext_root_hash we received from"
-                       " %s was bad" % bucket)
-                raise BadPlaintextHashValue(msg)
-            pt_hashtree = hashtree.IncompleteHashTree(self._total_segments)
-            pt_hashes = dict(list(enumerate(proposal)))
-            try:
-                pt_hashtree.set_hashes(pt_hashes)
-            except hashtree.BadHashError:
-                # the hashes they gave us were not self-consistent, even
-                # though the root matched what we saw in the uri_extension
-                # block
-                self._fetch_failures["plaintext_hashtree"] += 1
-                raise
-            self._plaintext_hashtree = pt_hashtree
-        d = self._obtain_validated_thing(None,
-                                         self._uri_extension_sources,
-                                         "plaintext_hashes",
-                                         "get_plaintext_hashes", (),
-                                         _validate_plaintext_hashtree)
-        return d
-
-    def _get_crypttext_hashtrees(self, res):
-        # crypttext hashes are optional too
-        if "crypttext_root_hash" not in self._uri_extension_data:
-            self._crypttext_hashtree = None
-            return
-        def _validate_crypttext_hashtree(proposal, bucket):
-            if proposal[0] != self._uri_extension_data['crypttext_root_hash']:
-                self._fetch_failures["crypttext_hashroot"] += 1
-                msg = ("The copy of the crypttext_root_hash we received from"
-                       " %s was bad" % bucket)
-                raise BadCrypttextHashValue(msg)
-            ct_hashtree = hashtree.IncompleteHashTree(self._total_segments)
-            ct_hashes = dict(list(enumerate(proposal)))
-            try:
-                ct_hashtree.set_hashes(ct_hashes)
-            except hashtree.BadHashError:
-                self._fetch_failures["crypttext_hashtree"] += 1
-                raise
-            ct_hashtree.set_hashes(ct_hashes)
-            self._crypttext_hashtree = ct_hashtree
-        d = self._obtain_validated_thing(None,
-                                         self._uri_extension_sources,
-                                         "crypttext_hashes",
-                                         "get_crypttext_hashes", (),
-                                         _validate_crypttext_hashtree)
-        return d
-
-    def _setup_hashtrees(self, res):
-        self._output.setup_hashtrees(self._plaintext_hashtree,
-                                     self._crypttext_hashtree)
-        if self._results:
-            elapsed = time.time() - self._get_hashtrees_started
-            self._results.timings["hashtrees"] = elapsed
-
-    def _create_validated_buckets(self, ignored=None):
-        self._share_vbuckets = {}
-        for sharenum, bucket in self._share_buckets:
-            vbucket = ValidatedBucket(sharenum, bucket,
-                                      self._share_hashtree,
-                                      self._roothash,
-                                      self._total_segments)
-            s = self._share_vbuckets.setdefault(sharenum, set())
-            s.add(vbucket)
-
-    def _activate_enough_buckets(self):
-        """either return a mapping from shnum to a ValidatedBucket that can
-        provide data for that share, or raise NotEnoughSharesError"""
-
-        while len(self.active_buckets) < self._num_needed_shares:
-            # need some more
-            handled_shnums = set(self.active_buckets.keys())
-            available_shnums = set(self._share_vbuckets.keys())
-            potential_shnums = list(available_shnums - handled_shnums)
-            if not potential_shnums:
-                raise NotEnoughSharesError
-            # choose a random share
-            shnum = random.choice(potential_shnums)
-            # and a random bucket that will provide it
-            validated_bucket = random.choice(list(self._share_vbuckets[shnum]))
-            self.active_buckets[shnum] = validated_bucket
-        return self.active_buckets
-
-
-    def _download_all_segments(self, res):
-        # the promise: upon entry to this function, self._share_vbuckets
-        # contains enough buckets to complete the download, and some extra
-        # ones to tolerate some buckets dropping out or having errors.
-        # self._share_vbuckets is a dictionary that maps from shnum to a set
-        # of ValidatedBuckets, which themselves are wrappers around
-        # RIBucketReader references.
-        self.active_buckets = {} # k: shnum, v: ValidatedBucket instance
-
-        self._started_fetching = time.time()
-
-        d = defer.succeed(None)
-        for segnum in range(self._total_segments-1):
-            d.addCallback(self._download_segment, segnum)
-            # this pause, at the end of write, prevents pre-fetch from
-            # happening until the consumer is ready for more data.
-            d.addCallback(self._check_for_pause)
-        d.addCallback(self._download_tail_segment, self._total_segments-1)
-        return d
-
-    def _check_for_pause(self, res):
-        if self._paused:
-            d = defer.Deferred()
-            self._paused.addCallback(lambda ignored: d.callback(res))
-            return d
-        if self._stopped:
-            raise DownloadStopped("our Consumer called stopProducing()")
-        return res
-
-    def _download_segment(self, res, segnum):
-        if self._status:
-            self._status.set_status("Downloading segment %d of %d" %
-                                    (segnum+1, self._total_segments))
-        self.log("downloading seg#%d of %d (%d%%)"
-                 % (segnum, self._total_segments,
-                    100.0 * segnum / self._total_segments))
-        # memory footprint: when the SegmentDownloader finishes pulling down
-        # all shares, we have 1*segment_size of usage.
-        segmentdler = SegmentDownloader(self, segnum, self._num_needed_shares,
-                                        self._results)
-        started = time.time()
-        d = segmentdler.start()
-        def _finished_fetching(res):
-            elapsed = time.time() - started
-            self._results.timings["cumulative_fetch"] += elapsed
-            return res
-        if self._results:
-            d.addCallback(_finished_fetching)
-        # pause before using more memory
-        d.addCallback(self._check_for_pause)
-        # while the codec does its job, we hit 2*segment_size
-        def _started_decode(res):
-            self._started_decode = time.time()
-            return res
-        if self._results:
-            d.addCallback(_started_decode)
-        d.addCallback(lambda (shares, shareids):
-                      self._codec.decode(shares, shareids))
-        # once the codec is done, we drop back to 1*segment_size, because
-        # 'shares' goes out of scope. The memory usage is all in the
-        # plaintext now, spread out into a bunch of tiny buffers.
-        def _finished_decode(res):
-            elapsed = time.time() - self._started_decode
-            self._results.timings["cumulative_decode"] += elapsed
-            return res
-        if self._results:
-            d.addCallback(_finished_decode)
-
-        # pause/check-for-stop just before writing, to honor stopProducing
-        d.addCallback(self._check_for_pause)
-        def _done(buffers):
-            # we start by joining all these buffers together into a single
-            # string. This makes Output.write easier, since it wants to hash
-            # data one segment at a time anyways, and doesn't impact our
-            # memory footprint since we're already peaking at 2*segment_size
-            # inside the codec a moment ago.
-            segment = "".join(buffers)
-            del buffers
-            # we're down to 1*segment_size right now, but write_segment()
-            # will decrypt a copy of the segment internally, which will push
-            # us up to 2*segment_size while it runs.
-            started_decrypt = time.time()
-            self._output.write_segment(segment)
-            if self._results:
-                elapsed = time.time() - started_decrypt
-                self._results.timings["cumulative_decrypt"] += elapsed
-        d.addCallback(_done)
-        return d
-
-    def _download_tail_segment(self, res, segnum):
-        self.log("downloading seg#%d of %d (%d%%)"
-                 % (segnum, self._total_segments,
-                    100.0 * segnum / self._total_segments))
-        segmentdler = SegmentDownloader(self, segnum, self._num_needed_shares,
-                                        self._results)
-        started = time.time()
-        d = segmentdler.start()
-        def _finished_fetching(res):
-            elapsed = time.time() - started
-            self._results.timings["cumulative_fetch"] += elapsed
-            return res
-        if self._results:
-            d.addCallback(_finished_fetching)
-        # pause before using more memory
-        d.addCallback(self._check_for_pause)
-        def _started_decode(res):
-            self._started_decode = time.time()
-            return res
-        if self._results:
-            d.addCallback(_started_decode)
-        d.addCallback(lambda (shares, shareids):
-                      self._tail_codec.decode(shares, shareids))
-        def _finished_decode(res):
-            elapsed = time.time() - self._started_decode
-            self._results.timings["cumulative_decode"] += elapsed
-            return res
-        if self._results:
-            d.addCallback(_finished_decode)
-        # pause/check-for-stop just before writing, to honor stopProducing
-        d.addCallback(self._check_for_pause)
-        def _done(buffers):
-            # trim off any padding added by the upload side
-            segment = "".join(buffers)
-            del buffers
-            # we never send empty segments. If the data was an exact multiple
-            # of the segment size, the last segment will be full.
-            pad_size = mathutil.pad_size(self._size, self._segment_size)
-            tail_size = self._segment_size - pad_size
-            segment = segment[:tail_size]
-            started_decrypt = time.time()
-            self._output.write_segment(segment)
-            if self._results:
-                elapsed = time.time() - started_decrypt
-                self._results.timings["cumulative_decrypt"] += elapsed
-        d.addCallback(_done)
-        return d
-
-    def _done(self, res):
-        self.log("download done")
-        if self._results:
-            now = time.time()
-            self._results.timings["total"] = now - self._started
-            self._results.timings["segments"] = now - self._started_fetching
-        self._output.close()
-        if self.check_crypttext_hash and self._crypttext_hash:
-            _assert(self._crypttext_hash == self._output.crypttext_hash,
-                    "bad crypttext_hash: computed=%s, expected=%s" %
-                    (base32.b2a(self._output.crypttext_hash),
-                     base32.b2a(self._crypttext_hash)))
-        if self.check_plaintext_hash and self._plaintext_hash:
-            _assert(self._plaintext_hash == self._output.plaintext_hash,
-                    "bad plaintext_hash: computed=%s, expected=%s" %
-                    (base32.b2a(self._output.plaintext_hash),
-                     base32.b2a(self._plaintext_hash)))
-        _assert(self._output.length == self._size,
-                got=self._output.length, expected=self._size)
-        return self._output.finish()
-
-    def get_download_status(self):
-        return self._status
-
-
-class LiteralDownloader:
-    def __init__(self, client, u, downloadable):
-        self._uri = IFileURI(u)
-        assert isinstance(self._uri, uri.LiteralFileURI)
-        self._downloadable = downloadable
-        self._status = s = DownloadStatus()
-        s.set_storage_index(None)
-        s.set_helper(False)
-        s.set_status("Done")
-        s.set_active(False)
-        s.set_progress(1.0)
-
-    def start(self):
-        data = self._uri.data
-        self._status.set_size(len(data))
-        self._downloadable.open(len(data))
-        self._downloadable.write(data)
-        self._downloadable.close()
-        return defer.maybeDeferred(self._downloadable.finish)
-
-    def get_download_status(self):
-        return self._status
-
-class FileName:
-    implements(IDownloadTarget)
-    def __init__(self, filename):
-        self._filename = filename
-        self.f = None
-    def open(self, size):
-        self.f = open(self._filename, "wb")
-        return self.f
-    def write(self, data):
-        self.f.write(data)
-    def close(self):
-        if self.f:
-            self.f.close()
-    def fail(self, why):
-        if self.f:
-            self.f.close()
-            os.unlink(self._filename)
-    def register_canceller(self, cb):
-        pass # we won't use it
-    def finish(self):
-        pass
-
-class Data:
-    implements(IDownloadTarget)
-    def __init__(self):
-        self._data = []
-    def open(self, size):
-        pass
-    def write(self, data):
-        self._data.append(data)
-    def close(self):
-        self.data = "".join(self._data)
-        del self._data
-    def fail(self, why):
-        del self._data
-    def register_canceller(self, cb):
-        pass # we won't use it
-    def finish(self):
-        return self.data
-
-class FileHandle:
-    """Use me to download data to a pre-defined filehandle-like object. I
-    will use the target's write() method. I will *not* close the filehandle:
-    I leave that up to the originator of the filehandle. The download process
-    will return the filehandle when it completes.
-    """
-    implements(IDownloadTarget)
-    def __init__(self, filehandle):
-        self._filehandle = filehandle
-    def open(self, size):
-        pass
-    def write(self, data):
-        self._filehandle.write(data)
-    def close(self):
-        # the originator of the filehandle reserves the right to close it
-        pass
-    def fail(self, why):
-        pass
-    def register_canceller(self, cb):
-        pass
-    def finish(self):
-        return self._filehandle
-
-class Downloader(service.MultiService):
-    """I am a service that allows file downloading.
-    """
-    implements(IDownloader)
-    name = "downloader"
-    MAX_DOWNLOAD_STATUSES = 10
-
-    def __init__(self, stats_provider=None):
-        service.MultiService.__init__(self)
-        self.stats_provider = stats_provider
-        self._all_downloads = weakref.WeakKeyDictionary() # for debugging
-        self._all_download_statuses = weakref.WeakKeyDictionary()
-        self._recent_download_statuses = []
-
-    def download(self, u, t):
-        assert self.parent
-        assert self.running
-        u = IFileURI(u)
-        t = IDownloadTarget(t)
-        assert t.write
-        assert t.close
-
-
-        if isinstance(u, uri.LiteralFileURI):
-            dl = LiteralDownloader(self.parent, u, t)
-        elif isinstance(u, uri.CHKFileURI):
-            if self.stats_provider:
-                # these counters are meant for network traffic, and don't
-                # include LIT files
-                self.stats_provider.count('downloader.files_downloaded', 1)
-                self.stats_provider.count('downloader.bytes_downloaded', u.get_size())
-            dl = FileDownloader(self.parent, u, t)
-        else:
-            raise RuntimeError("I don't know how to download a %s" % u)
-        self._add_download(dl)
-        d = dl.start()
-        return d
-
-    # utility functions
-    def download_to_data(self, uri):
-        return self.download(uri, Data())
-    def download_to_filename(self, uri, filename):
-        return self.download(uri, FileName(filename))
-    def download_to_filehandle(self, uri, filehandle):
-        return self.download(uri, FileHandle(filehandle))
-
-    def _add_download(self, downloader):
-        self._all_downloads[downloader] = None
-        s = downloader.get_download_status()
-        self._all_download_statuses[s] = None
-        self._recent_download_statuses.append(s)
-        while len(self._recent_download_statuses) > self.MAX_DOWNLOAD_STATUSES:
-            self._recent_download_statuses.pop(0)
-
-    def list_all_download_statuses(self):
-        for ds in self._all_download_statuses:
-            yield ds
diff --git a/src/allmydata/encode.py b/src/allmydata/encode.py

deleted file mode 100644 (file)

index 766292f..0000000
--- a/src/allmydata/encode.py
+++ /dev/null
@@ -1,718 +0,0 @@
-# -*- test-case-name: allmydata.test.test_encode -*-
-
-import time
-from zope.interface import implements
-from twisted.internet import defer
-from foolscap import eventual
-from allmydata import storage, uri
-from allmydata.hashtree import HashTree
-from allmydata.util import mathutil, hashutil, base32, log
-from allmydata.util.assertutil import _assert, precondition
-from allmydata.codec import CRSEncoder
-from allmydata.interfaces import IEncoder, IStorageBucketWriter, \
-     IEncryptedUploadable, IUploadStatus
-
-"""
-The goal of the encoder is to turn the original file into a series of
-'shares'. Each share is going to a 'shareholder' (nominally each shareholder
-is a different host, but for small grids there may be overlap). The number
-of shares is chosen to hit our reliability goals (more shares on more
-machines means more reliability), and is limited by overhead (proportional to
-numshares or log(numshares)) and the encoding technology in use (zfec permits
-only 256 shares total). It is also constrained by the amount of data
-we want to send to each host. For estimating purposes, think of 10 shares
-out of which we need 3 to reconstruct the file.
-
-The encoder starts by cutting the original file into segments. All segments
-except the last are of equal size. The segment size is chosen to constrain
-the memory footprint (which will probably vary between 1x and 4x segment
-size) and to constrain the overhead (which will be proportional to
-log(number of segments)).
-
-
-Each segment (A,B,C) is read into memory, encrypted, and encoded into
-blocks. The 'share' (say, share #1) that makes it out to a host is a
-collection of these blocks (block A1, B1, C1), plus some hash-tree
-information necessary to validate the data upon retrieval. Only one segment
-is handled at a time: all blocks for segment A are delivered before any
-work is begun on segment B.
-
-As blocks are created, we retain the hash of each one. The list of block hashes
-for a single share (say, hash(A1), hash(B1), hash(C1)) is used to form the base
-of a Merkle hash tree for that share, called the block hash tree.
-
-This hash tree has one terminal leaf per block. The complete block hash
-tree is sent to the shareholder after all the data has been sent. At
-retrieval time, the decoder will ask for specific pieces of this tree before
-asking for blocks, whichever it needs to validate those blocks.
-
-(Note: we don't really need to generate this whole block hash tree
-ourselves. It would be sufficient to have the shareholder generate it and
-just tell us the root. This gives us an extra level of validation on the
-transfer, though, and it is relatively cheap to compute.)
-
-Each of these block hash trees has a root hash. The collection of these
-root hashes for all shares are collected into the 'share hash tree', which
-has one terminal leaf per share. After sending the blocks and the complete
-block hash tree to each shareholder, we send them the portion of the share
-hash tree that is necessary to validate their share. The root of the share
-hash tree is put into the URI.
-
-"""
-
-class NotEnoughSharesError(Exception):
-    servermap = None
-    pass
-
-class UploadAborted(Exception):
-    pass
-
-KiB=1024
-MiB=1024*KiB
-GiB=1024*MiB
-TiB=1024*GiB
-PiB=1024*TiB
-
-class Encoder(object):
-    implements(IEncoder)
-    USE_PLAINTEXT_HASHES = False
-
-    def __init__(self, log_parent=None, upload_status=None):
-        object.__init__(self)
-        self.uri_extension_data = {}
-        self._codec = None
-        self._status = None
-        if upload_status:
-            self._status = IUploadStatus(upload_status)
-        precondition(log_parent is None or isinstance(log_parent, int),
-                     log_parent)
-        self._log_number = log.msg("creating Encoder %s" % self,
-                                   facility="tahoe.encoder", parent=log_parent)
-        self._aborted = False
-
-    def __repr__(self):
-        if hasattr(self, "_storage_index"):
-            return "<Encoder for %s>" % storage.si_b2a(self._storage_index)[:5]
-        return "<Encoder for unknown storage index>"
-
-    def log(self, *args, **kwargs):
-        if "parent" not in kwargs:
-            kwargs["parent"] = self._log_number
-        if "facility" not in kwargs:
-            kwargs["facility"] = "tahoe.encoder"
-        return log.msg(*args, **kwargs)
-
-    def set_encrypted_uploadable(self, uploadable):
-        eu = self._uploadable = IEncryptedUploadable(uploadable)
-        d = eu.get_size()
-        def _got_size(size):
-            self.log(format="file size: %(size)d", size=size)
-            self.file_size = size
-        d.addCallback(_got_size)
-        d.addCallback(lambda res: eu.get_all_encoding_parameters())
-        d.addCallback(self._got_all_encoding_parameters)
-        d.addCallback(lambda res: eu.get_storage_index())
-        def _done(storage_index):
-            self._storage_index = storage_index
-            return self
-        d.addCallback(_done)
-        return d
-
-    def _got_all_encoding_parameters(self, params):
-        assert not self._codec
-        k, happy, n, segsize = params
-        self.required_shares = k
-        self.shares_of_happiness = happy
-        self.num_shares = n
-        self.segment_size = segsize
-        self.log("got encoding parameters: %d/%d/%d %d" % (k,happy,n, segsize))
-        self.log("now setting up codec")
-
-        assert self.segment_size % self.required_shares == 0
-
-        self.num_segments = mathutil.div_ceil(self.file_size,
-                                              self.segment_size)
-
-        self._codec = CRSEncoder()
-        self._codec.set_params(self.segment_size,
-                               self.required_shares, self.num_shares)
-
-        data = self.uri_extension_data
-        data['codec_name'] = self._codec.get_encoder_type()
-        data['codec_params'] = self._codec.get_serialized_params()
-
-        data['size'] = self.file_size
-        data['segment_size'] = self.segment_size
-        self.share_size = mathutil.div_ceil(self.file_size,
-                                            self.required_shares)
-        data['num_segments'] = self.num_segments
-        data['needed_shares'] = self.required_shares
-        data['total_shares'] = self.num_shares
-
-        # the "tail" is the last segment. This segment may or may not be
-        # shorter than all other segments. We use the "tail codec" to handle
-        # it. If the tail is short, we use a different codec instance. In
-        # addition, the tail codec must be fed data which has been padded out
-        # to the right size.
-        self.tail_size = self.file_size % self.segment_size
-        if not self.tail_size:
-            self.tail_size = self.segment_size
-
-        # the tail codec is responsible for encoding tail_size bytes
-        padded_tail_size = mathutil.next_multiple(self.tail_size,
-                                                  self.required_shares)
-        self._tail_codec = CRSEncoder()
-        self._tail_codec.set_params(padded_tail_size,
-                                    self.required_shares, self.num_shares)
-        data['tail_codec_params'] = self._tail_codec.get_serialized_params()
-
-    def _get_share_size(self):
-        share_size = mathutil.div_ceil(self.file_size, self.required_shares)
-        overhead = self._compute_overhead()
-        return share_size + overhead
-
-    def _compute_overhead(self):
-        return 0
-
-    def get_param(self, name):
-        assert self._codec
-
-        if name == "storage_index":
-            return self._storage_index
-        elif name == "share_counts":
-            return (self.required_shares, self.shares_of_happiness,
-                    self.num_shares)
-        elif name == "num_segments":
-            return self.num_segments
-        elif name == "segment_size":
-            return self.segment_size
-        elif name == "block_size":
-            return self._codec.get_block_size()
-        elif name == "share_size":
-            return self._get_share_size()
-        elif name == "serialized_params":
-            return self._codec.get_serialized_params()
-        else:
-            raise KeyError("unknown parameter name '%s'" % name)
-
-    def set_shareholders(self, landlords):
-        assert isinstance(landlords, dict)
-        for k in landlords:
-            assert IStorageBucketWriter.providedBy(landlords[k])
-        self.landlords = landlords.copy()
-
-    def start(self):
-        self.log("%s starting" % (self,))
-        #paddedsize = self._size + mathutil.pad_size(self._size, self.needed_shares)
-        assert self._codec
-        self._crypttext_hasher = hashutil.crypttext_hasher()
-        self._crypttext_hashes = []
-        self.segment_num = 0
-        self.subshare_hashes = [[] for x in range(self.num_shares)]
-        # subshare_hashes[i] is a list that will be accumulated and then send
-        # to landlord[i]. This list contains a hash of each segment_share
-        # that we sent to that landlord.
-        self.share_root_hashes = [None] * self.num_shares
-
-        self._times = {
-            "cumulative_encoding": 0.0,
-            "cumulative_sending": 0.0,
-            "hashes_and_close": 0.0,
-            "total_encode_and_push": 0.0,
-            }
-        self._start_total_timestamp = time.time()
-
-        d = eventual.fireEventually()
-
-        d.addCallback(lambda res: self.start_all_shareholders())
-
-        for i in range(self.num_segments-1):
-            # note to self: this form doesn't work, because lambda only
-            # captures the slot, not the value
-            #d.addCallback(lambda res: self.do_segment(i))
-            # use this form instead:
-            d.addCallback(lambda res, i=i: self._encode_segment(i))
-            d.addCallback(self._send_segment, i)
-            d.addCallback(self._turn_barrier)
-        last_segnum = self.num_segments - 1
-        d.addCallback(lambda res: self._encode_tail_segment(last_segnum))
-        d.addCallback(self._send_segment, last_segnum)
-        d.addCallback(self._turn_barrier)
-
-        d.addCallback(lambda res: self.finish_hashing())
-
-        if self.USE_PLAINTEXT_HASHES:
-            d.addCallback(lambda res:
-                          self.send_plaintext_hash_tree_to_all_shareholders())
-        d.addCallback(lambda res:
-                      self.send_crypttext_hash_tree_to_all_shareholders())
-        d.addCallback(lambda res: self.send_all_subshare_hash_trees())
-        d.addCallback(lambda res: self.send_all_share_hash_trees())
-        d.addCallback(lambda res: self.send_uri_extension_to_all_shareholders())
-
-        d.addCallback(lambda res: self.close_all_shareholders())
-        d.addCallbacks(self.done, self.err)
-        return d
-
-    def set_status(self, status):
-        if self._status:
-            self._status.set_status(status)
-
-    def set_encode_and_push_progress(self, sent_segments=None, extra=0.0):
-        if self._status:
-            # we treat the final hash+close as an extra segment
-            if sent_segments is None:
-                sent_segments = self.num_segments
-            progress = float(sent_segments + extra) / (self.num_segments + 1)
-            self._status.set_progress(2, progress)
-
-    def abort(self):
-        self.log("aborting upload", level=log.UNUSUAL)
-        assert self._codec, "don't call abort before start"
-        self._aborted = True
-        # the next segment read (in _gather_data inside _encode_segment) will
-        # raise UploadAborted(), which will bypass the rest of the upload
-        # chain. If we've sent the final segment's shares, it's too late to
-        # abort. TODO: allow abort any time up to close_all_shareholders.
-
-    def _turn_barrier(self, res):
-        # putting this method in a Deferred chain imposes a guaranteed
-        # reactor turn between the pre- and post- portions of that chain.
-        # This can be useful to limit memory consumption: since Deferreds do
-        # not do tail recursion, code which uses defer.succeed(result) for
-        # consistency will cause objects to live for longer than you might
-        # normally expect.
-
-        return eventual.fireEventually(res)
-
-
-    def start_all_shareholders(self):
-        self.log("starting shareholders", level=log.NOISY)
-        self.set_status("Starting shareholders")
-        dl = []
-        for shareid in self.landlords:
-            d = self.landlords[shareid].start()
-            d.addErrback(self._remove_shareholder, shareid, "start")
-            dl.append(d)
-        return self._gather_responses(dl)
-
-    def _encode_segment(self, segnum):
-        codec = self._codec
-        start = time.time()
-
-        # the ICodecEncoder API wants to receive a total of self.segment_size
-        # bytes on each encode() call, broken up into a number of
-        # identically-sized pieces. Due to the way the codec algorithm works,
-        # these pieces need to be the same size as the share which the codec
-        # will generate. Therefore we must feed it with input_piece_size that
-        # equals the output share size.
-        input_piece_size = codec.get_block_size()
-
-        # as a result, the number of input pieces per encode() call will be
-        # equal to the number of required shares with which the codec was
-        # constructed. You can think of the codec as chopping up a
-        # 'segment_size' of data into 'required_shares' shares (not doing any
-        # fancy math at all, just doing a split), then creating some number
-        # of additional shares which can be substituted if the primary ones
-        # are unavailable
-
-        crypttext_segment_hasher = hashutil.crypttext_segment_hasher()
-
-        # memory footprint: we only hold a tiny piece of the plaintext at any
-        # given time. We build up a segment's worth of cryptttext, then hand
-        # it to the encoder. Assuming 3-of-10 encoding (3.3x expansion) and
-        # 1MiB max_segment_size, we get a peak memory footprint of 4.3*1MiB =
-        # 4.3MiB. Lowering max_segment_size to, say, 100KiB would drop the
-        # footprint to 430KiB at the expense of more hash-tree overhead.
-
-        d = self._gather_data(self.required_shares, input_piece_size,
-                              crypttext_segment_hasher)
-        def _done_gathering(chunks):
-            for c in chunks:
-                assert len(c) == input_piece_size
-            self._crypttext_hashes.append(crypttext_segment_hasher.digest())
-            # during this call, we hit 5*segsize memory
-            return codec.encode(chunks)
-        d.addCallback(_done_gathering)
-        def _done(res):
-            elapsed = time.time() - start
-            self._times["cumulative_encoding"] += elapsed
-            return res
-        d.addCallback(_done)
-        return d
-
-    def _encode_tail_segment(self, segnum):
-
-        start = time.time()
-        codec = self._tail_codec
-        input_piece_size = codec.get_block_size()
-
-        crypttext_segment_hasher = hashutil.crypttext_segment_hasher()
-
-        d = self._gather_data(self.required_shares, input_piece_size,
-                              crypttext_segment_hasher,
-                              allow_short=True)
-        def _done_gathering(chunks):
-            for c in chunks:
-                # a short trailing chunk will have been padded by
-                # _gather_data
-                assert len(c) == input_piece_size
-            self._crypttext_hashes.append(crypttext_segment_hasher.digest())
-            return codec.encode(chunks)
-        d.addCallback(_done_gathering)
-        def _done(res):
-            elapsed = time.time() - start
-            self._times["cumulative_encoding"] += elapsed
-            return res
-        d.addCallback(_done)
-        return d
-
-    def _gather_data(self, num_chunks, input_chunk_size,
-                     crypttext_segment_hasher,
-                     allow_short=False,
-                     previous_chunks=[]):
-        """Return a Deferred that will fire when the required number of
-        chunks have been read (and hashed and encrypted). The Deferred fires
-        with the combination of any 'previous_chunks' and the new chunks
-        which were gathered."""
-
-        if self._aborted:
-            raise UploadAborted()
-
-        if not num_chunks:
-            return defer.succeed(previous_chunks)
-
-        d = self._uploadable.read_encrypted(input_chunk_size, False)
-        def _got(data):
-            if self._aborted:
-                raise UploadAborted()
-            encrypted_pieces = []
-            length = 0
-            while data:
-                encrypted_piece = data.pop(0)
-                length += len(encrypted_piece)
-                crypttext_segment_hasher.update(encrypted_piece)
-                self._crypttext_hasher.update(encrypted_piece)
-                encrypted_pieces.append(encrypted_piece)
-
-            if allow_short:
-                if length < input_chunk_size:
-                    # padding
-                    pad_size = input_chunk_size - length
-                    encrypted_pieces.append('\x00' * pad_size)
-            else:
-                # non-tail segments should be the full segment size
-                if length != input_chunk_size:
-                    log.msg("non-tail segment should be full segment size: %d!=%d"
-                            % (length, input_chunk_size), level=log.BAD)
-                precondition(length == input_chunk_size,
-                             "length=%d != input_chunk_size=%d" %
-                             (length, input_chunk_size))
-
-            encrypted_piece = "".join(encrypted_pieces)
-            return previous_chunks + [encrypted_piece]
-
-        d.addCallback(_got)
-        d.addCallback(lambda chunks:
-                      self._gather_data(num_chunks-1, input_chunk_size,
-                                        crypttext_segment_hasher,
-                                        allow_short, chunks))
-        return d
-
-    def _send_segment(self, (shares, shareids), segnum):
-        # To generate the URI, we must generate the roothash, so we must
-        # generate all shares, even if we aren't actually giving them to
-        # anybody. This means that the set of shares we create will be equal
-        # to or larger than the set of landlords. If we have any landlord who
-        # *doesn't* have a share, that's an error.
-        _assert(set(self.landlords.keys()).issubset(set(shareids)),
-                shareids=shareids, landlords=self.landlords)
-        start = time.time()
-        dl = []
-        self.set_status("Sending segment %d of %d" % (segnum+1,
-                                                      self.num_segments))
-        self.set_encode_and_push_progress(segnum)
-        lognum = self.log("send_segment(%d)" % segnum, level=log.NOISY)
-        for i in range(len(shares)):
-            subshare = shares[i]
-            shareid = shareids[i]
-            d = self.send_subshare(shareid, segnum, subshare, lognum)
-            dl.append(d)
-            subshare_hash = hashutil.block_hash(subshare)
-            #from allmydata.util import base32
-            #log.msg("creating block (shareid=%d, blocknum=%d) "
-            #        "len=%d %r .. %r: %s" %
-            #        (shareid, segnum, len(subshare),
-            #         subshare[:50], subshare[-50:], base32.b2a(subshare_hash)))
-            self.subshare_hashes[shareid].append(subshare_hash)
-
-        dl = self._gather_responses(dl)
-        def _logit(res):
-            self.log("%s uploaded %s / %s bytes (%d%%) of your file." %
-                     (self,
-                      self.segment_size*(segnum+1),
-                      self.segment_size*self.num_segments,
-                      100 * (segnum+1) / self.num_segments,
-                      ),
-                     level=log.OPERATIONAL)
-            elapsed = time.time() - start
-            self._times["cumulative_sending"] += elapsed
-            return res
-        dl.addCallback(_logit)
-        return dl
-
-    def send_subshare(self, shareid, segment_num, subshare, lognum):
-        if shareid not in self.landlords:
-            return defer.succeed(None)
-        sh = self.landlords[shareid]
-        lognum2 = self.log("put_block to %s" % self.landlords[shareid],
-                           parent=lognum, level=log.NOISY)
-        d = sh.put_block(segment_num, subshare)
-        def _done(res):
-            self.log("put_block done", parent=lognum2, level=log.NOISY)
-            return res
-        d.addCallback(_done)
-        d.addErrback(self._remove_shareholder, shareid,
-                     "segnum=%d" % segment_num)
-        return d
-
-    def _remove_shareholder(self, why, shareid, where):
-        ln = self.log(format="error while sending %(method)s to shareholder=%(shnum)d",
-                      method=where, shnum=shareid,
-                      level=log.UNUSUAL, failure=why)
-        if shareid in self.landlords:
-            self.landlords[shareid].abort()
-            del self.landlords[shareid]
-        else:
-            # even more UNUSUAL
-            self.log("they weren't in our list of landlords", parent=ln,
-                     level=log.WEIRD)
-        if len(self.landlords) < self.shares_of_happiness:
-            msg = "lost too many shareholders during upload: %s" % why
-            raise NotEnoughSharesError(msg)
-        self.log("but we can still continue with %s shares, we'll be happy "
-                 "with at least %s" % (len(self.landlords),
-                                       self.shares_of_happiness),
-                 parent=ln)
-
-    def _gather_responses(self, dl):
-        d = defer.DeferredList(dl, fireOnOneErrback=True)
-        def _eatNotEnoughSharesError(f):
-            # all exceptions that occur while talking to a peer are handled
-            # in _remove_shareholder. That might raise NotEnoughSharesError,
-            # which will cause the DeferredList to errback but which should
-            # otherwise be consumed. Allow non-NotEnoughSharesError exceptions
-            # to pass through as an unhandled errback. We use this in lieu of
-            # consumeErrors=True to allow coding errors to be logged.
-            f.trap(NotEnoughSharesError)
-            return None
-        for d0 in dl:
-            d0.addErrback(_eatNotEnoughSharesError)
-        return d
-
-    def finish_hashing(self):
-        self._start_hashing_and_close_timestamp = time.time()
-        self.set_status("Finishing hashes")
-        self.set_encode_and_push_progress(extra=0.0)
-        crypttext_hash = self._crypttext_hasher.digest()
-        self.uri_extension_data["crypttext_hash"] = crypttext_hash
-        d = self._uploadable.get_plaintext_hash()
-        def _got(plaintext_hash):
-            self.log(format="plaintext_hash=%(plaintext_hash)s, SI=%(SI)s, size=%(size)d",
-                     plaintext_hash=base32.b2a(plaintext_hash),
-                     SI=storage.si_b2a(self._storage_index),
-                     size=self.file_size)
-            return plaintext_hash
-        d.addCallback(_got)
-        if self.USE_PLAINTEXT_HASHES:
-            def _use_plaintext_hash(plaintext_hash):
-                self.uri_extension_data["plaintext_hash"] = plaintext_hash
-                return self._uploadable.get_plaintext_hashtree_leaves(0, self.num_segments, self.num_segments)
-            d.addCallback(_use_plaintext_hash)
-            def _got_hashtree_leaves(leaves):
-                self.log("Encoder: got plaintext_hashtree_leaves: %s" %
-                         (",".join([base32.b2a(h) for h in leaves]),),
-                         level=log.NOISY)
-                ht = list(HashTree(list(leaves)))
-                self.uri_extension_data["plaintext_root_hash"] = ht[0]
-                self._plaintext_hashtree_nodes = ht
-            d.addCallback(_got_hashtree_leaves)
-
-        d.addCallback(lambda res: self._uploadable.close())
-        return d
-
-    def send_plaintext_hash_tree_to_all_shareholders(self):
-        self.log("sending plaintext hash tree", level=log.NOISY)
-        self.set_status("Sending Plaintext Hash Tree")
-        self.set_encode_and_push_progress(extra=0.2)
-        dl = []
-        for shareid in self.landlords.keys():
-            d = self.send_plaintext_hash_tree(shareid,
-                                              self._plaintext_hashtree_nodes)
-            dl.append(d)
-        return self._gather_responses(dl)
-
-    def send_plaintext_hash_tree(self, shareid, all_hashes):
-        if shareid not in self.landlords:
-            return defer.succeed(None)
-        sh = self.landlords[shareid]
-        d = sh.put_plaintext_hashes(all_hashes)
-        d.addErrback(self._remove_shareholder, shareid, "put_plaintext_hashes")
-        return d
-
-    def send_crypttext_hash_tree_to_all_shareholders(self):
-        self.log("sending crypttext hash tree", level=log.NOISY)
-        self.set_status("Sending Crypttext Hash Tree")
-        self.set_encode_and_push_progress(extra=0.3)
-        t = HashTree(self._crypttext_hashes)
-        all_hashes = list(t)
-        self.uri_extension_data["crypttext_root_hash"] = t[0]
-        dl = []
-        for shareid in self.landlords.keys():
-            dl.append(self.send_crypttext_hash_tree(shareid, all_hashes))
-        return self._gather_responses(dl)
-
-    def send_crypttext_hash_tree(self, shareid, all_hashes):
-        if shareid not in self.landlords:
-            return defer.succeed(None)
-        sh = self.landlords[shareid]
-        d = sh.put_crypttext_hashes(all_hashes)
-        d.addErrback(self._remove_shareholder, shareid, "put_crypttext_hashes")
-        return d
-
-    def send_all_subshare_hash_trees(self):
-        self.log("sending subshare hash trees", level=log.NOISY)
-        self.set_status("Sending Subshare Hash Trees")
-        self.set_encode_and_push_progress(extra=0.4)
-        dl = []
-        for shareid,hashes in enumerate(self.subshare_hashes):
-            # hashes is a list of the hashes of all subshares that were sent
-            # to shareholder[shareid].
-            dl.append(self.send_one_subshare_hash_tree(shareid, hashes))
-        return self._gather_responses(dl)
-
-    def send_one_subshare_hash_tree(self, shareid, subshare_hashes):
-        t = HashTree(subshare_hashes)
-        all_hashes = list(t)
-        # all_hashes[0] is the root hash, == hash(ah[1]+ah[2])
-        # all_hashes[1] is the left child, == hash(ah[3]+ah[4])
-        # all_hashes[n] == hash(all_hashes[2*n+1] + all_hashes[2*n+2])
-        self.share_root_hashes[shareid] = t[0]
-        if shareid not in self.landlords:
-            return defer.succeed(None)
-        sh = self.landlords[shareid]
-        d = sh.put_block_hashes(all_hashes)
-        d.addErrback(self._remove_shareholder, shareid, "put_block_hashes")
-        return d
-
-    def send_all_share_hash_trees(self):
-        # each bucket gets a set of share hash tree nodes that are needed to
-        # validate their share. This includes the share hash itself, but does
-        # not include the top-level hash root (which is stored securely in
-        # the URI instead).
-        self.log("sending all share hash trees", level=log.NOISY)
-        self.set_status("Sending Share Hash Trees")
-        self.set_encode_and_push_progress(extra=0.6)
-        dl = []
-        for h in self.share_root_hashes:
-            assert h
-        # create the share hash tree
-        t = HashTree(self.share_root_hashes)
-        # the root of this hash tree goes into our URI
-        self.uri_extension_data['share_root_hash'] = t[0]
-        # now send just the necessary pieces out to each shareholder
-        for i in range(self.num_shares):
-            # the HashTree is given a list of leaves: 0,1,2,3..n .
-            # These become nodes A+0,A+1,A+2.. of the tree, where A=n-1
-            needed_hash_indices = t.needed_hashes(i, include_leaf=True)
-            hashes = [(hi, t[hi]) for hi in needed_hash_indices]
-            dl.append(self.send_one_share_hash_tree(i, hashes))
-        return self._gather_responses(dl)
-
-    def send_one_share_hash_tree(self, shareid, needed_hashes):
-        if shareid not in self.landlords:
-            return defer.succeed(None)
-        sh = self.landlords[shareid]
-        d = sh.put_share_hashes(needed_hashes)
-        d.addErrback(self._remove_shareholder, shareid, "put_share_hashes")
-        return d
-
-    def send_uri_extension_to_all_shareholders(self):
-        lp = self.log("sending uri_extension", level=log.NOISY)
-        self.set_status("Sending URI Extensions")
-        self.set_encode_and_push_progress(extra=0.8)
-        for k in ('crypttext_root_hash', 'crypttext_hash',
-                  ):
-            assert k in self.uri_extension_data
-        if self.USE_PLAINTEXT_HASHES:
-            for k in ('plaintext_root_hash', 'plaintext_hash',
-                      ):
-                assert k in self.uri_extension_data
-        uri_extension = uri.pack_extension(self.uri_extension_data)
-        ed = {}
-        for k,v in self.uri_extension_data.items():
-            if k.endswith("hash"):
-                ed[k] = base32.b2a(v)
-            else:
-                ed[k] = v
-        self.log("uri_extension_data is %s" % (ed,), level=log.NOISY, parent=lp)
-        self.uri_extension_hash = hashutil.uri_extension_hash(uri_extension)
-        dl = []
-        for shareid in self.landlords.keys():
-            dl.append(self.send_uri_extension(shareid, uri_extension))
-        return self._gather_responses(dl)
-
-    def send_uri_extension(self, shareid, uri_extension):
-        sh = self.landlords[shareid]
-        d = sh.put_uri_extension(uri_extension)
-        d.addErrback(self._remove_shareholder, shareid, "put_uri_extension")
-        return d
-
-    def close_all_shareholders(self):
-        self.log("closing shareholders", level=log.NOISY)
-        self.set_status("Closing Shareholders")
-        self.set_encode_and_push_progress(extra=0.9)
-        dl = []
-        for shareid in self.landlords:
-            d = self.landlords[shareid].close()
-            d.addErrback(self._remove_shareholder, shareid, "close")
-            dl.append(d)
-        return self._gather_responses(dl)
-
-    def done(self, res):
-        self.log("upload done", level=log.OPERATIONAL)
-        self.set_status("Done")
-        self.set_encode_and_push_progress(extra=1.0) # done
-        now = time.time()
-        h_and_c_elapsed = now - self._start_hashing_and_close_timestamp
-        self._times["hashes_and_close"] = h_and_c_elapsed
-        total_elapsed = now - self._start_total_timestamp
-        self._times["total_encode_and_push"] = total_elapsed
-
-        # update our sharemap
-        self._shares_placed = set(self.landlords.keys())
-        return (self.uri_extension_hash, self.required_shares,
-                self.num_shares, self.file_size)
-
-    def err(self, f):
-        self.log("upload failed", failure=f, level=log.UNUSUAL)
-        self.set_status("Failed")
-        # we need to abort any remaining shareholders, so they'll delete the
-        # partial share, allowing someone else to upload it again.
-        self.log("aborting shareholders", level=log.UNUSUAL)
-        for shareid in list(self.landlords.keys()):
-            self.landlords[shareid].abort()
-        if f.check(defer.FirstError):
-            return f.value.subFailure
-        return f
-
-    def get_shares_placed(self):
-        # return a set of share numbers that were successfully placed.
-        return self._shares_placed
-
-    def get_times(self):
-        # return a dictionary of encode+push timings
-        return self._times
-
-    def get_uri_extension_data(self):
-        return self.uri_extension_data
diff --git a/src/allmydata/filenode.py b/src/allmydata/filenode.py

deleted file mode 100644 (file)

index 2d0f2a3..0000000
--- a/src/allmydata/filenode.py
+++ /dev/null
@@ -1,118 +0,0 @@
-
-from zope.interface import implements
-from twisted.internet import defer
-from allmydata.interfaces import IFileNode, IFileURI, IURI, ICheckable
-from allmydata import uri
-from allmydata.checker import SimpleCHKFileChecker, SimpleCHKFileVerifier, \
-     Results
-
-class FileNode:
-    implements(IFileNode, ICheckable)
-
-    def __init__(self, uri, client):
-        u = IFileURI(uri)
-        self.uri = u.to_string()
-        self._client = client
-
-    def get_uri(self):
-        return self.uri
-
-    def is_mutable(self):
-        return False
-
-    def is_readonly(self):
-        return True
-
-    def get_readonly_uri(self):
-        return self.uri
-
-    def get_size(self):
-        return IFileURI(self.uri).get_size()
-
-    def __hash__(self):
-        return hash((self.__class__, self.uri))
-    def __cmp__(self, them):
-        if cmp(type(self), type(them)):
-            return cmp(type(self), type(them))
-        if cmp(self.__class__, them.__class__):
-            return cmp(self.__class__, them.__class__)
-        return cmp(self.uri, them.uri)
-
-    def get_verifier(self):
-        return IFileURI(self.uri).get_verifier()
-
-    def check(self, verify=False, repair=False):
-        assert repair is False  # not implemented yet
-        vcap = self.get_verifier()
-        if verify:
-            v = SimpleCHKFileVerifier(self._client, vcap)
-            return v.start()
-        else:
-            peer_getter = self._client.get_permuted_peers
-            v = SimpleCHKFileChecker(peer_getter, vcap)
-            return v.check()
-
-    def download(self, target):
-        downloader = self._client.getServiceNamed("downloader")
-        return downloader.download(self.uri, target)
-
-    def download_to_data(self):
-        downloader = self._client.getServiceNamed("downloader")
-        return downloader.download_to_data(self.uri)
-
-
-
-class LiteralFileNode:
-    implements(IFileNode, ICheckable)
-
-    def __init__(self, my_uri, client):
-        u = IFileURI(my_uri)
-        assert isinstance(u, uri.LiteralFileURI)
-        self.uri = u.to_string()
-        self._client = client
-
-    def get_uri(self):
-        return self.uri
-
-    def is_mutable(self):
-        return False
-
-    def is_readonly(self):
-        return True
-
-    def get_readonly_uri(self):
-        return self.uri
-
-    def get_size(self):
-        return len(IURI(self.uri).data)
-
-    def __hash__(self):
-        return hash((self.__class__, self.uri))
-    def __cmp__(self, them):
-        if cmp(type(self), type(them)):
-            return cmp(type(self), type(them))
-        if cmp(self.__class__, them.__class__):
-            return cmp(self.__class__, them.__class__)
-        return cmp(self.uri, them.uri)
-
-    def get_verifier(self):
-        return None
-
-    def check(self, verify=False, repair=False):
-        # neither verify= nor repair= affect LIT files
-        r = Results(None)
-        r.healthy = True
-        r.problems = []
-        return defer.succeed(r)
-
-    def download(self, target):
-        # note that this does not update the stats_provider
-        data = IURI(self.uri).data
-        target.open(len(data))
-        target.write(data)
-        target.close()
-        return defer.maybeDeferred(target.finish)
-
-    def download_to_data(self):
-        data = IURI(self.uri).data
-        return defer.succeed(data)
diff --git a/src/allmydata/immutable/__init__.py b/src/allmydata/immutable/__init__.py

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/src/allmydata/immutable/checker.py b/src/allmydata/immutable/checker.py

new file mode 100644 (file)

index 0000000..c49aeb2
--- /dev/null
+++ b/src/allmydata/immutable/checker.py
@@ -0,0 +1,205 @@
+
+"""
+Given a StorageIndex, count how many shares we can find.
+
+This does no verification of the shares whatsoever. If the peer claims to
+have the share, we believe them.
+"""
+
+from zope.interface import implements
+from twisted.internet import defer
+from twisted.python import log
+from allmydata import storage
+from allmydata.interfaces import IVerifierURI, ICheckerResults
+from allmydata.immutable import download
+from allmydata.util import hashutil, base32
+
+class Results:
+    implements(ICheckerResults)
+
+    def __init__(self, storage_index):
+        # storage_index might be None for, say, LIT files
+        self.storage_index = storage_index
+        if storage_index is None:
+            self.storage_index_s = "<none>"
+        else:
+            self.storage_index_s = base32.b2a(storage_index)[:6]
+
+    def is_healthy(self):
+        return self.healthy
+
+    def html_summary(self):
+        if self.healthy:
+            return "<span>healthy</span>"
+        return "<span>NOT HEALTHY</span>"
+
+    def html(self):
+        s = "<div>\n"
+        s += "<h1>Checker Results for Immutable SI=%s</h1>\n" % self.storage_index_s
+        if self.healthy:
+            s += "<h2>Healthy!</h2>\n"
+        else:
+            s += "<h2>Not Healthy!</h2>\n"
+        s += "</div>\n"
+        return s
+
+
+class SimpleCHKFileChecker:
+    """Return a list of (needed, total, found, sharemap), where sharemap maps
+    share number to a list of (binary) nodeids of the shareholders."""
+
+    def __init__(self, peer_getter, uri_to_check):
+        self.peer_getter = peer_getter
+        self.found_shares = set()
+        self.uri_to_check = IVerifierURI(uri_to_check)
+        self.sharemap = {}
+
+    '''
+    def check_synchronously(self, si):
+        # this is how we would write this class if we were using synchronous
+        # messages (or if we used promises).
+        found = set()
+        for (pmpeerid, peerid, connection) in self.peer_getter(storage_index):
+            buckets = connection.get_buckets(si)
+            found.update(buckets.keys())
+        return len(found)
+    '''
+
+    def check(self):
+        d = self._get_all_shareholders(self.uri_to_check.storage_index)
+        d.addCallback(self._done)
+        return d
+
+    def _get_all_shareholders(self, storage_index):
+        dl = []
+        for (peerid, ss) in self.peer_getter("storage", storage_index):
+            d = ss.callRemote("get_buckets", storage_index)
+            d.addCallbacks(self._got_response, self._got_error,
+                           callbackArgs=(peerid,))
+            dl.append(d)
+        return defer.DeferredList(dl)
+
+    def _got_response(self, buckets, peerid):
+        # buckets is a dict: maps shum to an rref of the server who holds it
+        self.found_shares.update(buckets.keys())
+        for k in buckets:
+            if k not in self.sharemap:
+                self.sharemap[k] = []
+            self.sharemap[k].append(peerid)
+
+    def _got_error(self, f):
+        if f.check(KeyError):
+            pass
+        log.err(f)
+        pass
+
+    def _done(self, res):
+        u = self.uri_to_check
+        r = Results(self.uri_to_check.storage_index)
+        r.healthy = bool(len(self.found_shares) >= u.needed_shares)
+        r.stuff = (u.needed_shares, u.total_shares, len(self.found_shares),
+                   self.sharemap)
+        return r
+
+class VerifyingOutput:
+    def __init__(self, total_length, results):
+        self._crypttext_hasher = hashutil.crypttext_hasher()
+        self.length = 0
+        self.total_length = total_length
+        self._segment_number = 0
+        self._crypttext_hash_tree = None
+        self._opened = False
+        self._results = results
+        results.healthy = False
+
+    def setup_hashtrees(self, plaintext_hashtree, crypttext_hashtree):
+        self._crypttext_hash_tree = crypttext_hashtree
+
+    def write_segment(self, crypttext):
+        self.length += len(crypttext)
+
+        self._crypttext_hasher.update(crypttext)
+        if self._crypttext_hash_tree:
+            ch = hashutil.crypttext_segment_hasher()
+            ch.update(crypttext)
+            crypttext_leaves = {self._segment_number: ch.digest()}
+            self._crypttext_hash_tree.set_hashes(leaves=crypttext_leaves)
+
+        self._segment_number += 1
+
+    def close(self):
+        self.crypttext_hash = self._crypttext_hasher.digest()
+
+    def finish(self):
+        self._results.healthy = True
+        return self._results
+
+
+class SimpleCHKFileVerifier(download.FileDownloader):
+    # this reconstructs the crypttext, which verifies that at least 'k' of
+    # the shareholders are around and have valid data. It does not check the
+    # remaining shareholders, and it cannot verify the plaintext.
+    check_plaintext_hash = False
+
+    def __init__(self, client, u):
+        self._client = client
+
+        u = IVerifierURI(u)
+        self._storage_index = u.storage_index
+        self._uri_extension_hash = u.uri_extension_hash
+        self._total_shares = u.total_shares
+        self._size = u.size
+        self._num_needed_shares = u.needed_shares
+
+        self._si_s = storage.si_b2a(self._storage_index)
+        self.init_logging()
+
+        r = Results(self._storage_index)
+        self._output = VerifyingOutput(self._size, r)
+        self._paused = False
+        self._stopped = False
+
+        self._results = None
+        self.active_buckets = {} # k: shnum, v: bucket
+        self._share_buckets = [] # list of (sharenum, bucket) tuples
+        self._share_vbuckets = {} # k: shnum, v: set of ValidatedBuckets
+        self._uri_extension_sources = []
+
+        self._uri_extension_data = None
+
+        self._fetch_failures = {"uri_extension": 0,
+                                "plaintext_hashroot": 0,
+                                "plaintext_hashtree": 0,
+                                "crypttext_hashroot": 0,
+                                "crypttext_hashtree": 0,
+                                }
+
+    def init_logging(self):
+        self._log_prefix = prefix = storage.si_b2a(self._storage_index)[:5]
+        num = self._client.log("SimpleCHKFileVerifier(%s): starting" % prefix)
+        self._log_number = num
+
+    def log(self, msg, parent=None):
+        if parent is None:
+            parent = self._log_number
+        return self._client.log("SimpleCHKFileVerifier(%s): %s"
+                                % (self._log_prefix, msg),
+                                parent=parent)
+
+
+    def start(self):
+        log.msg("starting download [%s]" % storage.si_b2a(self._storage_index)[:5])
+
+        # first step: who should we download from?
+        d = defer.maybeDeferred(self._get_all_shareholders)
+        d.addCallback(self._got_all_shareholders)
+        # now get the uri_extension block from somebody and validate it
+        d.addCallback(self._obtain_uri_extension)
+        d.addCallback(self._got_uri_extension)
+        d.addCallback(self._get_hashtrees)
+        d.addCallback(self._create_validated_buckets)
+        # once we know that, we can download blocks from everybody
+        d.addCallback(self._download_all_segments)
+        d.addCallback(self._done)
+        return d
+
diff --git a/src/allmydata/immutable/download.py b/src/allmydata/immutable/download.py

new file mode 100644 (file)

index 0000000..983575e
--- /dev/null
+++ b/src/allmydata/immutable/download.py
@@ -0,0 +1,1100 @@
+
+import os, random, weakref, itertools, time
+from zope.interface import implements
+from twisted.internet import defer
+from twisted.internet.interfaces import IPushProducer, IConsumer
+from twisted.application import service
+from foolscap.eventual import eventually
+
+from allmydata.util import base32, mathutil, hashutil, log
+from allmydata.util.assertutil import _assert
+from allmydata import codec, hashtree, storage, uri
+from allmydata.interfaces import IDownloadTarget, IDownloader, IFileURI, \
+     IDownloadStatus, IDownloadResults
+from allmydata.immutable.encode import NotEnoughSharesError
+from pycryptopp.cipher.aes import AES
+
+class HaveAllPeersError(Exception):
+    # we use this to jump out of the loop
+    pass
+
+class BadURIExtensionHashValue(Exception):
+    pass
+class BadPlaintextHashValue(Exception):
+    pass
+class BadCrypttextHashValue(Exception):
+    pass
+
+class DownloadStopped(Exception):
+    pass
+
+class DownloadResults:
+    implements(IDownloadResults)
+
+    def __init__(self):
+        self.servers_used = set()
+        self.server_problems = {}
+        self.servermap = {}
+        self.timings = {}
+        self.file_size = None
+
+class Output:
+    def __init__(self, downloadable, key, total_length, log_parent,
+                 download_status):
+        self.downloadable = downloadable
+        self._decryptor = AES(key)
+        self._crypttext_hasher = hashutil.crypttext_hasher()
+        self._plaintext_hasher = hashutil.plaintext_hasher()
+        self.length = 0
+        self.total_length = total_length
+        self._segment_number = 0
+        self._plaintext_hash_tree = None
+        self._crypttext_hash_tree = None
+        self._opened = False
+        self._log_parent = log_parent
+        self._status = download_status
+        self._status.set_progress(0.0)
+
+    def log(self, *args, **kwargs):
+        if "parent" not in kwargs:
+            kwargs["parent"] = self._log_parent
+        if "facility" not in kwargs:
+            kwargs["facility"] = "download.output"
+        return log.msg(*args, **kwargs)
+
+    def setup_hashtrees(self, plaintext_hashtree, crypttext_hashtree):
+        self._plaintext_hash_tree = plaintext_hashtree
+        self._crypttext_hash_tree = crypttext_hashtree
+
+    def write_segment(self, crypttext):
+        self.length += len(crypttext)
+        self._status.set_progress( float(self.length) / self.total_length )
+
+        # memory footprint: 'crypttext' is the only segment_size usage
+        # outstanding. While we decrypt it into 'plaintext', we hit
+        # 2*segment_size.
+        self._crypttext_hasher.update(crypttext)
+        if self._crypttext_hash_tree:
+            ch = hashutil.crypttext_segment_hasher()
+            ch.update(crypttext)
+            crypttext_leaves = {self._segment_number: ch.digest()}
+            self.log(format="crypttext leaf hash (%(bytes)sB) [%(segnum)d] is %(hash)s",
+                     bytes=len(crypttext),
+                     segnum=self._segment_number, hash=base32.b2a(ch.digest()),
+                     level=log.NOISY)
+            self._crypttext_hash_tree.set_hashes(leaves=crypttext_leaves)
+
+        plaintext = self._decryptor.process(crypttext)
+        del crypttext
+
+        # now we're back down to 1*segment_size.
+
+        self._plaintext_hasher.update(plaintext)
+        if self._plaintext_hash_tree:
+            ph = hashutil.plaintext_segment_hasher()
+            ph.update(plaintext)
+            plaintext_leaves = {self._segment_number: ph.digest()}
+            self.log(format="plaintext leaf hash (%(bytes)sB) [%(segnum)d] is %(hash)s",
+                     bytes=len(plaintext),
+                     segnum=self._segment_number, hash=base32.b2a(ph.digest()),
+                     level=log.NOISY)
+            self._plaintext_hash_tree.set_hashes(leaves=plaintext_leaves)
+
+        self._segment_number += 1
+        # We're still at 1*segment_size. The Downloadable is responsible for
+        # any memory usage beyond this.
+        if not self._opened:
+            self._opened = True
+            self.downloadable.open(self.total_length)
+        self.downloadable.write(plaintext)
+
+    def fail(self, why):
+        # this is really unusual, and deserves maximum forensics
+        if why.check(DownloadStopped):
+            # except DownloadStopped just means the consumer aborted the
+            # download, not so scary
+            self.log("download stopped", level=log.UNUSUAL)
+        else:
+            self.log("download failed!", failure=why, level=log.SCARY)
+        self.downloadable.fail(why)
+
+    def close(self):
+        self.crypttext_hash = self._crypttext_hasher.digest()
+        self.plaintext_hash = self._plaintext_hasher.digest()
+        self.log("download finished, closing IDownloadable", level=log.NOISY)
+        self.downloadable.close()
+
+    def finish(self):
+        return self.downloadable.finish()
+
+class ValidatedBucket:
+    """I am a front-end for a remote storage bucket, responsible for
+    retrieving and validating data from that bucket.
+
+    My get_block() method is used by BlockDownloaders.
+    """
+
+    def __init__(self, sharenum, bucket,
+                 share_hash_tree, roothash,
+                 num_blocks):
+        self.sharenum = sharenum
+        self.bucket = bucket
+        self._share_hash = None # None means not validated yet
+        self.share_hash_tree = share_hash_tree
+        self._roothash = roothash
+        self.block_hash_tree = hashtree.IncompleteHashTree(num_blocks)
+        self.started = False
+
+    def get_block(self, blocknum):
+        if not self.started:
+            d = self.bucket.start()
+            def _started(res):
+                self.started = True
+                return self.get_block(blocknum)
+            d.addCallback(_started)
+            return d
+
+        # the first time we use this bucket, we need to fetch enough elements
+        # of the share hash tree to validate it from our share hash up to the
+        # hashroot.
+        if not self._share_hash:
+            d1 = self.bucket.get_share_hashes()
+        else:
+            d1 = defer.succeed([])
+
+        # we might need to grab some elements of our block hash tree, to
+        # validate the requested block up to the share hash
+        needed = self.block_hash_tree.needed_hashes(blocknum)
+        if needed:
+            # TODO: get fewer hashes, use get_block_hashes(needed)
+            d2 = self.bucket.get_block_hashes()
+        else:
+            d2 = defer.succeed([])
+
+        d3 = self.bucket.get_block(blocknum)
+
+        d = defer.gatherResults([d1, d2, d3])
+        d.addCallback(self._got_data, blocknum)
+        return d
+
+    def _got_data(self, res, blocknum):
+        sharehashes, blockhashes, blockdata = res
+        blockhash = None # to make logging it safe
+
+        try:
+            if not self._share_hash:
+                sh = dict(sharehashes)
+                sh[0] = self._roothash # always use our own root, from the URI
+                sht = self.share_hash_tree
+                if sht.get_leaf_index(self.sharenum) not in sh:
+                    raise hashtree.NotEnoughHashesError
+                sht.set_hashes(sh)
+                self._share_hash = sht.get_leaf(self.sharenum)
+
+            blockhash = hashutil.block_hash(blockdata)
+            #log.msg("checking block_hash(shareid=%d, blocknum=%d) len=%d "
+            #        "%r .. %r: %s" %
+            #        (self.sharenum, blocknum, len(blockdata),
+            #         blockdata[:50], blockdata[-50:], base32.b2a(blockhash)))
+
+            # we always validate the blockhash
+            bh = dict(enumerate(blockhashes))
+            # replace blockhash root with validated value
+            bh[0] = self._share_hash
+            self.block_hash_tree.set_hashes(bh, {blocknum: blockhash})
+
+        except (hashtree.BadHashError, hashtree.NotEnoughHashesError):
+            # log.WEIRD: indicates undetected disk/network error, or more
+            # likely a programming error
+            log.msg("hash failure in block=%d, shnum=%d on %s" %
+                    (blocknum, self.sharenum, self.bucket))
+            if self._share_hash:
+                log.msg(""" failure occurred when checking the block_hash_tree.
+                This suggests that either the block data was bad, or that the
+                block hashes we received along with it were bad.""")
+            else:
+                log.msg(""" the failure probably occurred when checking the
+                share_hash_tree, which suggests that the share hashes we
+                received from the remote peer were bad.""")
+            log.msg(" have self._share_hash: %s" % bool(self._share_hash))
+            log.msg(" block length: %d" % len(blockdata))
+            log.msg(" block hash: %s" % base32.b2a_or_none(blockhash))
+            if len(blockdata) < 100:
+                log.msg(" block data: %r" % (blockdata,))
+            else:
+                log.msg(" block data start/end: %r .. %r" %
+                        (blockdata[:50], blockdata[-50:]))
+            log.msg(" root hash: %s" % base32.b2a(self._roothash))
+            log.msg(" share hash tree:\n" + self.share_hash_tree.dump())
+            log.msg(" block hash tree:\n" + self.block_hash_tree.dump())
+            lines = []
+            for i,h in sorted(sharehashes):
+                lines.append("%3d: %s" % (i, base32.b2a_or_none(h)))
+            log.msg(" sharehashes:\n" + "\n".join(lines) + "\n")
+            lines = []
+            for i,h in enumerate(blockhashes):
+                lines.append("%3d: %s" % (i, base32.b2a_or_none(h)))
+            log.msg(" blockhashes:\n" + "\n".join(lines) + "\n")
+            raise
+
+        # If we made it here, the block is good. If the hash trees didn't
+        # like what they saw, they would have raised a BadHashError, causing
+        # our caller to see a Failure and thus ignore this block (as well as
+        # dropping this bucket).
+        return blockdata
+
+
+
+class BlockDownloader:
+    """I am responsible for downloading a single block (from a single bucket)
+    for a single segment.
+
+    I am a child of the SegmentDownloader.
+    """
+
+    def __init__(self, vbucket, blocknum, parent, results):
+        self.vbucket = vbucket
+        self.blocknum = blocknum
+        self.parent = parent
+        self.results = results
+        self._log_number = self.parent.log("starting block %d" % blocknum)
+
+    def log(self, msg, parent=None):
+        if parent is None:
+            parent = self._log_number
+        return self.parent.log(msg, parent=parent)
+
+    def start(self, segnum):
+        lognum = self.log("get_block(segnum=%d)" % segnum)
+        started = time.time()
+        d = self.vbucket.get_block(segnum)
+        d.addCallbacks(self._hold_block, self._got_block_error,
+                       callbackArgs=(started, lognum,), errbackArgs=(lognum,))
+        return d
+
+    def _hold_block(self, data, started, lognum):
+        if self.results:
+            elapsed = time.time() - started
+            peerid = self.vbucket.bucket.get_peerid()
+            if peerid not in self.results.timings["fetch_per_server"]:
+                self.results.timings["fetch_per_server"][peerid] = []
+            self.results.timings["fetch_per_server"][peerid].append(elapsed)
+        self.log("got block", parent=lognum)
+        self.parent.hold_block(self.blocknum, data)
+
+    def _got_block_error(self, f, lognum):
+        self.log("BlockDownloader[%d] got error: %s" % (self.blocknum, f),
+                 parent=lognum)
+        if self.results:
+            peerid = self.vbucket.bucket.get_peerid()
+            self.results.server_problems[peerid] = str(f)
+        self.parent.bucket_failed(self.vbucket)
+
+class SegmentDownloader:
+    """I am responsible for downloading all the blocks for a single segment
+    of data.
+
+    I am a child of the FileDownloader.
+    """
+
+    def __init__(self, parent, segmentnumber, needed_shares, results):
+        self.parent = parent
+        self.segmentnumber = segmentnumber
+        self.needed_blocks = needed_shares
+        self.blocks = {} # k: blocknum, v: data
+        self.results = results
+        self._log_number = self.parent.log("starting segment %d" %
+                                           segmentnumber)
+
+    def log(self, msg, parent=None):
+        if parent is None:
+            parent = self._log_number
+        return self.parent.log(msg, parent=parent)
+
+    def start(self):
+        return self._download()
+
+    def _download(self):
+        d = self._try()
+        def _done(res):
+            if len(self.blocks) >= self.needed_blocks:
+                # we only need self.needed_blocks blocks
+                # we want to get the smallest blockids, because they are
+                # more likely to be fast "primary blocks"
+                blockids = sorted(self.blocks.keys())[:self.needed_blocks]
+                blocks = []
+                for blocknum in blockids:
+                    blocks.append(self.blocks[blocknum])
+                return (blocks, blockids)
+            else:
+                return self._download()
+        d.addCallback(_done)
+        return d
+
+    def _try(self):
+        # fill our set of active buckets, maybe raising NotEnoughSharesError
+        active_buckets = self.parent._activate_enough_buckets()
+        # Now we have enough buckets, in self.parent.active_buckets.
+
+        # in test cases, bd.start might mutate active_buckets right away, so
+        # we need to put off calling start() until we've iterated all the way
+        # through it.
+        downloaders = []
+        for blocknum, vbucket in active_buckets.iteritems():
+            bd = BlockDownloader(vbucket, blocknum, self, self.results)
+            downloaders.append(bd)
+            if self.results:
+                self.results.servers_used.add(vbucket.bucket.get_peerid())
+        l = [bd.start(self.segmentnumber) for bd in downloaders]
+        return defer.DeferredList(l, fireOnOneErrback=True)
+
+    def hold_block(self, blocknum, data):
+        self.blocks[blocknum] = data
+
+    def bucket_failed(self, vbucket):
+        self.parent.bucket_failed(vbucket)
+
+class DownloadStatus:
+    implements(IDownloadStatus)
+    statusid_counter = itertools.count(0)
+
+    def __init__(self):
+        self.storage_index = None
+        self.size = None
+        self.helper = False
+        self.status = "Not started"
+        self.progress = 0.0
+        self.paused = False
+        self.stopped = False
+        self.active = True
+        self.results = None
+        self.counter = self.statusid_counter.next()
+        self.started = time.time()
+
+    def get_started(self):
+        return self.started
+    def get_storage_index(self):
+        return self.storage_index
+    def get_size(self):
+        return self.size
+    def using_helper(self):
+        return self.helper
+    def get_status(self):
+        status = self.status
+        if self.paused:
+            status += " (output paused)"
+        if self.stopped:
+            status += " (output stopped)"
+        return status
+    def get_progress(self):
+        return self.progress
+    def get_active(self):
+        return self.active
+    def get_results(self):
+        return self.results
+    def get_counter(self):
+        return self.counter
+
+    def set_storage_index(self, si):
+        self.storage_index = si
+    def set_size(self, size):
+        self.size = size
+    def set_helper(self, helper):
+        self.helper = helper
+    def set_status(self, status):
+        self.status = status
+    def set_paused(self, paused):
+        self.paused = paused
+    def set_stopped(self, stopped):
+        self.stopped = stopped
+    def set_progress(self, value):
+        self.progress = value
+    def set_active(self, value):
+        self.active = value
+    def set_results(self, value):
+        self.results = value
+
+class FileDownloader:
+    implements(IPushProducer)
+    check_crypttext_hash = True
+    check_plaintext_hash = True
+    _status = None
+
+    def __init__(self, client, u, downloadable):
+        self._client = client
+
+        u = IFileURI(u)
+        self._storage_index = u.storage_index
+        self._uri_extension_hash = u.uri_extension_hash
+        self._total_shares = u.total_shares
+        self._size = u.size
+        self._num_needed_shares = u.needed_shares
+
+        self._si_s = storage.si_b2a(self._storage_index)
+        self.init_logging()
+
+        self._started = time.time()
+        self._status = s = DownloadStatus()
+        s.set_status("Starting")
+        s.set_storage_index(self._storage_index)
+        s.set_size(self._size)
+        s.set_helper(False)
+        s.set_active(True)
+
+        self._results = DownloadResults()
+        s.set_results(self._results)
+        self._results.file_size = self._size
+        self._results.timings["servers_peer_selection"] = {}
+        self._results.timings["fetch_per_server"] = {}
+        self._results.timings["cumulative_fetch"] = 0.0
+        self._results.timings["cumulative_decode"] = 0.0
+        self._results.timings["cumulative_decrypt"] = 0.0
+        self._results.timings["paused"] = 0.0
+
+        if IConsumer.providedBy(downloadable):
+            downloadable.registerProducer(self, True)
+        self._downloadable = downloadable
+        self._output = Output(downloadable, u.key, self._size, self._log_number,
+                              self._status)
+        self._paused = False
+        self._stopped = False
+
+        self.active_buckets = {} # k: shnum, v: bucket
+        self._share_buckets = [] # list of (sharenum, bucket) tuples
+        self._share_vbuckets = {} # k: shnum, v: set of ValidatedBuckets
+        self._uri_extension_sources = []
+
+        self._uri_extension_data = None
+
+        self._fetch_failures = {"uri_extension": 0,
+                                "plaintext_hashroot": 0,
+                                "plaintext_hashtree": 0,
+                                "crypttext_hashroot": 0,
+                                "crypttext_hashtree": 0,
+                                }
+
+    def init_logging(self):
+        self._log_prefix = prefix = storage.si_b2a(self._storage_index)[:5]
+        num = self._client.log(format="FileDownloader(%(si)s): starting",
+                               si=storage.si_b2a(self._storage_index))
+        self._log_number = num
+
+    def log(self, *args, **kwargs):
+        if "parent" not in kwargs:
+            kwargs["parent"] = self._log_number
+        if "facility" not in kwargs:
+            kwargs["facility"] = "tahoe.download"
+        return log.msg(*args, **kwargs)
+
+    def pauseProducing(self):
+        if self._paused:
+            return
+        self._paused = defer.Deferred()
+        self._paused_at = time.time()
+        if self._status:
+            self._status.set_paused(True)
+
+    def resumeProducing(self):
+        if self._paused:
+            paused_for = time.time() - self._paused_at
+            self._results.timings['paused'] += paused_for
+            p = self._paused
+            self._paused = None
+            eventually(p.callback, None)
+            if self._status:
+                self._status.set_paused(False)
+
+    def stopProducing(self):
+        self.log("Download.stopProducing")
+        self._stopped = True
+        self.resumeProducing()
+        if self._status:
+            self._status.set_stopped(True)
+            self._status.set_active(False)
+
+    def start(self):
+        self.log("starting download")
+
+        # first step: who should we download from?
+        d = defer.maybeDeferred(self._get_all_shareholders)
+        d.addCallback(self._got_all_shareholders)
+        # now get the uri_extension block from somebody and validate it
+        d.addCallback(self._obtain_uri_extension)
+        d.addCallback(self._got_uri_extension)
+        d.addCallback(self._get_hashtrees)
+        d.addCallback(self._create_validated_buckets)
+        # once we know that, we can download blocks from everybody
+        d.addCallback(self._download_all_segments)
+        def _finished(res):
+            if self._status:
+                self._status.set_status("Finished")
+                self._status.set_active(False)
+                self._status.set_paused(False)
+            if IConsumer.providedBy(self._downloadable):
+                self._downloadable.unregisterProducer()
+            return res
+        d.addBoth(_finished)
+        def _failed(why):
+            if self._status:
+                self._status.set_status("Failed")
+                self._status.set_active(False)
+            self._output.fail(why)
+            return why
+        d.addErrback(_failed)
+        d.addCallback(self._done)
+        return d
+
+    def _get_all_shareholders(self):
+        dl = []
+        for (peerid,ss) in self._client.get_permuted_peers("storage",
+                                                           self._storage_index):
+            d = ss.callRemote("get_buckets", self._storage_index)
+            d.addCallbacks(self._got_response, self._got_error,
+                           callbackArgs=(peerid,))
+            dl.append(d)
+        self._responses_received = 0
+        self._queries_sent = len(dl)
+        if self._status:
+            self._status.set_status("Locating Shares (%d/%d)" %
+                                    (self._responses_received,
+                                     self._queries_sent))
+        return defer.DeferredList(dl)
+
+    def _got_response(self, buckets, peerid):
+        self._responses_received += 1
+        if self._results:
+            elapsed = time.time() - self._started
+            self._results.timings["servers_peer_selection"][peerid] = elapsed
+        if self._status:
+            self._status.set_status("Locating Shares (%d/%d)" %
+                                    (self._responses_received,
+                                     self._queries_sent))
+        for sharenum, bucket in buckets.iteritems():
+            b = storage.ReadBucketProxy(bucket, peerid, self._si_s)
+            self.add_share_bucket(sharenum, b)
+            self._uri_extension_sources.append(b)
+            if self._results:
+                if peerid not in self._results.servermap:
+                    self._results.servermap[peerid] = set()
+                self._results.servermap[peerid].add(sharenum)
+
+    def add_share_bucket(self, sharenum, bucket):
+        # this is split out for the benefit of test_encode.py
+        self._share_buckets.append( (sharenum, bucket) )
+
+    def _got_error(self, f):
+        self._client.log("Somebody failed. -- %s" % (f,))
+
+    def bucket_failed(self, vbucket):
+        shnum = vbucket.sharenum
+        del self.active_buckets[shnum]
+        s = self._share_vbuckets[shnum]
+        # s is a set of ValidatedBucket instances
+        s.remove(vbucket)
+        # ... which might now be empty
+        if not s:
+            # there are no more buckets which can provide this share, so
+            # remove the key. This may prompt us to use a different share.
+            del self._share_vbuckets[shnum]
+
+    def _got_all_shareholders(self, res):
+        if self._results:
+            now = time.time()
+            self._results.timings["peer_selection"] = now - self._started
+
+        if len(self._share_buckets) < self._num_needed_shares:
+            raise NotEnoughSharesError
+
+        #for s in self._share_vbuckets.values():
+        #    for vb in s:
+        #        assert isinstance(vb, ValidatedBucket), \
+        #               "vb is %s but should be a ValidatedBucket" % (vb,)
+
+    def _unpack_uri_extension_data(self, data):
+        return uri.unpack_extension(data)
+
+    def _obtain_uri_extension(self, ignored):
+        # all shareholders are supposed to have a copy of uri_extension, and
+        # all are supposed to be identical. We compute the hash of the data
+        # that comes back, and compare it against the version in our URI. If
+        # they don't match, ignore their data and try someone else.
+        if self._status:
+            self._status.set_status("Obtaining URI Extension")
+
+        self._uri_extension_fetch_started = time.time()
+        def _validate(proposal, bucket):
+            h = hashutil.uri_extension_hash(proposal)
+            if h != self._uri_extension_hash:
+                self._fetch_failures["uri_extension"] += 1
+                msg = ("The copy of uri_extension we received from "
+                       "%s was bad: wanted %s, got %s" %
+                       (bucket,
+                        base32.b2a(self._uri_extension_hash),
+                        base32.b2a(h)))
+                self.log(msg, level=log.SCARY)
+                raise BadURIExtensionHashValue(msg)
+            return self._unpack_uri_extension_data(proposal)
+        return self._obtain_validated_thing(None,
+                                            self._uri_extension_sources,
+                                            "uri_extension",
+                                            "get_uri_extension", (), _validate)
+
+    def _obtain_validated_thing(self, ignored, sources, name, methname, args,
+                                validatorfunc):
+        if not sources:
+            raise NotEnoughSharesError("started with zero peers while fetching "
+                                      "%s" % name)
+        bucket = sources[0]
+        sources = sources[1:]
+        #d = bucket.callRemote(methname, *args)
+        d = bucket.startIfNecessary()
+        d.addCallback(lambda res: getattr(bucket, methname)(*args))
+        d.addCallback(validatorfunc, bucket)
+        def _bad(f):
+            self.log("%s from vbucket %s failed:" % (name, bucket),
+                     failure=f, level=log.WEIRD)
+            if not sources:
+                raise NotEnoughSharesError("ran out of peers, last error was %s"
+                                          % (f,))
+            # try again with a different one
+            return self._obtain_validated_thing(None, sources, name,
+                                                methname, args, validatorfunc)
+        d.addErrback(_bad)
+        return d
+
+    def _got_uri_extension(self, uri_extension_data):
+        if self._results:
+            elapsed = time.time() - self._uri_extension_fetch_started
+            self._results.timings["uri_extension"] = elapsed
+
+        d = self._uri_extension_data = uri_extension_data
+
+        self._codec = codec.get_decoder_by_name(d['codec_name'])
+        self._codec.set_serialized_params(d['codec_params'])
+        self._tail_codec = codec.get_decoder_by_name(d['codec_name'])
+        self._tail_codec.set_serialized_params(d['tail_codec_params'])
+
+        crypttext_hash = d.get('crypttext_hash', None) # optional
+        if crypttext_hash:
+            assert isinstance(crypttext_hash, str)
+            assert len(crypttext_hash) == 32
+        self._crypttext_hash = crypttext_hash
+        self._plaintext_hash = d.get('plaintext_hash', None) # optional
+
+        self._roothash = d['share_root_hash']
+
+        self._segment_size = segment_size = d['segment_size']
+        self._total_segments = mathutil.div_ceil(self._size, segment_size)
+        self._current_segnum = 0
+
+        self._share_hashtree = hashtree.IncompleteHashTree(d['total_shares'])
+        self._share_hashtree.set_hashes({0: self._roothash})
+
+    def _get_hashtrees(self, res):
+        self._get_hashtrees_started = time.time()
+        if self._status:
+            self._status.set_status("Retrieving Hash Trees")
+        d = defer.maybeDeferred(self._get_plaintext_hashtrees)
+        d.addCallback(self._get_crypttext_hashtrees)
+        d.addCallback(self._setup_hashtrees)
+        return d
+
+    def _get_plaintext_hashtrees(self):
+        # plaintext hashes are optional. If the root isn't in the UEB, then
+        # the share will be holding an empty list. We don't even bother
+        # fetching it.
+        if "plaintext_root_hash" not in self._uri_extension_data:
+            self._plaintext_hashtree = None
+            return
+        def _validate_plaintext_hashtree(proposal, bucket):
+            if proposal[0] != self._uri_extension_data['plaintext_root_hash']:
+                self._fetch_failures["plaintext_hashroot"] += 1
+                msg = ("The copy of the plaintext_root_hash we received from"
+                       " %s was bad" % bucket)
+                raise BadPlaintextHashValue(msg)
+            pt_hashtree = hashtree.IncompleteHashTree(self._total_segments)
+            pt_hashes = dict(list(enumerate(proposal)))
+            try:
+                pt_hashtree.set_hashes(pt_hashes)
+            except hashtree.BadHashError:
+                # the hashes they gave us were not self-consistent, even
+                # though the root matched what we saw in the uri_extension
+                # block
+                self._fetch_failures["plaintext_hashtree"] += 1
+                raise
+            self._plaintext_hashtree = pt_hashtree
+        d = self._obtain_validated_thing(None,
+                                         self._uri_extension_sources,
+                                         "plaintext_hashes",
+                                         "get_plaintext_hashes", (),
+                                         _validate_plaintext_hashtree)
+        return d
+
+    def _get_crypttext_hashtrees(self, res):
+        # crypttext hashes are optional too
+        if "crypttext_root_hash" not in self._uri_extension_data:
+            self._crypttext_hashtree = None
+            return
+        def _validate_crypttext_hashtree(proposal, bucket):
+            if proposal[0] != self._uri_extension_data['crypttext_root_hash']:
+                self._fetch_failures["crypttext_hashroot"] += 1
+                msg = ("The copy of the crypttext_root_hash we received from"
+                       " %s was bad" % bucket)
+                raise BadCrypttextHashValue(msg)
+            ct_hashtree = hashtree.IncompleteHashTree(self._total_segments)
+            ct_hashes = dict(list(enumerate(proposal)))
+            try:
+                ct_hashtree.set_hashes(ct_hashes)
+            except hashtree.BadHashError:
+                self._fetch_failures["crypttext_hashtree"] += 1
+                raise
+            ct_hashtree.set_hashes(ct_hashes)
+            self._crypttext_hashtree = ct_hashtree
+        d = self._obtain_validated_thing(None,
+                                         self._uri_extension_sources,
+                                         "crypttext_hashes",
+                                         "get_crypttext_hashes", (),
+                                         _validate_crypttext_hashtree)
+        return d
+
+    def _setup_hashtrees(self, res):
+        self._output.setup_hashtrees(self._plaintext_hashtree,
+                                     self._crypttext_hashtree)
+        if self._results:
+            elapsed = time.time() - self._get_hashtrees_started
+            self._results.timings["hashtrees"] = elapsed
+
+    def _create_validated_buckets(self, ignored=None):
+        self._share_vbuckets = {}
+        for sharenum, bucket in self._share_buckets:
+            vbucket = ValidatedBucket(sharenum, bucket,
+                                      self._share_hashtree,
+                                      self._roothash,
+                                      self._total_segments)
+            s = self._share_vbuckets.setdefault(sharenum, set())
+            s.add(vbucket)
+
+    def _activate_enough_buckets(self):
+        """either return a mapping from shnum to a ValidatedBucket that can
+        provide data for that share, or raise NotEnoughSharesError"""
+
+        while len(self.active_buckets) < self._num_needed_shares:
+            # need some more
+            handled_shnums = set(self.active_buckets.keys())
+            available_shnums = set(self._share_vbuckets.keys())
+            potential_shnums = list(available_shnums - handled_shnums)
+            if not potential_shnums:
+                raise NotEnoughSharesError
+            # choose a random share
+            shnum = random.choice(potential_shnums)
+            # and a random bucket that will provide it
+            validated_bucket = random.choice(list(self._share_vbuckets[shnum]))
+            self.active_buckets[shnum] = validated_bucket
+        return self.active_buckets
+
+
+    def _download_all_segments(self, res):
+        # the promise: upon entry to this function, self._share_vbuckets
+        # contains enough buckets to complete the download, and some extra
+        # ones to tolerate some buckets dropping out or having errors.
+        # self._share_vbuckets is a dictionary that maps from shnum to a set
+        # of ValidatedBuckets, which themselves are wrappers around
+        # RIBucketReader references.
+        self.active_buckets = {} # k: shnum, v: ValidatedBucket instance
+
+        self._started_fetching = time.time()
+
+        d = defer.succeed(None)
+        for segnum in range(self._total_segments-1):
+            d.addCallback(self._download_segment, segnum)
+            # this pause, at the end of write, prevents pre-fetch from
+            # happening until the consumer is ready for more data.
+            d.addCallback(self._check_for_pause)
+        d.addCallback(self._download_tail_segment, self._total_segments-1)
+        return d
+
+    def _check_for_pause(self, res):
+        if self._paused:
+            d = defer.Deferred()
+            self._paused.addCallback(lambda ignored: d.callback(res))
+            return d
+        if self._stopped:
+            raise DownloadStopped("our Consumer called stopProducing()")
+        return res
+
+    def _download_segment(self, res, segnum):
+        if self._status:
+            self._status.set_status("Downloading segment %d of %d" %
+                                    (segnum+1, self._total_segments))
+        self.log("downloading seg#%d of %d (%d%%)"
+                 % (segnum, self._total_segments,
+                    100.0 * segnum / self._total_segments))
+        # memory footprint: when the SegmentDownloader finishes pulling down
+        # all shares, we have 1*segment_size of usage.
+        segmentdler = SegmentDownloader(self, segnum, self._num_needed_shares,
+                                        self._results)
+        started = time.time()
+        d = segmentdler.start()
+        def _finished_fetching(res):
+            elapsed = time.time() - started
+            self._results.timings["cumulative_fetch"] += elapsed
+            return res
+        if self._results:
+            d.addCallback(_finished_fetching)
+        # pause before using more memory
+        d.addCallback(self._check_for_pause)
+        # while the codec does its job, we hit 2*segment_size
+        def _started_decode(res):
+            self._started_decode = time.time()
+            return res
+        if self._results:
+            d.addCallback(_started_decode)
+        d.addCallback(lambda (shares, shareids):
+                      self._codec.decode(shares, shareids))
+        # once the codec is done, we drop back to 1*segment_size, because
+        # 'shares' goes out of scope. The memory usage is all in the
+        # plaintext now, spread out into a bunch of tiny buffers.
+        def _finished_decode(res):
+            elapsed = time.time() - self._started_decode
+            self._results.timings["cumulative_decode"] += elapsed
+            return res
+        if self._results:
+            d.addCallback(_finished_decode)
+
+        # pause/check-for-stop just before writing, to honor stopProducing
+        d.addCallback(self._check_for_pause)
+        def _done(buffers):
+            # we start by joining all these buffers together into a single
+            # string. This makes Output.write easier, since it wants to hash
+            # data one segment at a time anyways, and doesn't impact our
+            # memory footprint since we're already peaking at 2*segment_size
+            # inside the codec a moment ago.
+            segment = "".join(buffers)
+            del buffers
+            # we're down to 1*segment_size right now, but write_segment()
+            # will decrypt a copy of the segment internally, which will push
+            # us up to 2*segment_size while it runs.
+            started_decrypt = time.time()
+            self._output.write_segment(segment)
+            if self._results:
+                elapsed = time.time() - started_decrypt
+                self._results.timings["cumulative_decrypt"] += elapsed
+        d.addCallback(_done)
+        return d
+
+    def _download_tail_segment(self, res, segnum):
+        self.log("downloading seg#%d of %d (%d%%)"
+                 % (segnum, self._total_segments,
+                    100.0 * segnum / self._total_segments))
+        segmentdler = SegmentDownloader(self, segnum, self._num_needed_shares,
+                                        self._results)
+        started = time.time()
+        d = segmentdler.start()
+        def _finished_fetching(res):
+            elapsed = time.time() - started
+            self._results.timings["cumulative_fetch"] += elapsed
+            return res
+        if self._results:
+            d.addCallback(_finished_fetching)
+        # pause before using more memory
+        d.addCallback(self._check_for_pause)
+        def _started_decode(res):
+            self._started_decode = time.time()
+            return res
+        if self._results:
+            d.addCallback(_started_decode)
+        d.addCallback(lambda (shares, shareids):
+                      self._tail_codec.decode(shares, shareids))
+        def _finished_decode(res):
+            elapsed = time.time() - self._started_decode
+            self._results.timings["cumulative_decode"] += elapsed
+            return res
+        if self._results:
+            d.addCallback(_finished_decode)
+        # pause/check-for-stop just before writing, to honor stopProducing
+        d.addCallback(self._check_for_pause)
+        def _done(buffers):
+            # trim off any padding added by the upload side
+            segment = "".join(buffers)
+            del buffers
+            # we never send empty segments. If the data was an exact multiple
+            # of the segment size, the last segment will be full.
+            pad_size = mathutil.pad_size(self._size, self._segment_size)
+            tail_size = self._segment_size - pad_size
+            segment = segment[:tail_size]
+            started_decrypt = time.time()
+            self._output.write_segment(segment)
+            if self._results:
+                elapsed = time.time() - started_decrypt
+                self._results.timings["cumulative_decrypt"] += elapsed
+        d.addCallback(_done)
+        return d
+
+    def _done(self, res):
+        self.log("download done")
+        if self._results:
+            now = time.time()
+            self._results.timings["total"] = now - self._started
+            self._results.timings["segments"] = now - self._started_fetching
+        self._output.close()
+        if self.check_crypttext_hash and self._crypttext_hash:
+            _assert(self._crypttext_hash == self._output.crypttext_hash,
+                    "bad crypttext_hash: computed=%s, expected=%s" %
+                    (base32.b2a(self._output.crypttext_hash),
+                     base32.b2a(self._crypttext_hash)))
+        if self.check_plaintext_hash and self._plaintext_hash:
+            _assert(self._plaintext_hash == self._output.plaintext_hash,
+                    "bad plaintext_hash: computed=%s, expected=%s" %
+                    (base32.b2a(self._output.plaintext_hash),
+                     base32.b2a(self._plaintext_hash)))
+        _assert(self._output.length == self._size,
+                got=self._output.length, expected=self._size)
+        return self._output.finish()
+
+    def get_download_status(self):
+        return self._status
+
+
+class LiteralDownloader:
+    def __init__(self, client, u, downloadable):
+        self._uri = IFileURI(u)
+        assert isinstance(self._uri, uri.LiteralFileURI)
+        self._downloadable = downloadable
+        self._status = s = DownloadStatus()
+        s.set_storage_index(None)
+        s.set_helper(False)
+        s.set_status("Done")
+        s.set_active(False)
+        s.set_progress(1.0)
+
+    def start(self):
+        data = self._uri.data
+        self._status.set_size(len(data))
+        self._downloadable.open(len(data))
+        self._downloadable.write(data)
+        self._downloadable.close()
+        return defer.maybeDeferred(self._downloadable.finish)
+
+    def get_download_status(self):
+        return self._status
+
+class FileName:
+    implements(IDownloadTarget)
+    def __init__(self, filename):
+        self._filename = filename
+        self.f = None
+    def open(self, size):
+        self.f = open(self._filename, "wb")
+        return self.f
+    def write(self, data):
+        self.f.write(data)
+    def close(self):
+        if self.f:
+            self.f.close()
+    def fail(self, why):
+        if self.f:
+            self.f.close()
+            os.unlink(self._filename)
+    def register_canceller(self, cb):
+        pass # we won't use it
+    def finish(self):
+        pass
+
+class Data:
+    implements(IDownloadTarget)
+    def __init__(self):
+        self._data = []
+    def open(self, size):
+        pass
+    def write(self, data):
+        self._data.append(data)
+    def close(self):
+        self.data = "".join(self._data)
+        del self._data
+    def fail(self, why):
+        del self._data
+    def register_canceller(self, cb):
+        pass # we won't use it
+    def finish(self):
+        return self.data
+
+class FileHandle:
+    """Use me to download data to a pre-defined filehandle-like object. I
+    will use the target's write() method. I will *not* close the filehandle:
+    I leave that up to the originator of the filehandle. The download process
+    will return the filehandle when it completes.
+    """
+    implements(IDownloadTarget)
+    def __init__(self, filehandle):
+        self._filehandle = filehandle
+    def open(self, size):
+        pass
+    def write(self, data):
+        self._filehandle.write(data)
+    def close(self):
+        # the originator of the filehandle reserves the right to close it
+        pass
+    def fail(self, why):
+        pass
+    def register_canceller(self, cb):
+        pass
+    def finish(self):
+        return self._filehandle
+
+class Downloader(service.MultiService):
+    """I am a service that allows file downloading.
+    """
+    implements(IDownloader)
+    name = "downloader"
+    MAX_DOWNLOAD_STATUSES = 10
+
+    def __init__(self, stats_provider=None):
+        service.MultiService.__init__(self)
+        self.stats_provider = stats_provider
+        self._all_downloads = weakref.WeakKeyDictionary() # for debugging
+        self._all_download_statuses = weakref.WeakKeyDictionary()
+        self._recent_download_statuses = []
+
+    def download(self, u, t):
+        assert self.parent
+        assert self.running
+        u = IFileURI(u)
+        t = IDownloadTarget(t)
+        assert t.write
+        assert t.close
+
+
+        if isinstance(u, uri.LiteralFileURI):
+            dl = LiteralDownloader(self.parent, u, t)
+        elif isinstance(u, uri.CHKFileURI):
+            if self.stats_provider:
+                # these counters are meant for network traffic, and don't
+                # include LIT files
+                self.stats_provider.count('downloader.files_downloaded', 1)
+                self.stats_provider.count('downloader.bytes_downloaded', u.get_size())
+            dl = FileDownloader(self.parent, u, t)
+        else:
+            raise RuntimeError("I don't know how to download a %s" % u)
+        self._add_download(dl)
+        d = dl.start()
+        return d
+
+    # utility functions
+    def download_to_data(self, uri):
+        return self.download(uri, Data())
+    def download_to_filename(self, uri, filename):
+        return self.download(uri, FileName(filename))
+    def download_to_filehandle(self, uri, filehandle):
+        return self.download(uri, FileHandle(filehandle))
+
+    def _add_download(self, downloader):
+        self._all_downloads[downloader] = None
+        s = downloader.get_download_status()
+        self._all_download_statuses[s] = None
+        self._recent_download_statuses.append(s)
+        while len(self._recent_download_statuses) > self.MAX_DOWNLOAD_STATUSES:
+            self._recent_download_statuses.pop(0)
+
+    def list_all_download_statuses(self):
+        for ds in self._all_download_statuses:
+            yield ds
diff --git a/src/allmydata/immutable/encode.py b/src/allmydata/immutable/encode.py

new file mode 100644 (file)

index 0000000..766292f
--- /dev/null
+++ b/src/allmydata/immutable/encode.py
@@ -0,0 +1,718 @@
+# -*- test-case-name: allmydata.test.test_encode -*-
+
+import time
+from zope.interface import implements
+from twisted.internet import defer
+from foolscap import eventual
+from allmydata import storage, uri
+from allmydata.hashtree import HashTree
+from allmydata.util import mathutil, hashutil, base32, log
+from allmydata.util.assertutil import _assert, precondition
+from allmydata.codec import CRSEncoder
+from allmydata.interfaces import IEncoder, IStorageBucketWriter, \
+     IEncryptedUploadable, IUploadStatus
+
+"""
+The goal of the encoder is to turn the original file into a series of
+'shares'. Each share is going to a 'shareholder' (nominally each shareholder
+is a different host, but for small grids there may be overlap). The number
+of shares is chosen to hit our reliability goals (more shares on more
+machines means more reliability), and is limited by overhead (proportional to
+numshares or log(numshares)) and the encoding technology in use (zfec permits
+only 256 shares total). It is also constrained by the amount of data
+we want to send to each host. For estimating purposes, think of 10 shares
+out of which we need 3 to reconstruct the file.
+
+The encoder starts by cutting the original file into segments. All segments
+except the last are of equal size. The segment size is chosen to constrain
+the memory footprint (which will probably vary between 1x and 4x segment
+size) and to constrain the overhead (which will be proportional to
+log(number of segments)).
+
+
+Each segment (A,B,C) is read into memory, encrypted, and encoded into
+blocks. The 'share' (say, share #1) that makes it out to a host is a
+collection of these blocks (block A1, B1, C1), plus some hash-tree
+information necessary to validate the data upon retrieval. Only one segment
+is handled at a time: all blocks for segment A are delivered before any
+work is begun on segment B.
+
+As blocks are created, we retain the hash of each one. The list of block hashes
+for a single share (say, hash(A1), hash(B1), hash(C1)) is used to form the base
+of a Merkle hash tree for that share, called the block hash tree.
+
+This hash tree has one terminal leaf per block. The complete block hash
+tree is sent to the shareholder after all the data has been sent. At
+retrieval time, the decoder will ask for specific pieces of this tree before
+asking for blocks, whichever it needs to validate those blocks.
+
+(Note: we don't really need to generate this whole block hash tree
+ourselves. It would be sufficient to have the shareholder generate it and
+just tell us the root. This gives us an extra level of validation on the
+transfer, though, and it is relatively cheap to compute.)
+
+Each of these block hash trees has a root hash. The collection of these
+root hashes for all shares are collected into the 'share hash tree', which
+has one terminal leaf per share. After sending the blocks and the complete
+block hash tree to each shareholder, we send them the portion of the share
+hash tree that is necessary to validate their share. The root of the share
+hash tree is put into the URI.
+
+"""
+
+class NotEnoughSharesError(Exception):
+    servermap = None
+    pass
+
+class UploadAborted(Exception):
+    pass
+
+KiB=1024
+MiB=1024*KiB
+GiB=1024*MiB
+TiB=1024*GiB
+PiB=1024*TiB
+
+class Encoder(object):
+    implements(IEncoder)
+    USE_PLAINTEXT_HASHES = False
+
+    def __init__(self, log_parent=None, upload_status=None):
+        object.__init__(self)
+        self.uri_extension_data = {}
+        self._codec = None
+        self._status = None
+        if upload_status:
+            self._status = IUploadStatus(upload_status)
+        precondition(log_parent is None or isinstance(log_parent, int),
+                     log_parent)
+        self._log_number = log.msg("creating Encoder %s" % self,
+                                   facility="tahoe.encoder", parent=log_parent)
+        self._aborted = False
+
+    def __repr__(self):
+        if hasattr(self, "_storage_index"):
+            return "<Encoder for %s>" % storage.si_b2a(self._storage_index)[:5]
+        return "<Encoder for unknown storage index>"
+
+    def log(self, *args, **kwargs):
+        if "parent" not in kwargs:
+            kwargs["parent"] = self._log_number
+        if "facility" not in kwargs:
+            kwargs["facility"] = "tahoe.encoder"
+        return log.msg(*args, **kwargs)
+
+    def set_encrypted_uploadable(self, uploadable):
+        eu = self._uploadable = IEncryptedUploadable(uploadable)
+        d = eu.get_size()
+        def _got_size(size):
+            self.log(format="file size: %(size)d", size=size)
+            self.file_size = size
+        d.addCallback(_got_size)
+        d.addCallback(lambda res: eu.get_all_encoding_parameters())
+        d.addCallback(self._got_all_encoding_parameters)
+        d.addCallback(lambda res: eu.get_storage_index())
+        def _done(storage_index):
+            self._storage_index = storage_index
+            return self
+        d.addCallback(_done)
+        return d
+
+    def _got_all_encoding_parameters(self, params):
+        assert not self._codec
+        k, happy, n, segsize = params
+        self.required_shares = k
+        self.shares_of_happiness = happy
+        self.num_shares = n
+        self.segment_size = segsize
+        self.log("got encoding parameters: %d/%d/%d %d" % (k,happy,n, segsize))
+        self.log("now setting up codec")
+
+        assert self.segment_size % self.required_shares == 0
+
+        self.num_segments = mathutil.div_ceil(self.file_size,
+                                              self.segment_size)
+
+        self._codec = CRSEncoder()
+        self._codec.set_params(self.segment_size,
+                               self.required_shares, self.num_shares)
+
+        data = self.uri_extension_data
+        data['codec_name'] = self._codec.get_encoder_type()
+        data['codec_params'] = self._codec.get_serialized_params()
+
+        data['size'] = self.file_size
+        data['segment_size'] = self.segment_size
+        self.share_size = mathutil.div_ceil(self.file_size,
+                                            self.required_shares)
+        data['num_segments'] = self.num_segments
+        data['needed_shares'] = self.required_shares
+        data['total_shares'] = self.num_shares
+
+        # the "tail" is the last segment. This segment may or may not be
+        # shorter than all other segments. We use the "tail codec" to handle
+        # it. If the tail is short, we use a different codec instance. In
+        # addition, the tail codec must be fed data which has been padded out
+        # to the right size.
+        self.tail_size = self.file_size % self.segment_size
+        if not self.tail_size:
+            self.tail_size = self.segment_size
+
+        # the tail codec is responsible for encoding tail_size bytes
+        padded_tail_size = mathutil.next_multiple(self.tail_size,
+                                                  self.required_shares)
+        self._tail_codec = CRSEncoder()
+        self._tail_codec.set_params(padded_tail_size,
+                                    self.required_shares, self.num_shares)
+        data['tail_codec_params'] = self._tail_codec.get_serialized_params()
+
+    def _get_share_size(self):
+        share_size = mathutil.div_ceil(self.file_size, self.required_shares)
+        overhead = self._compute_overhead()
+        return share_size + overhead
+
+    def _compute_overhead(self):
+        return 0
+
+    def get_param(self, name):
+        assert self._codec
+
+        if name == "storage_index":
+            return self._storage_index
+        elif name == "share_counts":
+            return (self.required_shares, self.shares_of_happiness,
+                    self.num_shares)
+        elif name == "num_segments":
+            return self.num_segments
+        elif name == "segment_size":
+            return self.segment_size
+        elif name == "block_size":
+            return self._codec.get_block_size()
+        elif name == "share_size":
+            return self._get_share_size()
+        elif name == "serialized_params":
+            return self._codec.get_serialized_params()
+        else:
+            raise KeyError("unknown parameter name '%s'" % name)
+
+    def set_shareholders(self, landlords):
+        assert isinstance(landlords, dict)
+        for k in landlords:
+            assert IStorageBucketWriter.providedBy(landlords[k])
+        self.landlords = landlords.copy()
+
+    def start(self):
+        self.log("%s starting" % (self,))
+        #paddedsize = self._size + mathutil.pad_size(self._size, self.needed_shares)
+        assert self._codec
+        self._crypttext_hasher = hashutil.crypttext_hasher()
+        self._crypttext_hashes = []
+        self.segment_num = 0
+        self.subshare_hashes = [[] for x in range(self.num_shares)]
+        # subshare_hashes[i] is a list that will be accumulated and then send
+        # to landlord[i]. This list contains a hash of each segment_share
+        # that we sent to that landlord.
+        self.share_root_hashes = [None] * self.num_shares
+
+        self._times = {
+            "cumulative_encoding": 0.0,
+            "cumulative_sending": 0.0,
+            "hashes_and_close": 0.0,
+            "total_encode_and_push": 0.0,
+            }
+        self._start_total_timestamp = time.time()
+
+        d = eventual.fireEventually()
+
+        d.addCallback(lambda res: self.start_all_shareholders())
+
+        for i in range(self.num_segments-1):
+            # note to self: this form doesn't work, because lambda only
+            # captures the slot, not the value
+            #d.addCallback(lambda res: self.do_segment(i))
+            # use this form instead:
+            d.addCallback(lambda res, i=i: self._encode_segment(i))
+            d.addCallback(self._send_segment, i)
+            d.addCallback(self._turn_barrier)
+        last_segnum = self.num_segments - 1
+        d.addCallback(lambda res: self._encode_tail_segment(last_segnum))
+        d.addCallback(self._send_segment, last_segnum)
+        d.addCallback(self._turn_barrier)
+
+        d.addCallback(lambda res: self.finish_hashing())
+
+        if self.USE_PLAINTEXT_HASHES:
+            d.addCallback(lambda res:
+                          self.send_plaintext_hash_tree_to_all_shareholders())
+        d.addCallback(lambda res:
+                      self.send_crypttext_hash_tree_to_all_shareholders())
+        d.addCallback(lambda res: self.send_all_subshare_hash_trees())
+        d.addCallback(lambda res: self.send_all_share_hash_trees())
+        d.addCallback(lambda res: self.send_uri_extension_to_all_shareholders())
+
+        d.addCallback(lambda res: self.close_all_shareholders())
+        d.addCallbacks(self.done, self.err)
+        return d
+
+    def set_status(self, status):
+        if self._status:
+            self._status.set_status(status)
+
+    def set_encode_and_push_progress(self, sent_segments=None, extra=0.0):
+        if self._status:
+            # we treat the final hash+close as an extra segment
+            if sent_segments is None:
+                sent_segments = self.num_segments
+            progress = float(sent_segments + extra) / (self.num_segments + 1)
+            self._status.set_progress(2, progress)
+
+    def abort(self):
+        self.log("aborting upload", level=log.UNUSUAL)
+        assert self._codec, "don't call abort before start"
+        self._aborted = True
+        # the next segment read (in _gather_data inside _encode_segment) will
+        # raise UploadAborted(), which will bypass the rest of the upload
+        # chain. If we've sent the final segment's shares, it's too late to
+        # abort. TODO: allow abort any time up to close_all_shareholders.
+
+    def _turn_barrier(self, res):
+        # putting this method in a Deferred chain imposes a guaranteed
+        # reactor turn between the pre- and post- portions of that chain.
+        # This can be useful to limit memory consumption: since Deferreds do
+        # not do tail recursion, code which uses defer.succeed(result) for
+        # consistency will cause objects to live for longer than you might
+        # normally expect.
+
+        return eventual.fireEventually(res)
+
+
+    def start_all_shareholders(self):
+        self.log("starting shareholders", level=log.NOISY)
+        self.set_status("Starting shareholders")
+        dl = []
+        for shareid in self.landlords:
+            d = self.landlords[shareid].start()
+            d.addErrback(self._remove_shareholder, shareid, "start")
+            dl.append(d)
+        return self._gather_responses(dl)
+
+    def _encode_segment(self, segnum):
+        codec = self._codec
+        start = time.time()
+
+        # the ICodecEncoder API wants to receive a total of self.segment_size
+        # bytes on each encode() call, broken up into a number of
+        # identically-sized pieces. Due to the way the codec algorithm works,
+        # these pieces need to be the same size as the share which the codec
+        # will generate. Therefore we must feed it with input_piece_size that
+        # equals the output share size.
+        input_piece_size = codec.get_block_size()
+
+        # as a result, the number of input pieces per encode() call will be
+        # equal to the number of required shares with which the codec was
+        # constructed. You can think of the codec as chopping up a
+        # 'segment_size' of data into 'required_shares' shares (not doing any
+        # fancy math at all, just doing a split), then creating some number
+        # of additional shares which can be substituted if the primary ones
+        # are unavailable
+
+        crypttext_segment_hasher = hashutil.crypttext_segment_hasher()
+
+        # memory footprint: we only hold a tiny piece of the plaintext at any
+        # given time. We build up a segment's worth of cryptttext, then hand
+        # it to the encoder. Assuming 3-of-10 encoding (3.3x expansion) and
+        # 1MiB max_segment_size, we get a peak memory footprint of 4.3*1MiB =
+        # 4.3MiB. Lowering max_segment_size to, say, 100KiB would drop the
+        # footprint to 430KiB at the expense of more hash-tree overhead.
+
+        d = self._gather_data(self.required_shares, input_piece_size,
+                              crypttext_segment_hasher)
+        def _done_gathering(chunks):
+            for c in chunks:
+                assert len(c) == input_piece_size
+            self._crypttext_hashes.append(crypttext_segment_hasher.digest())
+            # during this call, we hit 5*segsize memory
+            return codec.encode(chunks)
+        d.addCallback(_done_gathering)
+        def _done(res):
+            elapsed = time.time() - start
+            self._times["cumulative_encoding"] += elapsed
+            return res
+        d.addCallback(_done)
+        return d
+
+    def _encode_tail_segment(self, segnum):
+
+        start = time.time()
+        codec = self._tail_codec
+        input_piece_size = codec.get_block_size()
+
+        crypttext_segment_hasher = hashutil.crypttext_segment_hasher()
+
+        d = self._gather_data(self.required_shares, input_piece_size,
+                              crypttext_segment_hasher,
+                              allow_short=True)
+        def _done_gathering(chunks):
+            for c in chunks:
+                # a short trailing chunk will have been padded by
+                # _gather_data
+                assert len(c) == input_piece_size
+            self._crypttext_hashes.append(crypttext_segment_hasher.digest())
+            return codec.encode(chunks)
+        d.addCallback(_done_gathering)
+        def _done(res):
+            elapsed = time.time() - start
+            self._times["cumulative_encoding"] += elapsed
+            return res
+        d.addCallback(_done)
+        return d
+
+    def _gather_data(self, num_chunks, input_chunk_size,
+                     crypttext_segment_hasher,
+                     allow_short=False,
+                     previous_chunks=[]):
+        """Return a Deferred that will fire when the required number of
+        chunks have been read (and hashed and encrypted). The Deferred fires
+        with the combination of any 'previous_chunks' and the new chunks
+        which were gathered."""
+
+        if self._aborted:
+            raise UploadAborted()
+
+        if not num_chunks:
+            return defer.succeed(previous_chunks)
+
+        d = self._uploadable.read_encrypted(input_chunk_size, False)
+        def _got(data):
+            if self._aborted:
+                raise UploadAborted()
+            encrypted_pieces = []
+            length = 0
+            while data:
+                encrypted_piece = data.pop(0)
+                length += len(encrypted_piece)
+                crypttext_segment_hasher.update(encrypted_piece)
+                self._crypttext_hasher.update(encrypted_piece)
+                encrypted_pieces.append(encrypted_piece)
+
+            if allow_short:
+                if length < input_chunk_size:
+                    # padding
+                    pad_size = input_chunk_size - length
+                    encrypted_pieces.append('\x00' * pad_size)
+            else:
+                # non-tail segments should be the full segment size
+                if length != input_chunk_size:
+                    log.msg("non-tail segment should be full segment size: %d!=%d"
+                            % (length, input_chunk_size), level=log.BAD)
+                precondition(length == input_chunk_size,
+                             "length=%d != input_chunk_size=%d" %
+                             (length, input_chunk_size))
+
+            encrypted_piece = "".join(encrypted_pieces)
+            return previous_chunks + [encrypted_piece]
+
+        d.addCallback(_got)
+        d.addCallback(lambda chunks:
+                      self._gather_data(num_chunks-1, input_chunk_size,
+                                        crypttext_segment_hasher,
+                                        allow_short, chunks))
+        return d
+
+    def _send_segment(self, (shares, shareids), segnum):
+        # To generate the URI, we must generate the roothash, so we must
+        # generate all shares, even if we aren't actually giving them to
+        # anybody. This means that the set of shares we create will be equal
+        # to or larger than the set of landlords. If we have any landlord who
+        # *doesn't* have a share, that's an error.
+        _assert(set(self.landlords.keys()).issubset(set(shareids)),
+                shareids=shareids, landlords=self.landlords)
+        start = time.time()
+        dl = []
+        self.set_status("Sending segment %d of %d" % (segnum+1,
+                                                      self.num_segments))
+        self.set_encode_and_push_progress(segnum)
+        lognum = self.log("send_segment(%d)" % segnum, level=log.NOISY)
+        for i in range(len(shares)):
+            subshare = shares[i]
+            shareid = shareids[i]
+            d = self.send_subshare(shareid, segnum, subshare, lognum)
+            dl.append(d)
+            subshare_hash = hashutil.block_hash(subshare)
+            #from allmydata.util import base32
+            #log.msg("creating block (shareid=%d, blocknum=%d) "
+            #        "len=%d %r .. %r: %s" %
+            #        (shareid, segnum, len(subshare),
+            #         subshare[:50], subshare[-50:], base32.b2a(subshare_hash)))
+            self.subshare_hashes[shareid].append(subshare_hash)
+
+        dl = self._gather_responses(dl)
+        def _logit(res):
+            self.log("%s uploaded %s / %s bytes (%d%%) of your file." %
+                     (self,
+                      self.segment_size*(segnum+1),
+                      self.segment_size*self.num_segments,
+                      100 * (segnum+1) / self.num_segments,
+                      ),
+                     level=log.OPERATIONAL)
+            elapsed = time.time() - start
+            self._times["cumulative_sending"] += elapsed
+            return res
+        dl.addCallback(_logit)
+        return dl
+
+    def send_subshare(self, shareid, segment_num, subshare, lognum):
+        if shareid not in self.landlords:
+            return defer.succeed(None)
+        sh = self.landlords[shareid]
+        lognum2 = self.log("put_block to %s" % self.landlords[shareid],
+                           parent=lognum, level=log.NOISY)
+        d = sh.put_block(segment_num, subshare)
+        def _done(res):
+            self.log("put_block done", parent=lognum2, level=log.NOISY)
+            return res
+        d.addCallback(_done)
+        d.addErrback(self._remove_shareholder, shareid,
+                     "segnum=%d" % segment_num)
+        return d
+
+    def _remove_shareholder(self, why, shareid, where):
+        ln = self.log(format="error while sending %(method)s to shareholder=%(shnum)d",
+                      method=where, shnum=shareid,
+                      level=log.UNUSUAL, failure=why)
+        if shareid in self.landlords:
+            self.landlords[shareid].abort()
+            del self.landlords[shareid]
+        else:
+            # even more UNUSUAL
+            self.log("they weren't in our list of landlords", parent=ln,
+                     level=log.WEIRD)
+        if len(self.landlords) < self.shares_of_happiness:
+            msg = "lost too many shareholders during upload: %s" % why
+            raise NotEnoughSharesError(msg)
+        self.log("but we can still continue with %s shares, we'll be happy "
+                 "with at least %s" % (len(self.landlords),
+                                       self.shares_of_happiness),
+                 parent=ln)
+
+    def _gather_responses(self, dl):
+        d = defer.DeferredList(dl, fireOnOneErrback=True)
+        def _eatNotEnoughSharesError(f):
+            # all exceptions that occur while talking to a peer are handled
+            # in _remove_shareholder. That might raise NotEnoughSharesError,
+            # which will cause the DeferredList to errback but which should
+            # otherwise be consumed. Allow non-NotEnoughSharesError exceptions
+            # to pass through as an unhandled errback. We use this in lieu of
+            # consumeErrors=True to allow coding errors to be logged.
+            f.trap(NotEnoughSharesError)
+            return None
+        for d0 in dl:
+            d0.addErrback(_eatNotEnoughSharesError)
+        return d
+
+    def finish_hashing(self):
+        self._start_hashing_and_close_timestamp = time.time()
+        self.set_status("Finishing hashes")
+        self.set_encode_and_push_progress(extra=0.0)
+        crypttext_hash = self._crypttext_hasher.digest()
+        self.uri_extension_data["crypttext_hash"] = crypttext_hash
+        d = self._uploadable.get_plaintext_hash()
+        def _got(plaintext_hash):
+            self.log(format="plaintext_hash=%(plaintext_hash)s, SI=%(SI)s, size=%(size)d",
+                     plaintext_hash=base32.b2a(plaintext_hash),
+                     SI=storage.si_b2a(self._storage_index),
+                     size=self.file_size)
+            return plaintext_hash
+        d.addCallback(_got)
+        if self.USE_PLAINTEXT_HASHES:
+            def _use_plaintext_hash(plaintext_hash):
+                self.uri_extension_data["plaintext_hash"] = plaintext_hash
+                return self._uploadable.get_plaintext_hashtree_leaves(0, self.num_segments, self.num_segments)
+            d.addCallback(_use_plaintext_hash)
+            def _got_hashtree_leaves(leaves):
+                self.log("Encoder: got plaintext_hashtree_leaves: %s" %
+                         (",".join([base32.b2a(h) for h in leaves]),),
+                         level=log.NOISY)
+                ht = list(HashTree(list(leaves)))
+                self.uri_extension_data["plaintext_root_hash"] = ht[0]
+                self._plaintext_hashtree_nodes = ht
+            d.addCallback(_got_hashtree_leaves)
+
+        d.addCallback(lambda res: self._uploadable.close())
+        return d
+
+    def send_plaintext_hash_tree_to_all_shareholders(self):
+        self.log("sending plaintext hash tree", level=log.NOISY)
+        self.set_status("Sending Plaintext Hash Tree")
+        self.set_encode_and_push_progress(extra=0.2)
+        dl = []
+        for shareid in self.landlords.keys():
+            d = self.send_plaintext_hash_tree(shareid,
+                                              self._plaintext_hashtree_nodes)
+            dl.append(d)
+        return self._gather_responses(dl)
+
+    def send_plaintext_hash_tree(self, shareid, all_hashes):
+        if shareid not in self.landlords:
+            return defer.succeed(None)
+        sh = self.landlords[shareid]
+        d = sh.put_plaintext_hashes(all_hashes)
+        d.addErrback(self._remove_shareholder, shareid, "put_plaintext_hashes")
+        return d
+
+    def send_crypttext_hash_tree_to_all_shareholders(self):
+        self.log("sending crypttext hash tree", level=log.NOISY)
+        self.set_status("Sending Crypttext Hash Tree")
+        self.set_encode_and_push_progress(extra=0.3)
+        t = HashTree(self._crypttext_hashes)
+        all_hashes = list(t)
+        self.uri_extension_data["crypttext_root_hash"] = t[0]
+        dl = []
+        for shareid in self.landlords.keys():
+            dl.append(self.send_crypttext_hash_tree(shareid, all_hashes))
+        return self._gather_responses(dl)
+
+    def send_crypttext_hash_tree(self, shareid, all_hashes):
+        if shareid not in self.landlords:
+            return defer.succeed(None)
+        sh = self.landlords[shareid]
+        d = sh.put_crypttext_hashes(all_hashes)
+        d.addErrback(self._remove_shareholder, shareid, "put_crypttext_hashes")
+        return d
+
+    def send_all_subshare_hash_trees(self):
+        self.log("sending subshare hash trees", level=log.NOISY)
+        self.set_status("Sending Subshare Hash Trees")
+        self.set_encode_and_push_progress(extra=0.4)
+        dl = []
+        for shareid,hashes in enumerate(self.subshare_hashes):
+            # hashes is a list of the hashes of all subshares that were sent
+            # to shareholder[shareid].
+            dl.append(self.send_one_subshare_hash_tree(shareid, hashes))
+        return self._gather_responses(dl)
+
+    def send_one_subshare_hash_tree(self, shareid, subshare_hashes):
+        t = HashTree(subshare_hashes)
+        all_hashes = list(t)
+        # all_hashes[0] is the root hash, == hash(ah[1]+ah[2])
+        # all_hashes[1] is the left child, == hash(ah[3]+ah[4])
+        # all_hashes[n] == hash(all_hashes[2*n+1] + all_hashes[2*n+2])
+        self.share_root_hashes[shareid] = t[0]
+        if shareid not in self.landlords:
+            return defer.succeed(None)
+        sh = self.landlords[shareid]
+        d = sh.put_block_hashes(all_hashes)
+        d.addErrback(self._remove_shareholder, shareid, "put_block_hashes")
+        return d
+
+    def send_all_share_hash_trees(self):
+        # each bucket gets a set of share hash tree nodes that are needed to
+        # validate their share. This includes the share hash itself, but does
+        # not include the top-level hash root (which is stored securely in
+        # the URI instead).
+        self.log("sending all share hash trees", level=log.NOISY)
+        self.set_status("Sending Share Hash Trees")
+        self.set_encode_and_push_progress(extra=0.6)
+        dl = []
+        for h in self.share_root_hashes:
+            assert h
+        # create the share hash tree
+        t = HashTree(self.share_root_hashes)
+        # the root of this hash tree goes into our URI
+        self.uri_extension_data['share_root_hash'] = t[0]
+        # now send just the necessary pieces out to each shareholder
+        for i in range(self.num_shares):
+            # the HashTree is given a list of leaves: 0,1,2,3..n .
+            # These become nodes A+0,A+1,A+2.. of the tree, where A=n-1
+            needed_hash_indices = t.needed_hashes(i, include_leaf=True)
+            hashes = [(hi, t[hi]) for hi in needed_hash_indices]
+            dl.append(self.send_one_share_hash_tree(i, hashes))
+        return self._gather_responses(dl)
+
+    def send_one_share_hash_tree(self, shareid, needed_hashes):
+        if shareid not in self.landlords:
+            return defer.succeed(None)
+        sh = self.landlords[shareid]
+        d = sh.put_share_hashes(needed_hashes)
+        d.addErrback(self._remove_shareholder, shareid, "put_share_hashes")
+        return d
+
+    def send_uri_extension_to_all_shareholders(self):
+        lp = self.log("sending uri_extension", level=log.NOISY)
+        self.set_status("Sending URI Extensions")
+        self.set_encode_and_push_progress(extra=0.8)
+        for k in ('crypttext_root_hash', 'crypttext_hash',
+                  ):
+            assert k in self.uri_extension_data
+        if self.USE_PLAINTEXT_HASHES:
+            for k in ('plaintext_root_hash', 'plaintext_hash',
+                      ):
+                assert k in self.uri_extension_data
+        uri_extension = uri.pack_extension(self.uri_extension_data)
+        ed = {}
+        for k,v in self.uri_extension_data.items():
+            if k.endswith("hash"):
+                ed[k] = base32.b2a(v)
+            else:
+                ed[k] = v
+        self.log("uri_extension_data is %s" % (ed,), level=log.NOISY, parent=lp)
+        self.uri_extension_hash = hashutil.uri_extension_hash(uri_extension)
+        dl = []
+        for shareid in self.landlords.keys():
+            dl.append(self.send_uri_extension(shareid, uri_extension))
+        return self._gather_responses(dl)
+
+    def send_uri_extension(self, shareid, uri_extension):
+        sh = self.landlords[shareid]
+        d = sh.put_uri_extension(uri_extension)
+        d.addErrback(self._remove_shareholder, shareid, "put_uri_extension")
+        return d
+
+    def close_all_shareholders(self):
+        self.log("closing shareholders", level=log.NOISY)
+        self.set_status("Closing Shareholders")
+        self.set_encode_and_push_progress(extra=0.9)
+        dl = []
+        for shareid in self.landlords:
+            d = self.landlords[shareid].close()
+            d.addErrback(self._remove_shareholder, shareid, "close")
+            dl.append(d)
+        return self._gather_responses(dl)
+
+    def done(self, res):
+        self.log("upload done", level=log.OPERATIONAL)
+        self.set_status("Done")
+        self.set_encode_and_push_progress(extra=1.0) # done
+        now = time.time()
+        h_and_c_elapsed = now - self._start_hashing_and_close_timestamp
+        self._times["hashes_and_close"] = h_and_c_elapsed
+        total_elapsed = now - self._start_total_timestamp
+        self._times["total_encode_and_push"] = total_elapsed
+
+        # update our sharemap
+        self._shares_placed = set(self.landlords.keys())
+        return (self.uri_extension_hash, self.required_shares,
+                self.num_shares, self.file_size)
+
+    def err(self, f):
+        self.log("upload failed", failure=f, level=log.UNUSUAL)
+        self.set_status("Failed")
+        # we need to abort any remaining shareholders, so they'll delete the
+        # partial share, allowing someone else to upload it again.
+        self.log("aborting shareholders", level=log.UNUSUAL)
+        for shareid in list(self.landlords.keys()):
+            self.landlords[shareid].abort()
+        if f.check(defer.FirstError):
+            return f.value.subFailure
+        return f
+
+    def get_shares_placed(self):
+        # return a set of share numbers that were successfully placed.
+        return self._shares_placed
+
+    def get_times(self):
+        # return a dictionary of encode+push timings
+        return self._times
+
+    def get_uri_extension_data(self):
+        return self.uri_extension_data
diff --git a/src/allmydata/immutable/filenode.py b/src/allmydata/immutable/filenode.py

new file mode 100644 (file)

index 0000000..c3caae7
--- /dev/null
+++ b/src/allmydata/immutable/filenode.py
@@ -0,0 +1,118 @@
+
+from zope.interface import implements
+from twisted.internet import defer
+from allmydata.interfaces import IFileNode, IFileURI, IURI, ICheckable
+from allmydata import uri
+from allmydata.immutable.checker import Results, \
+     SimpleCHKFileChecker, SimpleCHKFileVerifier
+
+class FileNode:
+    implements(IFileNode, ICheckable)
+
+    def __init__(self, uri, client):
+        u = IFileURI(uri)
+        self.uri = u.to_string()
+        self._client = client
+
+    def get_uri(self):
+        return self.uri
+
+    def is_mutable(self):
+        return False
+
+    def is_readonly(self):
+        return True
+
+    def get_readonly_uri(self):
+        return self.uri
+
+    def get_size(self):
+        return IFileURI(self.uri).get_size()
+
+    def __hash__(self):
+        return hash((self.__class__, self.uri))
+    def __cmp__(self, them):
+        if cmp(type(self), type(them)):
+            return cmp(type(self), type(them))
+        if cmp(self.__class__, them.__class__):
+            return cmp(self.__class__, them.__class__)
+        return cmp(self.uri, them.uri)
+
+    def get_verifier(self):
+        return IFileURI(self.uri).get_verifier()
+
+    def check(self, verify=False, repair=False):
+        assert repair is False  # not implemented yet
+        vcap = self.get_verifier()
+        if verify:
+            v = SimpleCHKFileVerifier(self._client, vcap)
+            return v.start()
+        else:
+            peer_getter = self._client.get_permuted_peers
+            v = SimpleCHKFileChecker(peer_getter, vcap)
+            return v.check()
+
+    def download(self, target):
+        downloader = self._client.getServiceNamed("downloader")
+        return downloader.download(self.uri, target)
+
+    def download_to_data(self):
+        downloader = self._client.getServiceNamed("downloader")
+        return downloader.download_to_data(self.uri)
+
+
+
+class LiteralFileNode:
+    implements(IFileNode, ICheckable)
+
+    def __init__(self, my_uri, client):
+        u = IFileURI(my_uri)
+        assert isinstance(u, uri.LiteralFileURI)
+        self.uri = u.to_string()
+        self._client = client
+
+    def get_uri(self):
+        return self.uri
+
+    def is_mutable(self):
+        return False
+
+    def is_readonly(self):
+        return True
+
+    def get_readonly_uri(self):
+        return self.uri
+
+    def get_size(self):
+        return len(IURI(self.uri).data)
+
+    def __hash__(self):
+        return hash((self.__class__, self.uri))
+    def __cmp__(self, them):
+        if cmp(type(self), type(them)):
+            return cmp(type(self), type(them))
+        if cmp(self.__class__, them.__class__):
+            return cmp(self.__class__, them.__class__)
+        return cmp(self.uri, them.uri)
+
+    def get_verifier(self):
+        return None
+
+    def check(self, verify=False, repair=False):
+        # neither verify= nor repair= affect LIT files
+        r = Results(None)
+        r.healthy = True
+        r.problems = []
+        return defer.succeed(r)
+
+    def download(self, target):
+        # note that this does not update the stats_provider
+        data = IURI(self.uri).data
+        target.open(len(data))
+        target.write(data)
+        target.close()
+        return defer.maybeDeferred(target.finish)
+
+    def download_to_data(self):
+        data = IURI(self.uri).data
+        return defer.succeed(data)
diff --git a/src/allmydata/immutable/upload.py b/src/allmydata/immutable/upload.py

new file mode 100644 (file)

index 0000000..2f64192
--- /dev/null
+++ b/src/allmydata/immutable/upload.py
@@ -0,0 +1,1270 @@
+
+import os, time, weakref, itertools
+from zope.interface import implements
+from twisted.python import failure
+from twisted.internet import defer
+from twisted.application import service
+from foolscap import Referenceable, Copyable, RemoteCopy
+from foolscap import eventual
+from foolscap.logging import log
+
+from allmydata.util.hashutil import file_renewal_secret_hash, \
+     file_cancel_secret_hash, bucket_renewal_secret_hash, \
+     bucket_cancel_secret_hash, plaintext_hasher, \
+     storage_index_hash, plaintext_segment_hasher, convergence_hasher
+from allmydata import storage, hashtree, uri
+from allmydata.immutable import encode
+from allmydata.util import base32, idlib, mathutil
+from allmydata.util.assertutil import precondition
+from allmydata.interfaces import IUploadable, IUploader, IUploadResults, \
+     IEncryptedUploadable, RIEncryptedUploadable, IUploadStatus
+from pycryptopp.cipher.aes import AES
+
+from cStringIO import StringIO
+
+
+KiB=1024
+MiB=1024*KiB
+GiB=1024*MiB
+TiB=1024*GiB
+PiB=1024*TiB
+
+class HaveAllPeersError(Exception):
+    # we use this to jump out of the loop
+    pass
+
+# this wants to live in storage, not here
+class TooFullError(Exception):
+    pass
+
+class UploadResults(Copyable, RemoteCopy):
+    implements(IUploadResults)
+    # note: don't change this string, it needs to match the value used on the
+    # helper, and it does *not* need to match the fully-qualified
+    # package/module/class name
+    typeToCopy = "allmydata.upload.UploadResults.tahoe.allmydata.com"
+    copytype = typeToCopy
+
+    def __init__(self):
+        self.timings = {} # dict of name to number of seconds
+        self.sharemap = {} # dict of shnum to placement string
+        self.servermap = {} # dict of peerid to set(shnums)
+        self.file_size = None
+        self.ciphertext_fetched = None # how much the helper fetched
+        self.uri = None
+        self.preexisting_shares = None # count of shares already present
+        self.pushed_shares = None # count of shares we pushed
+
+
+# our current uri_extension is 846 bytes for small files, a few bytes
+# more for larger ones (since the filesize is encoded in decimal in a
+# few places). Ask for a little bit more just in case we need it. If
+# the extension changes size, we can change EXTENSION_SIZE to
+# allocate a more accurate amount of space.
+EXTENSION_SIZE = 1000
+# TODO: actual extensions are closer to 419 bytes, so we can probably lower
+# this.
+
+class PeerTracker:
+    def __init__(self, peerid, storage_server,
+                 sharesize, blocksize, num_segments, num_share_hashes,
+                 storage_index,
+                 bucket_renewal_secret, bucket_cancel_secret):
+        precondition(isinstance(peerid, str), peerid)
+        precondition(len(peerid) == 20, peerid)
+        self.peerid = peerid
+        self._storageserver = storage_server # to an RIStorageServer
+        self.buckets = {} # k: shareid, v: IRemoteBucketWriter
+        self.sharesize = sharesize
+        as = storage.allocated_size(sharesize,
+                                    num_segments,
+                                    num_share_hashes,
+                                    EXTENSION_SIZE)
+        self.allocated_size = as
+
+        self.blocksize = blocksize
+        self.num_segments = num_segments
+        self.num_share_hashes = num_share_hashes
+        self.storage_index = storage_index
+
+        self.renew_secret = bucket_renewal_secret
+        self.cancel_secret = bucket_cancel_secret
+
+    def __repr__(self):
+        return ("<PeerTracker for peer %s and SI %s>"
+                % (idlib.shortnodeid_b2a(self.peerid),
+                   storage.si_b2a(self.storage_index)[:5]))
+
+    def query(self, sharenums):
+        d = self._storageserver.callRemote("allocate_buckets",
+                                           self.storage_index,
+                                           self.renew_secret,
+                                           self.cancel_secret,
+                                           sharenums,
+                                           self.allocated_size,
+                                           canary=Referenceable())
+        d.addCallback(self._got_reply)
+        return d
+
+    def _got_reply(self, (alreadygot, buckets)):
+        #log.msg("%s._got_reply(%s)" % (self, (alreadygot, buckets)))
+        b = {}
+        for sharenum, rref in buckets.iteritems():
+            bp = storage.WriteBucketProxy(rref, self.sharesize,
+                                          self.blocksize,
+                                          self.num_segments,
+                                          self.num_share_hashes,
+                                          EXTENSION_SIZE,
+                                          self.peerid)
+            b[sharenum] = bp
+        self.buckets.update(b)
+        return (alreadygot, set(b.keys()))
+
+class Tahoe2PeerSelector:
+
+    def __init__(self, upload_id, logparent=None, upload_status=None):
+        self.upload_id = upload_id
+        self.query_count, self.good_query_count, self.bad_query_count = 0,0,0
+        self.error_count = 0
+        self.num_peers_contacted = 0
+        self.last_failure_msg = None
+        self._status = IUploadStatus(upload_status)
+        self._log_parent = log.msg("%s starting" % self, parent=logparent)
+
+    def __repr__(self):
+        return "<Tahoe2PeerSelector for upload %s>" % self.upload_id
+
+    def get_shareholders(self, client,
+                         storage_index, share_size, block_size,
+                         num_segments, total_shares, shares_of_happiness):
+        """
+        @return: (used_peers, already_peers), where used_peers is a set of
+                 PeerTracker instances that have agreed to hold some shares
+                 for us (the shnum is stashed inside the PeerTracker),
+                 and already_peers is a dict mapping shnum to a peer
+                 which claims to already have the share.
+        """
+
+        if self._status:
+            self._status.set_status("Contacting Peers..")
+
+        self.total_shares = total_shares
+        self.shares_of_happiness = shares_of_happiness
+
+        self.homeless_shares = range(total_shares)
+        # self.uncontacted_peers = list() # peers we haven't asked yet
+        self.contacted_peers = [] # peers worth asking again
+        self.contacted_peers2 = [] # peers that we have asked again
+        self._started_second_pass = False
+        self.use_peers = set() # PeerTrackers that have shares assigned to them
+        self.preexisting_shares = {} # sharenum -> peerid holding the share
+
+        peers = client.get_permuted_peers("storage", storage_index)
+        if not peers:
+            raise encode.NotEnoughSharesError("client gave us zero peers")
+
+        # figure out how much space to ask for
+
+        # this needed_hashes computation should mirror
+        # Encoder.send_all_share_hash_trees. We use an IncompleteHashTree
+        # (instead of a HashTree) because we don't require actual hashing
+        # just to count the levels.
+        ht = hashtree.IncompleteHashTree(total_shares)
+        num_share_hashes = len(ht.needed_hashes(0, include_leaf=True))
+
+        # decide upon the renewal/cancel secrets, to include them in the
+        # allocat_buckets query.
+        client_renewal_secret = client.get_renewal_secret()
+        client_cancel_secret = client.get_cancel_secret()
+
+        file_renewal_secret = file_renewal_secret_hash(client_renewal_secret,
+                                                       storage_index)
+        file_cancel_secret = file_cancel_secret_hash(client_cancel_secret,
+                                                     storage_index)
+
+        trackers = [ PeerTracker(peerid, conn,
+                                 share_size, block_size,
+                                 num_segments, num_share_hashes,
+                                 storage_index,
+                                 bucket_renewal_secret_hash(file_renewal_secret,
+                                                            peerid),
+                                 bucket_cancel_secret_hash(file_cancel_secret,
+                                                           peerid),
+                                 )
+                     for (peerid, conn) in peers ]
+        self.uncontacted_peers = trackers
+
+        d = defer.maybeDeferred(self._loop)
+        return d
+
+    def _loop(self):
+        if not self.homeless_shares:
+            # all done
+            msg = ("placed all %d shares, "
+                   "sent %d queries to %d peers, "
+                   "%d queries placed some shares, %d placed none, "
+                   "got %d errors" %
+                   (self.total_shares,
+                    self.query_count, self.num_peers_contacted,
+                    self.good_query_count, self.bad_query_count,
+                    self.error_count))
+            log.msg("peer selection successful for %s: %s" % (self, msg),
+                    parent=self._log_parent)
+            return (self.use_peers, self.preexisting_shares)
+
+        if self.uncontacted_peers:
+            peer = self.uncontacted_peers.pop(0)
+            # TODO: don't pre-convert all peerids to PeerTrackers
+            assert isinstance(peer, PeerTracker)
+
+            shares_to_ask = set([self.homeless_shares.pop(0)])
+            self.query_count += 1
+            self.num_peers_contacted += 1
+            if self._status:
+                self._status.set_status("Contacting Peers [%s] (first query),"
+                                        " %d shares left.."
+                                        % (idlib.shortnodeid_b2a(peer.peerid),
+                                           len(self.homeless_shares)))
+            d = peer.query(shares_to_ask)
+            d.addBoth(self._got_response, peer, shares_to_ask,
+                      self.contacted_peers)
+            return d
+        elif self.contacted_peers:
+            # ask a peer that we've already asked.
+            if not self._started_second_pass:
+                log.msg("starting second pass", parent=self._log_parent,
+                        level=log.NOISY)
+                self._started_second_pass = True
+            num_shares = mathutil.div_ceil(len(self.homeless_shares),
+                                           len(self.contacted_peers))
+            peer = self.contacted_peers.pop(0)
+            shares_to_ask = set(self.homeless_shares[:num_shares])
+            self.homeless_shares[:num_shares] = []
+            self.query_count += 1
+            if self._status:
+                self._status.set_status("Contacting Peers [%s] (second query),"
+                                        " %d shares left.."
+                                        % (idlib.shortnodeid_b2a(peer.peerid),
+                                           len(self.homeless_shares)))
+            d = peer.query(shares_to_ask)
+            d.addBoth(self._got_response, peer, shares_to_ask,
+                      self.contacted_peers2)
+            return d
+        elif self.contacted_peers2:
+            # we've finished the second-or-later pass. Move all the remaining
+            # peers back into self.contacted_peers for the next pass.
+            self.contacted_peers.extend(self.contacted_peers2)
+            self.contacted_peers[:] = []
+            return self._loop()
+        else:
+            # no more peers. If we haven't placed enough shares, we fail.
+            placed_shares = self.total_shares - len(self.homeless_shares)
+            if placed_shares < self.shares_of_happiness:
+                msg = ("placed %d shares out of %d total (%d homeless), "
+                       "sent %d queries to %d peers, "
+                       "%d queries placed some shares, %d placed none, "
+                       "got %d errors" %
+                       (self.total_shares - len(self.homeless_shares),
+                        self.total_shares, len(self.homeless_shares),
+                        self.query_count, self.num_peers_contacted,
+                        self.good_query_count, self.bad_query_count,
+                        self.error_count))
+                msg = "peer selection failed for %s: %s" % (self, msg)
+                if self.last_failure_msg:
+                    msg += " (%s)" % (self.last_failure_msg,)
+                log.msg(msg, level=log.UNUSUAL, parent=self._log_parent)
+                raise encode.NotEnoughSharesError(msg)
+            else:
+                # we placed enough to be happy, so we're done
+                if self._status:
+                    self._status.set_status("Placed all shares")
+                return self.use_peers
+
+    def _got_response(self, res, peer, shares_to_ask, put_peer_here):
+        if isinstance(res, failure.Failure):
+            # This is unusual, and probably indicates a bug or a network
+            # problem.
+            log.msg("%s got error during peer selection: %s" % (peer, res),
+                    level=log.UNUSUAL, parent=self._log_parent)
+            self.error_count += 1
+            self.homeless_shares = list(shares_to_ask) + self.homeless_shares
+            if (self.uncontacted_peers
+                or self.contacted_peers
+                or self.contacted_peers2):
+                # there is still hope, so just loop
+                pass
+            else:
+                # No more peers, so this upload might fail (it depends upon
+                # whether we've hit shares_of_happiness or not). Log the last
+                # failure we got: if a coding error causes all peers to fail
+                # in the same way, this allows the common failure to be seen
+                # by the uploader and should help with debugging
+                msg = ("last failure (from %s) was: %s" % (peer, res))
+                self.last_failure_msg = msg
+        else:
+            (alreadygot, allocated) = res
+            log.msg("response from peer %s: alreadygot=%s, allocated=%s"
+                    % (idlib.shortnodeid_b2a(peer.peerid),
+                       tuple(sorted(alreadygot)), tuple(sorted(allocated))),
+                    level=log.NOISY, parent=self._log_parent)
+            progress = False
+            for s in alreadygot:
+                self.preexisting_shares[s] = peer.peerid
+                if s in self.homeless_shares:
+                    self.homeless_shares.remove(s)
+                    progress = True
+
+            # the PeerTracker will remember which shares were allocated on
+            # that peer. We just have to remember to use them.
+            if allocated:
+                self.use_peers.add(peer)
+                progress = True
+
+            not_yet_present = set(shares_to_ask) - set(alreadygot)
+            still_homeless = not_yet_present - set(allocated)
+
+            if progress:
+                # they accepted or already had at least one share, so
+                # progress has been made
+                self.good_query_count += 1
+            else:
+                self.bad_query_count += 1
+
+            if still_homeless:
+                # In networks with lots of space, this is very unusual and
+                # probably indicates an error. In networks with peers that
+                # are full, it is merely unusual. In networks that are very
+                # full, it is common, and many uploads will fail. In most
+                # cases, this is obviously not fatal, and we'll just use some
+                # other peers.
+
+                # some shares are still homeless, keep trying to find them a
+                # home. The ones that were rejected get first priority.
+                self.homeless_shares = (list(still_homeless)
+                                        + self.homeless_shares)
+                # Since they were unable to accept all of our requests, so it
+                # is safe to assume that asking them again won't help.
+            else:
+                # if they *were* able to accept everything, they might be
+                # willing to accept even more.
+                put_peer_here.append(peer)
+
+        # now loop
+        return self._loop()
+
+
+class EncryptAnUploadable:
+    """This is a wrapper that takes an IUploadable and provides
+    IEncryptedUploadable."""
+    implements(IEncryptedUploadable)
+    CHUNKSIZE = 50*1024
+
+    def __init__(self, original, log_parent=None):
+        self.original = IUploadable(original)
+        self._log_number = log_parent
+        self._encryptor = None
+        self._plaintext_hasher = plaintext_hasher()
+        self._plaintext_segment_hasher = None
+        self._plaintext_segment_hashes = []
+        self._encoding_parameters = None
+        self._file_size = None
+        self._ciphertext_bytes_read = 0
+        self._status = None
+
+    def set_upload_status(self, upload_status):
+        self._status = IUploadStatus(upload_status)
+        self.original.set_upload_status(upload_status)
+
+    def log(self, *args, **kwargs):
+        if "facility" not in kwargs:
+            kwargs["facility"] = "upload.encryption"
+        if "parent" not in kwargs:
+            kwargs["parent"] = self._log_number
+        return log.msg(*args, **kwargs)
+
+    def get_size(self):
+        if self._file_size is not None:
+            return defer.succeed(self._file_size)
+        d = self.original.get_size()
+        def _got_size(size):
+            self._file_size = size
+            if self._status:
+                self._status.set_size(size)
+            return size
+        d.addCallback(_got_size)
+        return d
+
+    def get_all_encoding_parameters(self):
+        if self._encoding_parameters is not None:
+            return defer.succeed(self._encoding_parameters)
+        d = self.original.get_all_encoding_parameters()
+        def _got(encoding_parameters):
+            (k, happy, n, segsize) = encoding_parameters
+            self._segment_size = segsize # used by segment hashers
+            self._encoding_parameters = encoding_parameters
+            self.log("my encoding parameters: %s" % (encoding_parameters,),
+                     level=log.NOISY)
+            return encoding_parameters
+        d.addCallback(_got)
+        return d
+
+    def _get_encryptor(self):
+        if self._encryptor:
+            return defer.succeed(self._encryptor)
+
+        d = self.original.get_encryption_key()
+        def _got(key):
+            e = AES(key)
+            self._encryptor = e
+
+            storage_index = storage_index_hash(key)
+            assert isinstance(storage_index, str)
+            # There's no point to having the SI be longer than the key, so we
+            # specify that it is truncated to the same 128 bits as the AES key.
+            assert len(storage_index) == 16  # SHA-256 truncated to 128b
+            self._storage_index = storage_index
+            if self._status:
+                self._status.set_storage_index(storage_index)
+            return e
+        d.addCallback(_got)
+        return d
+
+    def get_storage_index(self):
+        d = self._get_encryptor()
+        d.addCallback(lambda res: self._storage_index)
+        return d
+
+    def _get_segment_hasher(self):
+        p = self._plaintext_segment_hasher
+        if p:
+            left = self._segment_size - self._plaintext_segment_hashed_bytes
+            return p, left
+        p = plaintext_segment_hasher()
+        self._plaintext_segment_hasher = p
+        self._plaintext_segment_hashed_bytes = 0
+        return p, self._segment_size
+
+    def _update_segment_hash(self, chunk):
+        offset = 0
+        while offset < len(chunk):
+            p, segment_left = self._get_segment_hasher()
+            chunk_left = len(chunk) - offset
+            this_segment = min(chunk_left, segment_left)
+            p.update(chunk[offset:offset+this_segment])
+            self._plaintext_segment_hashed_bytes += this_segment
+
+            if self._plaintext_segment_hashed_bytes == self._segment_size:
+                # we've filled this segment
+                self._plaintext_segment_hashes.append(p.digest())
+                self._plaintext_segment_hasher = None
+                self.log("closed hash [%d]: %dB" %
+                         (len(self._plaintext_segment_hashes)-1,
+                          self._plaintext_segment_hashed_bytes),
+                         level=log.NOISY)
+                self.log(format="plaintext leaf hash [%(segnum)d] is %(hash)s",
+                         segnum=len(self._plaintext_segment_hashes)-1,
+                         hash=base32.b2a(p.digest()),
+                         level=log.NOISY)
+
+            offset += this_segment
+
+
+    def read_encrypted(self, length, hash_only):
+        # make sure our parameters have been set up first
+        d = self.get_all_encoding_parameters()
+        # and size
+        d.addCallback(lambda ignored: self.get_size())
+        d.addCallback(lambda ignored: self._get_encryptor())
+        # then fetch and encrypt the plaintext. The unusual structure here
+        # (passing a Deferred *into* a function) is needed to avoid
+        # overflowing the stack: Deferreds don't optimize out tail recursion.
+        # We also pass in a list, to which _read_encrypted will append
+        # ciphertext.
+        ciphertext = []
+        d2 = defer.Deferred()
+        d.addCallback(lambda ignored:
+                      self._read_encrypted(length, ciphertext, hash_only, d2))
+        d.addCallback(lambda ignored: d2)
+        return d
+
+    def _read_encrypted(self, remaining, ciphertext, hash_only, fire_when_done):
+        if not remaining:
+            fire_when_done.callback(ciphertext)
+            return None
+        # tolerate large length= values without consuming a lot of RAM by
+        # reading just a chunk (say 50kB) at a time. This only really matters
+        # when hash_only==True (i.e. resuming an interrupted upload), since
+        # that's the case where we will be skipping over a lot of data.
+        size = min(remaining, self.CHUNKSIZE)
+        remaining = remaining - size
+        # read a chunk of plaintext..
+        d = defer.maybeDeferred(self.original.read, size)
+        # N.B.: if read() is synchronous, then since everything else is
+        # actually synchronous too, we'd blow the stack unless we stall for a
+        # tick. Once you accept a Deferred from IUploadable.read(), you must
+        # be prepared to have it fire immediately too.
+        d.addCallback(eventual.fireEventually)
+        def _good(plaintext):
+            # and encrypt it..
+            # o/' over the fields we go, hashing all the way, sHA! sHA! sHA! o/'
+            ct = self._hash_and_encrypt_plaintext(plaintext, hash_only)
+            ciphertext.extend(ct)
+            self._read_encrypted(remaining, ciphertext, hash_only,
+                                 fire_when_done)
+        def _err(why):
+            fire_when_done.errback(why)
+        d.addCallback(_good)
+        d.addErrback(_err)
+        return None
+
+    def _hash_and_encrypt_plaintext(self, data, hash_only):
+        assert isinstance(data, (tuple, list)), type(data)
+        data = list(data)
+        cryptdata = []
+        # we use data.pop(0) instead of 'for chunk in data' to save
+        # memory: each chunk is destroyed as soon as we're done with it.
+        bytes_processed = 0
+        while data:
+            chunk = data.pop(0)
+            self.log(" read_encrypted handling %dB-sized chunk" % len(chunk),
+                     level=log.NOISY)
+            bytes_processed += len(chunk)
+            self._plaintext_hasher.update(chunk)
+            self._update_segment_hash(chunk)
+            # TODO: we have to encrypt the data (even if hash_only==True)
+            # because pycryptopp's AES-CTR implementation doesn't offer a
+            # way to change the counter value. Once pycryptopp acquires
+            # this ability, change this to simply update the counter
+            # before each call to (hash_only==False) _encryptor.process()
+            ciphertext = self._encryptor.process(chunk)
+            if hash_only:
+                self.log("  skipping encryption", level=log.NOISY)
+            else:
+                cryptdata.append(ciphertext)
+            del ciphertext
+            del chunk
+        self._ciphertext_bytes_read += bytes_processed
+        if self._status:
+            progress = float(self._ciphertext_bytes_read) / self._file_size
+            self._status.set_progress(1, progress)
+        return cryptdata
+
+
+    def get_plaintext_hashtree_leaves(self, first, last, num_segments):
+        if len(self._plaintext_segment_hashes) < num_segments:
+            # close out the last one
+            assert len(self._plaintext_segment_hashes) == num_segments-1
+            p, segment_left = self._get_segment_hasher()
+            self._plaintext_segment_hashes.append(p.digest())
+            del self._plaintext_segment_hasher
+            self.log("closing plaintext leaf hasher, hashed %d bytes" %
+                     self._plaintext_segment_hashed_bytes,
+                     level=log.NOISY)
+            self.log(format="plaintext leaf hash [%(segnum)d] is %(hash)s",
+                     segnum=len(self._plaintext_segment_hashes)-1,
+                     hash=base32.b2a(p.digest()),
+                     level=log.NOISY)
+        assert len(self._plaintext_segment_hashes) == num_segments
+        return defer.succeed(tuple(self._plaintext_segment_hashes[first:last]))
+
+    def get_plaintext_hash(self):
+        h = self._plaintext_hasher.digest()
+        return defer.succeed(h)
+
+    def close(self):
+        return self.original.close()
+
+class UploadStatus:
+    implements(IUploadStatus)
+    statusid_counter = itertools.count(0)
+
+    def __init__(self):
+        self.storage_index = None
+        self.size = None
+        self.helper = False
+        self.status = "Not started"
+        self.progress = [0.0, 0.0, 0.0]
+        self.active = True
+        self.results = None
+        self.counter = self.statusid_counter.next()
+        self.started = time.time()
+
+    def get_started(self):
+        return self.started
+    def get_storage_index(self):
+        return self.storage_index
+    def get_size(self):
+        return self.size
+    def using_helper(self):
+        return self.helper
+    def get_status(self):
+        return self.status
+    def get_progress(self):
+        return tuple(self.progress)
+    def get_active(self):
+        return self.active
+    def get_results(self):
+        return self.results
+    def get_counter(self):
+        return self.counter
+
+    def set_storage_index(self, si):
+        self.storage_index = si
+    def set_size(self, size):
+        self.size = size
+    def set_helper(self, helper):
+        self.helper = helper
+    def set_status(self, status):
+        self.status = status
+    def set_progress(self, which, value):
+        # [0]: chk, [1]: ciphertext, [2]: encode+push
+        self.progress[which] = value
+    def set_active(self, value):
+        self.active = value
+    def set_results(self, value):
+        self.results = value
+
+class CHKUploader:
+    peer_selector_class = Tahoe2PeerSelector
+
+    def __init__(self, client):
+        self._client = client
+        self._log_number = self._client.log("CHKUploader starting")
+        self._encoder = None
+        self._results = UploadResults()
+        self._storage_index = None
+        self._upload_status = UploadStatus()
+        self._upload_status.set_helper(False)
+        self._upload_status.set_active(True)
+        self._upload_status.set_results(self._results)
+
+    def log(self, *args, **kwargs):
+        if "parent" not in kwargs:
+            kwargs["parent"] = self._log_number
+        if "facility" not in kwargs:
+            kwargs["facility"] = "tahoe.upload"
+        return self._client.log(*args, **kwargs)
+
+    def start(self, uploadable):
+        """Start uploading the file.
+
+        This method returns a Deferred that will fire with the URI (a
+        string)."""
+
+        self._started = time.time()
+        uploadable = IUploadable(uploadable)
+        self.log("starting upload of %s" % uploadable)
+
+        eu = EncryptAnUploadable(uploadable, self._log_number)
+        eu.set_upload_status(self._upload_status)
+        d = self.start_encrypted(eu)
+        def _uploaded(res):
+            d1 = uploadable.get_encryption_key()
+            d1.addCallback(lambda key: self._compute_uri(res, key))
+            return d1
+        d.addCallback(_uploaded)
+        def _done(res):
+            self._upload_status.set_active(False)
+            return res
+        d.addBoth(_done)
+        return d
+
+    def abort(self):
+        """Call this is the upload must be abandoned before it completes.
+        This will tell the shareholders to delete their partial shares. I
+        return a Deferred that fires when these messages have been acked."""
+        if not self._encoder:
+            # how did you call abort() before calling start() ?
+            return defer.succeed(None)
+        return self._encoder.abort()
+
+    def start_encrypted(self, encrypted):
+        eu = IEncryptedUploadable(encrypted)
+
+        started = time.time()
+        self._encoder = e = encode.Encoder(self._log_number,
+                                           self._upload_status)
+        d = e.set_encrypted_uploadable(eu)
+        d.addCallback(self.locate_all_shareholders, started)
+        d.addCallback(self.set_shareholders, e)
+        d.addCallback(lambda res: e.start())
+        d.addCallback(self._encrypted_done)
+        # this fires with the uri_extension_hash and other data
+        return d
+
+    def locate_all_shareholders(self, encoder, started):
+        peer_selection_started = now = time.time()
+        self._storage_index_elapsed = now - started
+        storage_index = encoder.get_param("storage_index")
+        self._storage_index = storage_index
+        upload_id = storage.si_b2a(storage_index)[:5]
+        self.log("using storage index %s" % upload_id)
+        peer_selector = self.peer_selector_class(upload_id, self._log_number,
+                                                 self._upload_status)
+
+        share_size = encoder.get_param("share_size")
+        block_size = encoder.get_param("block_size")
+        num_segments = encoder.get_param("num_segments")
+        k,desired,n = encoder.get_param("share_counts")
+
+        self._peer_selection_started = time.time()
+        d = peer_selector.get_shareholders(self._client, storage_index,
+                                           share_size, block_size,
+                                           num_segments, n, desired)
+        def _done(res):
+            self._peer_selection_elapsed = time.time() - peer_selection_started
+            return res
+        d.addCallback(_done)
+        return d
+
+    def set_shareholders(self, (used_peers, already_peers), encoder):
+        """
+        @param used_peers: a sequence of PeerTracker objects
+        @paran already_peers: a dict mapping sharenum to a peerid that
+                              claims to already have this share
+        """
+        self.log("_send_shares, used_peers is %s" % (used_peers,))
+        # record already-present shares in self._results
+        for (shnum, peerid) in already_peers.items():
+            peerid_s = idlib.shortnodeid_b2a(peerid)
+            self._results.sharemap[shnum] = "Found on [%s]" % peerid_s
+            if peerid not in self._results.servermap:
+                self._results.servermap[peerid] = set()
+            self._results.servermap[peerid].add(shnum)
+        self._results.preexisting_shares = len(already_peers)
+
+        self._sharemap = {}
+        for peer in used_peers:
+            assert isinstance(peer, PeerTracker)
+        buckets = {}
+        for peer in used_peers:
+            buckets.update(peer.buckets)
+            for shnum in peer.buckets:
+                self._sharemap[shnum] = peer
+        assert len(buckets) == sum([len(peer.buckets) for peer in used_peers])
+        encoder.set_shareholders(buckets)
+
+    def _encrypted_done(self, res):
+        r = self._results
+        for shnum in self._encoder.get_shares_placed():
+            peer_tracker = self._sharemap[shnum]
+            peerid = peer_tracker.peerid
+            peerid_s = idlib.shortnodeid_b2a(peerid)
+            r.sharemap[shnum] = "Placed on [%s]" % peerid_s
+            if peerid not in r.servermap:
+                r.servermap[peerid] = set()
+            r.servermap[peerid].add(shnum)
+        r.pushed_shares = len(self._encoder.get_shares_placed())
+        now = time.time()
+        r.file_size = self._encoder.file_size
+        r.timings["total"] = now - self._started
+        r.timings["storage_index"] = self._storage_index_elapsed
+        r.timings["peer_selection"] = self._peer_selection_elapsed
+        r.timings.update(self._encoder.get_times())
+        r.uri_extension_data = self._encoder.get_uri_extension_data()
+        return res
+
+    def _compute_uri(self, (uri_extension_hash,
+                            needed_shares, total_shares, size),
+                     key):
+        u = uri.CHKFileURI(key=key,
+                           uri_extension_hash=uri_extension_hash,
+                           needed_shares=needed_shares,
+                           total_shares=total_shares,
+                           size=size,
+                           )
+        r = self._results
+        r.uri = u.to_string()
+        return r
+
+    def get_upload_status(self):
+        return self._upload_status
+
+def read_this_many_bytes(uploadable, size, prepend_data=[]):
+    if size == 0:
+        return defer.succeed([])
+    d = uploadable.read(size)
+    def _got(data):
+        assert isinstance(data, list)
+        bytes = sum([len(piece) for piece in data])
+        assert bytes > 0
+        assert bytes <= size
+        remaining = size - bytes
+        if remaining:
+            return read_this_many_bytes(uploadable, remaining,
+                                        prepend_data + data)
+        return prepend_data + data
+    d.addCallback(_got)
+    return d
+
+class LiteralUploader:
+
+    def __init__(self, client):
+        self._client = client
+        self._results = UploadResults()
+        self._status = s = UploadStatus()
+        s.set_storage_index(None)
+        s.set_helper(False)
+        s.set_progress(0, 1.0)
+        s.set_active(False)
+        s.set_results(self._results)
+
+    def start(self, uploadable):
+        uploadable = IUploadable(uploadable)
+        d = uploadable.get_size()
+        def _got_size(size):
+            self._size = size
+            self._status.set_size(size)
+            self._results.file_size = size
+            return read_this_many_bytes(uploadable, size)
+        d.addCallback(_got_size)
+        d.addCallback(lambda data: uri.LiteralFileURI("".join(data)))
+        d.addCallback(lambda u: u.to_string())
+        d.addCallback(self._build_results)
+        return d
+
+    def _build_results(self, uri):
+        self._results.uri = uri
+        self._status.set_status("Done")
+        self._status.set_progress(1, 1.0)
+        self._status.set_progress(2, 1.0)
+        return self._results
+
+    def close(self):
+        pass
+
+    def get_upload_status(self):
+        return self._status
+
+class RemoteEncryptedUploadable(Referenceable):
+    implements(RIEncryptedUploadable)
+
+    def __init__(self, encrypted_uploadable, upload_status):
+        self._eu = IEncryptedUploadable(encrypted_uploadable)
+        self._offset = 0
+        self._bytes_sent = 0
+        self._status = IUploadStatus(upload_status)
+        # we are responsible for updating the status string while we run, and
+        # for setting the ciphertext-fetch progress.
+        self._size = None
+
+    def get_size(self):
+        if self._size is not None:
+            return defer.succeed(self._size)
+        d = self._eu.get_size()
+        def _got_size(size):
+            self._size = size
+            return size
+        d.addCallback(_got_size)
+        return d
+
+    def remote_get_size(self):
+        return self.get_size()
+    def remote_get_all_encoding_parameters(self):
+        return self._eu.get_all_encoding_parameters()
+
+    def _read_encrypted(self, length, hash_only):
+        d = self._eu.read_encrypted(length, hash_only)
+        def _read(strings):
+            if hash_only:
+                self._offset += length
+            else:
+                size = sum([len(data) for data in strings])
+                self._offset += size
+            return strings
+        d.addCallback(_read)
+        return d
+
+    def remote_read_encrypted(self, offset, length):
+        # we don't support seek backwards, but we allow skipping forwards
+        precondition(offset >= 0, offset)
+        precondition(length >= 0, length)
+        lp = log.msg("remote_read_encrypted(%d-%d)" % (offset, offset+length),
+                     level=log.NOISY)
+        precondition(offset >= self._offset, offset, self._offset)
+        if offset > self._offset:
+            # read the data from disk anyways, to build up the hash tree
+            skip = offset - self._offset
+            log.msg("remote_read_encrypted skipping ahead from %d to %d, skip=%d" %
+                    (self._offset, offset, skip), level=log.UNUSUAL, parent=lp)
+            d = self._read_encrypted(skip, hash_only=True)
+        else:
+            d = defer.succeed(None)
+
+        def _at_correct_offset(res):
+            assert offset == self._offset, "%d != %d" % (offset, self._offset)
+            return self._read_encrypted(length, hash_only=False)
+        d.addCallback(_at_correct_offset)
+
+        def _read(strings):
+            size = sum([len(data) for data in strings])
+            self._bytes_sent += size
+            return strings
+        d.addCallback(_read)
+        return d
+
+    def remote_get_plaintext_hashtree_leaves(self, first, last, num_segments):
+        log.msg("remote_get_plaintext_hashtree_leaves: %d-%d of %d" %
+                (first, last-1, num_segments),
+                level=log.NOISY)
+        d = self._eu.get_plaintext_hashtree_leaves(first, last, num_segments)
+        d.addCallback(list)
+        return d
+    def remote_get_plaintext_hash(self):
+        return self._eu.get_plaintext_hash()
+    def remote_close(self):
+        return self._eu.close()
+
+
+class AssistedUploader:
+
+    def __init__(self, helper):
+        self._helper = helper
+        self._log_number = log.msg("AssistedUploader starting")
+        self._storage_index = None
+        self._upload_status = s = UploadStatus()
+        s.set_helper(True)
+        s.set_active(True)
+
+    def log(self, *args, **kwargs):
+        if "parent" not in kwargs:
+            kwargs["parent"] = self._log_number
+        return log.msg(*args, **kwargs)
+
+    def start(self, uploadable):
+        self._started = time.time()
+        u = IUploadable(uploadable)
+        eu = EncryptAnUploadable(u, self._log_number)
+        eu.set_upload_status(self._upload_status)
+        self._encuploadable = eu
+        d = eu.get_size()
+        d.addCallback(self._got_size)
+        d.addCallback(lambda res: eu.get_all_encoding_parameters())
+        d.addCallback(self._got_all_encoding_parameters)
+        # when we get the encryption key, that will also compute the storage
+        # index, so this only takes one pass.
+        # TODO: I'm not sure it's cool to switch back and forth between
+        # the Uploadable and the IEncryptedUploadable that wraps it.
+        d.addCallback(lambda res: u.get_encryption_key())
+        d.addCallback(self._got_encryption_key)
+        d.addCallback(lambda res: eu.get_storage_index())
+        d.addCallback(self._got_storage_index)
+        d.addCallback(self._contact_helper)
+        d.addCallback(self._build_readcap)
+        def _done(res):
+            self._upload_status.set_active(False)
+            return res
+        d.addBoth(_done)
+        return d
+
+    def _got_size(self, size):
+        self._size = size
+        self._upload_status.set_size(size)
+
+    def _got_all_encoding_parameters(self, params):
+        k, happy, n, segment_size = params
+        # stash these for URI generation later
+        self._needed_shares = k
+        self._total_shares = n
+        self._segment_size = segment_size
+
+    def _got_encryption_key(self, key):
+        self._key = key
+
+    def _got_storage_index(self, storage_index):
+        self._storage_index = storage_index
+
+
+    def _contact_helper(self, res):
+        now = self._time_contacting_helper_start = time.time()
+        self._storage_index_elapsed = now - self._started
+        self.log(format="contacting helper for SI %(si)s..",
+                 si=storage.si_b2a(self._storage_index))
+        self._upload_status.set_status("Contacting Helper")
+        d = self._helper.callRemote("upload_chk", self._storage_index)
+        d.addCallback(self._contacted_helper)
+        return d
+
+    def _contacted_helper(self, (upload_results, upload_helper)):
+        now = time.time()
+        elapsed = now - self._time_contacting_helper_start
+        self._elapsed_time_contacting_helper = elapsed
+        if upload_helper:
+            self.log("helper says we need to upload")
+            self._upload_status.set_status("Uploading Ciphertext")
+            # we need to upload the file
+            reu = RemoteEncryptedUploadable(self._encuploadable,
+                                            self._upload_status)
+            # let it pre-compute the size for progress purposes
+            d = reu.get_size()
+            d.addCallback(lambda ignored:
+                          upload_helper.callRemote("upload", reu))
+            # this Deferred will fire with the upload results
+            return d
+        self.log("helper says file is already uploaded")
+        self._upload_status.set_progress(1, 1.0)
+        self._upload_status.set_results(upload_results)
+        return upload_results
+
+    def _build_readcap(self, upload_results):
+        self.log("upload finished, building readcap")
+        self._upload_status.set_status("Building Readcap")
+        r = upload_results
+        assert r.uri_extension_data["needed_shares"] == self._needed_shares
+        assert r.uri_extension_data["total_shares"] == self._total_shares
+        assert r.uri_extension_data["segment_size"] == self._segment_size
+        assert r.uri_extension_data["size"] == self._size
+        u = uri.CHKFileURI(key=self._key,
+                           uri_extension_hash=r.uri_extension_hash,
+                           needed_shares=self._needed_shares,
+                           total_shares=self._total_shares,
+                           size=self._size,
+                           )
+        r.uri = u.to_string()
+        now = time.time()
+        r.file_size = self._size
+        r.timings["storage_index"] = self._storage_index_elapsed
+        r.timings["contacting_helper"] = self._elapsed_time_contacting_helper
+        if "total" in r.timings:
+            r.timings["helper_total"] = r.timings["total"]
+        r.timings["total"] = now - self._started
+        self._upload_status.set_status("Done")
+        self._upload_status.set_results(r)
+        return r
+
+    def get_upload_status(self):
+        return self._upload_status
+
+class BaseUploadable:
+    default_max_segment_size = 128*KiB # overridden by max_segment_size
+    default_encoding_param_k = 3 # overridden by encoding_parameters
+    default_encoding_param_happy = 7
+    default_encoding_param_n = 10
+
+    max_segment_size = None
+    encoding_param_k = None
+    encoding_param_happy = None
+    encoding_param_n = None
+
+    _all_encoding_parameters = None
+    _status = None
+
+    def set_upload_status(self, upload_status):
+        self._status = IUploadStatus(upload_status)
+
+    def set_default_encoding_parameters(self, default_params):
+        assert isinstance(default_params, dict)
+        for k,v in default_params.items():
+            precondition(isinstance(k, str), k, v)
+            precondition(isinstance(v, int), k, v)
+        if "k" in default_params:
+            self.default_encoding_param_k = default_params["k"]
+        if "happy" in default_params:
+            self.default_encoding_param_happy = default_params["happy"]
+        if "n" in default_params:
+            self.default_encoding_param_n = default_params["n"]
+        if "max_segment_size" in default_params:
+            self.default_max_segment_size = default_params["max_segment_size"]
+
+    def get_all_encoding_parameters(self):
+        if self._all_encoding_parameters:
+            return defer.succeed(self._all_encoding_parameters)
+
+        max_segsize = self.max_segment_size or self.default_max_segment_size
+        k = self.encoding_param_k or self.default_encoding_param_k
+        happy = self.encoding_param_happy or self.default_encoding_param_happy
+        n = self.encoding_param_n or self.default_encoding_param_n
+
+        d = self.get_size()
+        def _got_size(file_size):
+            # for small files, shrink the segment size to avoid wasting space
+            segsize = min(max_segsize, file_size)
+            # this must be a multiple of 'required_shares'==k
+            segsize = mathutil.next_multiple(segsize, k)
+            encoding_parameters = (k, happy, n, segsize)
+            self._all_encoding_parameters = encoding_parameters
+            return encoding_parameters
+        d.addCallback(_got_size)
+        return d
+
+class FileHandle(BaseUploadable):
+    implements(IUploadable)
+
+    def __init__(self, filehandle, convergence):
+        """
+        Upload the data from the filehandle.  If convergence is None then a
+        random encryption key will be used, else the plaintext will be hashed,
+        then the hash will be hashed together with the string in the
+        "convergence" argument to form the encryption key.
+        """
+        assert convergence is None or isinstance(convergence, str), (convergence, type(convergence))
+        self._filehandle = filehandle
+        self._key = None
+        self.convergence = convergence
+        self._size = None
+
+    def _get_encryption_key_convergent(self):
+        if self._key is not None:
+            return defer.succeed(self._key)
+
+        d = self.get_size()
+        # that sets self._size as a side-effect
+        d.addCallback(lambda size: self.get_all_encoding_parameters())
+        def _got(params):
+            k, happy, n, segsize = params
+            f = self._filehandle
+            enckey_hasher = convergence_hasher(k, n, segsize, self.convergence)
+            f.seek(0)
+            BLOCKSIZE = 64*1024
+            bytes_read = 0
+            while True:
+                data = f.read(BLOCKSIZE)
+                if not data:
+                    break
+                enckey_hasher.update(data)
+                # TODO: setting progress in a non-yielding loop is kind of
+                # pointless, but I'm anticipating (perhaps prematurely) the
+                # day when we use a slowjob or twisted's CooperatorService to
+                # make this yield time to other jobs.
+                bytes_read += len(data)
+                if self._status:
+                    self._status.set_progress(0, float(bytes_read)/self._size)
+            f.seek(0)
+            self._key = enckey_hasher.digest()
+            if self._status:
+                self._status.set_progress(0, 1.0)
+            assert len(self._key) == 16
+            return self._key
+        d.addCallback(_got)
+        return d
+
+    def _get_encryption_key_random(self):
+        if self._key is None:
+            self._key = os.urandom(16)
+        return defer.succeed(self._key)
+
+    def get_encryption_key(self):
+        if self.convergence is not None:
+            return self._get_encryption_key_convergent()
+        else:
+            return self._get_encryption_key_random()
+
+    def get_size(self):
+        if self._size is not None:
+            return defer.succeed(self._size)
+        self._filehandle.seek(0,2)
+        size = self._filehandle.tell()
+        self._size = size
+        self._filehandle.seek(0)
+        return defer.succeed(size)
+
+    def read(self, length):
+        return defer.succeed([self._filehandle.read(length)])
+
+    def close(self):
+        # the originator of the filehandle reserves the right to close it
+        pass
+
+class FileName(FileHandle):
+    def __init__(self, filename, convergence):
+        """
+        Upload the data from the filename.  If convergence is None then a
+        random encryption key will be used, else the plaintext will be hashed,
+        then the hash will be hashed together with the string in the
+        "convergence" argument to form the encryption key.
+        """
+        assert convergence is None or isinstance(convergence, str), (convergence, type(convergence))
+        FileHandle.__init__(self, open(filename, "rb"), convergence=convergence)
+    def close(self):
+        FileHandle.close(self)
+        self._filehandle.close()
+
+class Data(FileHandle):
+    def __init__(self, data, convergence):
+        """
+        Upload the data from the data argument.  If convergence is None then a
+        random encryption key will be used, else the plaintext will be hashed,
+        then the hash will be hashed together with the string in the
+        "convergence" argument to form the encryption key.
+        """
+        assert convergence is None or isinstance(convergence, str), (convergence, type(convergence))
+        FileHandle.__init__(self, StringIO(data), convergence=convergence)
+
+class Uploader(service.MultiService):
+    """I am a service that allows file uploading. I am a service-child of the
+    Client.
+    """
+    implements(IUploader)
+    name = "uploader"
+    uploader_class = CHKUploader
+    URI_LIT_SIZE_THRESHOLD = 55
+    MAX_UPLOAD_STATUSES = 10
+
+    def __init__(self, helper_furl=None, stats_provider=None):
+        self._helper_furl = helper_furl
+        self.stats_provider = stats_provider
+        self._helper = None
+        self._all_uploads = weakref.WeakKeyDictionary() # for debugging
+        self._all_upload_statuses = weakref.WeakKeyDictionary()
+        self._recent_upload_statuses = []
+        service.MultiService.__init__(self)
+
+    def startService(self):
+        service.MultiService.startService(self)
+        if self._helper_furl:
+            self.parent.tub.connectTo(self._helper_furl,
+                                      self._got_helper)
+
+    def _got_helper(self, helper):
+        self._helper = helper
+        helper.notifyOnDisconnect(self._lost_helper)
+    def _lost_helper(self):
+        self._helper = None
+
+    def get_helper_info(self):
+        # return a tuple of (helper_furl_or_None, connected_bool)
+        return (self._helper_furl, bool(self._helper))
+
+    def upload(self, uploadable):
+        # this returns the URI
+        assert self.parent
+        assert self.running
+
+        uploadable = IUploadable(uploadable)
+        d = uploadable.get_size()
+        def _got_size(size):
+            default_params = self.parent.get_encoding_parameters()
+            precondition(isinstance(default_params, dict), default_params)
+            precondition("max_segment_size" in default_params, default_params)
+            uploadable.set_default_encoding_parameters(default_params)
+
+            if self.stats_provider:
+                self.stats_provider.count('uploader.files_uploaded', 1)
+                self.stats_provider.count('uploader.bytes_uploaded', size)
+
+            if size <= self.URI_LIT_SIZE_THRESHOLD:
+                uploader = LiteralUploader(self.parent)
+            elif self._helper:
+                uploader = AssistedUploader(self._helper)
+            else:
+                uploader = self.uploader_class(self.parent)
+            self._add_upload(uploader)
+            return uploader.start(uploadable)
+        d.addCallback(_got_size)
+        def _done(res):
+            uploadable.close()
+            return res
+        d.addBoth(_done)
+        return d
+
+    def _add_upload(self, uploader):
+        s = uploader.get_upload_status()
+        self._all_uploads[uploader] = None
+        self._all_upload_statuses[s] = None
+        self._recent_upload_statuses.append(s)
+        while len(self._recent_upload_statuses) > self.MAX_UPLOAD_STATUSES:
+            self._recent_upload_statuses.pop(0)
+
+    def list_all_upload_statuses(self):
+        for us in self._all_upload_statuses:
+            yield us
diff --git a/src/allmydata/mutable/node.py b/src/allmydata/mutable/node.py

index 368b99621e50569b158cd6b80e2253bdc6a50930..cd9f9763218aca5280dc0c10f8a17da6a20fb404 100644 (file)
--- a/src/allmydata/mutable/node.py
+++ b/src/allmydata/mutable/node.py
@@ -10,7 +10,7 @@ from allmydata.interfaces import IMutableFileNode, IMutableFileURI, ICheckable
  from allmydata.util import hashutil
  from allmydata.util.assertutil import precondition
  from allmydata.uri import WriteableSSKFileURI
-from allmydata.encode import NotEnoughSharesError
+from allmydata.immutable.encode import NotEnoughSharesError
  from pycryptopp.publickey import rsa
  from pycryptopp.cipher.aes import AES
  
diff --git a/src/allmydata/mutable/retrieve.py b/src/allmydata/mutable/retrieve.py

index 66a049a4841f6074d324c6bd03b927437b799f73..b844bcf05ae687332810c426739582c8e95f2846 100644 (file)
--- a/src/allmydata/mutable/retrieve.py
+++ b/src/allmydata/mutable/retrieve.py
@@ -8,7 +8,7 @@ from foolscap.eventual import eventually, fireEventually
  from allmydata.interfaces import IRetrieveStatus
  from allmydata.util import hashutil, idlib, log
  from allmydata import hashtree, codec, storage
-from allmydata.encode import NotEnoughSharesError
+from allmydata.immutable.encode import NotEnoughSharesError
  from pycryptopp.cipher.aes import AES
  
  from common import DictOfSets, CorruptShareError, UncoordinatedWriteError
diff --git a/src/allmydata/offloaded.py b/src/allmydata/offloaded.py

index d4c94a1ac628e1b437744ce1ae3d6ff37b412208..06a5345b2d44df01c13eaa4810ebe5711bff7118 100644 (file)
--- a/src/allmydata/offloaded.py
+++ b/src/allmydata/offloaded.py
@@ -5,7 +5,8 @@ from twisted.application import service
  from twisted.internet import defer
  from foolscap import Referenceable
  from foolscap.eventual import eventually
-from allmydata import upload, interfaces, storage, uri
+from allmydata import interfaces, storage, uri
+from allmydata.immutable import upload
  from allmydata.util import idlib, log, observer, fileutil, hashutil
  
  
diff --git a/src/allmydata/test/check_memory.py b/src/allmydata/test/check_memory.py

index 908e21ecde4b99e3c7ce7943e06b4c830d1e7aa0..ddfa4337ffccc60e00dd7fd7e3b4b6e19f271f95 100644 (file)
--- a/src/allmydata/test/check_memory.py
+++ b/src/allmydata/test/check_memory.py
@@ -5,7 +5,8 @@ from cStringIO import StringIO
  from twisted.internet import defer, reactor, protocol, error
  from twisted.application import service, internet
  from twisted.web import client as tw_client
-from allmydata import client, introducer, upload
+from allmydata import client, introducer
+from allmydata.immutable import upload
  from allmydata.scripts import create_node
  from allmydata.util import testutil, fileutil
  import foolscap
diff --git a/src/allmydata/test/common.py b/src/allmydata/test/common.py

index ce4569a3e5b227157a1a006da3c9f56497b04380..09698883bc32462f9d0d1200d062a0786f755cce 100644 (file)
--- a/src/allmydata/test/common.py
+++ b/src/allmydata/test/common.py
@@ -4,10 +4,11 @@ from zope.interface import implements
  from twisted.internet import defer
  from twisted.python import failure
  from twisted.application import service
-from allmydata import uri, dirnode, checker
+from allmydata import uri, dirnode
  from allmydata.interfaces import IURI, IMutableFileNode, IFileNode, \
       FileTooLargeError
-from allmydata.encode import NotEnoughSharesError
+from allmydata.immutable import checker
+from allmydata.immutable.encode import NotEnoughSharesError
  from allmydata.util import log
  
  class FakeCHKFileNode:
diff --git a/src/allmydata/test/test_dirnode.py b/src/allmydata/test/test_dirnode.py

index 9ef0af5af75035a8e2e5ddf73b6181666031c3fc..4aba00c447282f9fecf070adc0bba80bc1a21328 100644 (file)
--- a/src/allmydata/test/test_dirnode.py
+++ b/src/allmydata/test/test_dirnode.py
@@ -2,7 +2,8 @@
  import time
  from zope.interface import implements
  from twisted.trial import unittest
-from allmydata import uri, dirnode, upload
+from allmydata import uri, dirnode
+from allmydata.immutable import upload
  from allmydata.interfaces import IURI, IClient, IMutableFileNode, \
       INewDirectoryURI, IReadonlyNewDirectoryURI, IFileNode, ExistingChildError
  from allmydata.util import hashutil, testutil
diff --git a/src/allmydata/test/test_encode.py b/src/allmydata/test/test_encode.py

index 6ba0e6dbf35edc9e16f3c68c591585f0aa178eec..bb9ba969a590a32b592555dfed5334120a2c87b2 100644 (file)
--- a/src/allmydata/test/test_encode.py
+++ b/src/allmydata/test/test_encode.py
@@ -5,7 +5,8 @@ from twisted.internet import defer, reactor
  from twisted.internet.interfaces import IConsumer
  from twisted.python.failure import Failure
  from foolscap import eventual
-from allmydata import encode, upload, download, hashtree, uri
+from allmydata import hashtree, uri
+from allmydata.immutable import encode, upload, download
  from allmydata.util import hashutil, testutil
  from allmydata.util.assertutil import _assert
  from allmydata.interfaces import IStorageBucketWriter, IStorageBucketReader
diff --git a/src/allmydata/test/test_filenode.py b/src/allmydata/test/test_filenode.py

index 39f7abb5a68fc6dae4717f79c192f6a444562257..a08e97bf91a042f58908f99c2fdb12ebac1f2ec7 100644 (file)
--- a/src/allmydata/test/test_filenode.py
+++ b/src/allmydata/test/test_filenode.py
@@ -1,6 +1,7 @@
  
  from twisted.trial import unittest
-from allmydata import filenode, uri, download
+from allmydata import uri
+from allmydata.immutable import filenode, download
  from allmydata.mutable.node import MutableFileNode
  from allmydata.util import hashutil
  
diff --git a/src/allmydata/test/test_helper.py b/src/allmydata/test/test_helper.py

index d72b9514534d803734167e61361b1655ca4b6519..2f61eab260b19946163d975d8c12098e344fac25 100644 (file)
--- a/src/allmydata/test/test_helper.py
+++ b/src/allmydata/test/test_helper.py
@@ -6,7 +6,8 @@ from twisted.application import service
  from foolscap import Tub, eventual
  from foolscap.logging import log
  
-from allmydata import offloaded, storage, upload
+from allmydata import offloaded, storage
+from allmydata.immutable import upload
  from allmydata.util import hashutil, fileutil, mathutil
  from pycryptopp.cipher.aes import AES
  
diff --git a/src/allmydata/test/test_mutable.py b/src/allmydata/test/test_mutable.py

index eb80b18130b910b13420d2a0dc4bcf2ea7ef14dd..3ce59502fccf14e5e19ba26a8e23eeabd7ff459f 100644 (file)
--- a/src/allmydata/test/test_mutable.py
+++ b/src/allmydata/test/test_mutable.py
@@ -4,12 +4,13 @@ from cStringIO import StringIO
  from twisted.trial import unittest
  from twisted.internet import defer, reactor
  from twisted.python import failure
-from allmydata import uri, download, storage
+from allmydata import uri, storage
+from allmydata.immutable import download
+from allmydata.immutable.encode import NotEnoughSharesError
  from allmydata.util import base32, testutil, idlib
  from allmydata.util.idlib import shortnodeid_b2a
  from allmydata.util.hashutil import tagged_hash
  from allmydata.util.fileutil import make_dirs
-from allmydata.encode import NotEnoughSharesError
  from allmydata.interfaces import IURI, IMutableFileURI, IUploadable, \
       FileTooLargeError
  from foolscap.eventual import eventually, fireEventually
diff --git a/src/allmydata/test/test_system.py b/src/allmydata/test/test_system.py

index 2b3e7a1a10a2f188510b735790cd3c9d46fbb111..237b89208a8e7e1459a6ed4251ef6bc272c9a872 100644 (file)
--- a/src/allmydata/test/test_system.py
+++ b/src/allmydata/test/test_system.py
@@ -8,8 +8,8 @@ from twisted.internet import threads # CLI tests use deferToThread
  from twisted.internet.error import ConnectionDone, ConnectionLost
  from twisted.application import service
  import allmydata
-from allmydata import client, uri, download, upload, storage, offloaded, \
-     filenode
+from allmydata import client, uri, storage, offloaded
+from allmydata.immutable import download, upload, filenode
  from allmydata.introducer.server import IntroducerNode
  from allmydata.util import fileutil, idlib, mathutil, testutil
  from allmydata.util import log, base32
diff --git a/src/allmydata/test/test_upload.py b/src/allmydata/test/test_upload.py

index 7dd7e0088875cd766440f4ea9c038b25869e8682..221c985b4f90cff8fbd6908cb8c683a450bd417a 100644 (file)
--- a/src/allmydata/test/test_upload.py
+++ b/src/allmydata/test/test_upload.py
@@ -6,7 +6,8 @@ from twisted.python import log
  from twisted.internet import defer
  from cStringIO import StringIO
  
-from allmydata import upload, encode, uri
+from allmydata import uri
+from allmydata.immutable import upload, encode
  from allmydata.interfaces import IFileURI, FileTooLargeError
  from allmydata.util.assertutil import precondition
  from allmydata.util.deferredutil import DeferredListShouldSucceed
diff --git a/src/allmydata/test/test_web.py b/src/allmydata/test/test_web.py

index 326689a3cd0479b3f97ca5e32d3ceb36bdfbf229..6ef18678df7c9f6d390ec35e751795204f85ca6a 100644 (file)
--- a/src/allmydata/test/test_web.py
+++ b/src/allmydata/test/test_web.py
@@ -5,7 +5,8 @@ from twisted.trial import unittest
  from twisted.internet import defer, reactor
  from twisted.web import client, error, http
  from twisted.python import failure, log
-from allmydata import interfaces, provisioning, uri, webish, upload, download
+from allmydata import interfaces, provisioning, uri, webish
+from allmydata.immutable import upload, download
  from allmydata.web import status, common
  from allmydata.util import fileutil
  from allmydata.test.common import FakeDirectoryNode, FakeCHKFileNode, \
diff --git a/src/allmydata/upload.py b/src/allmydata/upload.py

deleted file mode 100644 (file)

index 714e7a5..0000000
--- a/src/allmydata/upload.py
+++ /dev/null
@@ -1,1266 +0,0 @@
-
-import os, time, weakref, itertools
-from zope.interface import implements
-from twisted.python import failure
-from twisted.internet import defer
-from twisted.application import service
-from foolscap import Referenceable, Copyable, RemoteCopy
-from foolscap import eventual
-from foolscap.logging import log
-
-from allmydata.util.hashutil import file_renewal_secret_hash, \
-     file_cancel_secret_hash, bucket_renewal_secret_hash, \
-     bucket_cancel_secret_hash, plaintext_hasher, \
-     storage_index_hash, plaintext_segment_hasher, convergence_hasher
-from allmydata import encode, storage, hashtree, uri
-from allmydata.util import base32, idlib, mathutil
-from allmydata.util.assertutil import precondition
-from allmydata.interfaces import IUploadable, IUploader, IUploadResults, \
-     IEncryptedUploadable, RIEncryptedUploadable, IUploadStatus
-from pycryptopp.cipher.aes import AES
-
-from cStringIO import StringIO
-
-
-KiB=1024
-MiB=1024*KiB
-GiB=1024*MiB
-TiB=1024*GiB
-PiB=1024*TiB
-
-class HaveAllPeersError(Exception):
-    # we use this to jump out of the loop
-    pass
-
-# this wants to live in storage, not here
-class TooFullError(Exception):
-    pass
-
-class UploadResults(Copyable, RemoteCopy):
-    implements(IUploadResults)
-    typeToCopy = "allmydata.upload.UploadResults.tahoe.allmydata.com"
-    copytype = typeToCopy
-
-    def __init__(self):
-        self.timings = {} # dict of name to number of seconds
-        self.sharemap = {} # dict of shnum to placement string
-        self.servermap = {} # dict of peerid to set(shnums)
-        self.file_size = None
-        self.ciphertext_fetched = None # how much the helper fetched
-        self.uri = None
-        self.preexisting_shares = None # count of shares already present
-        self.pushed_shares = None # count of shares we pushed
-
-
-# our current uri_extension is 846 bytes for small files, a few bytes
-# more for larger ones (since the filesize is encoded in decimal in a
-# few places). Ask for a little bit more just in case we need it. If
-# the extension changes size, we can change EXTENSION_SIZE to
-# allocate a more accurate amount of space.
-EXTENSION_SIZE = 1000
-# TODO: actual extensions are closer to 419 bytes, so we can probably lower
-# this.
-
-class PeerTracker:
-    def __init__(self, peerid, storage_server,
-                 sharesize, blocksize, num_segments, num_share_hashes,
-                 storage_index,
-                 bucket_renewal_secret, bucket_cancel_secret):
-        precondition(isinstance(peerid, str), peerid)
-        precondition(len(peerid) == 20, peerid)
-        self.peerid = peerid
-        self._storageserver = storage_server # to an RIStorageServer
-        self.buckets = {} # k: shareid, v: IRemoteBucketWriter
-        self.sharesize = sharesize
-        as = storage.allocated_size(sharesize,
-                                    num_segments,
-                                    num_share_hashes,
-                                    EXTENSION_SIZE)
-        self.allocated_size = as
-
-        self.blocksize = blocksize
-        self.num_segments = num_segments
-        self.num_share_hashes = num_share_hashes
-        self.storage_index = storage_index
-
-        self.renew_secret = bucket_renewal_secret
-        self.cancel_secret = bucket_cancel_secret
-
-    def __repr__(self):
-        return ("<PeerTracker for peer %s and SI %s>"
-                % (idlib.shortnodeid_b2a(self.peerid),
-                   storage.si_b2a(self.storage_index)[:5]))
-
-    def query(self, sharenums):
-        d = self._storageserver.callRemote("allocate_buckets",
-                                           self.storage_index,
-                                           self.renew_secret,
-                                           self.cancel_secret,
-                                           sharenums,
-                                           self.allocated_size,
-                                           canary=Referenceable())
-        d.addCallback(self._got_reply)
-        return d
-
-    def _got_reply(self, (alreadygot, buckets)):
-        #log.msg("%s._got_reply(%s)" % (self, (alreadygot, buckets)))
-        b = {}
-        for sharenum, rref in buckets.iteritems():
-            bp = storage.WriteBucketProxy(rref, self.sharesize,
-                                          self.blocksize,
-                                          self.num_segments,
-                                          self.num_share_hashes,
-                                          EXTENSION_SIZE,
-                                          self.peerid)
-            b[sharenum] = bp
-        self.buckets.update(b)
-        return (alreadygot, set(b.keys()))
-
-class Tahoe2PeerSelector:
-
-    def __init__(self, upload_id, logparent=None, upload_status=None):
-        self.upload_id = upload_id
-        self.query_count, self.good_query_count, self.bad_query_count = 0,0,0
-        self.error_count = 0
-        self.num_peers_contacted = 0
-        self.last_failure_msg = None
-        self._status = IUploadStatus(upload_status)
-        self._log_parent = log.msg("%s starting" % self, parent=logparent)
-
-    def __repr__(self):
-        return "<Tahoe2PeerSelector for upload %s>" % self.upload_id
-
-    def get_shareholders(self, client,
-                         storage_index, share_size, block_size,
-                         num_segments, total_shares, shares_of_happiness):
-        """
-        @return: (used_peers, already_peers), where used_peers is a set of
-                 PeerTracker instances that have agreed to hold some shares
-                 for us (the shnum is stashed inside the PeerTracker),
-                 and already_peers is a dict mapping shnum to a peer
-                 which claims to already have the share.
-        """
-
-        if self._status:
-            self._status.set_status("Contacting Peers..")
-
-        self.total_shares = total_shares
-        self.shares_of_happiness = shares_of_happiness
-
-        self.homeless_shares = range(total_shares)
-        # self.uncontacted_peers = list() # peers we haven't asked yet
-        self.contacted_peers = [] # peers worth asking again
-        self.contacted_peers2 = [] # peers that we have asked again
-        self._started_second_pass = False
-        self.use_peers = set() # PeerTrackers that have shares assigned to them
-        self.preexisting_shares = {} # sharenum -> peerid holding the share
-
-        peers = client.get_permuted_peers("storage", storage_index)
-        if not peers:
-            raise encode.NotEnoughSharesError("client gave us zero peers")
-
-        # figure out how much space to ask for
-
-        # this needed_hashes computation should mirror
-        # Encoder.send_all_share_hash_trees. We use an IncompleteHashTree
-        # (instead of a HashTree) because we don't require actual hashing
-        # just to count the levels.
-        ht = hashtree.IncompleteHashTree(total_shares)
-        num_share_hashes = len(ht.needed_hashes(0, include_leaf=True))
-
-        # decide upon the renewal/cancel secrets, to include them in the
-        # allocat_buckets query.
-        client_renewal_secret = client.get_renewal_secret()
-        client_cancel_secret = client.get_cancel_secret()
-
-        file_renewal_secret = file_renewal_secret_hash(client_renewal_secret,
-                                                       storage_index)
-        file_cancel_secret = file_cancel_secret_hash(client_cancel_secret,
-                                                     storage_index)
-
-        trackers = [ PeerTracker(peerid, conn,
-                                 share_size, block_size,
-                                 num_segments, num_share_hashes,
-                                 storage_index,
-                                 bucket_renewal_secret_hash(file_renewal_secret,
-                                                            peerid),
-                                 bucket_cancel_secret_hash(file_cancel_secret,
-                                                           peerid),
-                                 )
-                     for (peerid, conn) in peers ]
-        self.uncontacted_peers = trackers
-
-        d = defer.maybeDeferred(self._loop)
-        return d
-
-    def _loop(self):
-        if not self.homeless_shares:
-            # all done
-            msg = ("placed all %d shares, "
-                   "sent %d queries to %d peers, "
-                   "%d queries placed some shares, %d placed none, "
-                   "got %d errors" %
-                   (self.total_shares,
-                    self.query_count, self.num_peers_contacted,
-                    self.good_query_count, self.bad_query_count,
-                    self.error_count))
-            log.msg("peer selection successful for %s: %s" % (self, msg),
-                    parent=self._log_parent)
-            return (self.use_peers, self.preexisting_shares)
-
-        if self.uncontacted_peers:
-            peer = self.uncontacted_peers.pop(0)
-            # TODO: don't pre-convert all peerids to PeerTrackers
-            assert isinstance(peer, PeerTracker)
-
-            shares_to_ask = set([self.homeless_shares.pop(0)])
-            self.query_count += 1
-            self.num_peers_contacted += 1
-            if self._status:
-                self._status.set_status("Contacting Peers [%s] (first query),"
-                                        " %d shares left.."
-                                        % (idlib.shortnodeid_b2a(peer.peerid),
-                                           len(self.homeless_shares)))
-            d = peer.query(shares_to_ask)
-            d.addBoth(self._got_response, peer, shares_to_ask,
-                      self.contacted_peers)
-            return d
-        elif self.contacted_peers:
-            # ask a peer that we've already asked.
-            if not self._started_second_pass:
-                log.msg("starting second pass", parent=self._log_parent,
-                        level=log.NOISY)
-                self._started_second_pass = True
-            num_shares = mathutil.div_ceil(len(self.homeless_shares),
-                                           len(self.contacted_peers))
-            peer = self.contacted_peers.pop(0)
-            shares_to_ask = set(self.homeless_shares[:num_shares])
-            self.homeless_shares[:num_shares] = []
-            self.query_count += 1
-            if self._status:
-                self._status.set_status("Contacting Peers [%s] (second query),"
-                                        " %d shares left.."
-                                        % (idlib.shortnodeid_b2a(peer.peerid),
-                                           len(self.homeless_shares)))
-            d = peer.query(shares_to_ask)
-            d.addBoth(self._got_response, peer, shares_to_ask,
-                      self.contacted_peers2)
-            return d
-        elif self.contacted_peers2:
-            # we've finished the second-or-later pass. Move all the remaining
-            # peers back into self.contacted_peers for the next pass.
-            self.contacted_peers.extend(self.contacted_peers2)
-            self.contacted_peers[:] = []
-            return self._loop()
-        else:
-            # no more peers. If we haven't placed enough shares, we fail.
-            placed_shares = self.total_shares - len(self.homeless_shares)
-            if placed_shares < self.shares_of_happiness:
-                msg = ("placed %d shares out of %d total (%d homeless), "
-                       "sent %d queries to %d peers, "
-                       "%d queries placed some shares, %d placed none, "
-                       "got %d errors" %
-                       (self.total_shares - len(self.homeless_shares),
-                        self.total_shares, len(self.homeless_shares),
-                        self.query_count, self.num_peers_contacted,
-                        self.good_query_count, self.bad_query_count,
-                        self.error_count))
-                msg = "peer selection failed for %s: %s" % (self, msg)
-                if self.last_failure_msg:
-                    msg += " (%s)" % (self.last_failure_msg,)
-                log.msg(msg, level=log.UNUSUAL, parent=self._log_parent)
-                raise encode.NotEnoughSharesError(msg)
-            else:
-                # we placed enough to be happy, so we're done
-                if self._status:
-                    self._status.set_status("Placed all shares")
-                return self.use_peers
-
-    def _got_response(self, res, peer, shares_to_ask, put_peer_here):
-        if isinstance(res, failure.Failure):
-            # This is unusual, and probably indicates a bug or a network
-            # problem.
-            log.msg("%s got error during peer selection: %s" % (peer, res),
-                    level=log.UNUSUAL, parent=self._log_parent)
-            self.error_count += 1
-            self.homeless_shares = list(shares_to_ask) + self.homeless_shares
-            if (self.uncontacted_peers
-                or self.contacted_peers
-                or self.contacted_peers2):
-                # there is still hope, so just loop
-                pass
-            else:
-                # No more peers, so this upload might fail (it depends upon
-                # whether we've hit shares_of_happiness or not). Log the last
-                # failure we got: if a coding error causes all peers to fail
-                # in the same way, this allows the common failure to be seen
-                # by the uploader and should help with debugging
-                msg = ("last failure (from %s) was: %s" % (peer, res))
-                self.last_failure_msg = msg
-        else:
-            (alreadygot, allocated) = res
-            log.msg("response from peer %s: alreadygot=%s, allocated=%s"
-                    % (idlib.shortnodeid_b2a(peer.peerid),
-                       tuple(sorted(alreadygot)), tuple(sorted(allocated))),
-                    level=log.NOISY, parent=self._log_parent)
-            progress = False
-            for s in alreadygot:
-                self.preexisting_shares[s] = peer.peerid
-                if s in self.homeless_shares:
-                    self.homeless_shares.remove(s)
-                    progress = True
-
-            # the PeerTracker will remember which shares were allocated on
-            # that peer. We just have to remember to use them.
-            if allocated:
-                self.use_peers.add(peer)
-                progress = True
-
-            not_yet_present = set(shares_to_ask) - set(alreadygot)
-            still_homeless = not_yet_present - set(allocated)
-
-            if progress:
-                # they accepted or already had at least one share, so
-                # progress has been made
-                self.good_query_count += 1
-            else:
-                self.bad_query_count += 1
-
-            if still_homeless:
-                # In networks with lots of space, this is very unusual and
-                # probably indicates an error. In networks with peers that
-                # are full, it is merely unusual. In networks that are very
-                # full, it is common, and many uploads will fail. In most
-                # cases, this is obviously not fatal, and we'll just use some
-                # other peers.
-
-                # some shares are still homeless, keep trying to find them a
-                # home. The ones that were rejected get first priority.
-                self.homeless_shares = (list(still_homeless)
-                                        + self.homeless_shares)
-                # Since they were unable to accept all of our requests, so it
-                # is safe to assume that asking them again won't help.
-            else:
-                # if they *were* able to accept everything, they might be
-                # willing to accept even more.
-                put_peer_here.append(peer)
-
-        # now loop
-        return self._loop()
-
-
-class EncryptAnUploadable:
-    """This is a wrapper that takes an IUploadable and provides
-    IEncryptedUploadable."""
-    implements(IEncryptedUploadable)
-    CHUNKSIZE = 50*1024
-
-    def __init__(self, original, log_parent=None):
-        self.original = IUploadable(original)
-        self._log_number = log_parent
-        self._encryptor = None
-        self._plaintext_hasher = plaintext_hasher()
-        self._plaintext_segment_hasher = None
-        self._plaintext_segment_hashes = []
-        self._encoding_parameters = None
-        self._file_size = None
-        self._ciphertext_bytes_read = 0
-        self._status = None
-
-    def set_upload_status(self, upload_status):
-        self._status = IUploadStatus(upload_status)
-        self.original.set_upload_status(upload_status)
-
-    def log(self, *args, **kwargs):
-        if "facility" not in kwargs:
-            kwargs["facility"] = "upload.encryption"
-        if "parent" not in kwargs:
-            kwargs["parent"] = self._log_number
-        return log.msg(*args, **kwargs)
-
-    def get_size(self):
-        if self._file_size is not None:
-            return defer.succeed(self._file_size)
-        d = self.original.get_size()
-        def _got_size(size):
-            self._file_size = size
-            if self._status:
-                self._status.set_size(size)
-            return size
-        d.addCallback(_got_size)
-        return d
-
-    def get_all_encoding_parameters(self):
-        if self._encoding_parameters is not None:
-            return defer.succeed(self._encoding_parameters)
-        d = self.original.get_all_encoding_parameters()
-        def _got(encoding_parameters):
-            (k, happy, n, segsize) = encoding_parameters
-            self._segment_size = segsize # used by segment hashers
-            self._encoding_parameters = encoding_parameters
-            self.log("my encoding parameters: %s" % (encoding_parameters,),
-                     level=log.NOISY)
-            return encoding_parameters
-        d.addCallback(_got)
-        return d
-
-    def _get_encryptor(self):
-        if self._encryptor:
-            return defer.succeed(self._encryptor)
-
-        d = self.original.get_encryption_key()
-        def _got(key):
-            e = AES(key)
-            self._encryptor = e
-
-            storage_index = storage_index_hash(key)
-            assert isinstance(storage_index, str)
-            # There's no point to having the SI be longer than the key, so we
-            # specify that it is truncated to the same 128 bits as the AES key.
-            assert len(storage_index) == 16  # SHA-256 truncated to 128b
-            self._storage_index = storage_index
-            if self._status:
-                self._status.set_storage_index(storage_index)
-            return e
-        d.addCallback(_got)
-        return d
-
-    def get_storage_index(self):
-        d = self._get_encryptor()
-        d.addCallback(lambda res: self._storage_index)
-        return d
-
-    def _get_segment_hasher(self):
-        p = self._plaintext_segment_hasher
-        if p:
-            left = self._segment_size - self._plaintext_segment_hashed_bytes
-            return p, left
-        p = plaintext_segment_hasher()
-        self._plaintext_segment_hasher = p
-        self._plaintext_segment_hashed_bytes = 0
-        return p, self._segment_size
-
-    def _update_segment_hash(self, chunk):
-        offset = 0
-        while offset < len(chunk):
-            p, segment_left = self._get_segment_hasher()
-            chunk_left = len(chunk) - offset
-            this_segment = min(chunk_left, segment_left)
-            p.update(chunk[offset:offset+this_segment])
-            self._plaintext_segment_hashed_bytes += this_segment
-
-            if self._plaintext_segment_hashed_bytes == self._segment_size:
-                # we've filled this segment
-                self._plaintext_segment_hashes.append(p.digest())
-                self._plaintext_segment_hasher = None
-                self.log("closed hash [%d]: %dB" %
-                         (len(self._plaintext_segment_hashes)-1,
-                          self._plaintext_segment_hashed_bytes),
-                         level=log.NOISY)
-                self.log(format="plaintext leaf hash [%(segnum)d] is %(hash)s",
-                         segnum=len(self._plaintext_segment_hashes)-1,
-                         hash=base32.b2a(p.digest()),
-                         level=log.NOISY)
-
-            offset += this_segment
-
-
-    def read_encrypted(self, length, hash_only):
-        # make sure our parameters have been set up first
-        d = self.get_all_encoding_parameters()
-        # and size
-        d.addCallback(lambda ignored: self.get_size())
-        d.addCallback(lambda ignored: self._get_encryptor())
-        # then fetch and encrypt the plaintext. The unusual structure here
-        # (passing a Deferred *into* a function) is needed to avoid
-        # overflowing the stack: Deferreds don't optimize out tail recursion.
-        # We also pass in a list, to which _read_encrypted will append
-        # ciphertext.
-        ciphertext = []
-        d2 = defer.Deferred()
-        d.addCallback(lambda ignored:
-                      self._read_encrypted(length, ciphertext, hash_only, d2))
-        d.addCallback(lambda ignored: d2)
-        return d
-
-    def _read_encrypted(self, remaining, ciphertext, hash_only, fire_when_done):
-        if not remaining:
-            fire_when_done.callback(ciphertext)
-            return None
-        # tolerate large length= values without consuming a lot of RAM by
-        # reading just a chunk (say 50kB) at a time. This only really matters
-        # when hash_only==True (i.e. resuming an interrupted upload), since
-        # that's the case where we will be skipping over a lot of data.
-        size = min(remaining, self.CHUNKSIZE)
-        remaining = remaining - size
-        # read a chunk of plaintext..
-        d = defer.maybeDeferred(self.original.read, size)
-        # N.B.: if read() is synchronous, then since everything else is
-        # actually synchronous too, we'd blow the stack unless we stall for a
-        # tick. Once you accept a Deferred from IUploadable.read(), you must
-        # be prepared to have it fire immediately too.
-        d.addCallback(eventual.fireEventually)
-        def _good(plaintext):
-            # and encrypt it..
-            # o/' over the fields we go, hashing all the way, sHA! sHA! sHA! o/'
-            ct = self._hash_and_encrypt_plaintext(plaintext, hash_only)
-            ciphertext.extend(ct)
-            self._read_encrypted(remaining, ciphertext, hash_only,
-                                 fire_when_done)
-        def _err(why):
-            fire_when_done.errback(why)
-        d.addCallback(_good)
-        d.addErrback(_err)
-        return None
-
-    def _hash_and_encrypt_plaintext(self, data, hash_only):
-        assert isinstance(data, (tuple, list)), type(data)
-        data = list(data)
-        cryptdata = []
-        # we use data.pop(0) instead of 'for chunk in data' to save
-        # memory: each chunk is destroyed as soon as we're done with it.
-        bytes_processed = 0
-        while data:
-            chunk = data.pop(0)
-            self.log(" read_encrypted handling %dB-sized chunk" % len(chunk),
-                     level=log.NOISY)
-            bytes_processed += len(chunk)
-            self._plaintext_hasher.update(chunk)
-            self._update_segment_hash(chunk)
-            # TODO: we have to encrypt the data (even if hash_only==True)
-            # because pycryptopp's AES-CTR implementation doesn't offer a
-            # way to change the counter value. Once pycryptopp acquires
-            # this ability, change this to simply update the counter
-            # before each call to (hash_only==False) _encryptor.process()
-            ciphertext = self._encryptor.process(chunk)
-            if hash_only:
-                self.log("  skipping encryption", level=log.NOISY)
-            else:
-                cryptdata.append(ciphertext)
-            del ciphertext
-            del chunk
-        self._ciphertext_bytes_read += bytes_processed
-        if self._status:
-            progress = float(self._ciphertext_bytes_read) / self._file_size
-            self._status.set_progress(1, progress)
-        return cryptdata
-
-
-    def get_plaintext_hashtree_leaves(self, first, last, num_segments):
-        if len(self._plaintext_segment_hashes) < num_segments:
-            # close out the last one
-            assert len(self._plaintext_segment_hashes) == num_segments-1
-            p, segment_left = self._get_segment_hasher()
-            self._plaintext_segment_hashes.append(p.digest())
-            del self._plaintext_segment_hasher
-            self.log("closing plaintext leaf hasher, hashed %d bytes" %
-                     self._plaintext_segment_hashed_bytes,
-                     level=log.NOISY)
-            self.log(format="plaintext leaf hash [%(segnum)d] is %(hash)s",
-                     segnum=len(self._plaintext_segment_hashes)-1,
-                     hash=base32.b2a(p.digest()),
-                     level=log.NOISY)
-        assert len(self._plaintext_segment_hashes) == num_segments
-        return defer.succeed(tuple(self._plaintext_segment_hashes[first:last]))
-
-    def get_plaintext_hash(self):
-        h = self._plaintext_hasher.digest()
-        return defer.succeed(h)
-
-    def close(self):
-        return self.original.close()
-
-class UploadStatus:
-    implements(IUploadStatus)
-    statusid_counter = itertools.count(0)
-
-    def __init__(self):
-        self.storage_index = None
-        self.size = None
-        self.helper = False
-        self.status = "Not started"
-        self.progress = [0.0, 0.0, 0.0]
-        self.active = True
-        self.results = None
-        self.counter = self.statusid_counter.next()
-        self.started = time.time()
-
-    def get_started(self):
-        return self.started
-    def get_storage_index(self):
-        return self.storage_index
-    def get_size(self):
-        return self.size
-    def using_helper(self):
-        return self.helper
-    def get_status(self):
-        return self.status
-    def get_progress(self):
-        return tuple(self.progress)
-    def get_active(self):
-        return self.active
-    def get_results(self):
-        return self.results
-    def get_counter(self):
-        return self.counter
-
-    def set_storage_index(self, si):
-        self.storage_index = si
-    def set_size(self, size):
-        self.size = size
-    def set_helper(self, helper):
-        self.helper = helper
-    def set_status(self, status):
-        self.status = status
-    def set_progress(self, which, value):
-        # [0]: chk, [1]: ciphertext, [2]: encode+push
-        self.progress[which] = value
-    def set_active(self, value):
-        self.active = value
-    def set_results(self, value):
-        self.results = value
-
-class CHKUploader:
-    peer_selector_class = Tahoe2PeerSelector
-
-    def __init__(self, client):
-        self._client = client
-        self._log_number = self._client.log("CHKUploader starting")
-        self._encoder = None
-        self._results = UploadResults()
-        self._storage_index = None
-        self._upload_status = UploadStatus()
-        self._upload_status.set_helper(False)
-        self._upload_status.set_active(True)
-        self._upload_status.set_results(self._results)
-
-    def log(self, *args, **kwargs):
-        if "parent" not in kwargs:
-            kwargs["parent"] = self._log_number
-        if "facility" not in kwargs:
-            kwargs["facility"] = "tahoe.upload"
-        return self._client.log(*args, **kwargs)
-
-    def start(self, uploadable):
-        """Start uploading the file.
-
-        This method returns a Deferred that will fire with the URI (a
-        string)."""
-
-        self._started = time.time()
-        uploadable = IUploadable(uploadable)
-        self.log("starting upload of %s" % uploadable)
-
-        eu = EncryptAnUploadable(uploadable, self._log_number)
-        eu.set_upload_status(self._upload_status)
-        d = self.start_encrypted(eu)
-        def _uploaded(res):
-            d1 = uploadable.get_encryption_key()
-            d1.addCallback(lambda key: self._compute_uri(res, key))
-            return d1
-        d.addCallback(_uploaded)
-        def _done(res):
-            self._upload_status.set_active(False)
-            return res
-        d.addBoth(_done)
-        return d
-
-    def abort(self):
-        """Call this is the upload must be abandoned before it completes.
-        This will tell the shareholders to delete their partial shares. I
-        return a Deferred that fires when these messages have been acked."""
-        if not self._encoder:
-            # how did you call abort() before calling start() ?
-            return defer.succeed(None)
-        return self._encoder.abort()
-
-    def start_encrypted(self, encrypted):
-        eu = IEncryptedUploadable(encrypted)
-
-        started = time.time()
-        self._encoder = e = encode.Encoder(self._log_number,
-                                           self._upload_status)
-        d = e.set_encrypted_uploadable(eu)
-        d.addCallback(self.locate_all_shareholders, started)
-        d.addCallback(self.set_shareholders, e)
-        d.addCallback(lambda res: e.start())
-        d.addCallback(self._encrypted_done)
-        # this fires with the uri_extension_hash and other data
-        return d
-
-    def locate_all_shareholders(self, encoder, started):
-        peer_selection_started = now = time.time()
-        self._storage_index_elapsed = now - started
-        storage_index = encoder.get_param("storage_index")
-        self._storage_index = storage_index
-        upload_id = storage.si_b2a(storage_index)[:5]
-        self.log("using storage index %s" % upload_id)
-        peer_selector = self.peer_selector_class(upload_id, self._log_number,
-                                                 self._upload_status)
-
-        share_size = encoder.get_param("share_size")
-        block_size = encoder.get_param("block_size")
-        num_segments = encoder.get_param("num_segments")
-        k,desired,n = encoder.get_param("share_counts")
-
-        self._peer_selection_started = time.time()
-        d = peer_selector.get_shareholders(self._client, storage_index,
-                                           share_size, block_size,
-                                           num_segments, n, desired)
-        def _done(res):
-            self._peer_selection_elapsed = time.time() - peer_selection_started
-            return res
-        d.addCallback(_done)
-        return d
-
-    def set_shareholders(self, (used_peers, already_peers), encoder):
-        """
-        @param used_peers: a sequence of PeerTracker objects
-        @paran already_peers: a dict mapping sharenum to a peerid that
-                              claims to already have this share
-        """
-        self.log("_send_shares, used_peers is %s" % (used_peers,))
-        # record already-present shares in self._results
-        for (shnum, peerid) in already_peers.items():
-            peerid_s = idlib.shortnodeid_b2a(peerid)
-            self._results.sharemap[shnum] = "Found on [%s]" % peerid_s
-            if peerid not in self._results.servermap:
-                self._results.servermap[peerid] = set()
-            self._results.servermap[peerid].add(shnum)
-        self._results.preexisting_shares = len(already_peers)
-
-        self._sharemap = {}
-        for peer in used_peers:
-            assert isinstance(peer, PeerTracker)
-        buckets = {}
-        for peer in used_peers:
-            buckets.update(peer.buckets)
-            for shnum in peer.buckets:
-                self._sharemap[shnum] = peer
-        assert len(buckets) == sum([len(peer.buckets) for peer in used_peers])
-        encoder.set_shareholders(buckets)
-
-    def _encrypted_done(self, res):
-        r = self._results
-        for shnum in self._encoder.get_shares_placed():
-            peer_tracker = self._sharemap[shnum]
-            peerid = peer_tracker.peerid
-            peerid_s = idlib.shortnodeid_b2a(peerid)
-            r.sharemap[shnum] = "Placed on [%s]" % peerid_s
-            if peerid not in r.servermap:
-                r.servermap[peerid] = set()
-            r.servermap[peerid].add(shnum)
-        r.pushed_shares = len(self._encoder.get_shares_placed())
-        now = time.time()
-        r.file_size = self._encoder.file_size
-        r.timings["total"] = now - self._started
-        r.timings["storage_index"] = self._storage_index_elapsed
-        r.timings["peer_selection"] = self._peer_selection_elapsed
-        r.timings.update(self._encoder.get_times())
-        r.uri_extension_data = self._encoder.get_uri_extension_data()
-        return res
-
-    def _compute_uri(self, (uri_extension_hash,
-                            needed_shares, total_shares, size),
-                     key):
-        u = uri.CHKFileURI(key=key,
-                           uri_extension_hash=uri_extension_hash,
-                           needed_shares=needed_shares,
-                           total_shares=total_shares,
-                           size=size,
-                           )
-        r = self._results
-        r.uri = u.to_string()
-        return r
-
-    def get_upload_status(self):
-        return self._upload_status
-
-def read_this_many_bytes(uploadable, size, prepend_data=[]):
-    if size == 0:
-        return defer.succeed([])
-    d = uploadable.read(size)
-    def _got(data):
-        assert isinstance(data, list)
-        bytes = sum([len(piece) for piece in data])
-        assert bytes > 0
-        assert bytes <= size
-        remaining = size - bytes
-        if remaining:
-            return read_this_many_bytes(uploadable, remaining,
-                                        prepend_data + data)
-        return prepend_data + data
-    d.addCallback(_got)
-    return d
-
-class LiteralUploader:
-
-    def __init__(self, client):
-        self._client = client
-        self._results = UploadResults()
-        self._status = s = UploadStatus()
-        s.set_storage_index(None)
-        s.set_helper(False)
-        s.set_progress(0, 1.0)
-        s.set_active(False)
-        s.set_results(self._results)
-
-    def start(self, uploadable):
-        uploadable = IUploadable(uploadable)
-        d = uploadable.get_size()
-        def _got_size(size):
-            self._size = size
-            self._status.set_size(size)
-            self._results.file_size = size
-            return read_this_many_bytes(uploadable, size)
-        d.addCallback(_got_size)
-        d.addCallback(lambda data: uri.LiteralFileURI("".join(data)))
-        d.addCallback(lambda u: u.to_string())
-        d.addCallback(self._build_results)
-        return d
-
-    def _build_results(self, uri):
-        self._results.uri = uri
-        self._status.set_status("Done")
-        self._status.set_progress(1, 1.0)
-        self._status.set_progress(2, 1.0)
-        return self._results
-
-    def close(self):
-        pass
-
-    def get_upload_status(self):
-        return self._status
-
-class RemoteEncryptedUploadable(Referenceable):
-    implements(RIEncryptedUploadable)
-
-    def __init__(self, encrypted_uploadable, upload_status):
-        self._eu = IEncryptedUploadable(encrypted_uploadable)
-        self._offset = 0
-        self._bytes_sent = 0
-        self._status = IUploadStatus(upload_status)
-        # we are responsible for updating the status string while we run, and
-        # for setting the ciphertext-fetch progress.
-        self._size = None
-
-    def get_size(self):
-        if self._size is not None:
-            return defer.succeed(self._size)
-        d = self._eu.get_size()
-        def _got_size(size):
-            self._size = size
-            return size
-        d.addCallback(_got_size)
-        return d
-
-    def remote_get_size(self):
-        return self.get_size()
-    def remote_get_all_encoding_parameters(self):
-        return self._eu.get_all_encoding_parameters()
-
-    def _read_encrypted(self, length, hash_only):
-        d = self._eu.read_encrypted(length, hash_only)
-        def _read(strings):
-            if hash_only:
-                self._offset += length
-            else:
-                size = sum([len(data) for data in strings])
-                self._offset += size
-            return strings
-        d.addCallback(_read)
-        return d
-
-    def remote_read_encrypted(self, offset, length):
-        # we don't support seek backwards, but we allow skipping forwards
-        precondition(offset >= 0, offset)
-        precondition(length >= 0, length)
-        lp = log.msg("remote_read_encrypted(%d-%d)" % (offset, offset+length),
-                     level=log.NOISY)
-        precondition(offset >= self._offset, offset, self._offset)
-        if offset > self._offset:
-            # read the data from disk anyways, to build up the hash tree
-            skip = offset - self._offset
-            log.msg("remote_read_encrypted skipping ahead from %d to %d, skip=%d" %
-                    (self._offset, offset, skip), level=log.UNUSUAL, parent=lp)
-            d = self._read_encrypted(skip, hash_only=True)
-        else:
-            d = defer.succeed(None)
-
-        def _at_correct_offset(res):
-            assert offset == self._offset, "%d != %d" % (offset, self._offset)
-            return self._read_encrypted(length, hash_only=False)
-        d.addCallback(_at_correct_offset)
-
-        def _read(strings):
-            size = sum([len(data) for data in strings])
-            self._bytes_sent += size
-            return strings
-        d.addCallback(_read)
-        return d
-
-    def remote_get_plaintext_hashtree_leaves(self, first, last, num_segments):
-        log.msg("remote_get_plaintext_hashtree_leaves: %d-%d of %d" %
-                (first, last-1, num_segments),
-                level=log.NOISY)
-        d = self._eu.get_plaintext_hashtree_leaves(first, last, num_segments)
-        d.addCallback(list)
-        return d
-    def remote_get_plaintext_hash(self):
-        return self._eu.get_plaintext_hash()
-    def remote_close(self):
-        return self._eu.close()
-
-
-class AssistedUploader:
-
-    def __init__(self, helper):
-        self._helper = helper
-        self._log_number = log.msg("AssistedUploader starting")
-        self._storage_index = None
-        self._upload_status = s = UploadStatus()
-        s.set_helper(True)
-        s.set_active(True)
-
-    def log(self, *args, **kwargs):
-        if "parent" not in kwargs:
-            kwargs["parent"] = self._log_number
-        return log.msg(*args, **kwargs)
-
-    def start(self, uploadable):
-        self._started = time.time()
-        u = IUploadable(uploadable)
-        eu = EncryptAnUploadable(u, self._log_number)
-        eu.set_upload_status(self._upload_status)
-        self._encuploadable = eu
-        d = eu.get_size()
-        d.addCallback(self._got_size)
-        d.addCallback(lambda res: eu.get_all_encoding_parameters())
-        d.addCallback(self._got_all_encoding_parameters)
-        # when we get the encryption key, that will also compute the storage
-        # index, so this only takes one pass.
-        # TODO: I'm not sure it's cool to switch back and forth between
-        # the Uploadable and the IEncryptedUploadable that wraps it.
-        d.addCallback(lambda res: u.get_encryption_key())
-        d.addCallback(self._got_encryption_key)
-        d.addCallback(lambda res: eu.get_storage_index())
-        d.addCallback(self._got_storage_index)
-        d.addCallback(self._contact_helper)
-        d.addCallback(self._build_readcap)
-        def _done(res):
-            self._upload_status.set_active(False)
-            return res
-        d.addBoth(_done)
-        return d
-
-    def _got_size(self, size):
-        self._size = size
-        self._upload_status.set_size(size)
-
-    def _got_all_encoding_parameters(self, params):
-        k, happy, n, segment_size = params
-        # stash these for URI generation later
-        self._needed_shares = k
-        self._total_shares = n
-        self._segment_size = segment_size
-
-    def _got_encryption_key(self, key):
-        self._key = key
-
-    def _got_storage_index(self, storage_index):
-        self._storage_index = storage_index
-
-
-    def _contact_helper(self, res):
-        now = self._time_contacting_helper_start = time.time()
-        self._storage_index_elapsed = now - self._started
-        self.log(format="contacting helper for SI %(si)s..",
-                 si=storage.si_b2a(self._storage_index))
-        self._upload_status.set_status("Contacting Helper")
-        d = self._helper.callRemote("upload_chk", self._storage_index)
-        d.addCallback(self._contacted_helper)
-        return d
-
-    def _contacted_helper(self, (upload_results, upload_helper)):
-        now = time.time()
-        elapsed = now - self._time_contacting_helper_start
-        self._elapsed_time_contacting_helper = elapsed
-        if upload_helper:
-            self.log("helper says we need to upload")
-            self._upload_status.set_status("Uploading Ciphertext")
-            # we need to upload the file
-            reu = RemoteEncryptedUploadable(self._encuploadable,
-                                            self._upload_status)
-            # let it pre-compute the size for progress purposes
-            d = reu.get_size()
-            d.addCallback(lambda ignored:
-                          upload_helper.callRemote("upload", reu))
-            # this Deferred will fire with the upload results
-            return d
-        self.log("helper says file is already uploaded")
-        self._upload_status.set_progress(1, 1.0)
-        self._upload_status.set_results(upload_results)
-        return upload_results
-
-    def _build_readcap(self, upload_results):
-        self.log("upload finished, building readcap")
-        self._upload_status.set_status("Building Readcap")
-        r = upload_results
-        assert r.uri_extension_data["needed_shares"] == self._needed_shares
-        assert r.uri_extension_data["total_shares"] == self._total_shares
-        assert r.uri_extension_data["segment_size"] == self._segment_size
-        assert r.uri_extension_data["size"] == self._size
-        u = uri.CHKFileURI(key=self._key,
-                           uri_extension_hash=r.uri_extension_hash,
-                           needed_shares=self._needed_shares,
-                           total_shares=self._total_shares,
-                           size=self._size,
-                           )
-        r.uri = u.to_string()
-        now = time.time()
-        r.file_size = self._size
-        r.timings["storage_index"] = self._storage_index_elapsed
-        r.timings["contacting_helper"] = self._elapsed_time_contacting_helper
-        if "total" in r.timings:
-            r.timings["helper_total"] = r.timings["total"]
-        r.timings["total"] = now - self._started
-        self._upload_status.set_status("Done")
-        self._upload_status.set_results(r)
-        return r
-
-    def get_upload_status(self):
-        return self._upload_status
-
-class BaseUploadable:
-    default_max_segment_size = 128*KiB # overridden by max_segment_size
-    default_encoding_param_k = 3 # overridden by encoding_parameters
-    default_encoding_param_happy = 7
-    default_encoding_param_n = 10
-
-    max_segment_size = None
-    encoding_param_k = None
-    encoding_param_happy = None
-    encoding_param_n = None
-
-    _all_encoding_parameters = None
-    _status = None
-
-    def set_upload_status(self, upload_status):
-        self._status = IUploadStatus(upload_status)
-
-    def set_default_encoding_parameters(self, default_params):
-        assert isinstance(default_params, dict)
-        for k,v in default_params.items():
-            precondition(isinstance(k, str), k, v)
-            precondition(isinstance(v, int), k, v)
-        if "k" in default_params:
-            self.default_encoding_param_k = default_params["k"]
-        if "happy" in default_params:
-            self.default_encoding_param_happy = default_params["happy"]
-        if "n" in default_params:
-            self.default_encoding_param_n = default_params["n"]
-        if "max_segment_size" in default_params:
-            self.default_max_segment_size = default_params["max_segment_size"]
-
-    def get_all_encoding_parameters(self):
-        if self._all_encoding_parameters:
-            return defer.succeed(self._all_encoding_parameters)
-
-        max_segsize = self.max_segment_size or self.default_max_segment_size
-        k = self.encoding_param_k or self.default_encoding_param_k
-        happy = self.encoding_param_happy or self.default_encoding_param_happy
-        n = self.encoding_param_n or self.default_encoding_param_n
-
-        d = self.get_size()
-        def _got_size(file_size):
-            # for small files, shrink the segment size to avoid wasting space
-            segsize = min(max_segsize, file_size)
-            # this must be a multiple of 'required_shares'==k
-            segsize = mathutil.next_multiple(segsize, k)
-            encoding_parameters = (k, happy, n, segsize)
-            self._all_encoding_parameters = encoding_parameters
-            return encoding_parameters
-        d.addCallback(_got_size)
-        return d
-
-class FileHandle(BaseUploadable):
-    implements(IUploadable)
-
-    def __init__(self, filehandle, convergence):
-        """
-        Upload the data from the filehandle.  If convergence is None then a
-        random encryption key will be used, else the plaintext will be hashed,
-        then the hash will be hashed together with the string in the
-        "convergence" argument to form the encryption key.
-        """
-        assert convergence is None or isinstance(convergence, str), (convergence, type(convergence))
-        self._filehandle = filehandle
-        self._key = None
-        self.convergence = convergence
-        self._size = None
-
-    def _get_encryption_key_convergent(self):
-        if self._key is not None:
-            return defer.succeed(self._key)
-
-        d = self.get_size()
-        # that sets self._size as a side-effect
-        d.addCallback(lambda size: self.get_all_encoding_parameters())
-        def _got(params):
-            k, happy, n, segsize = params
-            f = self._filehandle
-            enckey_hasher = convergence_hasher(k, n, segsize, self.convergence)
-            f.seek(0)
-            BLOCKSIZE = 64*1024
-            bytes_read = 0
-            while True:
-                data = f.read(BLOCKSIZE)
-                if not data:
-                    break
-                enckey_hasher.update(data)
-                # TODO: setting progress in a non-yielding loop is kind of
-                # pointless, but I'm anticipating (perhaps prematurely) the
-                # day when we use a slowjob or twisted's CooperatorService to
-                # make this yield time to other jobs.
-                bytes_read += len(data)
-                if self._status:
-                    self._status.set_progress(0, float(bytes_read)/self._size)
-            f.seek(0)
-            self._key = enckey_hasher.digest()
-            if self._status:
-                self._status.set_progress(0, 1.0)
-            assert len(self._key) == 16
-            return self._key
-        d.addCallback(_got)
-        return d
-
-    def _get_encryption_key_random(self):
-        if self._key is None:
-            self._key = os.urandom(16)
-        return defer.succeed(self._key)
-
-    def get_encryption_key(self):
-        if self.convergence is not None:
-            return self._get_encryption_key_convergent()
-        else:
-            return self._get_encryption_key_random()
-
-    def get_size(self):
-        if self._size is not None:
-            return defer.succeed(self._size)
-        self._filehandle.seek(0,2)
-        size = self._filehandle.tell()
-        self._size = size
-        self._filehandle.seek(0)
-        return defer.succeed(size)
-
-    def read(self, length):
-        return defer.succeed([self._filehandle.read(length)])
-
-    def close(self):
-        # the originator of the filehandle reserves the right to close it
-        pass
-
-class FileName(FileHandle):
-    def __init__(self, filename, convergence):
-        """
-        Upload the data from the filename.  If convergence is None then a
-        random encryption key will be used, else the plaintext will be hashed,
-        then the hash will be hashed together with the string in the
-        "convergence" argument to form the encryption key.
-        """
-        assert convergence is None or isinstance(convergence, str), (convergence, type(convergence))
-        FileHandle.__init__(self, open(filename, "rb"), convergence=convergence)
-    def close(self):
-        FileHandle.close(self)
-        self._filehandle.close()
-
-class Data(FileHandle):
-    def __init__(self, data, convergence):
-        """
-        Upload the data from the data argument.  If convergence is None then a
-        random encryption key will be used, else the plaintext will be hashed,
-        then the hash will be hashed together with the string in the
-        "convergence" argument to form the encryption key.
-        """
-        assert convergence is None or isinstance(convergence, str), (convergence, type(convergence))
-        FileHandle.__init__(self, StringIO(data), convergence=convergence)
-
-class Uploader(service.MultiService):
-    """I am a service that allows file uploading. I am a service-child of the
-    Client.
-    """
-    implements(IUploader)
-    name = "uploader"
-    uploader_class = CHKUploader
-    URI_LIT_SIZE_THRESHOLD = 55
-    MAX_UPLOAD_STATUSES = 10
-
-    def __init__(self, helper_furl=None, stats_provider=None):
-        self._helper_furl = helper_furl
-        self.stats_provider = stats_provider
-        self._helper = None
-        self._all_uploads = weakref.WeakKeyDictionary() # for debugging
-        self._all_upload_statuses = weakref.WeakKeyDictionary()
-        self._recent_upload_statuses = []
-        service.MultiService.__init__(self)
-
-    def startService(self):
-        service.MultiService.startService(self)
-        if self._helper_furl:
-            self.parent.tub.connectTo(self._helper_furl,
-                                      self._got_helper)
-
-    def _got_helper(self, helper):
-        self._helper = helper
-        helper.notifyOnDisconnect(self._lost_helper)
-    def _lost_helper(self):
-        self._helper = None
-
-    def get_helper_info(self):
-        # return a tuple of (helper_furl_or_None, connected_bool)
-        return (self._helper_furl, bool(self._helper))
-
-    def upload(self, uploadable):
-        # this returns the URI
-        assert self.parent
-        assert self.running
-
-        uploadable = IUploadable(uploadable)
-        d = uploadable.get_size()
-        def _got_size(size):
-            default_params = self.parent.get_encoding_parameters()
-            precondition(isinstance(default_params, dict), default_params)
-            precondition("max_segment_size" in default_params, default_params)
-            uploadable.set_default_encoding_parameters(default_params)
-
-            if self.stats_provider:
-                self.stats_provider.count('uploader.files_uploaded', 1)
-                self.stats_provider.count('uploader.bytes_uploaded', size)
-
-            if size <= self.URI_LIT_SIZE_THRESHOLD:
-                uploader = LiteralUploader(self.parent)
-            elif self._helper:
-                uploader = AssistedUploader(self._helper)
-            else:
-                uploader = self.uploader_class(self.parent)
-            self._add_upload(uploader)
-            return uploader.start(uploadable)
-        d.addCallback(_got_size)
-        def _done(res):
-            uploadable.close()
-            return res
-        d.addBoth(_done)
-        return d
-
-    def _add_upload(self, uploader):
-        s = uploader.get_upload_status()
-        self._all_uploads[uploader] = None
-        self._all_upload_statuses[s] = None
-        self._recent_upload_statuses.append(s)
-        while len(self._recent_upload_statuses) > self.MAX_UPLOAD_STATUSES:
-            self._recent_upload_statuses.pop(0)
-
-    def list_all_upload_statuses(self):
-        for us in self._all_upload_statuses:
-            yield us
diff --git a/src/allmydata/web/filenode.py b/src/allmydata/web/filenode.py

index 19b1b47e1e105e656a0663151c6aa520fa704720..9a9a28ef928e0546b286b80db10666d3d9970a33 100644 (file)
--- a/src/allmydata/web/filenode.py
+++ b/src/allmydata/web/filenode.py
@@ -8,9 +8,9 @@ from twisted.internet import defer
  from nevow import url, rend
  from nevow.inevow import IRequest
  
-from allmydata.upload import FileHandle
  from allmydata.interfaces import IDownloadTarget, ExistingChildError
  from allmydata.mutable.common import MODE_READ
+from allmydata.immutable.upload import FileHandle
  from allmydata.util import log
  
  from allmydata.web.common import text_plain, WebError, IClient, RenderMixin, \
diff --git a/src/allmydata/web/unlinked.py b/src/allmydata/web/unlinked.py

index 076908e96b076d922bb07389c3812dfc9f061144..3b8d95380377eb0e616eab9425663b3a1fafcabc 100644 (file)
--- a/src/allmydata/web/unlinked.py
+++ b/src/allmydata/web/unlinked.py
@@ -4,7 +4,7 @@ from twisted.web import http
  from twisted.internet import defer
  from nevow import rend, url, tags as T
  from nevow.inevow import IRequest
-from allmydata.upload import FileHandle
+from allmydata.immutable.upload import FileHandle
  from allmydata.web.common import IClient, getxmlfile, get_arg, boolean_of_arg
  from allmydata.web import status
author	Brian Warner <warner@allmydata.com>
	Wed, 16 Jul 2008 20:14:39 +0000 (13:14 -0700)
committer	Brian Warner <warner@allmydata.com>
	Wed, 16 Jul 2008 20:14:39 +0000 (13:14 -0700)
src/allmydata/checker.py	[deleted file]	patch \| blob \| history
src/allmydata/client.py		patch \| blob \| history
src/allmydata/control.py		patch \| blob \| history
src/allmydata/download.py	[deleted file]	patch \| blob \| history
src/allmydata/encode.py	[deleted file]	patch \| blob \| history
src/allmydata/filenode.py	[deleted file]	patch \| blob \| history
src/allmydata/immutable/__init__.py	[new file with mode: 0644]	patch \| blob
src/allmydata/immutable/checker.py	[new file with mode: 0644]	patch \| blob
src/allmydata/immutable/download.py	[new file with mode: 0644]	patch \| blob
src/allmydata/immutable/encode.py	[new file with mode: 0644]	patch \| blob
src/allmydata/immutable/filenode.py	[new file with mode: 0644]	patch \| blob
src/allmydata/immutable/upload.py	[new file with mode: 0644]	patch \| blob
src/allmydata/mutable/node.py		patch \| blob \| history
src/allmydata/mutable/retrieve.py		patch \| blob \| history
src/allmydata/offloaded.py		patch \| blob \| history
src/allmydata/test/check_memory.py		patch \| blob \| history
src/allmydata/test/common.py		patch \| blob \| history
src/allmydata/test/test_dirnode.py		patch \| blob \| history
src/allmydata/test/test_encode.py		patch \| blob \| history
src/allmydata/test/test_filenode.py		patch \| blob \| history
src/allmydata/test/test_helper.py		patch \| blob \| history
src/allmydata/test/test_mutable.py		patch \| blob \| history
src/allmydata/test/test_system.py		patch \| blob \| history
src/allmydata/test/test_upload.py		patch \| blob \| history
src/allmydata/test/test_web.py		patch \| blob \| history
src/allmydata/upload.py	[deleted file]	patch \| blob \| history
src/allmydata/web/filenode.py		patch \| blob \| history
src/allmydata/web/unlinked.py		patch \| blob \| history