+++ /dev/null
-
-"""
-Given a StorageIndex, count how many shares we can find.
-
-This does no verification of the shares whatsoever. If the peer claims to
-have the share, we believe them.
-"""
-
-from zope.interface import implements
-from twisted.internet import defer
-from twisted.python import log
-from allmydata.interfaces import IVerifierURI, ICheckerResults
-from allmydata import download, storage
-from allmydata.util import hashutil, base32
-
-class Results:
- implements(ICheckerResults)
-
- def __init__(self, storage_index):
- # storage_index might be None for, say, LIT files
- self.storage_index = storage_index
- if storage_index is None:
- self.storage_index_s = "<none>"
- else:
- self.storage_index_s = base32.b2a(storage_index)[:6]
-
- def is_healthy(self):
- return self.healthy
-
- def html_summary(self):
- if self.healthy:
- return "<span>healthy</span>"
- return "<span>NOT HEALTHY</span>"
-
- def html(self):
- s = "<div>\n"
- s += "<h1>Checker Results for Immutable SI=%s</h1>\n" % self.storage_index_s
- if self.healthy:
- s += "<h2>Healthy!</h2>\n"
- else:
- s += "<h2>Not Healthy!</h2>\n"
- s += "</div>\n"
- return s
-
-
-class SimpleCHKFileChecker:
- """Return a list of (needed, total, found, sharemap), where sharemap maps
- share number to a list of (binary) nodeids of the shareholders."""
-
- def __init__(self, peer_getter, uri_to_check):
- self.peer_getter = peer_getter
- self.found_shares = set()
- self.uri_to_check = IVerifierURI(uri_to_check)
- self.sharemap = {}
-
- '''
- def check_synchronously(self, si):
- # this is how we would write this class if we were using synchronous
- # messages (or if we used promises).
- found = set()
- for (pmpeerid, peerid, connection) in self.peer_getter(storage_index):
- buckets = connection.get_buckets(si)
- found.update(buckets.keys())
- return len(found)
- '''
-
- def check(self):
- d = self._get_all_shareholders(self.uri_to_check.storage_index)
- d.addCallback(self._done)
- return d
-
- def _get_all_shareholders(self, storage_index):
- dl = []
- for (peerid, ss) in self.peer_getter("storage", storage_index):
- d = ss.callRemote("get_buckets", storage_index)
- d.addCallbacks(self._got_response, self._got_error,
- callbackArgs=(peerid,))
- dl.append(d)
- return defer.DeferredList(dl)
-
- def _got_response(self, buckets, peerid):
- # buckets is a dict: maps shum to an rref of the server who holds it
- self.found_shares.update(buckets.keys())
- for k in buckets:
- if k not in self.sharemap:
- self.sharemap[k] = []
- self.sharemap[k].append(peerid)
-
- def _got_error(self, f):
- if f.check(KeyError):
- pass
- log.err(f)
- pass
-
- def _done(self, res):
- u = self.uri_to_check
- r = Results(self.uri_to_check.storage_index)
- r.healthy = bool(len(self.found_shares) >= u.needed_shares)
- r.stuff = (u.needed_shares, u.total_shares, len(self.found_shares),
- self.sharemap)
- return r
-
-class VerifyingOutput:
- def __init__(self, total_length, results):
- self._crypttext_hasher = hashutil.crypttext_hasher()
- self.length = 0
- self.total_length = total_length
- self._segment_number = 0
- self._crypttext_hash_tree = None
- self._opened = False
- self._results = results
- results.healthy = False
-
- def setup_hashtrees(self, plaintext_hashtree, crypttext_hashtree):
- self._crypttext_hash_tree = crypttext_hashtree
-
- def write_segment(self, crypttext):
- self.length += len(crypttext)
-
- self._crypttext_hasher.update(crypttext)
- if self._crypttext_hash_tree:
- ch = hashutil.crypttext_segment_hasher()
- ch.update(crypttext)
- crypttext_leaves = {self._segment_number: ch.digest()}
- self._crypttext_hash_tree.set_hashes(leaves=crypttext_leaves)
-
- self._segment_number += 1
-
- def close(self):
- self.crypttext_hash = self._crypttext_hasher.digest()
-
- def finish(self):
- self._results.healthy = True
- return self._results
-
-
-class SimpleCHKFileVerifier(download.FileDownloader):
- # this reconstructs the crypttext, which verifies that at least 'k' of
- # the shareholders are around and have valid data. It does not check the
- # remaining shareholders, and it cannot verify the plaintext.
- check_plaintext_hash = False
-
- def __init__(self, client, u):
- self._client = client
-
- u = IVerifierURI(u)
- self._storage_index = u.storage_index
- self._uri_extension_hash = u.uri_extension_hash
- self._total_shares = u.total_shares
- self._size = u.size
- self._num_needed_shares = u.needed_shares
-
- self._si_s = storage.si_b2a(self._storage_index)
- self.init_logging()
-
- r = Results(self._storage_index)
- self._output = VerifyingOutput(self._size, r)
- self._paused = False
- self._stopped = False
-
- self._results = None
- self.active_buckets = {} # k: shnum, v: bucket
- self._share_buckets = [] # list of (sharenum, bucket) tuples
- self._share_vbuckets = {} # k: shnum, v: set of ValidatedBuckets
- self._uri_extension_sources = []
-
- self._uri_extension_data = None
-
- self._fetch_failures = {"uri_extension": 0,
- "plaintext_hashroot": 0,
- "plaintext_hashtree": 0,
- "crypttext_hashroot": 0,
- "crypttext_hashtree": 0,
- }
-
- def init_logging(self):
- self._log_prefix = prefix = storage.si_b2a(self._storage_index)[:5]
- num = self._client.log("SimpleCHKFileVerifier(%s): starting" % prefix)
- self._log_number = num
-
- def log(self, msg, parent=None):
- if parent is None:
- parent = self._log_number
- return self._client.log("SimpleCHKFileVerifier(%s): %s"
- % (self._log_prefix, msg),
- parent=parent)
-
-
- def start(self):
- log.msg("starting download [%s]" % storage.si_b2a(self._storage_index)[:5])
-
- # first step: who should we download from?
- d = defer.maybeDeferred(self._get_all_shareholders)
- d.addCallback(self._got_all_shareholders)
- # now get the uri_extension block from somebody and validate it
- d.addCallback(self._obtain_uri_extension)
- d.addCallback(self._got_uri_extension)
- d.addCallback(self._get_hashtrees)
- d.addCallback(self._create_validated_buckets)
- # once we know that, we can download blocks from everybody
- d.addCallback(self._download_all_segments)
- d.addCallback(self._done)
- return d
-
import allmydata
from allmydata.storage import StorageServer
-from allmydata.upload import Uploader
-from allmydata.download import Downloader
+from allmydata.immutable.upload import Uploader
+from allmydata.immutable.download import Downloader
+from allmydata.immutable.filenode import FileNode, LiteralFileNode
from allmydata.offloaded import Helper
from allmydata.control import ControlServer
from allmydata.introducer.client import IntroducerClient
from allmydata.util import hashutil, base32, testutil
-from allmydata.filenode import FileNode, LiteralFileNode
from allmydata.uri import LiteralFileURI
from allmydata.dirnode import NewDirectoryNode
from allmydata.mutable.node import MutableFileNode, MutableWatcher
from foolscap import Referenceable
from allmydata.interfaces import RIControlClient
from allmydata.util import testutil, fileutil, mathutil
-from allmydata import upload, download
+from allmydata.immutable import upload, download
from twisted.python import log
def get_memory_usage():
+++ /dev/null
-
-import os, random, weakref, itertools, time
-from zope.interface import implements
-from twisted.internet import defer
-from twisted.internet.interfaces import IPushProducer, IConsumer
-from twisted.application import service
-from foolscap.eventual import eventually
-
-from allmydata.util import base32, mathutil, hashutil, log
-from allmydata.util.assertutil import _assert
-from allmydata import codec, hashtree, storage, uri
-from allmydata.interfaces import IDownloadTarget, IDownloader, IFileURI, \
- IDownloadStatus, IDownloadResults
-from allmydata.encode import NotEnoughSharesError
-from pycryptopp.cipher.aes import AES
-
-class HaveAllPeersError(Exception):
- # we use this to jump out of the loop
- pass
-
-class BadURIExtensionHashValue(Exception):
- pass
-class BadPlaintextHashValue(Exception):
- pass
-class BadCrypttextHashValue(Exception):
- pass
-
-class DownloadStopped(Exception):
- pass
-
-class DownloadResults:
- implements(IDownloadResults)
-
- def __init__(self):
- self.servers_used = set()
- self.server_problems = {}
- self.servermap = {}
- self.timings = {}
- self.file_size = None
-
-class Output:
- def __init__(self, downloadable, key, total_length, log_parent,
- download_status):
- self.downloadable = downloadable
- self._decryptor = AES(key)
- self._crypttext_hasher = hashutil.crypttext_hasher()
- self._plaintext_hasher = hashutil.plaintext_hasher()
- self.length = 0
- self.total_length = total_length
- self._segment_number = 0
- self._plaintext_hash_tree = None
- self._crypttext_hash_tree = None
- self._opened = False
- self._log_parent = log_parent
- self._status = download_status
- self._status.set_progress(0.0)
-
- def log(self, *args, **kwargs):
- if "parent" not in kwargs:
- kwargs["parent"] = self._log_parent
- if "facility" not in kwargs:
- kwargs["facility"] = "download.output"
- return log.msg(*args, **kwargs)
-
- def setup_hashtrees(self, plaintext_hashtree, crypttext_hashtree):
- self._plaintext_hash_tree = plaintext_hashtree
- self._crypttext_hash_tree = crypttext_hashtree
-
- def write_segment(self, crypttext):
- self.length += len(crypttext)
- self._status.set_progress( float(self.length) / self.total_length )
-
- # memory footprint: 'crypttext' is the only segment_size usage
- # outstanding. While we decrypt it into 'plaintext', we hit
- # 2*segment_size.
- self._crypttext_hasher.update(crypttext)
- if self._crypttext_hash_tree:
- ch = hashutil.crypttext_segment_hasher()
- ch.update(crypttext)
- crypttext_leaves = {self._segment_number: ch.digest()}
- self.log(format="crypttext leaf hash (%(bytes)sB) [%(segnum)d] is %(hash)s",
- bytes=len(crypttext),
- segnum=self._segment_number, hash=base32.b2a(ch.digest()),
- level=log.NOISY)
- self._crypttext_hash_tree.set_hashes(leaves=crypttext_leaves)
-
- plaintext = self._decryptor.process(crypttext)
- del crypttext
-
- # now we're back down to 1*segment_size.
-
- self._plaintext_hasher.update(plaintext)
- if self._plaintext_hash_tree:
- ph = hashutil.plaintext_segment_hasher()
- ph.update(plaintext)
- plaintext_leaves = {self._segment_number: ph.digest()}
- self.log(format="plaintext leaf hash (%(bytes)sB) [%(segnum)d] is %(hash)s",
- bytes=len(plaintext),
- segnum=self._segment_number, hash=base32.b2a(ph.digest()),
- level=log.NOISY)
- self._plaintext_hash_tree.set_hashes(leaves=plaintext_leaves)
-
- self._segment_number += 1
- # We're still at 1*segment_size. The Downloadable is responsible for
- # any memory usage beyond this.
- if not self._opened:
- self._opened = True
- self.downloadable.open(self.total_length)
- self.downloadable.write(plaintext)
-
- def fail(self, why):
- # this is really unusual, and deserves maximum forensics
- if why.check(DownloadStopped):
- # except DownloadStopped just means the consumer aborted the
- # download, not so scary
- self.log("download stopped", level=log.UNUSUAL)
- else:
- self.log("download failed!", failure=why, level=log.SCARY)
- self.downloadable.fail(why)
-
- def close(self):
- self.crypttext_hash = self._crypttext_hasher.digest()
- self.plaintext_hash = self._plaintext_hasher.digest()
- self.log("download finished, closing IDownloadable", level=log.NOISY)
- self.downloadable.close()
-
- def finish(self):
- return self.downloadable.finish()
-
-class ValidatedBucket:
- """I am a front-end for a remote storage bucket, responsible for
- retrieving and validating data from that bucket.
-
- My get_block() method is used by BlockDownloaders.
- """
-
- def __init__(self, sharenum, bucket,
- share_hash_tree, roothash,
- num_blocks):
- self.sharenum = sharenum
- self.bucket = bucket
- self._share_hash = None # None means not validated yet
- self.share_hash_tree = share_hash_tree
- self._roothash = roothash
- self.block_hash_tree = hashtree.IncompleteHashTree(num_blocks)
- self.started = False
-
- def get_block(self, blocknum):
- if not self.started:
- d = self.bucket.start()
- def _started(res):
- self.started = True
- return self.get_block(blocknum)
- d.addCallback(_started)
- return d
-
- # the first time we use this bucket, we need to fetch enough elements
- # of the share hash tree to validate it from our share hash up to the
- # hashroot.
- if not self._share_hash:
- d1 = self.bucket.get_share_hashes()
- else:
- d1 = defer.succeed([])
-
- # we might need to grab some elements of our block hash tree, to
- # validate the requested block up to the share hash
- needed = self.block_hash_tree.needed_hashes(blocknum)
- if needed:
- # TODO: get fewer hashes, use get_block_hashes(needed)
- d2 = self.bucket.get_block_hashes()
- else:
- d2 = defer.succeed([])
-
- d3 = self.bucket.get_block(blocknum)
-
- d = defer.gatherResults([d1, d2, d3])
- d.addCallback(self._got_data, blocknum)
- return d
-
- def _got_data(self, res, blocknum):
- sharehashes, blockhashes, blockdata = res
- blockhash = None # to make logging it safe
-
- try:
- if not self._share_hash:
- sh = dict(sharehashes)
- sh[0] = self._roothash # always use our own root, from the URI
- sht = self.share_hash_tree
- if sht.get_leaf_index(self.sharenum) not in sh:
- raise hashtree.NotEnoughHashesError
- sht.set_hashes(sh)
- self._share_hash = sht.get_leaf(self.sharenum)
-
- blockhash = hashutil.block_hash(blockdata)
- #log.msg("checking block_hash(shareid=%d, blocknum=%d) len=%d "
- # "%r .. %r: %s" %
- # (self.sharenum, blocknum, len(blockdata),
- # blockdata[:50], blockdata[-50:], base32.b2a(blockhash)))
-
- # we always validate the blockhash
- bh = dict(enumerate(blockhashes))
- # replace blockhash root with validated value
- bh[0] = self._share_hash
- self.block_hash_tree.set_hashes(bh, {blocknum: blockhash})
-
- except (hashtree.BadHashError, hashtree.NotEnoughHashesError):
- # log.WEIRD: indicates undetected disk/network error, or more
- # likely a programming error
- log.msg("hash failure in block=%d, shnum=%d on %s" %
- (blocknum, self.sharenum, self.bucket))
- if self._share_hash:
- log.msg(""" failure occurred when checking the block_hash_tree.
- This suggests that either the block data was bad, or that the
- block hashes we received along with it were bad.""")
- else:
- log.msg(""" the failure probably occurred when checking the
- share_hash_tree, which suggests that the share hashes we
- received from the remote peer were bad.""")
- log.msg(" have self._share_hash: %s" % bool(self._share_hash))
- log.msg(" block length: %d" % len(blockdata))
- log.msg(" block hash: %s" % base32.b2a_or_none(blockhash))
- if len(blockdata) < 100:
- log.msg(" block data: %r" % (blockdata,))
- else:
- log.msg(" block data start/end: %r .. %r" %
- (blockdata[:50], blockdata[-50:]))
- log.msg(" root hash: %s" % base32.b2a(self._roothash))
- log.msg(" share hash tree:\n" + self.share_hash_tree.dump())
- log.msg(" block hash tree:\n" + self.block_hash_tree.dump())
- lines = []
- for i,h in sorted(sharehashes):
- lines.append("%3d: %s" % (i, base32.b2a_or_none(h)))
- log.msg(" sharehashes:\n" + "\n".join(lines) + "\n")
- lines = []
- for i,h in enumerate(blockhashes):
- lines.append("%3d: %s" % (i, base32.b2a_or_none(h)))
- log.msg(" blockhashes:\n" + "\n".join(lines) + "\n")
- raise
-
- # If we made it here, the block is good. If the hash trees didn't
- # like what they saw, they would have raised a BadHashError, causing
- # our caller to see a Failure and thus ignore this block (as well as
- # dropping this bucket).
- return blockdata
-
-
-
-class BlockDownloader:
- """I am responsible for downloading a single block (from a single bucket)
- for a single segment.
-
- I am a child of the SegmentDownloader.
- """
-
- def __init__(self, vbucket, blocknum, parent, results):
- self.vbucket = vbucket
- self.blocknum = blocknum
- self.parent = parent
- self.results = results
- self._log_number = self.parent.log("starting block %d" % blocknum)
-
- def log(self, msg, parent=None):
- if parent is None:
- parent = self._log_number
- return self.parent.log(msg, parent=parent)
-
- def start(self, segnum):
- lognum = self.log("get_block(segnum=%d)" % segnum)
- started = time.time()
- d = self.vbucket.get_block(segnum)
- d.addCallbacks(self._hold_block, self._got_block_error,
- callbackArgs=(started, lognum,), errbackArgs=(lognum,))
- return d
-
- def _hold_block(self, data, started, lognum):
- if self.results:
- elapsed = time.time() - started
- peerid = self.vbucket.bucket.get_peerid()
- if peerid not in self.results.timings["fetch_per_server"]:
- self.results.timings["fetch_per_server"][peerid] = []
- self.results.timings["fetch_per_server"][peerid].append(elapsed)
- self.log("got block", parent=lognum)
- self.parent.hold_block(self.blocknum, data)
-
- def _got_block_error(self, f, lognum):
- self.log("BlockDownloader[%d] got error: %s" % (self.blocknum, f),
- parent=lognum)
- if self.results:
- peerid = self.vbucket.bucket.get_peerid()
- self.results.server_problems[peerid] = str(f)
- self.parent.bucket_failed(self.vbucket)
-
-class SegmentDownloader:
- """I am responsible for downloading all the blocks for a single segment
- of data.
-
- I am a child of the FileDownloader.
- """
-
- def __init__(self, parent, segmentnumber, needed_shares, results):
- self.parent = parent
- self.segmentnumber = segmentnumber
- self.needed_blocks = needed_shares
- self.blocks = {} # k: blocknum, v: data
- self.results = results
- self._log_number = self.parent.log("starting segment %d" %
- segmentnumber)
-
- def log(self, msg, parent=None):
- if parent is None:
- parent = self._log_number
- return self.parent.log(msg, parent=parent)
-
- def start(self):
- return self._download()
-
- def _download(self):
- d = self._try()
- def _done(res):
- if len(self.blocks) >= self.needed_blocks:
- # we only need self.needed_blocks blocks
- # we want to get the smallest blockids, because they are
- # more likely to be fast "primary blocks"
- blockids = sorted(self.blocks.keys())[:self.needed_blocks]
- blocks = []
- for blocknum in blockids:
- blocks.append(self.blocks[blocknum])
- return (blocks, blockids)
- else:
- return self._download()
- d.addCallback(_done)
- return d
-
- def _try(self):
- # fill our set of active buckets, maybe raising NotEnoughSharesError
- active_buckets = self.parent._activate_enough_buckets()
- # Now we have enough buckets, in self.parent.active_buckets.
-
- # in test cases, bd.start might mutate active_buckets right away, so
- # we need to put off calling start() until we've iterated all the way
- # through it.
- downloaders = []
- for blocknum, vbucket in active_buckets.iteritems():
- bd = BlockDownloader(vbucket, blocknum, self, self.results)
- downloaders.append(bd)
- if self.results:
- self.results.servers_used.add(vbucket.bucket.get_peerid())
- l = [bd.start(self.segmentnumber) for bd in downloaders]
- return defer.DeferredList(l, fireOnOneErrback=True)
-
- def hold_block(self, blocknum, data):
- self.blocks[blocknum] = data
-
- def bucket_failed(self, vbucket):
- self.parent.bucket_failed(vbucket)
-
-class DownloadStatus:
- implements(IDownloadStatus)
- statusid_counter = itertools.count(0)
-
- def __init__(self):
- self.storage_index = None
- self.size = None
- self.helper = False
- self.status = "Not started"
- self.progress = 0.0
- self.paused = False
- self.stopped = False
- self.active = True
- self.results = None
- self.counter = self.statusid_counter.next()
- self.started = time.time()
-
- def get_started(self):
- return self.started
- def get_storage_index(self):
- return self.storage_index
- def get_size(self):
- return self.size
- def using_helper(self):
- return self.helper
- def get_status(self):
- status = self.status
- if self.paused:
- status += " (output paused)"
- if self.stopped:
- status += " (output stopped)"
- return status
- def get_progress(self):
- return self.progress
- def get_active(self):
- return self.active
- def get_results(self):
- return self.results
- def get_counter(self):
- return self.counter
-
- def set_storage_index(self, si):
- self.storage_index = si
- def set_size(self, size):
- self.size = size
- def set_helper(self, helper):
- self.helper = helper
- def set_status(self, status):
- self.status = status
- def set_paused(self, paused):
- self.paused = paused
- def set_stopped(self, stopped):
- self.stopped = stopped
- def set_progress(self, value):
- self.progress = value
- def set_active(self, value):
- self.active = value
- def set_results(self, value):
- self.results = value
-
-class FileDownloader:
- implements(IPushProducer)
- check_crypttext_hash = True
- check_plaintext_hash = True
- _status = None
-
- def __init__(self, client, u, downloadable):
- self._client = client
-
- u = IFileURI(u)
- self._storage_index = u.storage_index
- self._uri_extension_hash = u.uri_extension_hash
- self._total_shares = u.total_shares
- self._size = u.size
- self._num_needed_shares = u.needed_shares
-
- self._si_s = storage.si_b2a(self._storage_index)
- self.init_logging()
-
- self._started = time.time()
- self._status = s = DownloadStatus()
- s.set_status("Starting")
- s.set_storage_index(self._storage_index)
- s.set_size(self._size)
- s.set_helper(False)
- s.set_active(True)
-
- self._results = DownloadResults()
- s.set_results(self._results)
- self._results.file_size = self._size
- self._results.timings["servers_peer_selection"] = {}
- self._results.timings["fetch_per_server"] = {}
- self._results.timings["cumulative_fetch"] = 0.0
- self._results.timings["cumulative_decode"] = 0.0
- self._results.timings["cumulative_decrypt"] = 0.0
- self._results.timings["paused"] = 0.0
-
- if IConsumer.providedBy(downloadable):
- downloadable.registerProducer(self, True)
- self._downloadable = downloadable
- self._output = Output(downloadable, u.key, self._size, self._log_number,
- self._status)
- self._paused = False
- self._stopped = False
-
- self.active_buckets = {} # k: shnum, v: bucket
- self._share_buckets = [] # list of (sharenum, bucket) tuples
- self._share_vbuckets = {} # k: shnum, v: set of ValidatedBuckets
- self._uri_extension_sources = []
-
- self._uri_extension_data = None
-
- self._fetch_failures = {"uri_extension": 0,
- "plaintext_hashroot": 0,
- "plaintext_hashtree": 0,
- "crypttext_hashroot": 0,
- "crypttext_hashtree": 0,
- }
-
- def init_logging(self):
- self._log_prefix = prefix = storage.si_b2a(self._storage_index)[:5]
- num = self._client.log(format="FileDownloader(%(si)s): starting",
- si=storage.si_b2a(self._storage_index))
- self._log_number = num
-
- def log(self, *args, **kwargs):
- if "parent" not in kwargs:
- kwargs["parent"] = self._log_number
- if "facility" not in kwargs:
- kwargs["facility"] = "tahoe.download"
- return log.msg(*args, **kwargs)
-
- def pauseProducing(self):
- if self._paused:
- return
- self._paused = defer.Deferred()
- self._paused_at = time.time()
- if self._status:
- self._status.set_paused(True)
-
- def resumeProducing(self):
- if self._paused:
- paused_for = time.time() - self._paused_at
- self._results.timings['paused'] += paused_for
- p = self._paused
- self._paused = None
- eventually(p.callback, None)
- if self._status:
- self._status.set_paused(False)
-
- def stopProducing(self):
- self.log("Download.stopProducing")
- self._stopped = True
- self.resumeProducing()
- if self._status:
- self._status.set_stopped(True)
- self._status.set_active(False)
-
- def start(self):
- self.log("starting download")
-
- # first step: who should we download from?
- d = defer.maybeDeferred(self._get_all_shareholders)
- d.addCallback(self._got_all_shareholders)
- # now get the uri_extension block from somebody and validate it
- d.addCallback(self._obtain_uri_extension)
- d.addCallback(self._got_uri_extension)
- d.addCallback(self._get_hashtrees)
- d.addCallback(self._create_validated_buckets)
- # once we know that, we can download blocks from everybody
- d.addCallback(self._download_all_segments)
- def _finished(res):
- if self._status:
- self._status.set_status("Finished")
- self._status.set_active(False)
- self._status.set_paused(False)
- if IConsumer.providedBy(self._downloadable):
- self._downloadable.unregisterProducer()
- return res
- d.addBoth(_finished)
- def _failed(why):
- if self._status:
- self._status.set_status("Failed")
- self._status.set_active(False)
- self._output.fail(why)
- return why
- d.addErrback(_failed)
- d.addCallback(self._done)
- return d
-
- def _get_all_shareholders(self):
- dl = []
- for (peerid,ss) in self._client.get_permuted_peers("storage",
- self._storage_index):
- d = ss.callRemote("get_buckets", self._storage_index)
- d.addCallbacks(self._got_response, self._got_error,
- callbackArgs=(peerid,))
- dl.append(d)
- self._responses_received = 0
- self._queries_sent = len(dl)
- if self._status:
- self._status.set_status("Locating Shares (%d/%d)" %
- (self._responses_received,
- self._queries_sent))
- return defer.DeferredList(dl)
-
- def _got_response(self, buckets, peerid):
- self._responses_received += 1
- if self._results:
- elapsed = time.time() - self._started
- self._results.timings["servers_peer_selection"][peerid] = elapsed
- if self._status:
- self._status.set_status("Locating Shares (%d/%d)" %
- (self._responses_received,
- self._queries_sent))
- for sharenum, bucket in buckets.iteritems():
- b = storage.ReadBucketProxy(bucket, peerid, self._si_s)
- self.add_share_bucket(sharenum, b)
- self._uri_extension_sources.append(b)
- if self._results:
- if peerid not in self._results.servermap:
- self._results.servermap[peerid] = set()
- self._results.servermap[peerid].add(sharenum)
-
- def add_share_bucket(self, sharenum, bucket):
- # this is split out for the benefit of test_encode.py
- self._share_buckets.append( (sharenum, bucket) )
-
- def _got_error(self, f):
- self._client.log("Somebody failed. -- %s" % (f,))
-
- def bucket_failed(self, vbucket):
- shnum = vbucket.sharenum
- del self.active_buckets[shnum]
- s = self._share_vbuckets[shnum]
- # s is a set of ValidatedBucket instances
- s.remove(vbucket)
- # ... which might now be empty
- if not s:
- # there are no more buckets which can provide this share, so
- # remove the key. This may prompt us to use a different share.
- del self._share_vbuckets[shnum]
-
- def _got_all_shareholders(self, res):
- if self._results:
- now = time.time()
- self._results.timings["peer_selection"] = now - self._started
-
- if len(self._share_buckets) < self._num_needed_shares:
- raise NotEnoughSharesError
-
- #for s in self._share_vbuckets.values():
- # for vb in s:
- # assert isinstance(vb, ValidatedBucket), \
- # "vb is %s but should be a ValidatedBucket" % (vb,)
-
- def _unpack_uri_extension_data(self, data):
- return uri.unpack_extension(data)
-
- def _obtain_uri_extension(self, ignored):
- # all shareholders are supposed to have a copy of uri_extension, and
- # all are supposed to be identical. We compute the hash of the data
- # that comes back, and compare it against the version in our URI. If
- # they don't match, ignore their data and try someone else.
- if self._status:
- self._status.set_status("Obtaining URI Extension")
-
- self._uri_extension_fetch_started = time.time()
- def _validate(proposal, bucket):
- h = hashutil.uri_extension_hash(proposal)
- if h != self._uri_extension_hash:
- self._fetch_failures["uri_extension"] += 1
- msg = ("The copy of uri_extension we received from "
- "%s was bad: wanted %s, got %s" %
- (bucket,
- base32.b2a(self._uri_extension_hash),
- base32.b2a(h)))
- self.log(msg, level=log.SCARY)
- raise BadURIExtensionHashValue(msg)
- return self._unpack_uri_extension_data(proposal)
- return self._obtain_validated_thing(None,
- self._uri_extension_sources,
- "uri_extension",
- "get_uri_extension", (), _validate)
-
- def _obtain_validated_thing(self, ignored, sources, name, methname, args,
- validatorfunc):
- if not sources:
- raise NotEnoughSharesError("started with zero peers while fetching "
- "%s" % name)
- bucket = sources[0]
- sources = sources[1:]
- #d = bucket.callRemote(methname, *args)
- d = bucket.startIfNecessary()
- d.addCallback(lambda res: getattr(bucket, methname)(*args))
- d.addCallback(validatorfunc, bucket)
- def _bad(f):
- self.log("%s from vbucket %s failed:" % (name, bucket),
- failure=f, level=log.WEIRD)
- if not sources:
- raise NotEnoughSharesError("ran out of peers, last error was %s"
- % (f,))
- # try again with a different one
- return self._obtain_validated_thing(None, sources, name,
- methname, args, validatorfunc)
- d.addErrback(_bad)
- return d
-
- def _got_uri_extension(self, uri_extension_data):
- if self._results:
- elapsed = time.time() - self._uri_extension_fetch_started
- self._results.timings["uri_extension"] = elapsed
-
- d = self._uri_extension_data = uri_extension_data
-
- self._codec = codec.get_decoder_by_name(d['codec_name'])
- self._codec.set_serialized_params(d['codec_params'])
- self._tail_codec = codec.get_decoder_by_name(d['codec_name'])
- self._tail_codec.set_serialized_params(d['tail_codec_params'])
-
- crypttext_hash = d.get('crypttext_hash', None) # optional
- if crypttext_hash:
- assert isinstance(crypttext_hash, str)
- assert len(crypttext_hash) == 32
- self._crypttext_hash = crypttext_hash
- self._plaintext_hash = d.get('plaintext_hash', None) # optional
-
- self._roothash = d['share_root_hash']
-
- self._segment_size = segment_size = d['segment_size']
- self._total_segments = mathutil.div_ceil(self._size, segment_size)
- self._current_segnum = 0
-
- self._share_hashtree = hashtree.IncompleteHashTree(d['total_shares'])
- self._share_hashtree.set_hashes({0: self._roothash})
-
- def _get_hashtrees(self, res):
- self._get_hashtrees_started = time.time()
- if self._status:
- self._status.set_status("Retrieving Hash Trees")
- d = defer.maybeDeferred(self._get_plaintext_hashtrees)
- d.addCallback(self._get_crypttext_hashtrees)
- d.addCallback(self._setup_hashtrees)
- return d
-
- def _get_plaintext_hashtrees(self):
- # plaintext hashes are optional. If the root isn't in the UEB, then
- # the share will be holding an empty list. We don't even bother
- # fetching it.
- if "plaintext_root_hash" not in self._uri_extension_data:
- self._plaintext_hashtree = None
- return
- def _validate_plaintext_hashtree(proposal, bucket):
- if proposal[0] != self._uri_extension_data['plaintext_root_hash']:
- self._fetch_failures["plaintext_hashroot"] += 1
- msg = ("The copy of the plaintext_root_hash we received from"
- " %s was bad" % bucket)
- raise BadPlaintextHashValue(msg)
- pt_hashtree = hashtree.IncompleteHashTree(self._total_segments)
- pt_hashes = dict(list(enumerate(proposal)))
- try:
- pt_hashtree.set_hashes(pt_hashes)
- except hashtree.BadHashError:
- # the hashes they gave us were not self-consistent, even
- # though the root matched what we saw in the uri_extension
- # block
- self._fetch_failures["plaintext_hashtree"] += 1
- raise
- self._plaintext_hashtree = pt_hashtree
- d = self._obtain_validated_thing(None,
- self._uri_extension_sources,
- "plaintext_hashes",
- "get_plaintext_hashes", (),
- _validate_plaintext_hashtree)
- return d
-
- def _get_crypttext_hashtrees(self, res):
- # crypttext hashes are optional too
- if "crypttext_root_hash" not in self._uri_extension_data:
- self._crypttext_hashtree = None
- return
- def _validate_crypttext_hashtree(proposal, bucket):
- if proposal[0] != self._uri_extension_data['crypttext_root_hash']:
- self._fetch_failures["crypttext_hashroot"] += 1
- msg = ("The copy of the crypttext_root_hash we received from"
- " %s was bad" % bucket)
- raise BadCrypttextHashValue(msg)
- ct_hashtree = hashtree.IncompleteHashTree(self._total_segments)
- ct_hashes = dict(list(enumerate(proposal)))
- try:
- ct_hashtree.set_hashes(ct_hashes)
- except hashtree.BadHashError:
- self._fetch_failures["crypttext_hashtree"] += 1
- raise
- ct_hashtree.set_hashes(ct_hashes)
- self._crypttext_hashtree = ct_hashtree
- d = self._obtain_validated_thing(None,
- self._uri_extension_sources,
- "crypttext_hashes",
- "get_crypttext_hashes", (),
- _validate_crypttext_hashtree)
- return d
-
- def _setup_hashtrees(self, res):
- self._output.setup_hashtrees(self._plaintext_hashtree,
- self._crypttext_hashtree)
- if self._results:
- elapsed = time.time() - self._get_hashtrees_started
- self._results.timings["hashtrees"] = elapsed
-
- def _create_validated_buckets(self, ignored=None):
- self._share_vbuckets = {}
- for sharenum, bucket in self._share_buckets:
- vbucket = ValidatedBucket(sharenum, bucket,
- self._share_hashtree,
- self._roothash,
- self._total_segments)
- s = self._share_vbuckets.setdefault(sharenum, set())
- s.add(vbucket)
-
- def _activate_enough_buckets(self):
- """either return a mapping from shnum to a ValidatedBucket that can
- provide data for that share, or raise NotEnoughSharesError"""
-
- while len(self.active_buckets) < self._num_needed_shares:
- # need some more
- handled_shnums = set(self.active_buckets.keys())
- available_shnums = set(self._share_vbuckets.keys())
- potential_shnums = list(available_shnums - handled_shnums)
- if not potential_shnums:
- raise NotEnoughSharesError
- # choose a random share
- shnum = random.choice(potential_shnums)
- # and a random bucket that will provide it
- validated_bucket = random.choice(list(self._share_vbuckets[shnum]))
- self.active_buckets[shnum] = validated_bucket
- return self.active_buckets
-
-
- def _download_all_segments(self, res):
- # the promise: upon entry to this function, self._share_vbuckets
- # contains enough buckets to complete the download, and some extra
- # ones to tolerate some buckets dropping out or having errors.
- # self._share_vbuckets is a dictionary that maps from shnum to a set
- # of ValidatedBuckets, which themselves are wrappers around
- # RIBucketReader references.
- self.active_buckets = {} # k: shnum, v: ValidatedBucket instance
-
- self._started_fetching = time.time()
-
- d = defer.succeed(None)
- for segnum in range(self._total_segments-1):
- d.addCallback(self._download_segment, segnum)
- # this pause, at the end of write, prevents pre-fetch from
- # happening until the consumer is ready for more data.
- d.addCallback(self._check_for_pause)
- d.addCallback(self._download_tail_segment, self._total_segments-1)
- return d
-
- def _check_for_pause(self, res):
- if self._paused:
- d = defer.Deferred()
- self._paused.addCallback(lambda ignored: d.callback(res))
- return d
- if self._stopped:
- raise DownloadStopped("our Consumer called stopProducing()")
- return res
-
- def _download_segment(self, res, segnum):
- if self._status:
- self._status.set_status("Downloading segment %d of %d" %
- (segnum+1, self._total_segments))
- self.log("downloading seg#%d of %d (%d%%)"
- % (segnum, self._total_segments,
- 100.0 * segnum / self._total_segments))
- # memory footprint: when the SegmentDownloader finishes pulling down
- # all shares, we have 1*segment_size of usage.
- segmentdler = SegmentDownloader(self, segnum, self._num_needed_shares,
- self._results)
- started = time.time()
- d = segmentdler.start()
- def _finished_fetching(res):
- elapsed = time.time() - started
- self._results.timings["cumulative_fetch"] += elapsed
- return res
- if self._results:
- d.addCallback(_finished_fetching)
- # pause before using more memory
- d.addCallback(self._check_for_pause)
- # while the codec does its job, we hit 2*segment_size
- def _started_decode(res):
- self._started_decode = time.time()
- return res
- if self._results:
- d.addCallback(_started_decode)
- d.addCallback(lambda (shares, shareids):
- self._codec.decode(shares, shareids))
- # once the codec is done, we drop back to 1*segment_size, because
- # 'shares' goes out of scope. The memory usage is all in the
- # plaintext now, spread out into a bunch of tiny buffers.
- def _finished_decode(res):
- elapsed = time.time() - self._started_decode
- self._results.timings["cumulative_decode"] += elapsed
- return res
- if self._results:
- d.addCallback(_finished_decode)
-
- # pause/check-for-stop just before writing, to honor stopProducing
- d.addCallback(self._check_for_pause)
- def _done(buffers):
- # we start by joining all these buffers together into a single
- # string. This makes Output.write easier, since it wants to hash
- # data one segment at a time anyways, and doesn't impact our
- # memory footprint since we're already peaking at 2*segment_size
- # inside the codec a moment ago.
- segment = "".join(buffers)
- del buffers
- # we're down to 1*segment_size right now, but write_segment()
- # will decrypt a copy of the segment internally, which will push
- # us up to 2*segment_size while it runs.
- started_decrypt = time.time()
- self._output.write_segment(segment)
- if self._results:
- elapsed = time.time() - started_decrypt
- self._results.timings["cumulative_decrypt"] += elapsed
- d.addCallback(_done)
- return d
-
- def _download_tail_segment(self, res, segnum):
- self.log("downloading seg#%d of %d (%d%%)"
- % (segnum, self._total_segments,
- 100.0 * segnum / self._total_segments))
- segmentdler = SegmentDownloader(self, segnum, self._num_needed_shares,
- self._results)
- started = time.time()
- d = segmentdler.start()
- def _finished_fetching(res):
- elapsed = time.time() - started
- self._results.timings["cumulative_fetch"] += elapsed
- return res
- if self._results:
- d.addCallback(_finished_fetching)
- # pause before using more memory
- d.addCallback(self._check_for_pause)
- def _started_decode(res):
- self._started_decode = time.time()
- return res
- if self._results:
- d.addCallback(_started_decode)
- d.addCallback(lambda (shares, shareids):
- self._tail_codec.decode(shares, shareids))
- def _finished_decode(res):
- elapsed = time.time() - self._started_decode
- self._results.timings["cumulative_decode"] += elapsed
- return res
- if self._results:
- d.addCallback(_finished_decode)
- # pause/check-for-stop just before writing, to honor stopProducing
- d.addCallback(self._check_for_pause)
- def _done(buffers):
- # trim off any padding added by the upload side
- segment = "".join(buffers)
- del buffers
- # we never send empty segments. If the data was an exact multiple
- # of the segment size, the last segment will be full.
- pad_size = mathutil.pad_size(self._size, self._segment_size)
- tail_size = self._segment_size - pad_size
- segment = segment[:tail_size]
- started_decrypt = time.time()
- self._output.write_segment(segment)
- if self._results:
- elapsed = time.time() - started_decrypt
- self._results.timings["cumulative_decrypt"] += elapsed
- d.addCallback(_done)
- return d
-
- def _done(self, res):
- self.log("download done")
- if self._results:
- now = time.time()
- self._results.timings["total"] = now - self._started
- self._results.timings["segments"] = now - self._started_fetching
- self._output.close()
- if self.check_crypttext_hash and self._crypttext_hash:
- _assert(self._crypttext_hash == self._output.crypttext_hash,
- "bad crypttext_hash: computed=%s, expected=%s" %
- (base32.b2a(self._output.crypttext_hash),
- base32.b2a(self._crypttext_hash)))
- if self.check_plaintext_hash and self._plaintext_hash:
- _assert(self._plaintext_hash == self._output.plaintext_hash,
- "bad plaintext_hash: computed=%s, expected=%s" %
- (base32.b2a(self._output.plaintext_hash),
- base32.b2a(self._plaintext_hash)))
- _assert(self._output.length == self._size,
- got=self._output.length, expected=self._size)
- return self._output.finish()
-
- def get_download_status(self):
- return self._status
-
-
-class LiteralDownloader:
- def __init__(self, client, u, downloadable):
- self._uri = IFileURI(u)
- assert isinstance(self._uri, uri.LiteralFileURI)
- self._downloadable = downloadable
- self._status = s = DownloadStatus()
- s.set_storage_index(None)
- s.set_helper(False)
- s.set_status("Done")
- s.set_active(False)
- s.set_progress(1.0)
-
- def start(self):
- data = self._uri.data
- self._status.set_size(len(data))
- self._downloadable.open(len(data))
- self._downloadable.write(data)
- self._downloadable.close()
- return defer.maybeDeferred(self._downloadable.finish)
-
- def get_download_status(self):
- return self._status
-
-class FileName:
- implements(IDownloadTarget)
- def __init__(self, filename):
- self._filename = filename
- self.f = None
- def open(self, size):
- self.f = open(self._filename, "wb")
- return self.f
- def write(self, data):
- self.f.write(data)
- def close(self):
- if self.f:
- self.f.close()
- def fail(self, why):
- if self.f:
- self.f.close()
- os.unlink(self._filename)
- def register_canceller(self, cb):
- pass # we won't use it
- def finish(self):
- pass
-
-class Data:
- implements(IDownloadTarget)
- def __init__(self):
- self._data = []
- def open(self, size):
- pass
- def write(self, data):
- self._data.append(data)
- def close(self):
- self.data = "".join(self._data)
- del self._data
- def fail(self, why):
- del self._data
- def register_canceller(self, cb):
- pass # we won't use it
- def finish(self):
- return self.data
-
-class FileHandle:
- """Use me to download data to a pre-defined filehandle-like object. I
- will use the target's write() method. I will *not* close the filehandle:
- I leave that up to the originator of the filehandle. The download process
- will return the filehandle when it completes.
- """
- implements(IDownloadTarget)
- def __init__(self, filehandle):
- self._filehandle = filehandle
- def open(self, size):
- pass
- def write(self, data):
- self._filehandle.write(data)
- def close(self):
- # the originator of the filehandle reserves the right to close it
- pass
- def fail(self, why):
- pass
- def register_canceller(self, cb):
- pass
- def finish(self):
- return self._filehandle
-
-class Downloader(service.MultiService):
- """I am a service that allows file downloading.
- """
- implements(IDownloader)
- name = "downloader"
- MAX_DOWNLOAD_STATUSES = 10
-
- def __init__(self, stats_provider=None):
- service.MultiService.__init__(self)
- self.stats_provider = stats_provider
- self._all_downloads = weakref.WeakKeyDictionary() # for debugging
- self._all_download_statuses = weakref.WeakKeyDictionary()
- self._recent_download_statuses = []
-
- def download(self, u, t):
- assert self.parent
- assert self.running
- u = IFileURI(u)
- t = IDownloadTarget(t)
- assert t.write
- assert t.close
-
-
- if isinstance(u, uri.LiteralFileURI):
- dl = LiteralDownloader(self.parent, u, t)
- elif isinstance(u, uri.CHKFileURI):
- if self.stats_provider:
- # these counters are meant for network traffic, and don't
- # include LIT files
- self.stats_provider.count('downloader.files_downloaded', 1)
- self.stats_provider.count('downloader.bytes_downloaded', u.get_size())
- dl = FileDownloader(self.parent, u, t)
- else:
- raise RuntimeError("I don't know how to download a %s" % u)
- self._add_download(dl)
- d = dl.start()
- return d
-
- # utility functions
- def download_to_data(self, uri):
- return self.download(uri, Data())
- def download_to_filename(self, uri, filename):
- return self.download(uri, FileName(filename))
- def download_to_filehandle(self, uri, filehandle):
- return self.download(uri, FileHandle(filehandle))
-
- def _add_download(self, downloader):
- self._all_downloads[downloader] = None
- s = downloader.get_download_status()
- self._all_download_statuses[s] = None
- self._recent_download_statuses.append(s)
- while len(self._recent_download_statuses) > self.MAX_DOWNLOAD_STATUSES:
- self._recent_download_statuses.pop(0)
-
- def list_all_download_statuses(self):
- for ds in self._all_download_statuses:
- yield ds
+++ /dev/null
-# -*- test-case-name: allmydata.test.test_encode -*-
-
-import time
-from zope.interface import implements
-from twisted.internet import defer
-from foolscap import eventual
-from allmydata import storage, uri
-from allmydata.hashtree import HashTree
-from allmydata.util import mathutil, hashutil, base32, log
-from allmydata.util.assertutil import _assert, precondition
-from allmydata.codec import CRSEncoder
-from allmydata.interfaces import IEncoder, IStorageBucketWriter, \
- IEncryptedUploadable, IUploadStatus
-
-"""
-The goal of the encoder is to turn the original file into a series of
-'shares'. Each share is going to a 'shareholder' (nominally each shareholder
-is a different host, but for small grids there may be overlap). The number
-of shares is chosen to hit our reliability goals (more shares on more
-machines means more reliability), and is limited by overhead (proportional to
-numshares or log(numshares)) and the encoding technology in use (zfec permits
-only 256 shares total). It is also constrained by the amount of data
-we want to send to each host. For estimating purposes, think of 10 shares
-out of which we need 3 to reconstruct the file.
-
-The encoder starts by cutting the original file into segments. All segments
-except the last are of equal size. The segment size is chosen to constrain
-the memory footprint (which will probably vary between 1x and 4x segment
-size) and to constrain the overhead (which will be proportional to
-log(number of segments)).
-
-
-Each segment (A,B,C) is read into memory, encrypted, and encoded into
-blocks. The 'share' (say, share #1) that makes it out to a host is a
-collection of these blocks (block A1, B1, C1), plus some hash-tree
-information necessary to validate the data upon retrieval. Only one segment
-is handled at a time: all blocks for segment A are delivered before any
-work is begun on segment B.
-
-As blocks are created, we retain the hash of each one. The list of block hashes
-for a single share (say, hash(A1), hash(B1), hash(C1)) is used to form the base
-of a Merkle hash tree for that share, called the block hash tree.
-
-This hash tree has one terminal leaf per block. The complete block hash
-tree is sent to the shareholder after all the data has been sent. At
-retrieval time, the decoder will ask for specific pieces of this tree before
-asking for blocks, whichever it needs to validate those blocks.
-
-(Note: we don't really need to generate this whole block hash tree
-ourselves. It would be sufficient to have the shareholder generate it and
-just tell us the root. This gives us an extra level of validation on the
-transfer, though, and it is relatively cheap to compute.)
-
-Each of these block hash trees has a root hash. The collection of these
-root hashes for all shares are collected into the 'share hash tree', which
-has one terminal leaf per share. After sending the blocks and the complete
-block hash tree to each shareholder, we send them the portion of the share
-hash tree that is necessary to validate their share. The root of the share
-hash tree is put into the URI.
-
-"""
-
-class NotEnoughSharesError(Exception):
- servermap = None
- pass
-
-class UploadAborted(Exception):
- pass
-
-KiB=1024
-MiB=1024*KiB
-GiB=1024*MiB
-TiB=1024*GiB
-PiB=1024*TiB
-
-class Encoder(object):
- implements(IEncoder)
- USE_PLAINTEXT_HASHES = False
-
- def __init__(self, log_parent=None, upload_status=None):
- object.__init__(self)
- self.uri_extension_data = {}
- self._codec = None
- self._status = None
- if upload_status:
- self._status = IUploadStatus(upload_status)
- precondition(log_parent is None or isinstance(log_parent, int),
- log_parent)
- self._log_number = log.msg("creating Encoder %s" % self,
- facility="tahoe.encoder", parent=log_parent)
- self._aborted = False
-
- def __repr__(self):
- if hasattr(self, "_storage_index"):
- return "<Encoder for %s>" % storage.si_b2a(self._storage_index)[:5]
- return "<Encoder for unknown storage index>"
-
- def log(self, *args, **kwargs):
- if "parent" not in kwargs:
- kwargs["parent"] = self._log_number
- if "facility" not in kwargs:
- kwargs["facility"] = "tahoe.encoder"
- return log.msg(*args, **kwargs)
-
- def set_encrypted_uploadable(self, uploadable):
- eu = self._uploadable = IEncryptedUploadable(uploadable)
- d = eu.get_size()
- def _got_size(size):
- self.log(format="file size: %(size)d", size=size)
- self.file_size = size
- d.addCallback(_got_size)
- d.addCallback(lambda res: eu.get_all_encoding_parameters())
- d.addCallback(self._got_all_encoding_parameters)
- d.addCallback(lambda res: eu.get_storage_index())
- def _done(storage_index):
- self._storage_index = storage_index
- return self
- d.addCallback(_done)
- return d
-
- def _got_all_encoding_parameters(self, params):
- assert not self._codec
- k, happy, n, segsize = params
- self.required_shares = k
- self.shares_of_happiness = happy
- self.num_shares = n
- self.segment_size = segsize
- self.log("got encoding parameters: %d/%d/%d %d" % (k,happy,n, segsize))
- self.log("now setting up codec")
-
- assert self.segment_size % self.required_shares == 0
-
- self.num_segments = mathutil.div_ceil(self.file_size,
- self.segment_size)
-
- self._codec = CRSEncoder()
- self._codec.set_params(self.segment_size,
- self.required_shares, self.num_shares)
-
- data = self.uri_extension_data
- data['codec_name'] = self._codec.get_encoder_type()
- data['codec_params'] = self._codec.get_serialized_params()
-
- data['size'] = self.file_size
- data['segment_size'] = self.segment_size
- self.share_size = mathutil.div_ceil(self.file_size,
- self.required_shares)
- data['num_segments'] = self.num_segments
- data['needed_shares'] = self.required_shares
- data['total_shares'] = self.num_shares
-
- # the "tail" is the last segment. This segment may or may not be
- # shorter than all other segments. We use the "tail codec" to handle
- # it. If the tail is short, we use a different codec instance. In
- # addition, the tail codec must be fed data which has been padded out
- # to the right size.
- self.tail_size = self.file_size % self.segment_size
- if not self.tail_size:
- self.tail_size = self.segment_size
-
- # the tail codec is responsible for encoding tail_size bytes
- padded_tail_size = mathutil.next_multiple(self.tail_size,
- self.required_shares)
- self._tail_codec = CRSEncoder()
- self._tail_codec.set_params(padded_tail_size,
- self.required_shares, self.num_shares)
- data['tail_codec_params'] = self._tail_codec.get_serialized_params()
-
- def _get_share_size(self):
- share_size = mathutil.div_ceil(self.file_size, self.required_shares)
- overhead = self._compute_overhead()
- return share_size + overhead
-
- def _compute_overhead(self):
- return 0
-
- def get_param(self, name):
- assert self._codec
-
- if name == "storage_index":
- return self._storage_index
- elif name == "share_counts":
- return (self.required_shares, self.shares_of_happiness,
- self.num_shares)
- elif name == "num_segments":
- return self.num_segments
- elif name == "segment_size":
- return self.segment_size
- elif name == "block_size":
- return self._codec.get_block_size()
- elif name == "share_size":
- return self._get_share_size()
- elif name == "serialized_params":
- return self._codec.get_serialized_params()
- else:
- raise KeyError("unknown parameter name '%s'" % name)
-
- def set_shareholders(self, landlords):
- assert isinstance(landlords, dict)
- for k in landlords:
- assert IStorageBucketWriter.providedBy(landlords[k])
- self.landlords = landlords.copy()
-
- def start(self):
- self.log("%s starting" % (self,))
- #paddedsize = self._size + mathutil.pad_size(self._size, self.needed_shares)
- assert self._codec
- self._crypttext_hasher = hashutil.crypttext_hasher()
- self._crypttext_hashes = []
- self.segment_num = 0
- self.subshare_hashes = [[] for x in range(self.num_shares)]
- # subshare_hashes[i] is a list that will be accumulated and then send
- # to landlord[i]. This list contains a hash of each segment_share
- # that we sent to that landlord.
- self.share_root_hashes = [None] * self.num_shares
-
- self._times = {
- "cumulative_encoding": 0.0,
- "cumulative_sending": 0.0,
- "hashes_and_close": 0.0,
- "total_encode_and_push": 0.0,
- }
- self._start_total_timestamp = time.time()
-
- d = eventual.fireEventually()
-
- d.addCallback(lambda res: self.start_all_shareholders())
-
- for i in range(self.num_segments-1):
- # note to self: this form doesn't work, because lambda only
- # captures the slot, not the value
- #d.addCallback(lambda res: self.do_segment(i))
- # use this form instead:
- d.addCallback(lambda res, i=i: self._encode_segment(i))
- d.addCallback(self._send_segment, i)
- d.addCallback(self._turn_barrier)
- last_segnum = self.num_segments - 1
- d.addCallback(lambda res: self._encode_tail_segment(last_segnum))
- d.addCallback(self._send_segment, last_segnum)
- d.addCallback(self._turn_barrier)
-
- d.addCallback(lambda res: self.finish_hashing())
-
- if self.USE_PLAINTEXT_HASHES:
- d.addCallback(lambda res:
- self.send_plaintext_hash_tree_to_all_shareholders())
- d.addCallback(lambda res:
- self.send_crypttext_hash_tree_to_all_shareholders())
- d.addCallback(lambda res: self.send_all_subshare_hash_trees())
- d.addCallback(lambda res: self.send_all_share_hash_trees())
- d.addCallback(lambda res: self.send_uri_extension_to_all_shareholders())
-
- d.addCallback(lambda res: self.close_all_shareholders())
- d.addCallbacks(self.done, self.err)
- return d
-
- def set_status(self, status):
- if self._status:
- self._status.set_status(status)
-
- def set_encode_and_push_progress(self, sent_segments=None, extra=0.0):
- if self._status:
- # we treat the final hash+close as an extra segment
- if sent_segments is None:
- sent_segments = self.num_segments
- progress = float(sent_segments + extra) / (self.num_segments + 1)
- self._status.set_progress(2, progress)
-
- def abort(self):
- self.log("aborting upload", level=log.UNUSUAL)
- assert self._codec, "don't call abort before start"
- self._aborted = True
- # the next segment read (in _gather_data inside _encode_segment) will
- # raise UploadAborted(), which will bypass the rest of the upload
- # chain. If we've sent the final segment's shares, it's too late to
- # abort. TODO: allow abort any time up to close_all_shareholders.
-
- def _turn_barrier(self, res):
- # putting this method in a Deferred chain imposes a guaranteed
- # reactor turn between the pre- and post- portions of that chain.
- # This can be useful to limit memory consumption: since Deferreds do
- # not do tail recursion, code which uses defer.succeed(result) for
- # consistency will cause objects to live for longer than you might
- # normally expect.
-
- return eventual.fireEventually(res)
-
-
- def start_all_shareholders(self):
- self.log("starting shareholders", level=log.NOISY)
- self.set_status("Starting shareholders")
- dl = []
- for shareid in self.landlords:
- d = self.landlords[shareid].start()
- d.addErrback(self._remove_shareholder, shareid, "start")
- dl.append(d)
- return self._gather_responses(dl)
-
- def _encode_segment(self, segnum):
- codec = self._codec
- start = time.time()
-
- # the ICodecEncoder API wants to receive a total of self.segment_size
- # bytes on each encode() call, broken up into a number of
- # identically-sized pieces. Due to the way the codec algorithm works,
- # these pieces need to be the same size as the share which the codec
- # will generate. Therefore we must feed it with input_piece_size that
- # equals the output share size.
- input_piece_size = codec.get_block_size()
-
- # as a result, the number of input pieces per encode() call will be
- # equal to the number of required shares with which the codec was
- # constructed. You can think of the codec as chopping up a
- # 'segment_size' of data into 'required_shares' shares (not doing any
- # fancy math at all, just doing a split), then creating some number
- # of additional shares which can be substituted if the primary ones
- # are unavailable
-
- crypttext_segment_hasher = hashutil.crypttext_segment_hasher()
-
- # memory footprint: we only hold a tiny piece of the plaintext at any
- # given time. We build up a segment's worth of cryptttext, then hand
- # it to the encoder. Assuming 3-of-10 encoding (3.3x expansion) and
- # 1MiB max_segment_size, we get a peak memory footprint of 4.3*1MiB =
- # 4.3MiB. Lowering max_segment_size to, say, 100KiB would drop the
- # footprint to 430KiB at the expense of more hash-tree overhead.
-
- d = self._gather_data(self.required_shares, input_piece_size,
- crypttext_segment_hasher)
- def _done_gathering(chunks):
- for c in chunks:
- assert len(c) == input_piece_size
- self._crypttext_hashes.append(crypttext_segment_hasher.digest())
- # during this call, we hit 5*segsize memory
- return codec.encode(chunks)
- d.addCallback(_done_gathering)
- def _done(res):
- elapsed = time.time() - start
- self._times["cumulative_encoding"] += elapsed
- return res
- d.addCallback(_done)
- return d
-
- def _encode_tail_segment(self, segnum):
-
- start = time.time()
- codec = self._tail_codec
- input_piece_size = codec.get_block_size()
-
- crypttext_segment_hasher = hashutil.crypttext_segment_hasher()
-
- d = self._gather_data(self.required_shares, input_piece_size,
- crypttext_segment_hasher,
- allow_short=True)
- def _done_gathering(chunks):
- for c in chunks:
- # a short trailing chunk will have been padded by
- # _gather_data
- assert len(c) == input_piece_size
- self._crypttext_hashes.append(crypttext_segment_hasher.digest())
- return codec.encode(chunks)
- d.addCallback(_done_gathering)
- def _done(res):
- elapsed = time.time() - start
- self._times["cumulative_encoding"] += elapsed
- return res
- d.addCallback(_done)
- return d
-
- def _gather_data(self, num_chunks, input_chunk_size,
- crypttext_segment_hasher,
- allow_short=False,
- previous_chunks=[]):
- """Return a Deferred that will fire when the required number of
- chunks have been read (and hashed and encrypted). The Deferred fires
- with the combination of any 'previous_chunks' and the new chunks
- which were gathered."""
-
- if self._aborted:
- raise UploadAborted()
-
- if not num_chunks:
- return defer.succeed(previous_chunks)
-
- d = self._uploadable.read_encrypted(input_chunk_size, False)
- def _got(data):
- if self._aborted:
- raise UploadAborted()
- encrypted_pieces = []
- length = 0
- while data:
- encrypted_piece = data.pop(0)
- length += len(encrypted_piece)
- crypttext_segment_hasher.update(encrypted_piece)
- self._crypttext_hasher.update(encrypted_piece)
- encrypted_pieces.append(encrypted_piece)
-
- if allow_short:
- if length < input_chunk_size:
- # padding
- pad_size = input_chunk_size - length
- encrypted_pieces.append('\x00' * pad_size)
- else:
- # non-tail segments should be the full segment size
- if length != input_chunk_size:
- log.msg("non-tail segment should be full segment size: %d!=%d"
- % (length, input_chunk_size), level=log.BAD)
- precondition(length == input_chunk_size,
- "length=%d != input_chunk_size=%d" %
- (length, input_chunk_size))
-
- encrypted_piece = "".join(encrypted_pieces)
- return previous_chunks + [encrypted_piece]
-
- d.addCallback(_got)
- d.addCallback(lambda chunks:
- self._gather_data(num_chunks-1, input_chunk_size,
- crypttext_segment_hasher,
- allow_short, chunks))
- return d
-
- def _send_segment(self, (shares, shareids), segnum):
- # To generate the URI, we must generate the roothash, so we must
- # generate all shares, even if we aren't actually giving them to
- # anybody. This means that the set of shares we create will be equal
- # to or larger than the set of landlords. If we have any landlord who
- # *doesn't* have a share, that's an error.
- _assert(set(self.landlords.keys()).issubset(set(shareids)),
- shareids=shareids, landlords=self.landlords)
- start = time.time()
- dl = []
- self.set_status("Sending segment %d of %d" % (segnum+1,
- self.num_segments))
- self.set_encode_and_push_progress(segnum)
- lognum = self.log("send_segment(%d)" % segnum, level=log.NOISY)
- for i in range(len(shares)):
- subshare = shares[i]
- shareid = shareids[i]
- d = self.send_subshare(shareid, segnum, subshare, lognum)
- dl.append(d)
- subshare_hash = hashutil.block_hash(subshare)
- #from allmydata.util import base32
- #log.msg("creating block (shareid=%d, blocknum=%d) "
- # "len=%d %r .. %r: %s" %
- # (shareid, segnum, len(subshare),
- # subshare[:50], subshare[-50:], base32.b2a(subshare_hash)))
- self.subshare_hashes[shareid].append(subshare_hash)
-
- dl = self._gather_responses(dl)
- def _logit(res):
- self.log("%s uploaded %s / %s bytes (%d%%) of your file." %
- (self,
- self.segment_size*(segnum+1),
- self.segment_size*self.num_segments,
- 100 * (segnum+1) / self.num_segments,
- ),
- level=log.OPERATIONAL)
- elapsed = time.time() - start
- self._times["cumulative_sending"] += elapsed
- return res
- dl.addCallback(_logit)
- return dl
-
- def send_subshare(self, shareid, segment_num, subshare, lognum):
- if shareid not in self.landlords:
- return defer.succeed(None)
- sh = self.landlords[shareid]
- lognum2 = self.log("put_block to %s" % self.landlords[shareid],
- parent=lognum, level=log.NOISY)
- d = sh.put_block(segment_num, subshare)
- def _done(res):
- self.log("put_block done", parent=lognum2, level=log.NOISY)
- return res
- d.addCallback(_done)
- d.addErrback(self._remove_shareholder, shareid,
- "segnum=%d" % segment_num)
- return d
-
- def _remove_shareholder(self, why, shareid, where):
- ln = self.log(format="error while sending %(method)s to shareholder=%(shnum)d",
- method=where, shnum=shareid,
- level=log.UNUSUAL, failure=why)
- if shareid in self.landlords:
- self.landlords[shareid].abort()
- del self.landlords[shareid]
- else:
- # even more UNUSUAL
- self.log("they weren't in our list of landlords", parent=ln,
- level=log.WEIRD)
- if len(self.landlords) < self.shares_of_happiness:
- msg = "lost too many shareholders during upload: %s" % why
- raise NotEnoughSharesError(msg)
- self.log("but we can still continue with %s shares, we'll be happy "
- "with at least %s" % (len(self.landlords),
- self.shares_of_happiness),
- parent=ln)
-
- def _gather_responses(self, dl):
- d = defer.DeferredList(dl, fireOnOneErrback=True)
- def _eatNotEnoughSharesError(f):
- # all exceptions that occur while talking to a peer are handled
- # in _remove_shareholder. That might raise NotEnoughSharesError,
- # which will cause the DeferredList to errback but which should
- # otherwise be consumed. Allow non-NotEnoughSharesError exceptions
- # to pass through as an unhandled errback. We use this in lieu of
- # consumeErrors=True to allow coding errors to be logged.
- f.trap(NotEnoughSharesError)
- return None
- for d0 in dl:
- d0.addErrback(_eatNotEnoughSharesError)
- return d
-
- def finish_hashing(self):
- self._start_hashing_and_close_timestamp = time.time()
- self.set_status("Finishing hashes")
- self.set_encode_and_push_progress(extra=0.0)
- crypttext_hash = self._crypttext_hasher.digest()
- self.uri_extension_data["crypttext_hash"] = crypttext_hash
- d = self._uploadable.get_plaintext_hash()
- def _got(plaintext_hash):
- self.log(format="plaintext_hash=%(plaintext_hash)s, SI=%(SI)s, size=%(size)d",
- plaintext_hash=base32.b2a(plaintext_hash),
- SI=storage.si_b2a(self._storage_index),
- size=self.file_size)
- return plaintext_hash
- d.addCallback(_got)
- if self.USE_PLAINTEXT_HASHES:
- def _use_plaintext_hash(plaintext_hash):
- self.uri_extension_data["plaintext_hash"] = plaintext_hash
- return self._uploadable.get_plaintext_hashtree_leaves(0, self.num_segments, self.num_segments)
- d.addCallback(_use_plaintext_hash)
- def _got_hashtree_leaves(leaves):
- self.log("Encoder: got plaintext_hashtree_leaves: %s" %
- (",".join([base32.b2a(h) for h in leaves]),),
- level=log.NOISY)
- ht = list(HashTree(list(leaves)))
- self.uri_extension_data["plaintext_root_hash"] = ht[0]
- self._plaintext_hashtree_nodes = ht
- d.addCallback(_got_hashtree_leaves)
-
- d.addCallback(lambda res: self._uploadable.close())
- return d
-
- def send_plaintext_hash_tree_to_all_shareholders(self):
- self.log("sending plaintext hash tree", level=log.NOISY)
- self.set_status("Sending Plaintext Hash Tree")
- self.set_encode_and_push_progress(extra=0.2)
- dl = []
- for shareid in self.landlords.keys():
- d = self.send_plaintext_hash_tree(shareid,
- self._plaintext_hashtree_nodes)
- dl.append(d)
- return self._gather_responses(dl)
-
- def send_plaintext_hash_tree(self, shareid, all_hashes):
- if shareid not in self.landlords:
- return defer.succeed(None)
- sh = self.landlords[shareid]
- d = sh.put_plaintext_hashes(all_hashes)
- d.addErrback(self._remove_shareholder, shareid, "put_plaintext_hashes")
- return d
-
- def send_crypttext_hash_tree_to_all_shareholders(self):
- self.log("sending crypttext hash tree", level=log.NOISY)
- self.set_status("Sending Crypttext Hash Tree")
- self.set_encode_and_push_progress(extra=0.3)
- t = HashTree(self._crypttext_hashes)
- all_hashes = list(t)
- self.uri_extension_data["crypttext_root_hash"] = t[0]
- dl = []
- for shareid in self.landlords.keys():
- dl.append(self.send_crypttext_hash_tree(shareid, all_hashes))
- return self._gather_responses(dl)
-
- def send_crypttext_hash_tree(self, shareid, all_hashes):
- if shareid not in self.landlords:
- return defer.succeed(None)
- sh = self.landlords[shareid]
- d = sh.put_crypttext_hashes(all_hashes)
- d.addErrback(self._remove_shareholder, shareid, "put_crypttext_hashes")
- return d
-
- def send_all_subshare_hash_trees(self):
- self.log("sending subshare hash trees", level=log.NOISY)
- self.set_status("Sending Subshare Hash Trees")
- self.set_encode_and_push_progress(extra=0.4)
- dl = []
- for shareid,hashes in enumerate(self.subshare_hashes):
- # hashes is a list of the hashes of all subshares that were sent
- # to shareholder[shareid].
- dl.append(self.send_one_subshare_hash_tree(shareid, hashes))
- return self._gather_responses(dl)
-
- def send_one_subshare_hash_tree(self, shareid, subshare_hashes):
- t = HashTree(subshare_hashes)
- all_hashes = list(t)
- # all_hashes[0] is the root hash, == hash(ah[1]+ah[2])
- # all_hashes[1] is the left child, == hash(ah[3]+ah[4])
- # all_hashes[n] == hash(all_hashes[2*n+1] + all_hashes[2*n+2])
- self.share_root_hashes[shareid] = t[0]
- if shareid not in self.landlords:
- return defer.succeed(None)
- sh = self.landlords[shareid]
- d = sh.put_block_hashes(all_hashes)
- d.addErrback(self._remove_shareholder, shareid, "put_block_hashes")
- return d
-
- def send_all_share_hash_trees(self):
- # each bucket gets a set of share hash tree nodes that are needed to
- # validate their share. This includes the share hash itself, but does
- # not include the top-level hash root (which is stored securely in
- # the URI instead).
- self.log("sending all share hash trees", level=log.NOISY)
- self.set_status("Sending Share Hash Trees")
- self.set_encode_and_push_progress(extra=0.6)
- dl = []
- for h in self.share_root_hashes:
- assert h
- # create the share hash tree
- t = HashTree(self.share_root_hashes)
- # the root of this hash tree goes into our URI
- self.uri_extension_data['share_root_hash'] = t[0]
- # now send just the necessary pieces out to each shareholder
- for i in range(self.num_shares):
- # the HashTree is given a list of leaves: 0,1,2,3..n .
- # These become nodes A+0,A+1,A+2.. of the tree, where A=n-1
- needed_hash_indices = t.needed_hashes(i, include_leaf=True)
- hashes = [(hi, t[hi]) for hi in needed_hash_indices]
- dl.append(self.send_one_share_hash_tree(i, hashes))
- return self._gather_responses(dl)
-
- def send_one_share_hash_tree(self, shareid, needed_hashes):
- if shareid not in self.landlords:
- return defer.succeed(None)
- sh = self.landlords[shareid]
- d = sh.put_share_hashes(needed_hashes)
- d.addErrback(self._remove_shareholder, shareid, "put_share_hashes")
- return d
-
- def send_uri_extension_to_all_shareholders(self):
- lp = self.log("sending uri_extension", level=log.NOISY)
- self.set_status("Sending URI Extensions")
- self.set_encode_and_push_progress(extra=0.8)
- for k in ('crypttext_root_hash', 'crypttext_hash',
- ):
- assert k in self.uri_extension_data
- if self.USE_PLAINTEXT_HASHES:
- for k in ('plaintext_root_hash', 'plaintext_hash',
- ):
- assert k in self.uri_extension_data
- uri_extension = uri.pack_extension(self.uri_extension_data)
- ed = {}
- for k,v in self.uri_extension_data.items():
- if k.endswith("hash"):
- ed[k] = base32.b2a(v)
- else:
- ed[k] = v
- self.log("uri_extension_data is %s" % (ed,), level=log.NOISY, parent=lp)
- self.uri_extension_hash = hashutil.uri_extension_hash(uri_extension)
- dl = []
- for shareid in self.landlords.keys():
- dl.append(self.send_uri_extension(shareid, uri_extension))
- return self._gather_responses(dl)
-
- def send_uri_extension(self, shareid, uri_extension):
- sh = self.landlords[shareid]
- d = sh.put_uri_extension(uri_extension)
- d.addErrback(self._remove_shareholder, shareid, "put_uri_extension")
- return d
-
- def close_all_shareholders(self):
- self.log("closing shareholders", level=log.NOISY)
- self.set_status("Closing Shareholders")
- self.set_encode_and_push_progress(extra=0.9)
- dl = []
- for shareid in self.landlords:
- d = self.landlords[shareid].close()
- d.addErrback(self._remove_shareholder, shareid, "close")
- dl.append(d)
- return self._gather_responses(dl)
-
- def done(self, res):
- self.log("upload done", level=log.OPERATIONAL)
- self.set_status("Done")
- self.set_encode_and_push_progress(extra=1.0) # done
- now = time.time()
- h_and_c_elapsed = now - self._start_hashing_and_close_timestamp
- self._times["hashes_and_close"] = h_and_c_elapsed
- total_elapsed = now - self._start_total_timestamp
- self._times["total_encode_and_push"] = total_elapsed
-
- # update our sharemap
- self._shares_placed = set(self.landlords.keys())
- return (self.uri_extension_hash, self.required_shares,
- self.num_shares, self.file_size)
-
- def err(self, f):
- self.log("upload failed", failure=f, level=log.UNUSUAL)
- self.set_status("Failed")
- # we need to abort any remaining shareholders, so they'll delete the
- # partial share, allowing someone else to upload it again.
- self.log("aborting shareholders", level=log.UNUSUAL)
- for shareid in list(self.landlords.keys()):
- self.landlords[shareid].abort()
- if f.check(defer.FirstError):
- return f.value.subFailure
- return f
-
- def get_shares_placed(self):
- # return a set of share numbers that were successfully placed.
- return self._shares_placed
-
- def get_times(self):
- # return a dictionary of encode+push timings
- return self._times
-
- def get_uri_extension_data(self):
- return self.uri_extension_data
+++ /dev/null
-
-from zope.interface import implements
-from twisted.internet import defer
-from allmydata.interfaces import IFileNode, IFileURI, IURI, ICheckable
-from allmydata import uri
-from allmydata.checker import SimpleCHKFileChecker, SimpleCHKFileVerifier, \
- Results
-
-class FileNode:
- implements(IFileNode, ICheckable)
-
- def __init__(self, uri, client):
- u = IFileURI(uri)
- self.uri = u.to_string()
- self._client = client
-
- def get_uri(self):
- return self.uri
-
- def is_mutable(self):
- return False
-
- def is_readonly(self):
- return True
-
- def get_readonly_uri(self):
- return self.uri
-
- def get_size(self):
- return IFileURI(self.uri).get_size()
-
- def __hash__(self):
- return hash((self.__class__, self.uri))
- def __cmp__(self, them):
- if cmp(type(self), type(them)):
- return cmp(type(self), type(them))
- if cmp(self.__class__, them.__class__):
- return cmp(self.__class__, them.__class__)
- return cmp(self.uri, them.uri)
-
- def get_verifier(self):
- return IFileURI(self.uri).get_verifier()
-
- def check(self, verify=False, repair=False):
- assert repair is False # not implemented yet
- vcap = self.get_verifier()
- if verify:
- v = SimpleCHKFileVerifier(self._client, vcap)
- return v.start()
- else:
- peer_getter = self._client.get_permuted_peers
- v = SimpleCHKFileChecker(peer_getter, vcap)
- return v.check()
-
- def download(self, target):
- downloader = self._client.getServiceNamed("downloader")
- return downloader.download(self.uri, target)
-
- def download_to_data(self):
- downloader = self._client.getServiceNamed("downloader")
- return downloader.download_to_data(self.uri)
-
-
-
-class LiteralFileNode:
- implements(IFileNode, ICheckable)
-
- def __init__(self, my_uri, client):
- u = IFileURI(my_uri)
- assert isinstance(u, uri.LiteralFileURI)
- self.uri = u.to_string()
- self._client = client
-
- def get_uri(self):
- return self.uri
-
- def is_mutable(self):
- return False
-
- def is_readonly(self):
- return True
-
- def get_readonly_uri(self):
- return self.uri
-
- def get_size(self):
- return len(IURI(self.uri).data)
-
- def __hash__(self):
- return hash((self.__class__, self.uri))
- def __cmp__(self, them):
- if cmp(type(self), type(them)):
- return cmp(type(self), type(them))
- if cmp(self.__class__, them.__class__):
- return cmp(self.__class__, them.__class__)
- return cmp(self.uri, them.uri)
-
- def get_verifier(self):
- return None
-
- def check(self, verify=False, repair=False):
- # neither verify= nor repair= affect LIT files
- r = Results(None)
- r.healthy = True
- r.problems = []
- return defer.succeed(r)
-
- def download(self, target):
- # note that this does not update the stats_provider
- data = IURI(self.uri).data
- target.open(len(data))
- target.write(data)
- target.close()
- return defer.maybeDeferred(target.finish)
-
- def download_to_data(self):
- data = IURI(self.uri).data
- return defer.succeed(data)
--- /dev/null
+
+"""
+Given a StorageIndex, count how many shares we can find.
+
+This does no verification of the shares whatsoever. If the peer claims to
+have the share, we believe them.
+"""
+
+from zope.interface import implements
+from twisted.internet import defer
+from twisted.python import log
+from allmydata import storage
+from allmydata.interfaces import IVerifierURI, ICheckerResults
+from allmydata.immutable import download
+from allmydata.util import hashutil, base32
+
+class Results:
+ implements(ICheckerResults)
+
+ def __init__(self, storage_index):
+ # storage_index might be None for, say, LIT files
+ self.storage_index = storage_index
+ if storage_index is None:
+ self.storage_index_s = "<none>"
+ else:
+ self.storage_index_s = base32.b2a(storage_index)[:6]
+
+ def is_healthy(self):
+ return self.healthy
+
+ def html_summary(self):
+ if self.healthy:
+ return "<span>healthy</span>"
+ return "<span>NOT HEALTHY</span>"
+
+ def html(self):
+ s = "<div>\n"
+ s += "<h1>Checker Results for Immutable SI=%s</h1>\n" % self.storage_index_s
+ if self.healthy:
+ s += "<h2>Healthy!</h2>\n"
+ else:
+ s += "<h2>Not Healthy!</h2>\n"
+ s += "</div>\n"
+ return s
+
+
+class SimpleCHKFileChecker:
+ """Return a list of (needed, total, found, sharemap), where sharemap maps
+ share number to a list of (binary) nodeids of the shareholders."""
+
+ def __init__(self, peer_getter, uri_to_check):
+ self.peer_getter = peer_getter
+ self.found_shares = set()
+ self.uri_to_check = IVerifierURI(uri_to_check)
+ self.sharemap = {}
+
+ '''
+ def check_synchronously(self, si):
+ # this is how we would write this class if we were using synchronous
+ # messages (or if we used promises).
+ found = set()
+ for (pmpeerid, peerid, connection) in self.peer_getter(storage_index):
+ buckets = connection.get_buckets(si)
+ found.update(buckets.keys())
+ return len(found)
+ '''
+
+ def check(self):
+ d = self._get_all_shareholders(self.uri_to_check.storage_index)
+ d.addCallback(self._done)
+ return d
+
+ def _get_all_shareholders(self, storage_index):
+ dl = []
+ for (peerid, ss) in self.peer_getter("storage", storage_index):
+ d = ss.callRemote("get_buckets", storage_index)
+ d.addCallbacks(self._got_response, self._got_error,
+ callbackArgs=(peerid,))
+ dl.append(d)
+ return defer.DeferredList(dl)
+
+ def _got_response(self, buckets, peerid):
+ # buckets is a dict: maps shum to an rref of the server who holds it
+ self.found_shares.update(buckets.keys())
+ for k in buckets:
+ if k not in self.sharemap:
+ self.sharemap[k] = []
+ self.sharemap[k].append(peerid)
+
+ def _got_error(self, f):
+ if f.check(KeyError):
+ pass
+ log.err(f)
+ pass
+
+ def _done(self, res):
+ u = self.uri_to_check
+ r = Results(self.uri_to_check.storage_index)
+ r.healthy = bool(len(self.found_shares) >= u.needed_shares)
+ r.stuff = (u.needed_shares, u.total_shares, len(self.found_shares),
+ self.sharemap)
+ return r
+
+class VerifyingOutput:
+ def __init__(self, total_length, results):
+ self._crypttext_hasher = hashutil.crypttext_hasher()
+ self.length = 0
+ self.total_length = total_length
+ self._segment_number = 0
+ self._crypttext_hash_tree = None
+ self._opened = False
+ self._results = results
+ results.healthy = False
+
+ def setup_hashtrees(self, plaintext_hashtree, crypttext_hashtree):
+ self._crypttext_hash_tree = crypttext_hashtree
+
+ def write_segment(self, crypttext):
+ self.length += len(crypttext)
+
+ self._crypttext_hasher.update(crypttext)
+ if self._crypttext_hash_tree:
+ ch = hashutil.crypttext_segment_hasher()
+ ch.update(crypttext)
+ crypttext_leaves = {self._segment_number: ch.digest()}
+ self._crypttext_hash_tree.set_hashes(leaves=crypttext_leaves)
+
+ self._segment_number += 1
+
+ def close(self):
+ self.crypttext_hash = self._crypttext_hasher.digest()
+
+ def finish(self):
+ self._results.healthy = True
+ return self._results
+
+
+class SimpleCHKFileVerifier(download.FileDownloader):
+ # this reconstructs the crypttext, which verifies that at least 'k' of
+ # the shareholders are around and have valid data. It does not check the
+ # remaining shareholders, and it cannot verify the plaintext.
+ check_plaintext_hash = False
+
+ def __init__(self, client, u):
+ self._client = client
+
+ u = IVerifierURI(u)
+ self._storage_index = u.storage_index
+ self._uri_extension_hash = u.uri_extension_hash
+ self._total_shares = u.total_shares
+ self._size = u.size
+ self._num_needed_shares = u.needed_shares
+
+ self._si_s = storage.si_b2a(self._storage_index)
+ self.init_logging()
+
+ r = Results(self._storage_index)
+ self._output = VerifyingOutput(self._size, r)
+ self._paused = False
+ self._stopped = False
+
+ self._results = None
+ self.active_buckets = {} # k: shnum, v: bucket
+ self._share_buckets = [] # list of (sharenum, bucket) tuples
+ self._share_vbuckets = {} # k: shnum, v: set of ValidatedBuckets
+ self._uri_extension_sources = []
+
+ self._uri_extension_data = None
+
+ self._fetch_failures = {"uri_extension": 0,
+ "plaintext_hashroot": 0,
+ "plaintext_hashtree": 0,
+ "crypttext_hashroot": 0,
+ "crypttext_hashtree": 0,
+ }
+
+ def init_logging(self):
+ self._log_prefix = prefix = storage.si_b2a(self._storage_index)[:5]
+ num = self._client.log("SimpleCHKFileVerifier(%s): starting" % prefix)
+ self._log_number = num
+
+ def log(self, msg, parent=None):
+ if parent is None:
+ parent = self._log_number
+ return self._client.log("SimpleCHKFileVerifier(%s): %s"
+ % (self._log_prefix, msg),
+ parent=parent)
+
+
+ def start(self):
+ log.msg("starting download [%s]" % storage.si_b2a(self._storage_index)[:5])
+
+ # first step: who should we download from?
+ d = defer.maybeDeferred(self._get_all_shareholders)
+ d.addCallback(self._got_all_shareholders)
+ # now get the uri_extension block from somebody and validate it
+ d.addCallback(self._obtain_uri_extension)
+ d.addCallback(self._got_uri_extension)
+ d.addCallback(self._get_hashtrees)
+ d.addCallback(self._create_validated_buckets)
+ # once we know that, we can download blocks from everybody
+ d.addCallback(self._download_all_segments)
+ d.addCallback(self._done)
+ return d
+
--- /dev/null
+
+import os, random, weakref, itertools, time
+from zope.interface import implements
+from twisted.internet import defer
+from twisted.internet.interfaces import IPushProducer, IConsumer
+from twisted.application import service
+from foolscap.eventual import eventually
+
+from allmydata.util import base32, mathutil, hashutil, log
+from allmydata.util.assertutil import _assert
+from allmydata import codec, hashtree, storage, uri
+from allmydata.interfaces import IDownloadTarget, IDownloader, IFileURI, \
+ IDownloadStatus, IDownloadResults
+from allmydata.immutable.encode import NotEnoughSharesError
+from pycryptopp.cipher.aes import AES
+
+class HaveAllPeersError(Exception):
+ # we use this to jump out of the loop
+ pass
+
+class BadURIExtensionHashValue(Exception):
+ pass
+class BadPlaintextHashValue(Exception):
+ pass
+class BadCrypttextHashValue(Exception):
+ pass
+
+class DownloadStopped(Exception):
+ pass
+
+class DownloadResults:
+ implements(IDownloadResults)
+
+ def __init__(self):
+ self.servers_used = set()
+ self.server_problems = {}
+ self.servermap = {}
+ self.timings = {}
+ self.file_size = None
+
+class Output:
+ def __init__(self, downloadable, key, total_length, log_parent,
+ download_status):
+ self.downloadable = downloadable
+ self._decryptor = AES(key)
+ self._crypttext_hasher = hashutil.crypttext_hasher()
+ self._plaintext_hasher = hashutil.plaintext_hasher()
+ self.length = 0
+ self.total_length = total_length
+ self._segment_number = 0
+ self._plaintext_hash_tree = None
+ self._crypttext_hash_tree = None
+ self._opened = False
+ self._log_parent = log_parent
+ self._status = download_status
+ self._status.set_progress(0.0)
+
+ def log(self, *args, **kwargs):
+ if "parent" not in kwargs:
+ kwargs["parent"] = self._log_parent
+ if "facility" not in kwargs:
+ kwargs["facility"] = "download.output"
+ return log.msg(*args, **kwargs)
+
+ def setup_hashtrees(self, plaintext_hashtree, crypttext_hashtree):
+ self._plaintext_hash_tree = plaintext_hashtree
+ self._crypttext_hash_tree = crypttext_hashtree
+
+ def write_segment(self, crypttext):
+ self.length += len(crypttext)
+ self._status.set_progress( float(self.length) / self.total_length )
+
+ # memory footprint: 'crypttext' is the only segment_size usage
+ # outstanding. While we decrypt it into 'plaintext', we hit
+ # 2*segment_size.
+ self._crypttext_hasher.update(crypttext)
+ if self._crypttext_hash_tree:
+ ch = hashutil.crypttext_segment_hasher()
+ ch.update(crypttext)
+ crypttext_leaves = {self._segment_number: ch.digest()}
+ self.log(format="crypttext leaf hash (%(bytes)sB) [%(segnum)d] is %(hash)s",
+ bytes=len(crypttext),
+ segnum=self._segment_number, hash=base32.b2a(ch.digest()),
+ level=log.NOISY)
+ self._crypttext_hash_tree.set_hashes(leaves=crypttext_leaves)
+
+ plaintext = self._decryptor.process(crypttext)
+ del crypttext
+
+ # now we're back down to 1*segment_size.
+
+ self._plaintext_hasher.update(plaintext)
+ if self._plaintext_hash_tree:
+ ph = hashutil.plaintext_segment_hasher()
+ ph.update(plaintext)
+ plaintext_leaves = {self._segment_number: ph.digest()}
+ self.log(format="plaintext leaf hash (%(bytes)sB) [%(segnum)d] is %(hash)s",
+ bytes=len(plaintext),
+ segnum=self._segment_number, hash=base32.b2a(ph.digest()),
+ level=log.NOISY)
+ self._plaintext_hash_tree.set_hashes(leaves=plaintext_leaves)
+
+ self._segment_number += 1
+ # We're still at 1*segment_size. The Downloadable is responsible for
+ # any memory usage beyond this.
+ if not self._opened:
+ self._opened = True
+ self.downloadable.open(self.total_length)
+ self.downloadable.write(plaintext)
+
+ def fail(self, why):
+ # this is really unusual, and deserves maximum forensics
+ if why.check(DownloadStopped):
+ # except DownloadStopped just means the consumer aborted the
+ # download, not so scary
+ self.log("download stopped", level=log.UNUSUAL)
+ else:
+ self.log("download failed!", failure=why, level=log.SCARY)
+ self.downloadable.fail(why)
+
+ def close(self):
+ self.crypttext_hash = self._crypttext_hasher.digest()
+ self.plaintext_hash = self._plaintext_hasher.digest()
+ self.log("download finished, closing IDownloadable", level=log.NOISY)
+ self.downloadable.close()
+
+ def finish(self):
+ return self.downloadable.finish()
+
+class ValidatedBucket:
+ """I am a front-end for a remote storage bucket, responsible for
+ retrieving and validating data from that bucket.
+
+ My get_block() method is used by BlockDownloaders.
+ """
+
+ def __init__(self, sharenum, bucket,
+ share_hash_tree, roothash,
+ num_blocks):
+ self.sharenum = sharenum
+ self.bucket = bucket
+ self._share_hash = None # None means not validated yet
+ self.share_hash_tree = share_hash_tree
+ self._roothash = roothash
+ self.block_hash_tree = hashtree.IncompleteHashTree(num_blocks)
+ self.started = False
+
+ def get_block(self, blocknum):
+ if not self.started:
+ d = self.bucket.start()
+ def _started(res):
+ self.started = True
+ return self.get_block(blocknum)
+ d.addCallback(_started)
+ return d
+
+ # the first time we use this bucket, we need to fetch enough elements
+ # of the share hash tree to validate it from our share hash up to the
+ # hashroot.
+ if not self._share_hash:
+ d1 = self.bucket.get_share_hashes()
+ else:
+ d1 = defer.succeed([])
+
+ # we might need to grab some elements of our block hash tree, to
+ # validate the requested block up to the share hash
+ needed = self.block_hash_tree.needed_hashes(blocknum)
+ if needed:
+ # TODO: get fewer hashes, use get_block_hashes(needed)
+ d2 = self.bucket.get_block_hashes()
+ else:
+ d2 = defer.succeed([])
+
+ d3 = self.bucket.get_block(blocknum)
+
+ d = defer.gatherResults([d1, d2, d3])
+ d.addCallback(self._got_data, blocknum)
+ return d
+
+ def _got_data(self, res, blocknum):
+ sharehashes, blockhashes, blockdata = res
+ blockhash = None # to make logging it safe
+
+ try:
+ if not self._share_hash:
+ sh = dict(sharehashes)
+ sh[0] = self._roothash # always use our own root, from the URI
+ sht = self.share_hash_tree
+ if sht.get_leaf_index(self.sharenum) not in sh:
+ raise hashtree.NotEnoughHashesError
+ sht.set_hashes(sh)
+ self._share_hash = sht.get_leaf(self.sharenum)
+
+ blockhash = hashutil.block_hash(blockdata)
+ #log.msg("checking block_hash(shareid=%d, blocknum=%d) len=%d "
+ # "%r .. %r: %s" %
+ # (self.sharenum, blocknum, len(blockdata),
+ # blockdata[:50], blockdata[-50:], base32.b2a(blockhash)))
+
+ # we always validate the blockhash
+ bh = dict(enumerate(blockhashes))
+ # replace blockhash root with validated value
+ bh[0] = self._share_hash
+ self.block_hash_tree.set_hashes(bh, {blocknum: blockhash})
+
+ except (hashtree.BadHashError, hashtree.NotEnoughHashesError):
+ # log.WEIRD: indicates undetected disk/network error, or more
+ # likely a programming error
+ log.msg("hash failure in block=%d, shnum=%d on %s" %
+ (blocknum, self.sharenum, self.bucket))
+ if self._share_hash:
+ log.msg(""" failure occurred when checking the block_hash_tree.
+ This suggests that either the block data was bad, or that the
+ block hashes we received along with it were bad.""")
+ else:
+ log.msg(""" the failure probably occurred when checking the
+ share_hash_tree, which suggests that the share hashes we
+ received from the remote peer were bad.""")
+ log.msg(" have self._share_hash: %s" % bool(self._share_hash))
+ log.msg(" block length: %d" % len(blockdata))
+ log.msg(" block hash: %s" % base32.b2a_or_none(blockhash))
+ if len(blockdata) < 100:
+ log.msg(" block data: %r" % (blockdata,))
+ else:
+ log.msg(" block data start/end: %r .. %r" %
+ (blockdata[:50], blockdata[-50:]))
+ log.msg(" root hash: %s" % base32.b2a(self._roothash))
+ log.msg(" share hash tree:\n" + self.share_hash_tree.dump())
+ log.msg(" block hash tree:\n" + self.block_hash_tree.dump())
+ lines = []
+ for i,h in sorted(sharehashes):
+ lines.append("%3d: %s" % (i, base32.b2a_or_none(h)))
+ log.msg(" sharehashes:\n" + "\n".join(lines) + "\n")
+ lines = []
+ for i,h in enumerate(blockhashes):
+ lines.append("%3d: %s" % (i, base32.b2a_or_none(h)))
+ log.msg(" blockhashes:\n" + "\n".join(lines) + "\n")
+ raise
+
+ # If we made it here, the block is good. If the hash trees didn't
+ # like what they saw, they would have raised a BadHashError, causing
+ # our caller to see a Failure and thus ignore this block (as well as
+ # dropping this bucket).
+ return blockdata
+
+
+
+class BlockDownloader:
+ """I am responsible for downloading a single block (from a single bucket)
+ for a single segment.
+
+ I am a child of the SegmentDownloader.
+ """
+
+ def __init__(self, vbucket, blocknum, parent, results):
+ self.vbucket = vbucket
+ self.blocknum = blocknum
+ self.parent = parent
+ self.results = results
+ self._log_number = self.parent.log("starting block %d" % blocknum)
+
+ def log(self, msg, parent=None):
+ if parent is None:
+ parent = self._log_number
+ return self.parent.log(msg, parent=parent)
+
+ def start(self, segnum):
+ lognum = self.log("get_block(segnum=%d)" % segnum)
+ started = time.time()
+ d = self.vbucket.get_block(segnum)
+ d.addCallbacks(self._hold_block, self._got_block_error,
+ callbackArgs=(started, lognum,), errbackArgs=(lognum,))
+ return d
+
+ def _hold_block(self, data, started, lognum):
+ if self.results:
+ elapsed = time.time() - started
+ peerid = self.vbucket.bucket.get_peerid()
+ if peerid not in self.results.timings["fetch_per_server"]:
+ self.results.timings["fetch_per_server"][peerid] = []
+ self.results.timings["fetch_per_server"][peerid].append(elapsed)
+ self.log("got block", parent=lognum)
+ self.parent.hold_block(self.blocknum, data)
+
+ def _got_block_error(self, f, lognum):
+ self.log("BlockDownloader[%d] got error: %s" % (self.blocknum, f),
+ parent=lognum)
+ if self.results:
+ peerid = self.vbucket.bucket.get_peerid()
+ self.results.server_problems[peerid] = str(f)
+ self.parent.bucket_failed(self.vbucket)
+
+class SegmentDownloader:
+ """I am responsible for downloading all the blocks for a single segment
+ of data.
+
+ I am a child of the FileDownloader.
+ """
+
+ def __init__(self, parent, segmentnumber, needed_shares, results):
+ self.parent = parent
+ self.segmentnumber = segmentnumber
+ self.needed_blocks = needed_shares
+ self.blocks = {} # k: blocknum, v: data
+ self.results = results
+ self._log_number = self.parent.log("starting segment %d" %
+ segmentnumber)
+
+ def log(self, msg, parent=None):
+ if parent is None:
+ parent = self._log_number
+ return self.parent.log(msg, parent=parent)
+
+ def start(self):
+ return self._download()
+
+ def _download(self):
+ d = self._try()
+ def _done(res):
+ if len(self.blocks) >= self.needed_blocks:
+ # we only need self.needed_blocks blocks
+ # we want to get the smallest blockids, because they are
+ # more likely to be fast "primary blocks"
+ blockids = sorted(self.blocks.keys())[:self.needed_blocks]
+ blocks = []
+ for blocknum in blockids:
+ blocks.append(self.blocks[blocknum])
+ return (blocks, blockids)
+ else:
+ return self._download()
+ d.addCallback(_done)
+ return d
+
+ def _try(self):
+ # fill our set of active buckets, maybe raising NotEnoughSharesError
+ active_buckets = self.parent._activate_enough_buckets()
+ # Now we have enough buckets, in self.parent.active_buckets.
+
+ # in test cases, bd.start might mutate active_buckets right away, so
+ # we need to put off calling start() until we've iterated all the way
+ # through it.
+ downloaders = []
+ for blocknum, vbucket in active_buckets.iteritems():
+ bd = BlockDownloader(vbucket, blocknum, self, self.results)
+ downloaders.append(bd)
+ if self.results:
+ self.results.servers_used.add(vbucket.bucket.get_peerid())
+ l = [bd.start(self.segmentnumber) for bd in downloaders]
+ return defer.DeferredList(l, fireOnOneErrback=True)
+
+ def hold_block(self, blocknum, data):
+ self.blocks[blocknum] = data
+
+ def bucket_failed(self, vbucket):
+ self.parent.bucket_failed(vbucket)
+
+class DownloadStatus:
+ implements(IDownloadStatus)
+ statusid_counter = itertools.count(0)
+
+ def __init__(self):
+ self.storage_index = None
+ self.size = None
+ self.helper = False
+ self.status = "Not started"
+ self.progress = 0.0
+ self.paused = False
+ self.stopped = False
+ self.active = True
+ self.results = None
+ self.counter = self.statusid_counter.next()
+ self.started = time.time()
+
+ def get_started(self):
+ return self.started
+ def get_storage_index(self):
+ return self.storage_index
+ def get_size(self):
+ return self.size
+ def using_helper(self):
+ return self.helper
+ def get_status(self):
+ status = self.status
+ if self.paused:
+ status += " (output paused)"
+ if self.stopped:
+ status += " (output stopped)"
+ return status
+ def get_progress(self):
+ return self.progress
+ def get_active(self):
+ return self.active
+ def get_results(self):
+ return self.results
+ def get_counter(self):
+ return self.counter
+
+ def set_storage_index(self, si):
+ self.storage_index = si
+ def set_size(self, size):
+ self.size = size
+ def set_helper(self, helper):
+ self.helper = helper
+ def set_status(self, status):
+ self.status = status
+ def set_paused(self, paused):
+ self.paused = paused
+ def set_stopped(self, stopped):
+ self.stopped = stopped
+ def set_progress(self, value):
+ self.progress = value
+ def set_active(self, value):
+ self.active = value
+ def set_results(self, value):
+ self.results = value
+
+class FileDownloader:
+ implements(IPushProducer)
+ check_crypttext_hash = True
+ check_plaintext_hash = True
+ _status = None
+
+ def __init__(self, client, u, downloadable):
+ self._client = client
+
+ u = IFileURI(u)
+ self._storage_index = u.storage_index
+ self._uri_extension_hash = u.uri_extension_hash
+ self._total_shares = u.total_shares
+ self._size = u.size
+ self._num_needed_shares = u.needed_shares
+
+ self._si_s = storage.si_b2a(self._storage_index)
+ self.init_logging()
+
+ self._started = time.time()
+ self._status = s = DownloadStatus()
+ s.set_status("Starting")
+ s.set_storage_index(self._storage_index)
+ s.set_size(self._size)
+ s.set_helper(False)
+ s.set_active(True)
+
+ self._results = DownloadResults()
+ s.set_results(self._results)
+ self._results.file_size = self._size
+ self._results.timings["servers_peer_selection"] = {}
+ self._results.timings["fetch_per_server"] = {}
+ self._results.timings["cumulative_fetch"] = 0.0
+ self._results.timings["cumulative_decode"] = 0.0
+ self._results.timings["cumulative_decrypt"] = 0.0
+ self._results.timings["paused"] = 0.0
+
+ if IConsumer.providedBy(downloadable):
+ downloadable.registerProducer(self, True)
+ self._downloadable = downloadable
+ self._output = Output(downloadable, u.key, self._size, self._log_number,
+ self._status)
+ self._paused = False
+ self._stopped = False
+
+ self.active_buckets = {} # k: shnum, v: bucket
+ self._share_buckets = [] # list of (sharenum, bucket) tuples
+ self._share_vbuckets = {} # k: shnum, v: set of ValidatedBuckets
+ self._uri_extension_sources = []
+
+ self._uri_extension_data = None
+
+ self._fetch_failures = {"uri_extension": 0,
+ "plaintext_hashroot": 0,
+ "plaintext_hashtree": 0,
+ "crypttext_hashroot": 0,
+ "crypttext_hashtree": 0,
+ }
+
+ def init_logging(self):
+ self._log_prefix = prefix = storage.si_b2a(self._storage_index)[:5]
+ num = self._client.log(format="FileDownloader(%(si)s): starting",
+ si=storage.si_b2a(self._storage_index))
+ self._log_number = num
+
+ def log(self, *args, **kwargs):
+ if "parent" not in kwargs:
+ kwargs["parent"] = self._log_number
+ if "facility" not in kwargs:
+ kwargs["facility"] = "tahoe.download"
+ return log.msg(*args, **kwargs)
+
+ def pauseProducing(self):
+ if self._paused:
+ return
+ self._paused = defer.Deferred()
+ self._paused_at = time.time()
+ if self._status:
+ self._status.set_paused(True)
+
+ def resumeProducing(self):
+ if self._paused:
+ paused_for = time.time() - self._paused_at
+ self._results.timings['paused'] += paused_for
+ p = self._paused
+ self._paused = None
+ eventually(p.callback, None)
+ if self._status:
+ self._status.set_paused(False)
+
+ def stopProducing(self):
+ self.log("Download.stopProducing")
+ self._stopped = True
+ self.resumeProducing()
+ if self._status:
+ self._status.set_stopped(True)
+ self._status.set_active(False)
+
+ def start(self):
+ self.log("starting download")
+
+ # first step: who should we download from?
+ d = defer.maybeDeferred(self._get_all_shareholders)
+ d.addCallback(self._got_all_shareholders)
+ # now get the uri_extension block from somebody and validate it
+ d.addCallback(self._obtain_uri_extension)
+ d.addCallback(self._got_uri_extension)
+ d.addCallback(self._get_hashtrees)
+ d.addCallback(self._create_validated_buckets)
+ # once we know that, we can download blocks from everybody
+ d.addCallback(self._download_all_segments)
+ def _finished(res):
+ if self._status:
+ self._status.set_status("Finished")
+ self._status.set_active(False)
+ self._status.set_paused(False)
+ if IConsumer.providedBy(self._downloadable):
+ self._downloadable.unregisterProducer()
+ return res
+ d.addBoth(_finished)
+ def _failed(why):
+ if self._status:
+ self._status.set_status("Failed")
+ self._status.set_active(False)
+ self._output.fail(why)
+ return why
+ d.addErrback(_failed)
+ d.addCallback(self._done)
+ return d
+
+ def _get_all_shareholders(self):
+ dl = []
+ for (peerid,ss) in self._client.get_permuted_peers("storage",
+ self._storage_index):
+ d = ss.callRemote("get_buckets", self._storage_index)
+ d.addCallbacks(self._got_response, self._got_error,
+ callbackArgs=(peerid,))
+ dl.append(d)
+ self._responses_received = 0
+ self._queries_sent = len(dl)
+ if self._status:
+ self._status.set_status("Locating Shares (%d/%d)" %
+ (self._responses_received,
+ self._queries_sent))
+ return defer.DeferredList(dl)
+
+ def _got_response(self, buckets, peerid):
+ self._responses_received += 1
+ if self._results:
+ elapsed = time.time() - self._started
+ self._results.timings["servers_peer_selection"][peerid] = elapsed
+ if self._status:
+ self._status.set_status("Locating Shares (%d/%d)" %
+ (self._responses_received,
+ self._queries_sent))
+ for sharenum, bucket in buckets.iteritems():
+ b = storage.ReadBucketProxy(bucket, peerid, self._si_s)
+ self.add_share_bucket(sharenum, b)
+ self._uri_extension_sources.append(b)
+ if self._results:
+ if peerid not in self._results.servermap:
+ self._results.servermap[peerid] = set()
+ self._results.servermap[peerid].add(sharenum)
+
+ def add_share_bucket(self, sharenum, bucket):
+ # this is split out for the benefit of test_encode.py
+ self._share_buckets.append( (sharenum, bucket) )
+
+ def _got_error(self, f):
+ self._client.log("Somebody failed. -- %s" % (f,))
+
+ def bucket_failed(self, vbucket):
+ shnum = vbucket.sharenum
+ del self.active_buckets[shnum]
+ s = self._share_vbuckets[shnum]
+ # s is a set of ValidatedBucket instances
+ s.remove(vbucket)
+ # ... which might now be empty
+ if not s:
+ # there are no more buckets which can provide this share, so
+ # remove the key. This may prompt us to use a different share.
+ del self._share_vbuckets[shnum]
+
+ def _got_all_shareholders(self, res):
+ if self._results:
+ now = time.time()
+ self._results.timings["peer_selection"] = now - self._started
+
+ if len(self._share_buckets) < self._num_needed_shares:
+ raise NotEnoughSharesError
+
+ #for s in self._share_vbuckets.values():
+ # for vb in s:
+ # assert isinstance(vb, ValidatedBucket), \
+ # "vb is %s but should be a ValidatedBucket" % (vb,)
+
+ def _unpack_uri_extension_data(self, data):
+ return uri.unpack_extension(data)
+
+ def _obtain_uri_extension(self, ignored):
+ # all shareholders are supposed to have a copy of uri_extension, and
+ # all are supposed to be identical. We compute the hash of the data
+ # that comes back, and compare it against the version in our URI. If
+ # they don't match, ignore their data and try someone else.
+ if self._status:
+ self._status.set_status("Obtaining URI Extension")
+
+ self._uri_extension_fetch_started = time.time()
+ def _validate(proposal, bucket):
+ h = hashutil.uri_extension_hash(proposal)
+ if h != self._uri_extension_hash:
+ self._fetch_failures["uri_extension"] += 1
+ msg = ("The copy of uri_extension we received from "
+ "%s was bad: wanted %s, got %s" %
+ (bucket,
+ base32.b2a(self._uri_extension_hash),
+ base32.b2a(h)))
+ self.log(msg, level=log.SCARY)
+ raise BadURIExtensionHashValue(msg)
+ return self._unpack_uri_extension_data(proposal)
+ return self._obtain_validated_thing(None,
+ self._uri_extension_sources,
+ "uri_extension",
+ "get_uri_extension", (), _validate)
+
+ def _obtain_validated_thing(self, ignored, sources, name, methname, args,
+ validatorfunc):
+ if not sources:
+ raise NotEnoughSharesError("started with zero peers while fetching "
+ "%s" % name)
+ bucket = sources[0]
+ sources = sources[1:]
+ #d = bucket.callRemote(methname, *args)
+ d = bucket.startIfNecessary()
+ d.addCallback(lambda res: getattr(bucket, methname)(*args))
+ d.addCallback(validatorfunc, bucket)
+ def _bad(f):
+ self.log("%s from vbucket %s failed:" % (name, bucket),
+ failure=f, level=log.WEIRD)
+ if not sources:
+ raise NotEnoughSharesError("ran out of peers, last error was %s"
+ % (f,))
+ # try again with a different one
+ return self._obtain_validated_thing(None, sources, name,
+ methname, args, validatorfunc)
+ d.addErrback(_bad)
+ return d
+
+ def _got_uri_extension(self, uri_extension_data):
+ if self._results:
+ elapsed = time.time() - self._uri_extension_fetch_started
+ self._results.timings["uri_extension"] = elapsed
+
+ d = self._uri_extension_data = uri_extension_data
+
+ self._codec = codec.get_decoder_by_name(d['codec_name'])
+ self._codec.set_serialized_params(d['codec_params'])
+ self._tail_codec = codec.get_decoder_by_name(d['codec_name'])
+ self._tail_codec.set_serialized_params(d['tail_codec_params'])
+
+ crypttext_hash = d.get('crypttext_hash', None) # optional
+ if crypttext_hash:
+ assert isinstance(crypttext_hash, str)
+ assert len(crypttext_hash) == 32
+ self._crypttext_hash = crypttext_hash
+ self._plaintext_hash = d.get('plaintext_hash', None) # optional
+
+ self._roothash = d['share_root_hash']
+
+ self._segment_size = segment_size = d['segment_size']
+ self._total_segments = mathutil.div_ceil(self._size, segment_size)
+ self._current_segnum = 0
+
+ self._share_hashtree = hashtree.IncompleteHashTree(d['total_shares'])
+ self._share_hashtree.set_hashes({0: self._roothash})
+
+ def _get_hashtrees(self, res):
+ self._get_hashtrees_started = time.time()
+ if self._status:
+ self._status.set_status("Retrieving Hash Trees")
+ d = defer.maybeDeferred(self._get_plaintext_hashtrees)
+ d.addCallback(self._get_crypttext_hashtrees)
+ d.addCallback(self._setup_hashtrees)
+ return d
+
+ def _get_plaintext_hashtrees(self):
+ # plaintext hashes are optional. If the root isn't in the UEB, then
+ # the share will be holding an empty list. We don't even bother
+ # fetching it.
+ if "plaintext_root_hash" not in self._uri_extension_data:
+ self._plaintext_hashtree = None
+ return
+ def _validate_plaintext_hashtree(proposal, bucket):
+ if proposal[0] != self._uri_extension_data['plaintext_root_hash']:
+ self._fetch_failures["plaintext_hashroot"] += 1
+ msg = ("The copy of the plaintext_root_hash we received from"
+ " %s was bad" % bucket)
+ raise BadPlaintextHashValue(msg)
+ pt_hashtree = hashtree.IncompleteHashTree(self._total_segments)
+ pt_hashes = dict(list(enumerate(proposal)))
+ try:
+ pt_hashtree.set_hashes(pt_hashes)
+ except hashtree.BadHashError:
+ # the hashes they gave us were not self-consistent, even
+ # though the root matched what we saw in the uri_extension
+ # block
+ self._fetch_failures["plaintext_hashtree"] += 1
+ raise
+ self._plaintext_hashtree = pt_hashtree
+ d = self._obtain_validated_thing(None,
+ self._uri_extension_sources,
+ "plaintext_hashes",
+ "get_plaintext_hashes", (),
+ _validate_plaintext_hashtree)
+ return d
+
+ def _get_crypttext_hashtrees(self, res):
+ # crypttext hashes are optional too
+ if "crypttext_root_hash" not in self._uri_extension_data:
+ self._crypttext_hashtree = None
+ return
+ def _validate_crypttext_hashtree(proposal, bucket):
+ if proposal[0] != self._uri_extension_data['crypttext_root_hash']:
+ self._fetch_failures["crypttext_hashroot"] += 1
+ msg = ("The copy of the crypttext_root_hash we received from"
+ " %s was bad" % bucket)
+ raise BadCrypttextHashValue(msg)
+ ct_hashtree = hashtree.IncompleteHashTree(self._total_segments)
+ ct_hashes = dict(list(enumerate(proposal)))
+ try:
+ ct_hashtree.set_hashes(ct_hashes)
+ except hashtree.BadHashError:
+ self._fetch_failures["crypttext_hashtree"] += 1
+ raise
+ ct_hashtree.set_hashes(ct_hashes)
+ self._crypttext_hashtree = ct_hashtree
+ d = self._obtain_validated_thing(None,
+ self._uri_extension_sources,
+ "crypttext_hashes",
+ "get_crypttext_hashes", (),
+ _validate_crypttext_hashtree)
+ return d
+
+ def _setup_hashtrees(self, res):
+ self._output.setup_hashtrees(self._plaintext_hashtree,
+ self._crypttext_hashtree)
+ if self._results:
+ elapsed = time.time() - self._get_hashtrees_started
+ self._results.timings["hashtrees"] = elapsed
+
+ def _create_validated_buckets(self, ignored=None):
+ self._share_vbuckets = {}
+ for sharenum, bucket in self._share_buckets:
+ vbucket = ValidatedBucket(sharenum, bucket,
+ self._share_hashtree,
+ self._roothash,
+ self._total_segments)
+ s = self._share_vbuckets.setdefault(sharenum, set())
+ s.add(vbucket)
+
+ def _activate_enough_buckets(self):
+ """either return a mapping from shnum to a ValidatedBucket that can
+ provide data for that share, or raise NotEnoughSharesError"""
+
+ while len(self.active_buckets) < self._num_needed_shares:
+ # need some more
+ handled_shnums = set(self.active_buckets.keys())
+ available_shnums = set(self._share_vbuckets.keys())
+ potential_shnums = list(available_shnums - handled_shnums)
+ if not potential_shnums:
+ raise NotEnoughSharesError
+ # choose a random share
+ shnum = random.choice(potential_shnums)
+ # and a random bucket that will provide it
+ validated_bucket = random.choice(list(self._share_vbuckets[shnum]))
+ self.active_buckets[shnum] = validated_bucket
+ return self.active_buckets
+
+
+ def _download_all_segments(self, res):
+ # the promise: upon entry to this function, self._share_vbuckets
+ # contains enough buckets to complete the download, and some extra
+ # ones to tolerate some buckets dropping out or having errors.
+ # self._share_vbuckets is a dictionary that maps from shnum to a set
+ # of ValidatedBuckets, which themselves are wrappers around
+ # RIBucketReader references.
+ self.active_buckets = {} # k: shnum, v: ValidatedBucket instance
+
+ self._started_fetching = time.time()
+
+ d = defer.succeed(None)
+ for segnum in range(self._total_segments-1):
+ d.addCallback(self._download_segment, segnum)
+ # this pause, at the end of write, prevents pre-fetch from
+ # happening until the consumer is ready for more data.
+ d.addCallback(self._check_for_pause)
+ d.addCallback(self._download_tail_segment, self._total_segments-1)
+ return d
+
+ def _check_for_pause(self, res):
+ if self._paused:
+ d = defer.Deferred()
+ self._paused.addCallback(lambda ignored: d.callback(res))
+ return d
+ if self._stopped:
+ raise DownloadStopped("our Consumer called stopProducing()")
+ return res
+
+ def _download_segment(self, res, segnum):
+ if self._status:
+ self._status.set_status("Downloading segment %d of %d" %
+ (segnum+1, self._total_segments))
+ self.log("downloading seg#%d of %d (%d%%)"
+ % (segnum, self._total_segments,
+ 100.0 * segnum / self._total_segments))
+ # memory footprint: when the SegmentDownloader finishes pulling down
+ # all shares, we have 1*segment_size of usage.
+ segmentdler = SegmentDownloader(self, segnum, self._num_needed_shares,
+ self._results)
+ started = time.time()
+ d = segmentdler.start()
+ def _finished_fetching(res):
+ elapsed = time.time() - started
+ self._results.timings["cumulative_fetch"] += elapsed
+ return res
+ if self._results:
+ d.addCallback(_finished_fetching)
+ # pause before using more memory
+ d.addCallback(self._check_for_pause)
+ # while the codec does its job, we hit 2*segment_size
+ def _started_decode(res):
+ self._started_decode = time.time()
+ return res
+ if self._results:
+ d.addCallback(_started_decode)
+ d.addCallback(lambda (shares, shareids):
+ self._codec.decode(shares, shareids))
+ # once the codec is done, we drop back to 1*segment_size, because
+ # 'shares' goes out of scope. The memory usage is all in the
+ # plaintext now, spread out into a bunch of tiny buffers.
+ def _finished_decode(res):
+ elapsed = time.time() - self._started_decode
+ self._results.timings["cumulative_decode"] += elapsed
+ return res
+ if self._results:
+ d.addCallback(_finished_decode)
+
+ # pause/check-for-stop just before writing, to honor stopProducing
+ d.addCallback(self._check_for_pause)
+ def _done(buffers):
+ # we start by joining all these buffers together into a single
+ # string. This makes Output.write easier, since it wants to hash
+ # data one segment at a time anyways, and doesn't impact our
+ # memory footprint since we're already peaking at 2*segment_size
+ # inside the codec a moment ago.
+ segment = "".join(buffers)
+ del buffers
+ # we're down to 1*segment_size right now, but write_segment()
+ # will decrypt a copy of the segment internally, which will push
+ # us up to 2*segment_size while it runs.
+ started_decrypt = time.time()
+ self._output.write_segment(segment)
+ if self._results:
+ elapsed = time.time() - started_decrypt
+ self._results.timings["cumulative_decrypt"] += elapsed
+ d.addCallback(_done)
+ return d
+
+ def _download_tail_segment(self, res, segnum):
+ self.log("downloading seg#%d of %d (%d%%)"
+ % (segnum, self._total_segments,
+ 100.0 * segnum / self._total_segments))
+ segmentdler = SegmentDownloader(self, segnum, self._num_needed_shares,
+ self._results)
+ started = time.time()
+ d = segmentdler.start()
+ def _finished_fetching(res):
+ elapsed = time.time() - started
+ self._results.timings["cumulative_fetch"] += elapsed
+ return res
+ if self._results:
+ d.addCallback(_finished_fetching)
+ # pause before using more memory
+ d.addCallback(self._check_for_pause)
+ def _started_decode(res):
+ self._started_decode = time.time()
+ return res
+ if self._results:
+ d.addCallback(_started_decode)
+ d.addCallback(lambda (shares, shareids):
+ self._tail_codec.decode(shares, shareids))
+ def _finished_decode(res):
+ elapsed = time.time() - self._started_decode
+ self._results.timings["cumulative_decode"] += elapsed
+ return res
+ if self._results:
+ d.addCallback(_finished_decode)
+ # pause/check-for-stop just before writing, to honor stopProducing
+ d.addCallback(self._check_for_pause)
+ def _done(buffers):
+ # trim off any padding added by the upload side
+ segment = "".join(buffers)
+ del buffers
+ # we never send empty segments. If the data was an exact multiple
+ # of the segment size, the last segment will be full.
+ pad_size = mathutil.pad_size(self._size, self._segment_size)
+ tail_size = self._segment_size - pad_size
+ segment = segment[:tail_size]
+ started_decrypt = time.time()
+ self._output.write_segment(segment)
+ if self._results:
+ elapsed = time.time() - started_decrypt
+ self._results.timings["cumulative_decrypt"] += elapsed
+ d.addCallback(_done)
+ return d
+
+ def _done(self, res):
+ self.log("download done")
+ if self._results:
+ now = time.time()
+ self._results.timings["total"] = now - self._started
+ self._results.timings["segments"] = now - self._started_fetching
+ self._output.close()
+ if self.check_crypttext_hash and self._crypttext_hash:
+ _assert(self._crypttext_hash == self._output.crypttext_hash,
+ "bad crypttext_hash: computed=%s, expected=%s" %
+ (base32.b2a(self._output.crypttext_hash),
+ base32.b2a(self._crypttext_hash)))
+ if self.check_plaintext_hash and self._plaintext_hash:
+ _assert(self._plaintext_hash == self._output.plaintext_hash,
+ "bad plaintext_hash: computed=%s, expected=%s" %
+ (base32.b2a(self._output.plaintext_hash),
+ base32.b2a(self._plaintext_hash)))
+ _assert(self._output.length == self._size,
+ got=self._output.length, expected=self._size)
+ return self._output.finish()
+
+ def get_download_status(self):
+ return self._status
+
+
+class LiteralDownloader:
+ def __init__(self, client, u, downloadable):
+ self._uri = IFileURI(u)
+ assert isinstance(self._uri, uri.LiteralFileURI)
+ self._downloadable = downloadable
+ self._status = s = DownloadStatus()
+ s.set_storage_index(None)
+ s.set_helper(False)
+ s.set_status("Done")
+ s.set_active(False)
+ s.set_progress(1.0)
+
+ def start(self):
+ data = self._uri.data
+ self._status.set_size(len(data))
+ self._downloadable.open(len(data))
+ self._downloadable.write(data)
+ self._downloadable.close()
+ return defer.maybeDeferred(self._downloadable.finish)
+
+ def get_download_status(self):
+ return self._status
+
+class FileName:
+ implements(IDownloadTarget)
+ def __init__(self, filename):
+ self._filename = filename
+ self.f = None
+ def open(self, size):
+ self.f = open(self._filename, "wb")
+ return self.f
+ def write(self, data):
+ self.f.write(data)
+ def close(self):
+ if self.f:
+ self.f.close()
+ def fail(self, why):
+ if self.f:
+ self.f.close()
+ os.unlink(self._filename)
+ def register_canceller(self, cb):
+ pass # we won't use it
+ def finish(self):
+ pass
+
+class Data:
+ implements(IDownloadTarget)
+ def __init__(self):
+ self._data = []
+ def open(self, size):
+ pass
+ def write(self, data):
+ self._data.append(data)
+ def close(self):
+ self.data = "".join(self._data)
+ del self._data
+ def fail(self, why):
+ del self._data
+ def register_canceller(self, cb):
+ pass # we won't use it
+ def finish(self):
+ return self.data
+
+class FileHandle:
+ """Use me to download data to a pre-defined filehandle-like object. I
+ will use the target's write() method. I will *not* close the filehandle:
+ I leave that up to the originator of the filehandle. The download process
+ will return the filehandle when it completes.
+ """
+ implements(IDownloadTarget)
+ def __init__(self, filehandle):
+ self._filehandle = filehandle
+ def open(self, size):
+ pass
+ def write(self, data):
+ self._filehandle.write(data)
+ def close(self):
+ # the originator of the filehandle reserves the right to close it
+ pass
+ def fail(self, why):
+ pass
+ def register_canceller(self, cb):
+ pass
+ def finish(self):
+ return self._filehandle
+
+class Downloader(service.MultiService):
+ """I am a service that allows file downloading.
+ """
+ implements(IDownloader)
+ name = "downloader"
+ MAX_DOWNLOAD_STATUSES = 10
+
+ def __init__(self, stats_provider=None):
+ service.MultiService.__init__(self)
+ self.stats_provider = stats_provider
+ self._all_downloads = weakref.WeakKeyDictionary() # for debugging
+ self._all_download_statuses = weakref.WeakKeyDictionary()
+ self._recent_download_statuses = []
+
+ def download(self, u, t):
+ assert self.parent
+ assert self.running
+ u = IFileURI(u)
+ t = IDownloadTarget(t)
+ assert t.write
+ assert t.close
+
+
+ if isinstance(u, uri.LiteralFileURI):
+ dl = LiteralDownloader(self.parent, u, t)
+ elif isinstance(u, uri.CHKFileURI):
+ if self.stats_provider:
+ # these counters are meant for network traffic, and don't
+ # include LIT files
+ self.stats_provider.count('downloader.files_downloaded', 1)
+ self.stats_provider.count('downloader.bytes_downloaded', u.get_size())
+ dl = FileDownloader(self.parent, u, t)
+ else:
+ raise RuntimeError("I don't know how to download a %s" % u)
+ self._add_download(dl)
+ d = dl.start()
+ return d
+
+ # utility functions
+ def download_to_data(self, uri):
+ return self.download(uri, Data())
+ def download_to_filename(self, uri, filename):
+ return self.download(uri, FileName(filename))
+ def download_to_filehandle(self, uri, filehandle):
+ return self.download(uri, FileHandle(filehandle))
+
+ def _add_download(self, downloader):
+ self._all_downloads[downloader] = None
+ s = downloader.get_download_status()
+ self._all_download_statuses[s] = None
+ self._recent_download_statuses.append(s)
+ while len(self._recent_download_statuses) > self.MAX_DOWNLOAD_STATUSES:
+ self._recent_download_statuses.pop(0)
+
+ def list_all_download_statuses(self):
+ for ds in self._all_download_statuses:
+ yield ds
--- /dev/null
+# -*- test-case-name: allmydata.test.test_encode -*-
+
+import time
+from zope.interface import implements
+from twisted.internet import defer
+from foolscap import eventual
+from allmydata import storage, uri
+from allmydata.hashtree import HashTree
+from allmydata.util import mathutil, hashutil, base32, log
+from allmydata.util.assertutil import _assert, precondition
+from allmydata.codec import CRSEncoder
+from allmydata.interfaces import IEncoder, IStorageBucketWriter, \
+ IEncryptedUploadable, IUploadStatus
+
+"""
+The goal of the encoder is to turn the original file into a series of
+'shares'. Each share is going to a 'shareholder' (nominally each shareholder
+is a different host, but for small grids there may be overlap). The number
+of shares is chosen to hit our reliability goals (more shares on more
+machines means more reliability), and is limited by overhead (proportional to
+numshares or log(numshares)) and the encoding technology in use (zfec permits
+only 256 shares total). It is also constrained by the amount of data
+we want to send to each host. For estimating purposes, think of 10 shares
+out of which we need 3 to reconstruct the file.
+
+The encoder starts by cutting the original file into segments. All segments
+except the last are of equal size. The segment size is chosen to constrain
+the memory footprint (which will probably vary between 1x and 4x segment
+size) and to constrain the overhead (which will be proportional to
+log(number of segments)).
+
+
+Each segment (A,B,C) is read into memory, encrypted, and encoded into
+blocks. The 'share' (say, share #1) that makes it out to a host is a
+collection of these blocks (block A1, B1, C1), plus some hash-tree
+information necessary to validate the data upon retrieval. Only one segment
+is handled at a time: all blocks for segment A are delivered before any
+work is begun on segment B.
+
+As blocks are created, we retain the hash of each one. The list of block hashes
+for a single share (say, hash(A1), hash(B1), hash(C1)) is used to form the base
+of a Merkle hash tree for that share, called the block hash tree.
+
+This hash tree has one terminal leaf per block. The complete block hash
+tree is sent to the shareholder after all the data has been sent. At
+retrieval time, the decoder will ask for specific pieces of this tree before
+asking for blocks, whichever it needs to validate those blocks.
+
+(Note: we don't really need to generate this whole block hash tree
+ourselves. It would be sufficient to have the shareholder generate it and
+just tell us the root. This gives us an extra level of validation on the
+transfer, though, and it is relatively cheap to compute.)
+
+Each of these block hash trees has a root hash. The collection of these
+root hashes for all shares are collected into the 'share hash tree', which
+has one terminal leaf per share. After sending the blocks and the complete
+block hash tree to each shareholder, we send them the portion of the share
+hash tree that is necessary to validate their share. The root of the share
+hash tree is put into the URI.
+
+"""
+
+class NotEnoughSharesError(Exception):
+ servermap = None
+ pass
+
+class UploadAborted(Exception):
+ pass
+
+KiB=1024
+MiB=1024*KiB
+GiB=1024*MiB
+TiB=1024*GiB
+PiB=1024*TiB
+
+class Encoder(object):
+ implements(IEncoder)
+ USE_PLAINTEXT_HASHES = False
+
+ def __init__(self, log_parent=None, upload_status=None):
+ object.__init__(self)
+ self.uri_extension_data = {}
+ self._codec = None
+ self._status = None
+ if upload_status:
+ self._status = IUploadStatus(upload_status)
+ precondition(log_parent is None or isinstance(log_parent, int),
+ log_parent)
+ self._log_number = log.msg("creating Encoder %s" % self,
+ facility="tahoe.encoder", parent=log_parent)
+ self._aborted = False
+
+ def __repr__(self):
+ if hasattr(self, "_storage_index"):
+ return "<Encoder for %s>" % storage.si_b2a(self._storage_index)[:5]
+ return "<Encoder for unknown storage index>"
+
+ def log(self, *args, **kwargs):
+ if "parent" not in kwargs:
+ kwargs["parent"] = self._log_number
+ if "facility" not in kwargs:
+ kwargs["facility"] = "tahoe.encoder"
+ return log.msg(*args, **kwargs)
+
+ def set_encrypted_uploadable(self, uploadable):
+ eu = self._uploadable = IEncryptedUploadable(uploadable)
+ d = eu.get_size()
+ def _got_size(size):
+ self.log(format="file size: %(size)d", size=size)
+ self.file_size = size
+ d.addCallback(_got_size)
+ d.addCallback(lambda res: eu.get_all_encoding_parameters())
+ d.addCallback(self._got_all_encoding_parameters)
+ d.addCallback(lambda res: eu.get_storage_index())
+ def _done(storage_index):
+ self._storage_index = storage_index
+ return self
+ d.addCallback(_done)
+ return d
+
+ def _got_all_encoding_parameters(self, params):
+ assert not self._codec
+ k, happy, n, segsize = params
+ self.required_shares = k
+ self.shares_of_happiness = happy
+ self.num_shares = n
+ self.segment_size = segsize
+ self.log("got encoding parameters: %d/%d/%d %d" % (k,happy,n, segsize))
+ self.log("now setting up codec")
+
+ assert self.segment_size % self.required_shares == 0
+
+ self.num_segments = mathutil.div_ceil(self.file_size,
+ self.segment_size)
+
+ self._codec = CRSEncoder()
+ self._codec.set_params(self.segment_size,
+ self.required_shares, self.num_shares)
+
+ data = self.uri_extension_data
+ data['codec_name'] = self._codec.get_encoder_type()
+ data['codec_params'] = self._codec.get_serialized_params()
+
+ data['size'] = self.file_size
+ data['segment_size'] = self.segment_size
+ self.share_size = mathutil.div_ceil(self.file_size,
+ self.required_shares)
+ data['num_segments'] = self.num_segments
+ data['needed_shares'] = self.required_shares
+ data['total_shares'] = self.num_shares
+
+ # the "tail" is the last segment. This segment may or may not be
+ # shorter than all other segments. We use the "tail codec" to handle
+ # it. If the tail is short, we use a different codec instance. In
+ # addition, the tail codec must be fed data which has been padded out
+ # to the right size.
+ self.tail_size = self.file_size % self.segment_size
+ if not self.tail_size:
+ self.tail_size = self.segment_size
+
+ # the tail codec is responsible for encoding tail_size bytes
+ padded_tail_size = mathutil.next_multiple(self.tail_size,
+ self.required_shares)
+ self._tail_codec = CRSEncoder()
+ self._tail_codec.set_params(padded_tail_size,
+ self.required_shares, self.num_shares)
+ data['tail_codec_params'] = self._tail_codec.get_serialized_params()
+
+ def _get_share_size(self):
+ share_size = mathutil.div_ceil(self.file_size, self.required_shares)
+ overhead = self._compute_overhead()
+ return share_size + overhead
+
+ def _compute_overhead(self):
+ return 0
+
+ def get_param(self, name):
+ assert self._codec
+
+ if name == "storage_index":
+ return self._storage_index
+ elif name == "share_counts":
+ return (self.required_shares, self.shares_of_happiness,
+ self.num_shares)
+ elif name == "num_segments":
+ return self.num_segments
+ elif name == "segment_size":
+ return self.segment_size
+ elif name == "block_size":
+ return self._codec.get_block_size()
+ elif name == "share_size":
+ return self._get_share_size()
+ elif name == "serialized_params":
+ return self._codec.get_serialized_params()
+ else:
+ raise KeyError("unknown parameter name '%s'" % name)
+
+ def set_shareholders(self, landlords):
+ assert isinstance(landlords, dict)
+ for k in landlords:
+ assert IStorageBucketWriter.providedBy(landlords[k])
+ self.landlords = landlords.copy()
+
+ def start(self):
+ self.log("%s starting" % (self,))
+ #paddedsize = self._size + mathutil.pad_size(self._size, self.needed_shares)
+ assert self._codec
+ self._crypttext_hasher = hashutil.crypttext_hasher()
+ self._crypttext_hashes = []
+ self.segment_num = 0
+ self.subshare_hashes = [[] for x in range(self.num_shares)]
+ # subshare_hashes[i] is a list that will be accumulated and then send
+ # to landlord[i]. This list contains a hash of each segment_share
+ # that we sent to that landlord.
+ self.share_root_hashes = [None] * self.num_shares
+
+ self._times = {
+ "cumulative_encoding": 0.0,
+ "cumulative_sending": 0.0,
+ "hashes_and_close": 0.0,
+ "total_encode_and_push": 0.0,
+ }
+ self._start_total_timestamp = time.time()
+
+ d = eventual.fireEventually()
+
+ d.addCallback(lambda res: self.start_all_shareholders())
+
+ for i in range(self.num_segments-1):
+ # note to self: this form doesn't work, because lambda only
+ # captures the slot, not the value
+ #d.addCallback(lambda res: self.do_segment(i))
+ # use this form instead:
+ d.addCallback(lambda res, i=i: self._encode_segment(i))
+ d.addCallback(self._send_segment, i)
+ d.addCallback(self._turn_barrier)
+ last_segnum = self.num_segments - 1
+ d.addCallback(lambda res: self._encode_tail_segment(last_segnum))
+ d.addCallback(self._send_segment, last_segnum)
+ d.addCallback(self._turn_barrier)
+
+ d.addCallback(lambda res: self.finish_hashing())
+
+ if self.USE_PLAINTEXT_HASHES:
+ d.addCallback(lambda res:
+ self.send_plaintext_hash_tree_to_all_shareholders())
+ d.addCallback(lambda res:
+ self.send_crypttext_hash_tree_to_all_shareholders())
+ d.addCallback(lambda res: self.send_all_subshare_hash_trees())
+ d.addCallback(lambda res: self.send_all_share_hash_trees())
+ d.addCallback(lambda res: self.send_uri_extension_to_all_shareholders())
+
+ d.addCallback(lambda res: self.close_all_shareholders())
+ d.addCallbacks(self.done, self.err)
+ return d
+
+ def set_status(self, status):
+ if self._status:
+ self._status.set_status(status)
+
+ def set_encode_and_push_progress(self, sent_segments=None, extra=0.0):
+ if self._status:
+ # we treat the final hash+close as an extra segment
+ if sent_segments is None:
+ sent_segments = self.num_segments
+ progress = float(sent_segments + extra) / (self.num_segments + 1)
+ self._status.set_progress(2, progress)
+
+ def abort(self):
+ self.log("aborting upload", level=log.UNUSUAL)
+ assert self._codec, "don't call abort before start"
+ self._aborted = True
+ # the next segment read (in _gather_data inside _encode_segment) will
+ # raise UploadAborted(), which will bypass the rest of the upload
+ # chain. If we've sent the final segment's shares, it's too late to
+ # abort. TODO: allow abort any time up to close_all_shareholders.
+
+ def _turn_barrier(self, res):
+ # putting this method in a Deferred chain imposes a guaranteed
+ # reactor turn between the pre- and post- portions of that chain.
+ # This can be useful to limit memory consumption: since Deferreds do
+ # not do tail recursion, code which uses defer.succeed(result) for
+ # consistency will cause objects to live for longer than you might
+ # normally expect.
+
+ return eventual.fireEventually(res)
+
+
+ def start_all_shareholders(self):
+ self.log("starting shareholders", level=log.NOISY)
+ self.set_status("Starting shareholders")
+ dl = []
+ for shareid in self.landlords:
+ d = self.landlords[shareid].start()
+ d.addErrback(self._remove_shareholder, shareid, "start")
+ dl.append(d)
+ return self._gather_responses(dl)
+
+ def _encode_segment(self, segnum):
+ codec = self._codec
+ start = time.time()
+
+ # the ICodecEncoder API wants to receive a total of self.segment_size
+ # bytes on each encode() call, broken up into a number of
+ # identically-sized pieces. Due to the way the codec algorithm works,
+ # these pieces need to be the same size as the share which the codec
+ # will generate. Therefore we must feed it with input_piece_size that
+ # equals the output share size.
+ input_piece_size = codec.get_block_size()
+
+ # as a result, the number of input pieces per encode() call will be
+ # equal to the number of required shares with which the codec was
+ # constructed. You can think of the codec as chopping up a
+ # 'segment_size' of data into 'required_shares' shares (not doing any
+ # fancy math at all, just doing a split), then creating some number
+ # of additional shares which can be substituted if the primary ones
+ # are unavailable
+
+ crypttext_segment_hasher = hashutil.crypttext_segment_hasher()
+
+ # memory footprint: we only hold a tiny piece of the plaintext at any
+ # given time. We build up a segment's worth of cryptttext, then hand
+ # it to the encoder. Assuming 3-of-10 encoding (3.3x expansion) and
+ # 1MiB max_segment_size, we get a peak memory footprint of 4.3*1MiB =
+ # 4.3MiB. Lowering max_segment_size to, say, 100KiB would drop the
+ # footprint to 430KiB at the expense of more hash-tree overhead.
+
+ d = self._gather_data(self.required_shares, input_piece_size,
+ crypttext_segment_hasher)
+ def _done_gathering(chunks):
+ for c in chunks:
+ assert len(c) == input_piece_size
+ self._crypttext_hashes.append(crypttext_segment_hasher.digest())
+ # during this call, we hit 5*segsize memory
+ return codec.encode(chunks)
+ d.addCallback(_done_gathering)
+ def _done(res):
+ elapsed = time.time() - start
+ self._times["cumulative_encoding"] += elapsed
+ return res
+ d.addCallback(_done)
+ return d
+
+ def _encode_tail_segment(self, segnum):
+
+ start = time.time()
+ codec = self._tail_codec
+ input_piece_size = codec.get_block_size()
+
+ crypttext_segment_hasher = hashutil.crypttext_segment_hasher()
+
+ d = self._gather_data(self.required_shares, input_piece_size,
+ crypttext_segment_hasher,
+ allow_short=True)
+ def _done_gathering(chunks):
+ for c in chunks:
+ # a short trailing chunk will have been padded by
+ # _gather_data
+ assert len(c) == input_piece_size
+ self._crypttext_hashes.append(crypttext_segment_hasher.digest())
+ return codec.encode(chunks)
+ d.addCallback(_done_gathering)
+ def _done(res):
+ elapsed = time.time() - start
+ self._times["cumulative_encoding"] += elapsed
+ return res
+ d.addCallback(_done)
+ return d
+
+ def _gather_data(self, num_chunks, input_chunk_size,
+ crypttext_segment_hasher,
+ allow_short=False,
+ previous_chunks=[]):
+ """Return a Deferred that will fire when the required number of
+ chunks have been read (and hashed and encrypted). The Deferred fires
+ with the combination of any 'previous_chunks' and the new chunks
+ which were gathered."""
+
+ if self._aborted:
+ raise UploadAborted()
+
+ if not num_chunks:
+ return defer.succeed(previous_chunks)
+
+ d = self._uploadable.read_encrypted(input_chunk_size, False)
+ def _got(data):
+ if self._aborted:
+ raise UploadAborted()
+ encrypted_pieces = []
+ length = 0
+ while data:
+ encrypted_piece = data.pop(0)
+ length += len(encrypted_piece)
+ crypttext_segment_hasher.update(encrypted_piece)
+ self._crypttext_hasher.update(encrypted_piece)
+ encrypted_pieces.append(encrypted_piece)
+
+ if allow_short:
+ if length < input_chunk_size:
+ # padding
+ pad_size = input_chunk_size - length
+ encrypted_pieces.append('\x00' * pad_size)
+ else:
+ # non-tail segments should be the full segment size
+ if length != input_chunk_size:
+ log.msg("non-tail segment should be full segment size: %d!=%d"
+ % (length, input_chunk_size), level=log.BAD)
+ precondition(length == input_chunk_size,
+ "length=%d != input_chunk_size=%d" %
+ (length, input_chunk_size))
+
+ encrypted_piece = "".join(encrypted_pieces)
+ return previous_chunks + [encrypted_piece]
+
+ d.addCallback(_got)
+ d.addCallback(lambda chunks:
+ self._gather_data(num_chunks-1, input_chunk_size,
+ crypttext_segment_hasher,
+ allow_short, chunks))
+ return d
+
+ def _send_segment(self, (shares, shareids), segnum):
+ # To generate the URI, we must generate the roothash, so we must
+ # generate all shares, even if we aren't actually giving them to
+ # anybody. This means that the set of shares we create will be equal
+ # to or larger than the set of landlords. If we have any landlord who
+ # *doesn't* have a share, that's an error.
+ _assert(set(self.landlords.keys()).issubset(set(shareids)),
+ shareids=shareids, landlords=self.landlords)
+ start = time.time()
+ dl = []
+ self.set_status("Sending segment %d of %d" % (segnum+1,
+ self.num_segments))
+ self.set_encode_and_push_progress(segnum)
+ lognum = self.log("send_segment(%d)" % segnum, level=log.NOISY)
+ for i in range(len(shares)):
+ subshare = shares[i]
+ shareid = shareids[i]
+ d = self.send_subshare(shareid, segnum, subshare, lognum)
+ dl.append(d)
+ subshare_hash = hashutil.block_hash(subshare)
+ #from allmydata.util import base32
+ #log.msg("creating block (shareid=%d, blocknum=%d) "
+ # "len=%d %r .. %r: %s" %
+ # (shareid, segnum, len(subshare),
+ # subshare[:50], subshare[-50:], base32.b2a(subshare_hash)))
+ self.subshare_hashes[shareid].append(subshare_hash)
+
+ dl = self._gather_responses(dl)
+ def _logit(res):
+ self.log("%s uploaded %s / %s bytes (%d%%) of your file." %
+ (self,
+ self.segment_size*(segnum+1),
+ self.segment_size*self.num_segments,
+ 100 * (segnum+1) / self.num_segments,
+ ),
+ level=log.OPERATIONAL)
+ elapsed = time.time() - start
+ self._times["cumulative_sending"] += elapsed
+ return res
+ dl.addCallback(_logit)
+ return dl
+
+ def send_subshare(self, shareid, segment_num, subshare, lognum):
+ if shareid not in self.landlords:
+ return defer.succeed(None)
+ sh = self.landlords[shareid]
+ lognum2 = self.log("put_block to %s" % self.landlords[shareid],
+ parent=lognum, level=log.NOISY)
+ d = sh.put_block(segment_num, subshare)
+ def _done(res):
+ self.log("put_block done", parent=lognum2, level=log.NOISY)
+ return res
+ d.addCallback(_done)
+ d.addErrback(self._remove_shareholder, shareid,
+ "segnum=%d" % segment_num)
+ return d
+
+ def _remove_shareholder(self, why, shareid, where):
+ ln = self.log(format="error while sending %(method)s to shareholder=%(shnum)d",
+ method=where, shnum=shareid,
+ level=log.UNUSUAL, failure=why)
+ if shareid in self.landlords:
+ self.landlords[shareid].abort()
+ del self.landlords[shareid]
+ else:
+ # even more UNUSUAL
+ self.log("they weren't in our list of landlords", parent=ln,
+ level=log.WEIRD)
+ if len(self.landlords) < self.shares_of_happiness:
+ msg = "lost too many shareholders during upload: %s" % why
+ raise NotEnoughSharesError(msg)
+ self.log("but we can still continue with %s shares, we'll be happy "
+ "with at least %s" % (len(self.landlords),
+ self.shares_of_happiness),
+ parent=ln)
+
+ def _gather_responses(self, dl):
+ d = defer.DeferredList(dl, fireOnOneErrback=True)
+ def _eatNotEnoughSharesError(f):
+ # all exceptions that occur while talking to a peer are handled
+ # in _remove_shareholder. That might raise NotEnoughSharesError,
+ # which will cause the DeferredList to errback but which should
+ # otherwise be consumed. Allow non-NotEnoughSharesError exceptions
+ # to pass through as an unhandled errback. We use this in lieu of
+ # consumeErrors=True to allow coding errors to be logged.
+ f.trap(NotEnoughSharesError)
+ return None
+ for d0 in dl:
+ d0.addErrback(_eatNotEnoughSharesError)
+ return d
+
+ def finish_hashing(self):
+ self._start_hashing_and_close_timestamp = time.time()
+ self.set_status("Finishing hashes")
+ self.set_encode_and_push_progress(extra=0.0)
+ crypttext_hash = self._crypttext_hasher.digest()
+ self.uri_extension_data["crypttext_hash"] = crypttext_hash
+ d = self._uploadable.get_plaintext_hash()
+ def _got(plaintext_hash):
+ self.log(format="plaintext_hash=%(plaintext_hash)s, SI=%(SI)s, size=%(size)d",
+ plaintext_hash=base32.b2a(plaintext_hash),
+ SI=storage.si_b2a(self._storage_index),
+ size=self.file_size)
+ return plaintext_hash
+ d.addCallback(_got)
+ if self.USE_PLAINTEXT_HASHES:
+ def _use_plaintext_hash(plaintext_hash):
+ self.uri_extension_data["plaintext_hash"] = plaintext_hash
+ return self._uploadable.get_plaintext_hashtree_leaves(0, self.num_segments, self.num_segments)
+ d.addCallback(_use_plaintext_hash)
+ def _got_hashtree_leaves(leaves):
+ self.log("Encoder: got plaintext_hashtree_leaves: %s" %
+ (",".join([base32.b2a(h) for h in leaves]),),
+ level=log.NOISY)
+ ht = list(HashTree(list(leaves)))
+ self.uri_extension_data["plaintext_root_hash"] = ht[0]
+ self._plaintext_hashtree_nodes = ht
+ d.addCallback(_got_hashtree_leaves)
+
+ d.addCallback(lambda res: self._uploadable.close())
+ return d
+
+ def send_plaintext_hash_tree_to_all_shareholders(self):
+ self.log("sending plaintext hash tree", level=log.NOISY)
+ self.set_status("Sending Plaintext Hash Tree")
+ self.set_encode_and_push_progress(extra=0.2)
+ dl = []
+ for shareid in self.landlords.keys():
+ d = self.send_plaintext_hash_tree(shareid,
+ self._plaintext_hashtree_nodes)
+ dl.append(d)
+ return self._gather_responses(dl)
+
+ def send_plaintext_hash_tree(self, shareid, all_hashes):
+ if shareid not in self.landlords:
+ return defer.succeed(None)
+ sh = self.landlords[shareid]
+ d = sh.put_plaintext_hashes(all_hashes)
+ d.addErrback(self._remove_shareholder, shareid, "put_plaintext_hashes")
+ return d
+
+ def send_crypttext_hash_tree_to_all_shareholders(self):
+ self.log("sending crypttext hash tree", level=log.NOISY)
+ self.set_status("Sending Crypttext Hash Tree")
+ self.set_encode_and_push_progress(extra=0.3)
+ t = HashTree(self._crypttext_hashes)
+ all_hashes = list(t)
+ self.uri_extension_data["crypttext_root_hash"] = t[0]
+ dl = []
+ for shareid in self.landlords.keys():
+ dl.append(self.send_crypttext_hash_tree(shareid, all_hashes))
+ return self._gather_responses(dl)
+
+ def send_crypttext_hash_tree(self, shareid, all_hashes):
+ if shareid not in self.landlords:
+ return defer.succeed(None)
+ sh = self.landlords[shareid]
+ d = sh.put_crypttext_hashes(all_hashes)
+ d.addErrback(self._remove_shareholder, shareid, "put_crypttext_hashes")
+ return d
+
+ def send_all_subshare_hash_trees(self):
+ self.log("sending subshare hash trees", level=log.NOISY)
+ self.set_status("Sending Subshare Hash Trees")
+ self.set_encode_and_push_progress(extra=0.4)
+ dl = []
+ for shareid,hashes in enumerate(self.subshare_hashes):
+ # hashes is a list of the hashes of all subshares that were sent
+ # to shareholder[shareid].
+ dl.append(self.send_one_subshare_hash_tree(shareid, hashes))
+ return self._gather_responses(dl)
+
+ def send_one_subshare_hash_tree(self, shareid, subshare_hashes):
+ t = HashTree(subshare_hashes)
+ all_hashes = list(t)
+ # all_hashes[0] is the root hash, == hash(ah[1]+ah[2])
+ # all_hashes[1] is the left child, == hash(ah[3]+ah[4])
+ # all_hashes[n] == hash(all_hashes[2*n+1] + all_hashes[2*n+2])
+ self.share_root_hashes[shareid] = t[0]
+ if shareid not in self.landlords:
+ return defer.succeed(None)
+ sh = self.landlords[shareid]
+ d = sh.put_block_hashes(all_hashes)
+ d.addErrback(self._remove_shareholder, shareid, "put_block_hashes")
+ return d
+
+ def send_all_share_hash_trees(self):
+ # each bucket gets a set of share hash tree nodes that are needed to
+ # validate their share. This includes the share hash itself, but does
+ # not include the top-level hash root (which is stored securely in
+ # the URI instead).
+ self.log("sending all share hash trees", level=log.NOISY)
+ self.set_status("Sending Share Hash Trees")
+ self.set_encode_and_push_progress(extra=0.6)
+ dl = []
+ for h in self.share_root_hashes:
+ assert h
+ # create the share hash tree
+ t = HashTree(self.share_root_hashes)
+ # the root of this hash tree goes into our URI
+ self.uri_extension_data['share_root_hash'] = t[0]
+ # now send just the necessary pieces out to each shareholder
+ for i in range(self.num_shares):
+ # the HashTree is given a list of leaves: 0,1,2,3..n .
+ # These become nodes A+0,A+1,A+2.. of the tree, where A=n-1
+ needed_hash_indices = t.needed_hashes(i, include_leaf=True)
+ hashes = [(hi, t[hi]) for hi in needed_hash_indices]
+ dl.append(self.send_one_share_hash_tree(i, hashes))
+ return self._gather_responses(dl)
+
+ def send_one_share_hash_tree(self, shareid, needed_hashes):
+ if shareid not in self.landlords:
+ return defer.succeed(None)
+ sh = self.landlords[shareid]
+ d = sh.put_share_hashes(needed_hashes)
+ d.addErrback(self._remove_shareholder, shareid, "put_share_hashes")
+ return d
+
+ def send_uri_extension_to_all_shareholders(self):
+ lp = self.log("sending uri_extension", level=log.NOISY)
+ self.set_status("Sending URI Extensions")
+ self.set_encode_and_push_progress(extra=0.8)
+ for k in ('crypttext_root_hash', 'crypttext_hash',
+ ):
+ assert k in self.uri_extension_data
+ if self.USE_PLAINTEXT_HASHES:
+ for k in ('plaintext_root_hash', 'plaintext_hash',
+ ):
+ assert k in self.uri_extension_data
+ uri_extension = uri.pack_extension(self.uri_extension_data)
+ ed = {}
+ for k,v in self.uri_extension_data.items():
+ if k.endswith("hash"):
+ ed[k] = base32.b2a(v)
+ else:
+ ed[k] = v
+ self.log("uri_extension_data is %s" % (ed,), level=log.NOISY, parent=lp)
+ self.uri_extension_hash = hashutil.uri_extension_hash(uri_extension)
+ dl = []
+ for shareid in self.landlords.keys():
+ dl.append(self.send_uri_extension(shareid, uri_extension))
+ return self._gather_responses(dl)
+
+ def send_uri_extension(self, shareid, uri_extension):
+ sh = self.landlords[shareid]
+ d = sh.put_uri_extension(uri_extension)
+ d.addErrback(self._remove_shareholder, shareid, "put_uri_extension")
+ return d
+
+ def close_all_shareholders(self):
+ self.log("closing shareholders", level=log.NOISY)
+ self.set_status("Closing Shareholders")
+ self.set_encode_and_push_progress(extra=0.9)
+ dl = []
+ for shareid in self.landlords:
+ d = self.landlords[shareid].close()
+ d.addErrback(self._remove_shareholder, shareid, "close")
+ dl.append(d)
+ return self._gather_responses(dl)
+
+ def done(self, res):
+ self.log("upload done", level=log.OPERATIONAL)
+ self.set_status("Done")
+ self.set_encode_and_push_progress(extra=1.0) # done
+ now = time.time()
+ h_and_c_elapsed = now - self._start_hashing_and_close_timestamp
+ self._times["hashes_and_close"] = h_and_c_elapsed
+ total_elapsed = now - self._start_total_timestamp
+ self._times["total_encode_and_push"] = total_elapsed
+
+ # update our sharemap
+ self._shares_placed = set(self.landlords.keys())
+ return (self.uri_extension_hash, self.required_shares,
+ self.num_shares, self.file_size)
+
+ def err(self, f):
+ self.log("upload failed", failure=f, level=log.UNUSUAL)
+ self.set_status("Failed")
+ # we need to abort any remaining shareholders, so they'll delete the
+ # partial share, allowing someone else to upload it again.
+ self.log("aborting shareholders", level=log.UNUSUAL)
+ for shareid in list(self.landlords.keys()):
+ self.landlords[shareid].abort()
+ if f.check(defer.FirstError):
+ return f.value.subFailure
+ return f
+
+ def get_shares_placed(self):
+ # return a set of share numbers that were successfully placed.
+ return self._shares_placed
+
+ def get_times(self):
+ # return a dictionary of encode+push timings
+ return self._times
+
+ def get_uri_extension_data(self):
+ return self.uri_extension_data
--- /dev/null
+
+from zope.interface import implements
+from twisted.internet import defer
+from allmydata.interfaces import IFileNode, IFileURI, IURI, ICheckable
+from allmydata import uri
+from allmydata.immutable.checker import Results, \
+ SimpleCHKFileChecker, SimpleCHKFileVerifier
+
+class FileNode:
+ implements(IFileNode, ICheckable)
+
+ def __init__(self, uri, client):
+ u = IFileURI(uri)
+ self.uri = u.to_string()
+ self._client = client
+
+ def get_uri(self):
+ return self.uri
+
+ def is_mutable(self):
+ return False
+
+ def is_readonly(self):
+ return True
+
+ def get_readonly_uri(self):
+ return self.uri
+
+ def get_size(self):
+ return IFileURI(self.uri).get_size()
+
+ def __hash__(self):
+ return hash((self.__class__, self.uri))
+ def __cmp__(self, them):
+ if cmp(type(self), type(them)):
+ return cmp(type(self), type(them))
+ if cmp(self.__class__, them.__class__):
+ return cmp(self.__class__, them.__class__)
+ return cmp(self.uri, them.uri)
+
+ def get_verifier(self):
+ return IFileURI(self.uri).get_verifier()
+
+ def check(self, verify=False, repair=False):
+ assert repair is False # not implemented yet
+ vcap = self.get_verifier()
+ if verify:
+ v = SimpleCHKFileVerifier(self._client, vcap)
+ return v.start()
+ else:
+ peer_getter = self._client.get_permuted_peers
+ v = SimpleCHKFileChecker(peer_getter, vcap)
+ return v.check()
+
+ def download(self, target):
+ downloader = self._client.getServiceNamed("downloader")
+ return downloader.download(self.uri, target)
+
+ def download_to_data(self):
+ downloader = self._client.getServiceNamed("downloader")
+ return downloader.download_to_data(self.uri)
+
+
+
+class LiteralFileNode:
+ implements(IFileNode, ICheckable)
+
+ def __init__(self, my_uri, client):
+ u = IFileURI(my_uri)
+ assert isinstance(u, uri.LiteralFileURI)
+ self.uri = u.to_string()
+ self._client = client
+
+ def get_uri(self):
+ return self.uri
+
+ def is_mutable(self):
+ return False
+
+ def is_readonly(self):
+ return True
+
+ def get_readonly_uri(self):
+ return self.uri
+
+ def get_size(self):
+ return len(IURI(self.uri).data)
+
+ def __hash__(self):
+ return hash((self.__class__, self.uri))
+ def __cmp__(self, them):
+ if cmp(type(self), type(them)):
+ return cmp(type(self), type(them))
+ if cmp(self.__class__, them.__class__):
+ return cmp(self.__class__, them.__class__)
+ return cmp(self.uri, them.uri)
+
+ def get_verifier(self):
+ return None
+
+ def check(self, verify=False, repair=False):
+ # neither verify= nor repair= affect LIT files
+ r = Results(None)
+ r.healthy = True
+ r.problems = []
+ return defer.succeed(r)
+
+ def download(self, target):
+ # note that this does not update the stats_provider
+ data = IURI(self.uri).data
+ target.open(len(data))
+ target.write(data)
+ target.close()
+ return defer.maybeDeferred(target.finish)
+
+ def download_to_data(self):
+ data = IURI(self.uri).data
+ return defer.succeed(data)
--- /dev/null
+
+import os, time, weakref, itertools
+from zope.interface import implements
+from twisted.python import failure
+from twisted.internet import defer
+from twisted.application import service
+from foolscap import Referenceable, Copyable, RemoteCopy
+from foolscap import eventual
+from foolscap.logging import log
+
+from allmydata.util.hashutil import file_renewal_secret_hash, \
+ file_cancel_secret_hash, bucket_renewal_secret_hash, \
+ bucket_cancel_secret_hash, plaintext_hasher, \
+ storage_index_hash, plaintext_segment_hasher, convergence_hasher
+from allmydata import storage, hashtree, uri
+from allmydata.immutable import encode
+from allmydata.util import base32, idlib, mathutil
+from allmydata.util.assertutil import precondition
+from allmydata.interfaces import IUploadable, IUploader, IUploadResults, \
+ IEncryptedUploadable, RIEncryptedUploadable, IUploadStatus
+from pycryptopp.cipher.aes import AES
+
+from cStringIO import StringIO
+
+
+KiB=1024
+MiB=1024*KiB
+GiB=1024*MiB
+TiB=1024*GiB
+PiB=1024*TiB
+
+class HaveAllPeersError(Exception):
+ # we use this to jump out of the loop
+ pass
+
+# this wants to live in storage, not here
+class TooFullError(Exception):
+ pass
+
+class UploadResults(Copyable, RemoteCopy):
+ implements(IUploadResults)
+ # note: don't change this string, it needs to match the value used on the
+ # helper, and it does *not* need to match the fully-qualified
+ # package/module/class name
+ typeToCopy = "allmydata.upload.UploadResults.tahoe.allmydata.com"
+ copytype = typeToCopy
+
+ def __init__(self):
+ self.timings = {} # dict of name to number of seconds
+ self.sharemap = {} # dict of shnum to placement string
+ self.servermap = {} # dict of peerid to set(shnums)
+ self.file_size = None
+ self.ciphertext_fetched = None # how much the helper fetched
+ self.uri = None
+ self.preexisting_shares = None # count of shares already present
+ self.pushed_shares = None # count of shares we pushed
+
+
+# our current uri_extension is 846 bytes for small files, a few bytes
+# more for larger ones (since the filesize is encoded in decimal in a
+# few places). Ask for a little bit more just in case we need it. If
+# the extension changes size, we can change EXTENSION_SIZE to
+# allocate a more accurate amount of space.
+EXTENSION_SIZE = 1000
+# TODO: actual extensions are closer to 419 bytes, so we can probably lower
+# this.
+
+class PeerTracker:
+ def __init__(self, peerid, storage_server,
+ sharesize, blocksize, num_segments, num_share_hashes,
+ storage_index,
+ bucket_renewal_secret, bucket_cancel_secret):
+ precondition(isinstance(peerid, str), peerid)
+ precondition(len(peerid) == 20, peerid)
+ self.peerid = peerid
+ self._storageserver = storage_server # to an RIStorageServer
+ self.buckets = {} # k: shareid, v: IRemoteBucketWriter
+ self.sharesize = sharesize
+ as = storage.allocated_size(sharesize,
+ num_segments,
+ num_share_hashes,
+ EXTENSION_SIZE)
+ self.allocated_size = as
+
+ self.blocksize = blocksize
+ self.num_segments = num_segments
+ self.num_share_hashes = num_share_hashes
+ self.storage_index = storage_index
+
+ self.renew_secret = bucket_renewal_secret
+ self.cancel_secret = bucket_cancel_secret
+
+ def __repr__(self):
+ return ("<PeerTracker for peer %s and SI %s>"
+ % (idlib.shortnodeid_b2a(self.peerid),
+ storage.si_b2a(self.storage_index)[:5]))
+
+ def query(self, sharenums):
+ d = self._storageserver.callRemote("allocate_buckets",
+ self.storage_index,
+ self.renew_secret,
+ self.cancel_secret,
+ sharenums,
+ self.allocated_size,
+ canary=Referenceable())
+ d.addCallback(self._got_reply)
+ return d
+
+ def _got_reply(self, (alreadygot, buckets)):
+ #log.msg("%s._got_reply(%s)" % (self, (alreadygot, buckets)))
+ b = {}
+ for sharenum, rref in buckets.iteritems():
+ bp = storage.WriteBucketProxy(rref, self.sharesize,
+ self.blocksize,
+ self.num_segments,
+ self.num_share_hashes,
+ EXTENSION_SIZE,
+ self.peerid)
+ b[sharenum] = bp
+ self.buckets.update(b)
+ return (alreadygot, set(b.keys()))
+
+class Tahoe2PeerSelector:
+
+ def __init__(self, upload_id, logparent=None, upload_status=None):
+ self.upload_id = upload_id
+ self.query_count, self.good_query_count, self.bad_query_count = 0,0,0
+ self.error_count = 0
+ self.num_peers_contacted = 0
+ self.last_failure_msg = None
+ self._status = IUploadStatus(upload_status)
+ self._log_parent = log.msg("%s starting" % self, parent=logparent)
+
+ def __repr__(self):
+ return "<Tahoe2PeerSelector for upload %s>" % self.upload_id
+
+ def get_shareholders(self, client,
+ storage_index, share_size, block_size,
+ num_segments, total_shares, shares_of_happiness):
+ """
+ @return: (used_peers, already_peers), where used_peers is a set of
+ PeerTracker instances that have agreed to hold some shares
+ for us (the shnum is stashed inside the PeerTracker),
+ and already_peers is a dict mapping shnum to a peer
+ which claims to already have the share.
+ """
+
+ if self._status:
+ self._status.set_status("Contacting Peers..")
+
+ self.total_shares = total_shares
+ self.shares_of_happiness = shares_of_happiness
+
+ self.homeless_shares = range(total_shares)
+ # self.uncontacted_peers = list() # peers we haven't asked yet
+ self.contacted_peers = [] # peers worth asking again
+ self.contacted_peers2 = [] # peers that we have asked again
+ self._started_second_pass = False
+ self.use_peers = set() # PeerTrackers that have shares assigned to them
+ self.preexisting_shares = {} # sharenum -> peerid holding the share
+
+ peers = client.get_permuted_peers("storage", storage_index)
+ if not peers:
+ raise encode.NotEnoughSharesError("client gave us zero peers")
+
+ # figure out how much space to ask for
+
+ # this needed_hashes computation should mirror
+ # Encoder.send_all_share_hash_trees. We use an IncompleteHashTree
+ # (instead of a HashTree) because we don't require actual hashing
+ # just to count the levels.
+ ht = hashtree.IncompleteHashTree(total_shares)
+ num_share_hashes = len(ht.needed_hashes(0, include_leaf=True))
+
+ # decide upon the renewal/cancel secrets, to include them in the
+ # allocat_buckets query.
+ client_renewal_secret = client.get_renewal_secret()
+ client_cancel_secret = client.get_cancel_secret()
+
+ file_renewal_secret = file_renewal_secret_hash(client_renewal_secret,
+ storage_index)
+ file_cancel_secret = file_cancel_secret_hash(client_cancel_secret,
+ storage_index)
+
+ trackers = [ PeerTracker(peerid, conn,
+ share_size, block_size,
+ num_segments, num_share_hashes,
+ storage_index,
+ bucket_renewal_secret_hash(file_renewal_secret,
+ peerid),
+ bucket_cancel_secret_hash(file_cancel_secret,
+ peerid),
+ )
+ for (peerid, conn) in peers ]
+ self.uncontacted_peers = trackers
+
+ d = defer.maybeDeferred(self._loop)
+ return d
+
+ def _loop(self):
+ if not self.homeless_shares:
+ # all done
+ msg = ("placed all %d shares, "
+ "sent %d queries to %d peers, "
+ "%d queries placed some shares, %d placed none, "
+ "got %d errors" %
+ (self.total_shares,
+ self.query_count, self.num_peers_contacted,
+ self.good_query_count, self.bad_query_count,
+ self.error_count))
+ log.msg("peer selection successful for %s: %s" % (self, msg),
+ parent=self._log_parent)
+ return (self.use_peers, self.preexisting_shares)
+
+ if self.uncontacted_peers:
+ peer = self.uncontacted_peers.pop(0)
+ # TODO: don't pre-convert all peerids to PeerTrackers
+ assert isinstance(peer, PeerTracker)
+
+ shares_to_ask = set([self.homeless_shares.pop(0)])
+ self.query_count += 1
+ self.num_peers_contacted += 1
+ if self._status:
+ self._status.set_status("Contacting Peers [%s] (first query),"
+ " %d shares left.."
+ % (idlib.shortnodeid_b2a(peer.peerid),
+ len(self.homeless_shares)))
+ d = peer.query(shares_to_ask)
+ d.addBoth(self._got_response, peer, shares_to_ask,
+ self.contacted_peers)
+ return d
+ elif self.contacted_peers:
+ # ask a peer that we've already asked.
+ if not self._started_second_pass:
+ log.msg("starting second pass", parent=self._log_parent,
+ level=log.NOISY)
+ self._started_second_pass = True
+ num_shares = mathutil.div_ceil(len(self.homeless_shares),
+ len(self.contacted_peers))
+ peer = self.contacted_peers.pop(0)
+ shares_to_ask = set(self.homeless_shares[:num_shares])
+ self.homeless_shares[:num_shares] = []
+ self.query_count += 1
+ if self._status:
+ self._status.set_status("Contacting Peers [%s] (second query),"
+ " %d shares left.."
+ % (idlib.shortnodeid_b2a(peer.peerid),
+ len(self.homeless_shares)))
+ d = peer.query(shares_to_ask)
+ d.addBoth(self._got_response, peer, shares_to_ask,
+ self.contacted_peers2)
+ return d
+ elif self.contacted_peers2:
+ # we've finished the second-or-later pass. Move all the remaining
+ # peers back into self.contacted_peers for the next pass.
+ self.contacted_peers.extend(self.contacted_peers2)
+ self.contacted_peers[:] = []
+ return self._loop()
+ else:
+ # no more peers. If we haven't placed enough shares, we fail.
+ placed_shares = self.total_shares - len(self.homeless_shares)
+ if placed_shares < self.shares_of_happiness:
+ msg = ("placed %d shares out of %d total (%d homeless), "
+ "sent %d queries to %d peers, "
+ "%d queries placed some shares, %d placed none, "
+ "got %d errors" %
+ (self.total_shares - len(self.homeless_shares),
+ self.total_shares, len(self.homeless_shares),
+ self.query_count, self.num_peers_contacted,
+ self.good_query_count, self.bad_query_count,
+ self.error_count))
+ msg = "peer selection failed for %s: %s" % (self, msg)
+ if self.last_failure_msg:
+ msg += " (%s)" % (self.last_failure_msg,)
+ log.msg(msg, level=log.UNUSUAL, parent=self._log_parent)
+ raise encode.NotEnoughSharesError(msg)
+ else:
+ # we placed enough to be happy, so we're done
+ if self._status:
+ self._status.set_status("Placed all shares")
+ return self.use_peers
+
+ def _got_response(self, res, peer, shares_to_ask, put_peer_here):
+ if isinstance(res, failure.Failure):
+ # This is unusual, and probably indicates a bug or a network
+ # problem.
+ log.msg("%s got error during peer selection: %s" % (peer, res),
+ level=log.UNUSUAL, parent=self._log_parent)
+ self.error_count += 1
+ self.homeless_shares = list(shares_to_ask) + self.homeless_shares
+ if (self.uncontacted_peers
+ or self.contacted_peers
+ or self.contacted_peers2):
+ # there is still hope, so just loop
+ pass
+ else:
+ # No more peers, so this upload might fail (it depends upon
+ # whether we've hit shares_of_happiness or not). Log the last
+ # failure we got: if a coding error causes all peers to fail
+ # in the same way, this allows the common failure to be seen
+ # by the uploader and should help with debugging
+ msg = ("last failure (from %s) was: %s" % (peer, res))
+ self.last_failure_msg = msg
+ else:
+ (alreadygot, allocated) = res
+ log.msg("response from peer %s: alreadygot=%s, allocated=%s"
+ % (idlib.shortnodeid_b2a(peer.peerid),
+ tuple(sorted(alreadygot)), tuple(sorted(allocated))),
+ level=log.NOISY, parent=self._log_parent)
+ progress = False
+ for s in alreadygot:
+ self.preexisting_shares[s] = peer.peerid
+ if s in self.homeless_shares:
+ self.homeless_shares.remove(s)
+ progress = True
+
+ # the PeerTracker will remember which shares were allocated on
+ # that peer. We just have to remember to use them.
+ if allocated:
+ self.use_peers.add(peer)
+ progress = True
+
+ not_yet_present = set(shares_to_ask) - set(alreadygot)
+ still_homeless = not_yet_present - set(allocated)
+
+ if progress:
+ # they accepted or already had at least one share, so
+ # progress has been made
+ self.good_query_count += 1
+ else:
+ self.bad_query_count += 1
+
+ if still_homeless:
+ # In networks with lots of space, this is very unusual and
+ # probably indicates an error. In networks with peers that
+ # are full, it is merely unusual. In networks that are very
+ # full, it is common, and many uploads will fail. In most
+ # cases, this is obviously not fatal, and we'll just use some
+ # other peers.
+
+ # some shares are still homeless, keep trying to find them a
+ # home. The ones that were rejected get first priority.
+ self.homeless_shares = (list(still_homeless)
+ + self.homeless_shares)
+ # Since they were unable to accept all of our requests, so it
+ # is safe to assume that asking them again won't help.
+ else:
+ # if they *were* able to accept everything, they might be
+ # willing to accept even more.
+ put_peer_here.append(peer)
+
+ # now loop
+ return self._loop()
+
+
+class EncryptAnUploadable:
+ """This is a wrapper that takes an IUploadable and provides
+ IEncryptedUploadable."""
+ implements(IEncryptedUploadable)
+ CHUNKSIZE = 50*1024
+
+ def __init__(self, original, log_parent=None):
+ self.original = IUploadable(original)
+ self._log_number = log_parent
+ self._encryptor = None
+ self._plaintext_hasher = plaintext_hasher()
+ self._plaintext_segment_hasher = None
+ self._plaintext_segment_hashes = []
+ self._encoding_parameters = None
+ self._file_size = None
+ self._ciphertext_bytes_read = 0
+ self._status = None
+
+ def set_upload_status(self, upload_status):
+ self._status = IUploadStatus(upload_status)
+ self.original.set_upload_status(upload_status)
+
+ def log(self, *args, **kwargs):
+ if "facility" not in kwargs:
+ kwargs["facility"] = "upload.encryption"
+ if "parent" not in kwargs:
+ kwargs["parent"] = self._log_number
+ return log.msg(*args, **kwargs)
+
+ def get_size(self):
+ if self._file_size is not None:
+ return defer.succeed(self._file_size)
+ d = self.original.get_size()
+ def _got_size(size):
+ self._file_size = size
+ if self._status:
+ self._status.set_size(size)
+ return size
+ d.addCallback(_got_size)
+ return d
+
+ def get_all_encoding_parameters(self):
+ if self._encoding_parameters is not None:
+ return defer.succeed(self._encoding_parameters)
+ d = self.original.get_all_encoding_parameters()
+ def _got(encoding_parameters):
+ (k, happy, n, segsize) = encoding_parameters
+ self._segment_size = segsize # used by segment hashers
+ self._encoding_parameters = encoding_parameters
+ self.log("my encoding parameters: %s" % (encoding_parameters,),
+ level=log.NOISY)
+ return encoding_parameters
+ d.addCallback(_got)
+ return d
+
+ def _get_encryptor(self):
+ if self._encryptor:
+ return defer.succeed(self._encryptor)
+
+ d = self.original.get_encryption_key()
+ def _got(key):
+ e = AES(key)
+ self._encryptor = e
+
+ storage_index = storage_index_hash(key)
+ assert isinstance(storage_index, str)
+ # There's no point to having the SI be longer than the key, so we
+ # specify that it is truncated to the same 128 bits as the AES key.
+ assert len(storage_index) == 16 # SHA-256 truncated to 128b
+ self._storage_index = storage_index
+ if self._status:
+ self._status.set_storage_index(storage_index)
+ return e
+ d.addCallback(_got)
+ return d
+
+ def get_storage_index(self):
+ d = self._get_encryptor()
+ d.addCallback(lambda res: self._storage_index)
+ return d
+
+ def _get_segment_hasher(self):
+ p = self._plaintext_segment_hasher
+ if p:
+ left = self._segment_size - self._plaintext_segment_hashed_bytes
+ return p, left
+ p = plaintext_segment_hasher()
+ self._plaintext_segment_hasher = p
+ self._plaintext_segment_hashed_bytes = 0
+ return p, self._segment_size
+
+ def _update_segment_hash(self, chunk):
+ offset = 0
+ while offset < len(chunk):
+ p, segment_left = self._get_segment_hasher()
+ chunk_left = len(chunk) - offset
+ this_segment = min(chunk_left, segment_left)
+ p.update(chunk[offset:offset+this_segment])
+ self._plaintext_segment_hashed_bytes += this_segment
+
+ if self._plaintext_segment_hashed_bytes == self._segment_size:
+ # we've filled this segment
+ self._plaintext_segment_hashes.append(p.digest())
+ self._plaintext_segment_hasher = None
+ self.log("closed hash [%d]: %dB" %
+ (len(self._plaintext_segment_hashes)-1,
+ self._plaintext_segment_hashed_bytes),
+ level=log.NOISY)
+ self.log(format="plaintext leaf hash [%(segnum)d] is %(hash)s",
+ segnum=len(self._plaintext_segment_hashes)-1,
+ hash=base32.b2a(p.digest()),
+ level=log.NOISY)
+
+ offset += this_segment
+
+
+ def read_encrypted(self, length, hash_only):
+ # make sure our parameters have been set up first
+ d = self.get_all_encoding_parameters()
+ # and size
+ d.addCallback(lambda ignored: self.get_size())
+ d.addCallback(lambda ignored: self._get_encryptor())
+ # then fetch and encrypt the plaintext. The unusual structure here
+ # (passing a Deferred *into* a function) is needed to avoid
+ # overflowing the stack: Deferreds don't optimize out tail recursion.
+ # We also pass in a list, to which _read_encrypted will append
+ # ciphertext.
+ ciphertext = []
+ d2 = defer.Deferred()
+ d.addCallback(lambda ignored:
+ self._read_encrypted(length, ciphertext, hash_only, d2))
+ d.addCallback(lambda ignored: d2)
+ return d
+
+ def _read_encrypted(self, remaining, ciphertext, hash_only, fire_when_done):
+ if not remaining:
+ fire_when_done.callback(ciphertext)
+ return None
+ # tolerate large length= values without consuming a lot of RAM by
+ # reading just a chunk (say 50kB) at a time. This only really matters
+ # when hash_only==True (i.e. resuming an interrupted upload), since
+ # that's the case where we will be skipping over a lot of data.
+ size = min(remaining, self.CHUNKSIZE)
+ remaining = remaining - size
+ # read a chunk of plaintext..
+ d = defer.maybeDeferred(self.original.read, size)
+ # N.B.: if read() is synchronous, then since everything else is
+ # actually synchronous too, we'd blow the stack unless we stall for a
+ # tick. Once you accept a Deferred from IUploadable.read(), you must
+ # be prepared to have it fire immediately too.
+ d.addCallback(eventual.fireEventually)
+ def _good(plaintext):
+ # and encrypt it..
+ # o/' over the fields we go, hashing all the way, sHA! sHA! sHA! o/'
+ ct = self._hash_and_encrypt_plaintext(plaintext, hash_only)
+ ciphertext.extend(ct)
+ self._read_encrypted(remaining, ciphertext, hash_only,
+ fire_when_done)
+ def _err(why):
+ fire_when_done.errback(why)
+ d.addCallback(_good)
+ d.addErrback(_err)
+ return None
+
+ def _hash_and_encrypt_plaintext(self, data, hash_only):
+ assert isinstance(data, (tuple, list)), type(data)
+ data = list(data)
+ cryptdata = []
+ # we use data.pop(0) instead of 'for chunk in data' to save
+ # memory: each chunk is destroyed as soon as we're done with it.
+ bytes_processed = 0
+ while data:
+ chunk = data.pop(0)
+ self.log(" read_encrypted handling %dB-sized chunk" % len(chunk),
+ level=log.NOISY)
+ bytes_processed += len(chunk)
+ self._plaintext_hasher.update(chunk)
+ self._update_segment_hash(chunk)
+ # TODO: we have to encrypt the data (even if hash_only==True)
+ # because pycryptopp's AES-CTR implementation doesn't offer a
+ # way to change the counter value. Once pycryptopp acquires
+ # this ability, change this to simply update the counter
+ # before each call to (hash_only==False) _encryptor.process()
+ ciphertext = self._encryptor.process(chunk)
+ if hash_only:
+ self.log(" skipping encryption", level=log.NOISY)
+ else:
+ cryptdata.append(ciphertext)
+ del ciphertext
+ del chunk
+ self._ciphertext_bytes_read += bytes_processed
+ if self._status:
+ progress = float(self._ciphertext_bytes_read) / self._file_size
+ self._status.set_progress(1, progress)
+ return cryptdata
+
+
+ def get_plaintext_hashtree_leaves(self, first, last, num_segments):
+ if len(self._plaintext_segment_hashes) < num_segments:
+ # close out the last one
+ assert len(self._plaintext_segment_hashes) == num_segments-1
+ p, segment_left = self._get_segment_hasher()
+ self._plaintext_segment_hashes.append(p.digest())
+ del self._plaintext_segment_hasher
+ self.log("closing plaintext leaf hasher, hashed %d bytes" %
+ self._plaintext_segment_hashed_bytes,
+ level=log.NOISY)
+ self.log(format="plaintext leaf hash [%(segnum)d] is %(hash)s",
+ segnum=len(self._plaintext_segment_hashes)-1,
+ hash=base32.b2a(p.digest()),
+ level=log.NOISY)
+ assert len(self._plaintext_segment_hashes) == num_segments
+ return defer.succeed(tuple(self._plaintext_segment_hashes[first:last]))
+
+ def get_plaintext_hash(self):
+ h = self._plaintext_hasher.digest()
+ return defer.succeed(h)
+
+ def close(self):
+ return self.original.close()
+
+class UploadStatus:
+ implements(IUploadStatus)
+ statusid_counter = itertools.count(0)
+
+ def __init__(self):
+ self.storage_index = None
+ self.size = None
+ self.helper = False
+ self.status = "Not started"
+ self.progress = [0.0, 0.0, 0.0]
+ self.active = True
+ self.results = None
+ self.counter = self.statusid_counter.next()
+ self.started = time.time()
+
+ def get_started(self):
+ return self.started
+ def get_storage_index(self):
+ return self.storage_index
+ def get_size(self):
+ return self.size
+ def using_helper(self):
+ return self.helper
+ def get_status(self):
+ return self.status
+ def get_progress(self):
+ return tuple(self.progress)
+ def get_active(self):
+ return self.active
+ def get_results(self):
+ return self.results
+ def get_counter(self):
+ return self.counter
+
+ def set_storage_index(self, si):
+ self.storage_index = si
+ def set_size(self, size):
+ self.size = size
+ def set_helper(self, helper):
+ self.helper = helper
+ def set_status(self, status):
+ self.status = status
+ def set_progress(self, which, value):
+ # [0]: chk, [1]: ciphertext, [2]: encode+push
+ self.progress[which] = value
+ def set_active(self, value):
+ self.active = value
+ def set_results(self, value):
+ self.results = value
+
+class CHKUploader:
+ peer_selector_class = Tahoe2PeerSelector
+
+ def __init__(self, client):
+ self._client = client
+ self._log_number = self._client.log("CHKUploader starting")
+ self._encoder = None
+ self._results = UploadResults()
+ self._storage_index = None
+ self._upload_status = UploadStatus()
+ self._upload_status.set_helper(False)
+ self._upload_status.set_active(True)
+ self._upload_status.set_results(self._results)
+
+ def log(self, *args, **kwargs):
+ if "parent" not in kwargs:
+ kwargs["parent"] = self._log_number
+ if "facility" not in kwargs:
+ kwargs["facility"] = "tahoe.upload"
+ return self._client.log(*args, **kwargs)
+
+ def start(self, uploadable):
+ """Start uploading the file.
+
+ This method returns a Deferred that will fire with the URI (a
+ string)."""
+
+ self._started = time.time()
+ uploadable = IUploadable(uploadable)
+ self.log("starting upload of %s" % uploadable)
+
+ eu = EncryptAnUploadable(uploadable, self._log_number)
+ eu.set_upload_status(self._upload_status)
+ d = self.start_encrypted(eu)
+ def _uploaded(res):
+ d1 = uploadable.get_encryption_key()
+ d1.addCallback(lambda key: self._compute_uri(res, key))
+ return d1
+ d.addCallback(_uploaded)
+ def _done(res):
+ self._upload_status.set_active(False)
+ return res
+ d.addBoth(_done)
+ return d
+
+ def abort(self):
+ """Call this is the upload must be abandoned before it completes.
+ This will tell the shareholders to delete their partial shares. I
+ return a Deferred that fires when these messages have been acked."""
+ if not self._encoder:
+ # how did you call abort() before calling start() ?
+ return defer.succeed(None)
+ return self._encoder.abort()
+
+ def start_encrypted(self, encrypted):
+ eu = IEncryptedUploadable(encrypted)
+
+ started = time.time()
+ self._encoder = e = encode.Encoder(self._log_number,
+ self._upload_status)
+ d = e.set_encrypted_uploadable(eu)
+ d.addCallback(self.locate_all_shareholders, started)
+ d.addCallback(self.set_shareholders, e)
+ d.addCallback(lambda res: e.start())
+ d.addCallback(self._encrypted_done)
+ # this fires with the uri_extension_hash and other data
+ return d
+
+ def locate_all_shareholders(self, encoder, started):
+ peer_selection_started = now = time.time()
+ self._storage_index_elapsed = now - started
+ storage_index = encoder.get_param("storage_index")
+ self._storage_index = storage_index
+ upload_id = storage.si_b2a(storage_index)[:5]
+ self.log("using storage index %s" % upload_id)
+ peer_selector = self.peer_selector_class(upload_id, self._log_number,
+ self._upload_status)
+
+ share_size = encoder.get_param("share_size")
+ block_size = encoder.get_param("block_size")
+ num_segments = encoder.get_param("num_segments")
+ k,desired,n = encoder.get_param("share_counts")
+
+ self._peer_selection_started = time.time()
+ d = peer_selector.get_shareholders(self._client, storage_index,
+ share_size, block_size,
+ num_segments, n, desired)
+ def _done(res):
+ self._peer_selection_elapsed = time.time() - peer_selection_started
+ return res
+ d.addCallback(_done)
+ return d
+
+ def set_shareholders(self, (used_peers, already_peers), encoder):
+ """
+ @param used_peers: a sequence of PeerTracker objects
+ @paran already_peers: a dict mapping sharenum to a peerid that
+ claims to already have this share
+ """
+ self.log("_send_shares, used_peers is %s" % (used_peers,))
+ # record already-present shares in self._results
+ for (shnum, peerid) in already_peers.items():
+ peerid_s = idlib.shortnodeid_b2a(peerid)
+ self._results.sharemap[shnum] = "Found on [%s]" % peerid_s
+ if peerid not in self._results.servermap:
+ self._results.servermap[peerid] = set()
+ self._results.servermap[peerid].add(shnum)
+ self._results.preexisting_shares = len(already_peers)
+
+ self._sharemap = {}
+ for peer in used_peers:
+ assert isinstance(peer, PeerTracker)
+ buckets = {}
+ for peer in used_peers:
+ buckets.update(peer.buckets)
+ for shnum in peer.buckets:
+ self._sharemap[shnum] = peer
+ assert len(buckets) == sum([len(peer.buckets) for peer in used_peers])
+ encoder.set_shareholders(buckets)
+
+ def _encrypted_done(self, res):
+ r = self._results
+ for shnum in self._encoder.get_shares_placed():
+ peer_tracker = self._sharemap[shnum]
+ peerid = peer_tracker.peerid
+ peerid_s = idlib.shortnodeid_b2a(peerid)
+ r.sharemap[shnum] = "Placed on [%s]" % peerid_s
+ if peerid not in r.servermap:
+ r.servermap[peerid] = set()
+ r.servermap[peerid].add(shnum)
+ r.pushed_shares = len(self._encoder.get_shares_placed())
+ now = time.time()
+ r.file_size = self._encoder.file_size
+ r.timings["total"] = now - self._started
+ r.timings["storage_index"] = self._storage_index_elapsed
+ r.timings["peer_selection"] = self._peer_selection_elapsed
+ r.timings.update(self._encoder.get_times())
+ r.uri_extension_data = self._encoder.get_uri_extension_data()
+ return res
+
+ def _compute_uri(self, (uri_extension_hash,
+ needed_shares, total_shares, size),
+ key):
+ u = uri.CHKFileURI(key=key,
+ uri_extension_hash=uri_extension_hash,
+ needed_shares=needed_shares,
+ total_shares=total_shares,
+ size=size,
+ )
+ r = self._results
+ r.uri = u.to_string()
+ return r
+
+ def get_upload_status(self):
+ return self._upload_status
+
+def read_this_many_bytes(uploadable, size, prepend_data=[]):
+ if size == 0:
+ return defer.succeed([])
+ d = uploadable.read(size)
+ def _got(data):
+ assert isinstance(data, list)
+ bytes = sum([len(piece) for piece in data])
+ assert bytes > 0
+ assert bytes <= size
+ remaining = size - bytes
+ if remaining:
+ return read_this_many_bytes(uploadable, remaining,
+ prepend_data + data)
+ return prepend_data + data
+ d.addCallback(_got)
+ return d
+
+class LiteralUploader:
+
+ def __init__(self, client):
+ self._client = client
+ self._results = UploadResults()
+ self._status = s = UploadStatus()
+ s.set_storage_index(None)
+ s.set_helper(False)
+ s.set_progress(0, 1.0)
+ s.set_active(False)
+ s.set_results(self._results)
+
+ def start(self, uploadable):
+ uploadable = IUploadable(uploadable)
+ d = uploadable.get_size()
+ def _got_size(size):
+ self._size = size
+ self._status.set_size(size)
+ self._results.file_size = size
+ return read_this_many_bytes(uploadable, size)
+ d.addCallback(_got_size)
+ d.addCallback(lambda data: uri.LiteralFileURI("".join(data)))
+ d.addCallback(lambda u: u.to_string())
+ d.addCallback(self._build_results)
+ return d
+
+ def _build_results(self, uri):
+ self._results.uri = uri
+ self._status.set_status("Done")
+ self._status.set_progress(1, 1.0)
+ self._status.set_progress(2, 1.0)
+ return self._results
+
+ def close(self):
+ pass
+
+ def get_upload_status(self):
+ return self._status
+
+class RemoteEncryptedUploadable(Referenceable):
+ implements(RIEncryptedUploadable)
+
+ def __init__(self, encrypted_uploadable, upload_status):
+ self._eu = IEncryptedUploadable(encrypted_uploadable)
+ self._offset = 0
+ self._bytes_sent = 0
+ self._status = IUploadStatus(upload_status)
+ # we are responsible for updating the status string while we run, and
+ # for setting the ciphertext-fetch progress.
+ self._size = None
+
+ def get_size(self):
+ if self._size is not None:
+ return defer.succeed(self._size)
+ d = self._eu.get_size()
+ def _got_size(size):
+ self._size = size
+ return size
+ d.addCallback(_got_size)
+ return d
+
+ def remote_get_size(self):
+ return self.get_size()
+ def remote_get_all_encoding_parameters(self):
+ return self._eu.get_all_encoding_parameters()
+
+ def _read_encrypted(self, length, hash_only):
+ d = self._eu.read_encrypted(length, hash_only)
+ def _read(strings):
+ if hash_only:
+ self._offset += length
+ else:
+ size = sum([len(data) for data in strings])
+ self._offset += size
+ return strings
+ d.addCallback(_read)
+ return d
+
+ def remote_read_encrypted(self, offset, length):
+ # we don't support seek backwards, but we allow skipping forwards
+ precondition(offset >= 0, offset)
+ precondition(length >= 0, length)
+ lp = log.msg("remote_read_encrypted(%d-%d)" % (offset, offset+length),
+ level=log.NOISY)
+ precondition(offset >= self._offset, offset, self._offset)
+ if offset > self._offset:
+ # read the data from disk anyways, to build up the hash tree
+ skip = offset - self._offset
+ log.msg("remote_read_encrypted skipping ahead from %d to %d, skip=%d" %
+ (self._offset, offset, skip), level=log.UNUSUAL, parent=lp)
+ d = self._read_encrypted(skip, hash_only=True)
+ else:
+ d = defer.succeed(None)
+
+ def _at_correct_offset(res):
+ assert offset == self._offset, "%d != %d" % (offset, self._offset)
+ return self._read_encrypted(length, hash_only=False)
+ d.addCallback(_at_correct_offset)
+
+ def _read(strings):
+ size = sum([len(data) for data in strings])
+ self._bytes_sent += size
+ return strings
+ d.addCallback(_read)
+ return d
+
+ def remote_get_plaintext_hashtree_leaves(self, first, last, num_segments):
+ log.msg("remote_get_plaintext_hashtree_leaves: %d-%d of %d" %
+ (first, last-1, num_segments),
+ level=log.NOISY)
+ d = self._eu.get_plaintext_hashtree_leaves(first, last, num_segments)
+ d.addCallback(list)
+ return d
+ def remote_get_plaintext_hash(self):
+ return self._eu.get_plaintext_hash()
+ def remote_close(self):
+ return self._eu.close()
+
+
+class AssistedUploader:
+
+ def __init__(self, helper):
+ self._helper = helper
+ self._log_number = log.msg("AssistedUploader starting")
+ self._storage_index = None
+ self._upload_status = s = UploadStatus()
+ s.set_helper(True)
+ s.set_active(True)
+
+ def log(self, *args, **kwargs):
+ if "parent" not in kwargs:
+ kwargs["parent"] = self._log_number
+ return log.msg(*args, **kwargs)
+
+ def start(self, uploadable):
+ self._started = time.time()
+ u = IUploadable(uploadable)
+ eu = EncryptAnUploadable(u, self._log_number)
+ eu.set_upload_status(self._upload_status)
+ self._encuploadable = eu
+ d = eu.get_size()
+ d.addCallback(self._got_size)
+ d.addCallback(lambda res: eu.get_all_encoding_parameters())
+ d.addCallback(self._got_all_encoding_parameters)
+ # when we get the encryption key, that will also compute the storage
+ # index, so this only takes one pass.
+ # TODO: I'm not sure it's cool to switch back and forth between
+ # the Uploadable and the IEncryptedUploadable that wraps it.
+ d.addCallback(lambda res: u.get_encryption_key())
+ d.addCallback(self._got_encryption_key)
+ d.addCallback(lambda res: eu.get_storage_index())
+ d.addCallback(self._got_storage_index)
+ d.addCallback(self._contact_helper)
+ d.addCallback(self._build_readcap)
+ def _done(res):
+ self._upload_status.set_active(False)
+ return res
+ d.addBoth(_done)
+ return d
+
+ def _got_size(self, size):
+ self._size = size
+ self._upload_status.set_size(size)
+
+ def _got_all_encoding_parameters(self, params):
+ k, happy, n, segment_size = params
+ # stash these for URI generation later
+ self._needed_shares = k
+ self._total_shares = n
+ self._segment_size = segment_size
+
+ def _got_encryption_key(self, key):
+ self._key = key
+
+ def _got_storage_index(self, storage_index):
+ self._storage_index = storage_index
+
+
+ def _contact_helper(self, res):
+ now = self._time_contacting_helper_start = time.time()
+ self._storage_index_elapsed = now - self._started
+ self.log(format="contacting helper for SI %(si)s..",
+ si=storage.si_b2a(self._storage_index))
+ self._upload_status.set_status("Contacting Helper")
+ d = self._helper.callRemote("upload_chk", self._storage_index)
+ d.addCallback(self._contacted_helper)
+ return d
+
+ def _contacted_helper(self, (upload_results, upload_helper)):
+ now = time.time()
+ elapsed = now - self._time_contacting_helper_start
+ self._elapsed_time_contacting_helper = elapsed
+ if upload_helper:
+ self.log("helper says we need to upload")
+ self._upload_status.set_status("Uploading Ciphertext")
+ # we need to upload the file
+ reu = RemoteEncryptedUploadable(self._encuploadable,
+ self._upload_status)
+ # let it pre-compute the size for progress purposes
+ d = reu.get_size()
+ d.addCallback(lambda ignored:
+ upload_helper.callRemote("upload", reu))
+ # this Deferred will fire with the upload results
+ return d
+ self.log("helper says file is already uploaded")
+ self._upload_status.set_progress(1, 1.0)
+ self._upload_status.set_results(upload_results)
+ return upload_results
+
+ def _build_readcap(self, upload_results):
+ self.log("upload finished, building readcap")
+ self._upload_status.set_status("Building Readcap")
+ r = upload_results
+ assert r.uri_extension_data["needed_shares"] == self._needed_shares
+ assert r.uri_extension_data["total_shares"] == self._total_shares
+ assert r.uri_extension_data["segment_size"] == self._segment_size
+ assert r.uri_extension_data["size"] == self._size
+ u = uri.CHKFileURI(key=self._key,
+ uri_extension_hash=r.uri_extension_hash,
+ needed_shares=self._needed_shares,
+ total_shares=self._total_shares,
+ size=self._size,
+ )
+ r.uri = u.to_string()
+ now = time.time()
+ r.file_size = self._size
+ r.timings["storage_index"] = self._storage_index_elapsed
+ r.timings["contacting_helper"] = self._elapsed_time_contacting_helper
+ if "total" in r.timings:
+ r.timings["helper_total"] = r.timings["total"]
+ r.timings["total"] = now - self._started
+ self._upload_status.set_status("Done")
+ self._upload_status.set_results(r)
+ return r
+
+ def get_upload_status(self):
+ return self._upload_status
+
+class BaseUploadable:
+ default_max_segment_size = 128*KiB # overridden by max_segment_size
+ default_encoding_param_k = 3 # overridden by encoding_parameters
+ default_encoding_param_happy = 7
+ default_encoding_param_n = 10
+
+ max_segment_size = None
+ encoding_param_k = None
+ encoding_param_happy = None
+ encoding_param_n = None
+
+ _all_encoding_parameters = None
+ _status = None
+
+ def set_upload_status(self, upload_status):
+ self._status = IUploadStatus(upload_status)
+
+ def set_default_encoding_parameters(self, default_params):
+ assert isinstance(default_params, dict)
+ for k,v in default_params.items():
+ precondition(isinstance(k, str), k, v)
+ precondition(isinstance(v, int), k, v)
+ if "k" in default_params:
+ self.default_encoding_param_k = default_params["k"]
+ if "happy" in default_params:
+ self.default_encoding_param_happy = default_params["happy"]
+ if "n" in default_params:
+ self.default_encoding_param_n = default_params["n"]
+ if "max_segment_size" in default_params:
+ self.default_max_segment_size = default_params["max_segment_size"]
+
+ def get_all_encoding_parameters(self):
+ if self._all_encoding_parameters:
+ return defer.succeed(self._all_encoding_parameters)
+
+ max_segsize = self.max_segment_size or self.default_max_segment_size
+ k = self.encoding_param_k or self.default_encoding_param_k
+ happy = self.encoding_param_happy or self.default_encoding_param_happy
+ n = self.encoding_param_n or self.default_encoding_param_n
+
+ d = self.get_size()
+ def _got_size(file_size):
+ # for small files, shrink the segment size to avoid wasting space
+ segsize = min(max_segsize, file_size)
+ # this must be a multiple of 'required_shares'==k
+ segsize = mathutil.next_multiple(segsize, k)
+ encoding_parameters = (k, happy, n, segsize)
+ self._all_encoding_parameters = encoding_parameters
+ return encoding_parameters
+ d.addCallback(_got_size)
+ return d
+
+class FileHandle(BaseUploadable):
+ implements(IUploadable)
+
+ def __init__(self, filehandle, convergence):
+ """
+ Upload the data from the filehandle. If convergence is None then a
+ random encryption key will be used, else the plaintext will be hashed,
+ then the hash will be hashed together with the string in the
+ "convergence" argument to form the encryption key.
+ """
+ assert convergence is None or isinstance(convergence, str), (convergence, type(convergence))
+ self._filehandle = filehandle
+ self._key = None
+ self.convergence = convergence
+ self._size = None
+
+ def _get_encryption_key_convergent(self):
+ if self._key is not None:
+ return defer.succeed(self._key)
+
+ d = self.get_size()
+ # that sets self._size as a side-effect
+ d.addCallback(lambda size: self.get_all_encoding_parameters())
+ def _got(params):
+ k, happy, n, segsize = params
+ f = self._filehandle
+ enckey_hasher = convergence_hasher(k, n, segsize, self.convergence)
+ f.seek(0)
+ BLOCKSIZE = 64*1024
+ bytes_read = 0
+ while True:
+ data = f.read(BLOCKSIZE)
+ if not data:
+ break
+ enckey_hasher.update(data)
+ # TODO: setting progress in a non-yielding loop is kind of
+ # pointless, but I'm anticipating (perhaps prematurely) the
+ # day when we use a slowjob or twisted's CooperatorService to
+ # make this yield time to other jobs.
+ bytes_read += len(data)
+ if self._status:
+ self._status.set_progress(0, float(bytes_read)/self._size)
+ f.seek(0)
+ self._key = enckey_hasher.digest()
+ if self._status:
+ self._status.set_progress(0, 1.0)
+ assert len(self._key) == 16
+ return self._key
+ d.addCallback(_got)
+ return d
+
+ def _get_encryption_key_random(self):
+ if self._key is None:
+ self._key = os.urandom(16)
+ return defer.succeed(self._key)
+
+ def get_encryption_key(self):
+ if self.convergence is not None:
+ return self._get_encryption_key_convergent()
+ else:
+ return self._get_encryption_key_random()
+
+ def get_size(self):
+ if self._size is not None:
+ return defer.succeed(self._size)
+ self._filehandle.seek(0,2)
+ size = self._filehandle.tell()
+ self._size = size
+ self._filehandle.seek(0)
+ return defer.succeed(size)
+
+ def read(self, length):
+ return defer.succeed([self._filehandle.read(length)])
+
+ def close(self):
+ # the originator of the filehandle reserves the right to close it
+ pass
+
+class FileName(FileHandle):
+ def __init__(self, filename, convergence):
+ """
+ Upload the data from the filename. If convergence is None then a
+ random encryption key will be used, else the plaintext will be hashed,
+ then the hash will be hashed together with the string in the
+ "convergence" argument to form the encryption key.
+ """
+ assert convergence is None or isinstance(convergence, str), (convergence, type(convergence))
+ FileHandle.__init__(self, open(filename, "rb"), convergence=convergence)
+ def close(self):
+ FileHandle.close(self)
+ self._filehandle.close()
+
+class Data(FileHandle):
+ def __init__(self, data, convergence):
+ """
+ Upload the data from the data argument. If convergence is None then a
+ random encryption key will be used, else the plaintext will be hashed,
+ then the hash will be hashed together with the string in the
+ "convergence" argument to form the encryption key.
+ """
+ assert convergence is None or isinstance(convergence, str), (convergence, type(convergence))
+ FileHandle.__init__(self, StringIO(data), convergence=convergence)
+
+class Uploader(service.MultiService):
+ """I am a service that allows file uploading. I am a service-child of the
+ Client.
+ """
+ implements(IUploader)
+ name = "uploader"
+ uploader_class = CHKUploader
+ URI_LIT_SIZE_THRESHOLD = 55
+ MAX_UPLOAD_STATUSES = 10
+
+ def __init__(self, helper_furl=None, stats_provider=None):
+ self._helper_furl = helper_furl
+ self.stats_provider = stats_provider
+ self._helper = None
+ self._all_uploads = weakref.WeakKeyDictionary() # for debugging
+ self._all_upload_statuses = weakref.WeakKeyDictionary()
+ self._recent_upload_statuses = []
+ service.MultiService.__init__(self)
+
+ def startService(self):
+ service.MultiService.startService(self)
+ if self._helper_furl:
+ self.parent.tub.connectTo(self._helper_furl,
+ self._got_helper)
+
+ def _got_helper(self, helper):
+ self._helper = helper
+ helper.notifyOnDisconnect(self._lost_helper)
+ def _lost_helper(self):
+ self._helper = None
+
+ def get_helper_info(self):
+ # return a tuple of (helper_furl_or_None, connected_bool)
+ return (self._helper_furl, bool(self._helper))
+
+ def upload(self, uploadable):
+ # this returns the URI
+ assert self.parent
+ assert self.running
+
+ uploadable = IUploadable(uploadable)
+ d = uploadable.get_size()
+ def _got_size(size):
+ default_params = self.parent.get_encoding_parameters()
+ precondition(isinstance(default_params, dict), default_params)
+ precondition("max_segment_size" in default_params, default_params)
+ uploadable.set_default_encoding_parameters(default_params)
+
+ if self.stats_provider:
+ self.stats_provider.count('uploader.files_uploaded', 1)
+ self.stats_provider.count('uploader.bytes_uploaded', size)
+
+ if size <= self.URI_LIT_SIZE_THRESHOLD:
+ uploader = LiteralUploader(self.parent)
+ elif self._helper:
+ uploader = AssistedUploader(self._helper)
+ else:
+ uploader = self.uploader_class(self.parent)
+ self._add_upload(uploader)
+ return uploader.start(uploadable)
+ d.addCallback(_got_size)
+ def _done(res):
+ uploadable.close()
+ return res
+ d.addBoth(_done)
+ return d
+
+ def _add_upload(self, uploader):
+ s = uploader.get_upload_status()
+ self._all_uploads[uploader] = None
+ self._all_upload_statuses[s] = None
+ self._recent_upload_statuses.append(s)
+ while len(self._recent_upload_statuses) > self.MAX_UPLOAD_STATUSES:
+ self._recent_upload_statuses.pop(0)
+
+ def list_all_upload_statuses(self):
+ for us in self._all_upload_statuses:
+ yield us
from allmydata.util import hashutil
from allmydata.util.assertutil import precondition
from allmydata.uri import WriteableSSKFileURI
-from allmydata.encode import NotEnoughSharesError
+from allmydata.immutable.encode import NotEnoughSharesError
from pycryptopp.publickey import rsa
from pycryptopp.cipher.aes import AES
from allmydata.interfaces import IRetrieveStatus
from allmydata.util import hashutil, idlib, log
from allmydata import hashtree, codec, storage
-from allmydata.encode import NotEnoughSharesError
+from allmydata.immutable.encode import NotEnoughSharesError
from pycryptopp.cipher.aes import AES
from common import DictOfSets, CorruptShareError, UncoordinatedWriteError
from twisted.internet import defer
from foolscap import Referenceable
from foolscap.eventual import eventually
-from allmydata import upload, interfaces, storage, uri
+from allmydata import interfaces, storage, uri
+from allmydata.immutable import upload
from allmydata.util import idlib, log, observer, fileutil, hashutil
from twisted.internet import defer, reactor, protocol, error
from twisted.application import service, internet
from twisted.web import client as tw_client
-from allmydata import client, introducer, upload
+from allmydata import client, introducer
+from allmydata.immutable import upload
from allmydata.scripts import create_node
from allmydata.util import testutil, fileutil
import foolscap
from twisted.internet import defer
from twisted.python import failure
from twisted.application import service
-from allmydata import uri, dirnode, checker
+from allmydata import uri, dirnode
from allmydata.interfaces import IURI, IMutableFileNode, IFileNode, \
FileTooLargeError
-from allmydata.encode import NotEnoughSharesError
+from allmydata.immutable import checker
+from allmydata.immutable.encode import NotEnoughSharesError
from allmydata.util import log
class FakeCHKFileNode:
import time
from zope.interface import implements
from twisted.trial import unittest
-from allmydata import uri, dirnode, upload
+from allmydata import uri, dirnode
+from allmydata.immutable import upload
from allmydata.interfaces import IURI, IClient, IMutableFileNode, \
INewDirectoryURI, IReadonlyNewDirectoryURI, IFileNode, ExistingChildError
from allmydata.util import hashutil, testutil
from twisted.internet.interfaces import IConsumer
from twisted.python.failure import Failure
from foolscap import eventual
-from allmydata import encode, upload, download, hashtree, uri
+from allmydata import hashtree, uri
+from allmydata.immutable import encode, upload, download
from allmydata.util import hashutil, testutil
from allmydata.util.assertutil import _assert
from allmydata.interfaces import IStorageBucketWriter, IStorageBucketReader
from twisted.trial import unittest
-from allmydata import filenode, uri, download
+from allmydata import uri
+from allmydata.immutable import filenode, download
from allmydata.mutable.node import MutableFileNode
from allmydata.util import hashutil
from foolscap import Tub, eventual
from foolscap.logging import log
-from allmydata import offloaded, storage, upload
+from allmydata import offloaded, storage
+from allmydata.immutable import upload
from allmydata.util import hashutil, fileutil, mathutil
from pycryptopp.cipher.aes import AES
from twisted.trial import unittest
from twisted.internet import defer, reactor
from twisted.python import failure
-from allmydata import uri, download, storage
+from allmydata import uri, storage
+from allmydata.immutable import download
+from allmydata.immutable.encode import NotEnoughSharesError
from allmydata.util import base32, testutil, idlib
from allmydata.util.idlib import shortnodeid_b2a
from allmydata.util.hashutil import tagged_hash
from allmydata.util.fileutil import make_dirs
-from allmydata.encode import NotEnoughSharesError
from allmydata.interfaces import IURI, IMutableFileURI, IUploadable, \
FileTooLargeError
from foolscap.eventual import eventually, fireEventually
from twisted.internet.error import ConnectionDone, ConnectionLost
from twisted.application import service
import allmydata
-from allmydata import client, uri, download, upload, storage, offloaded, \
- filenode
+from allmydata import client, uri, storage, offloaded
+from allmydata.immutable import download, upload, filenode
from allmydata.introducer.server import IntroducerNode
from allmydata.util import fileutil, idlib, mathutil, testutil
from allmydata.util import log, base32
from twisted.internet import defer
from cStringIO import StringIO
-from allmydata import upload, encode, uri
+from allmydata import uri
+from allmydata.immutable import upload, encode
from allmydata.interfaces import IFileURI, FileTooLargeError
from allmydata.util.assertutil import precondition
from allmydata.util.deferredutil import DeferredListShouldSucceed
from twisted.internet import defer, reactor
from twisted.web import client, error, http
from twisted.python import failure, log
-from allmydata import interfaces, provisioning, uri, webish, upload, download
+from allmydata import interfaces, provisioning, uri, webish
+from allmydata.immutable import upload, download
from allmydata.web import status, common
from allmydata.util import fileutil
from allmydata.test.common import FakeDirectoryNode, FakeCHKFileNode, \
+++ /dev/null
-
-import os, time, weakref, itertools
-from zope.interface import implements
-from twisted.python import failure
-from twisted.internet import defer
-from twisted.application import service
-from foolscap import Referenceable, Copyable, RemoteCopy
-from foolscap import eventual
-from foolscap.logging import log
-
-from allmydata.util.hashutil import file_renewal_secret_hash, \
- file_cancel_secret_hash, bucket_renewal_secret_hash, \
- bucket_cancel_secret_hash, plaintext_hasher, \
- storage_index_hash, plaintext_segment_hasher, convergence_hasher
-from allmydata import encode, storage, hashtree, uri
-from allmydata.util import base32, idlib, mathutil
-from allmydata.util.assertutil import precondition
-from allmydata.interfaces import IUploadable, IUploader, IUploadResults, \
- IEncryptedUploadable, RIEncryptedUploadable, IUploadStatus
-from pycryptopp.cipher.aes import AES
-
-from cStringIO import StringIO
-
-
-KiB=1024
-MiB=1024*KiB
-GiB=1024*MiB
-TiB=1024*GiB
-PiB=1024*TiB
-
-class HaveAllPeersError(Exception):
- # we use this to jump out of the loop
- pass
-
-# this wants to live in storage, not here
-class TooFullError(Exception):
- pass
-
-class UploadResults(Copyable, RemoteCopy):
- implements(IUploadResults)
- typeToCopy = "allmydata.upload.UploadResults.tahoe.allmydata.com"
- copytype = typeToCopy
-
- def __init__(self):
- self.timings = {} # dict of name to number of seconds
- self.sharemap = {} # dict of shnum to placement string
- self.servermap = {} # dict of peerid to set(shnums)
- self.file_size = None
- self.ciphertext_fetched = None # how much the helper fetched
- self.uri = None
- self.preexisting_shares = None # count of shares already present
- self.pushed_shares = None # count of shares we pushed
-
-
-# our current uri_extension is 846 bytes for small files, a few bytes
-# more for larger ones (since the filesize is encoded in decimal in a
-# few places). Ask for a little bit more just in case we need it. If
-# the extension changes size, we can change EXTENSION_SIZE to
-# allocate a more accurate amount of space.
-EXTENSION_SIZE = 1000
-# TODO: actual extensions are closer to 419 bytes, so we can probably lower
-# this.
-
-class PeerTracker:
- def __init__(self, peerid, storage_server,
- sharesize, blocksize, num_segments, num_share_hashes,
- storage_index,
- bucket_renewal_secret, bucket_cancel_secret):
- precondition(isinstance(peerid, str), peerid)
- precondition(len(peerid) == 20, peerid)
- self.peerid = peerid
- self._storageserver = storage_server # to an RIStorageServer
- self.buckets = {} # k: shareid, v: IRemoteBucketWriter
- self.sharesize = sharesize
- as = storage.allocated_size(sharesize,
- num_segments,
- num_share_hashes,
- EXTENSION_SIZE)
- self.allocated_size = as
-
- self.blocksize = blocksize
- self.num_segments = num_segments
- self.num_share_hashes = num_share_hashes
- self.storage_index = storage_index
-
- self.renew_secret = bucket_renewal_secret
- self.cancel_secret = bucket_cancel_secret
-
- def __repr__(self):
- return ("<PeerTracker for peer %s and SI %s>"
- % (idlib.shortnodeid_b2a(self.peerid),
- storage.si_b2a(self.storage_index)[:5]))
-
- def query(self, sharenums):
- d = self._storageserver.callRemote("allocate_buckets",
- self.storage_index,
- self.renew_secret,
- self.cancel_secret,
- sharenums,
- self.allocated_size,
- canary=Referenceable())
- d.addCallback(self._got_reply)
- return d
-
- def _got_reply(self, (alreadygot, buckets)):
- #log.msg("%s._got_reply(%s)" % (self, (alreadygot, buckets)))
- b = {}
- for sharenum, rref in buckets.iteritems():
- bp = storage.WriteBucketProxy(rref, self.sharesize,
- self.blocksize,
- self.num_segments,
- self.num_share_hashes,
- EXTENSION_SIZE,
- self.peerid)
- b[sharenum] = bp
- self.buckets.update(b)
- return (alreadygot, set(b.keys()))
-
-class Tahoe2PeerSelector:
-
- def __init__(self, upload_id, logparent=None, upload_status=None):
- self.upload_id = upload_id
- self.query_count, self.good_query_count, self.bad_query_count = 0,0,0
- self.error_count = 0
- self.num_peers_contacted = 0
- self.last_failure_msg = None
- self._status = IUploadStatus(upload_status)
- self._log_parent = log.msg("%s starting" % self, parent=logparent)
-
- def __repr__(self):
- return "<Tahoe2PeerSelector for upload %s>" % self.upload_id
-
- def get_shareholders(self, client,
- storage_index, share_size, block_size,
- num_segments, total_shares, shares_of_happiness):
- """
- @return: (used_peers, already_peers), where used_peers is a set of
- PeerTracker instances that have agreed to hold some shares
- for us (the shnum is stashed inside the PeerTracker),
- and already_peers is a dict mapping shnum to a peer
- which claims to already have the share.
- """
-
- if self._status:
- self._status.set_status("Contacting Peers..")
-
- self.total_shares = total_shares
- self.shares_of_happiness = shares_of_happiness
-
- self.homeless_shares = range(total_shares)
- # self.uncontacted_peers = list() # peers we haven't asked yet
- self.contacted_peers = [] # peers worth asking again
- self.contacted_peers2 = [] # peers that we have asked again
- self._started_second_pass = False
- self.use_peers = set() # PeerTrackers that have shares assigned to them
- self.preexisting_shares = {} # sharenum -> peerid holding the share
-
- peers = client.get_permuted_peers("storage", storage_index)
- if not peers:
- raise encode.NotEnoughSharesError("client gave us zero peers")
-
- # figure out how much space to ask for
-
- # this needed_hashes computation should mirror
- # Encoder.send_all_share_hash_trees. We use an IncompleteHashTree
- # (instead of a HashTree) because we don't require actual hashing
- # just to count the levels.
- ht = hashtree.IncompleteHashTree(total_shares)
- num_share_hashes = len(ht.needed_hashes(0, include_leaf=True))
-
- # decide upon the renewal/cancel secrets, to include them in the
- # allocat_buckets query.
- client_renewal_secret = client.get_renewal_secret()
- client_cancel_secret = client.get_cancel_secret()
-
- file_renewal_secret = file_renewal_secret_hash(client_renewal_secret,
- storage_index)
- file_cancel_secret = file_cancel_secret_hash(client_cancel_secret,
- storage_index)
-
- trackers = [ PeerTracker(peerid, conn,
- share_size, block_size,
- num_segments, num_share_hashes,
- storage_index,
- bucket_renewal_secret_hash(file_renewal_secret,
- peerid),
- bucket_cancel_secret_hash(file_cancel_secret,
- peerid),
- )
- for (peerid, conn) in peers ]
- self.uncontacted_peers = trackers
-
- d = defer.maybeDeferred(self._loop)
- return d
-
- def _loop(self):
- if not self.homeless_shares:
- # all done
- msg = ("placed all %d shares, "
- "sent %d queries to %d peers, "
- "%d queries placed some shares, %d placed none, "
- "got %d errors" %
- (self.total_shares,
- self.query_count, self.num_peers_contacted,
- self.good_query_count, self.bad_query_count,
- self.error_count))
- log.msg("peer selection successful for %s: %s" % (self, msg),
- parent=self._log_parent)
- return (self.use_peers, self.preexisting_shares)
-
- if self.uncontacted_peers:
- peer = self.uncontacted_peers.pop(0)
- # TODO: don't pre-convert all peerids to PeerTrackers
- assert isinstance(peer, PeerTracker)
-
- shares_to_ask = set([self.homeless_shares.pop(0)])
- self.query_count += 1
- self.num_peers_contacted += 1
- if self._status:
- self._status.set_status("Contacting Peers [%s] (first query),"
- " %d shares left.."
- % (idlib.shortnodeid_b2a(peer.peerid),
- len(self.homeless_shares)))
- d = peer.query(shares_to_ask)
- d.addBoth(self._got_response, peer, shares_to_ask,
- self.contacted_peers)
- return d
- elif self.contacted_peers:
- # ask a peer that we've already asked.
- if not self._started_second_pass:
- log.msg("starting second pass", parent=self._log_parent,
- level=log.NOISY)
- self._started_second_pass = True
- num_shares = mathutil.div_ceil(len(self.homeless_shares),
- len(self.contacted_peers))
- peer = self.contacted_peers.pop(0)
- shares_to_ask = set(self.homeless_shares[:num_shares])
- self.homeless_shares[:num_shares] = []
- self.query_count += 1
- if self._status:
- self._status.set_status("Contacting Peers [%s] (second query),"
- " %d shares left.."
- % (idlib.shortnodeid_b2a(peer.peerid),
- len(self.homeless_shares)))
- d = peer.query(shares_to_ask)
- d.addBoth(self._got_response, peer, shares_to_ask,
- self.contacted_peers2)
- return d
- elif self.contacted_peers2:
- # we've finished the second-or-later pass. Move all the remaining
- # peers back into self.contacted_peers for the next pass.
- self.contacted_peers.extend(self.contacted_peers2)
- self.contacted_peers[:] = []
- return self._loop()
- else:
- # no more peers. If we haven't placed enough shares, we fail.
- placed_shares = self.total_shares - len(self.homeless_shares)
- if placed_shares < self.shares_of_happiness:
- msg = ("placed %d shares out of %d total (%d homeless), "
- "sent %d queries to %d peers, "
- "%d queries placed some shares, %d placed none, "
- "got %d errors" %
- (self.total_shares - len(self.homeless_shares),
- self.total_shares, len(self.homeless_shares),
- self.query_count, self.num_peers_contacted,
- self.good_query_count, self.bad_query_count,
- self.error_count))
- msg = "peer selection failed for %s: %s" % (self, msg)
- if self.last_failure_msg:
- msg += " (%s)" % (self.last_failure_msg,)
- log.msg(msg, level=log.UNUSUAL, parent=self._log_parent)
- raise encode.NotEnoughSharesError(msg)
- else:
- # we placed enough to be happy, so we're done
- if self._status:
- self._status.set_status("Placed all shares")
- return self.use_peers
-
- def _got_response(self, res, peer, shares_to_ask, put_peer_here):
- if isinstance(res, failure.Failure):
- # This is unusual, and probably indicates a bug or a network
- # problem.
- log.msg("%s got error during peer selection: %s" % (peer, res),
- level=log.UNUSUAL, parent=self._log_parent)
- self.error_count += 1
- self.homeless_shares = list(shares_to_ask) + self.homeless_shares
- if (self.uncontacted_peers
- or self.contacted_peers
- or self.contacted_peers2):
- # there is still hope, so just loop
- pass
- else:
- # No more peers, so this upload might fail (it depends upon
- # whether we've hit shares_of_happiness or not). Log the last
- # failure we got: if a coding error causes all peers to fail
- # in the same way, this allows the common failure to be seen
- # by the uploader and should help with debugging
- msg = ("last failure (from %s) was: %s" % (peer, res))
- self.last_failure_msg = msg
- else:
- (alreadygot, allocated) = res
- log.msg("response from peer %s: alreadygot=%s, allocated=%s"
- % (idlib.shortnodeid_b2a(peer.peerid),
- tuple(sorted(alreadygot)), tuple(sorted(allocated))),
- level=log.NOISY, parent=self._log_parent)
- progress = False
- for s in alreadygot:
- self.preexisting_shares[s] = peer.peerid
- if s in self.homeless_shares:
- self.homeless_shares.remove(s)
- progress = True
-
- # the PeerTracker will remember which shares were allocated on
- # that peer. We just have to remember to use them.
- if allocated:
- self.use_peers.add(peer)
- progress = True
-
- not_yet_present = set(shares_to_ask) - set(alreadygot)
- still_homeless = not_yet_present - set(allocated)
-
- if progress:
- # they accepted or already had at least one share, so
- # progress has been made
- self.good_query_count += 1
- else:
- self.bad_query_count += 1
-
- if still_homeless:
- # In networks with lots of space, this is very unusual and
- # probably indicates an error. In networks with peers that
- # are full, it is merely unusual. In networks that are very
- # full, it is common, and many uploads will fail. In most
- # cases, this is obviously not fatal, and we'll just use some
- # other peers.
-
- # some shares are still homeless, keep trying to find them a
- # home. The ones that were rejected get first priority.
- self.homeless_shares = (list(still_homeless)
- + self.homeless_shares)
- # Since they were unable to accept all of our requests, so it
- # is safe to assume that asking them again won't help.
- else:
- # if they *were* able to accept everything, they might be
- # willing to accept even more.
- put_peer_here.append(peer)
-
- # now loop
- return self._loop()
-
-
-class EncryptAnUploadable:
- """This is a wrapper that takes an IUploadable and provides
- IEncryptedUploadable."""
- implements(IEncryptedUploadable)
- CHUNKSIZE = 50*1024
-
- def __init__(self, original, log_parent=None):
- self.original = IUploadable(original)
- self._log_number = log_parent
- self._encryptor = None
- self._plaintext_hasher = plaintext_hasher()
- self._plaintext_segment_hasher = None
- self._plaintext_segment_hashes = []
- self._encoding_parameters = None
- self._file_size = None
- self._ciphertext_bytes_read = 0
- self._status = None
-
- def set_upload_status(self, upload_status):
- self._status = IUploadStatus(upload_status)
- self.original.set_upload_status(upload_status)
-
- def log(self, *args, **kwargs):
- if "facility" not in kwargs:
- kwargs["facility"] = "upload.encryption"
- if "parent" not in kwargs:
- kwargs["parent"] = self._log_number
- return log.msg(*args, **kwargs)
-
- def get_size(self):
- if self._file_size is not None:
- return defer.succeed(self._file_size)
- d = self.original.get_size()
- def _got_size(size):
- self._file_size = size
- if self._status:
- self._status.set_size(size)
- return size
- d.addCallback(_got_size)
- return d
-
- def get_all_encoding_parameters(self):
- if self._encoding_parameters is not None:
- return defer.succeed(self._encoding_parameters)
- d = self.original.get_all_encoding_parameters()
- def _got(encoding_parameters):
- (k, happy, n, segsize) = encoding_parameters
- self._segment_size = segsize # used by segment hashers
- self._encoding_parameters = encoding_parameters
- self.log("my encoding parameters: %s" % (encoding_parameters,),
- level=log.NOISY)
- return encoding_parameters
- d.addCallback(_got)
- return d
-
- def _get_encryptor(self):
- if self._encryptor:
- return defer.succeed(self._encryptor)
-
- d = self.original.get_encryption_key()
- def _got(key):
- e = AES(key)
- self._encryptor = e
-
- storage_index = storage_index_hash(key)
- assert isinstance(storage_index, str)
- # There's no point to having the SI be longer than the key, so we
- # specify that it is truncated to the same 128 bits as the AES key.
- assert len(storage_index) == 16 # SHA-256 truncated to 128b
- self._storage_index = storage_index
- if self._status:
- self._status.set_storage_index(storage_index)
- return e
- d.addCallback(_got)
- return d
-
- def get_storage_index(self):
- d = self._get_encryptor()
- d.addCallback(lambda res: self._storage_index)
- return d
-
- def _get_segment_hasher(self):
- p = self._plaintext_segment_hasher
- if p:
- left = self._segment_size - self._plaintext_segment_hashed_bytes
- return p, left
- p = plaintext_segment_hasher()
- self._plaintext_segment_hasher = p
- self._plaintext_segment_hashed_bytes = 0
- return p, self._segment_size
-
- def _update_segment_hash(self, chunk):
- offset = 0
- while offset < len(chunk):
- p, segment_left = self._get_segment_hasher()
- chunk_left = len(chunk) - offset
- this_segment = min(chunk_left, segment_left)
- p.update(chunk[offset:offset+this_segment])
- self._plaintext_segment_hashed_bytes += this_segment
-
- if self._plaintext_segment_hashed_bytes == self._segment_size:
- # we've filled this segment
- self._plaintext_segment_hashes.append(p.digest())
- self._plaintext_segment_hasher = None
- self.log("closed hash [%d]: %dB" %
- (len(self._plaintext_segment_hashes)-1,
- self._plaintext_segment_hashed_bytes),
- level=log.NOISY)
- self.log(format="plaintext leaf hash [%(segnum)d] is %(hash)s",
- segnum=len(self._plaintext_segment_hashes)-1,
- hash=base32.b2a(p.digest()),
- level=log.NOISY)
-
- offset += this_segment
-
-
- def read_encrypted(self, length, hash_only):
- # make sure our parameters have been set up first
- d = self.get_all_encoding_parameters()
- # and size
- d.addCallback(lambda ignored: self.get_size())
- d.addCallback(lambda ignored: self._get_encryptor())
- # then fetch and encrypt the plaintext. The unusual structure here
- # (passing a Deferred *into* a function) is needed to avoid
- # overflowing the stack: Deferreds don't optimize out tail recursion.
- # We also pass in a list, to which _read_encrypted will append
- # ciphertext.
- ciphertext = []
- d2 = defer.Deferred()
- d.addCallback(lambda ignored:
- self._read_encrypted(length, ciphertext, hash_only, d2))
- d.addCallback(lambda ignored: d2)
- return d
-
- def _read_encrypted(self, remaining, ciphertext, hash_only, fire_when_done):
- if not remaining:
- fire_when_done.callback(ciphertext)
- return None
- # tolerate large length= values without consuming a lot of RAM by
- # reading just a chunk (say 50kB) at a time. This only really matters
- # when hash_only==True (i.e. resuming an interrupted upload), since
- # that's the case where we will be skipping over a lot of data.
- size = min(remaining, self.CHUNKSIZE)
- remaining = remaining - size
- # read a chunk of plaintext..
- d = defer.maybeDeferred(self.original.read, size)
- # N.B.: if read() is synchronous, then since everything else is
- # actually synchronous too, we'd blow the stack unless we stall for a
- # tick. Once you accept a Deferred from IUploadable.read(), you must
- # be prepared to have it fire immediately too.
- d.addCallback(eventual.fireEventually)
- def _good(plaintext):
- # and encrypt it..
- # o/' over the fields we go, hashing all the way, sHA! sHA! sHA! o/'
- ct = self._hash_and_encrypt_plaintext(plaintext, hash_only)
- ciphertext.extend(ct)
- self._read_encrypted(remaining, ciphertext, hash_only,
- fire_when_done)
- def _err(why):
- fire_when_done.errback(why)
- d.addCallback(_good)
- d.addErrback(_err)
- return None
-
- def _hash_and_encrypt_plaintext(self, data, hash_only):
- assert isinstance(data, (tuple, list)), type(data)
- data = list(data)
- cryptdata = []
- # we use data.pop(0) instead of 'for chunk in data' to save
- # memory: each chunk is destroyed as soon as we're done with it.
- bytes_processed = 0
- while data:
- chunk = data.pop(0)
- self.log(" read_encrypted handling %dB-sized chunk" % len(chunk),
- level=log.NOISY)
- bytes_processed += len(chunk)
- self._plaintext_hasher.update(chunk)
- self._update_segment_hash(chunk)
- # TODO: we have to encrypt the data (even if hash_only==True)
- # because pycryptopp's AES-CTR implementation doesn't offer a
- # way to change the counter value. Once pycryptopp acquires
- # this ability, change this to simply update the counter
- # before each call to (hash_only==False) _encryptor.process()
- ciphertext = self._encryptor.process(chunk)
- if hash_only:
- self.log(" skipping encryption", level=log.NOISY)
- else:
- cryptdata.append(ciphertext)
- del ciphertext
- del chunk
- self._ciphertext_bytes_read += bytes_processed
- if self._status:
- progress = float(self._ciphertext_bytes_read) / self._file_size
- self._status.set_progress(1, progress)
- return cryptdata
-
-
- def get_plaintext_hashtree_leaves(self, first, last, num_segments):
- if len(self._plaintext_segment_hashes) < num_segments:
- # close out the last one
- assert len(self._plaintext_segment_hashes) == num_segments-1
- p, segment_left = self._get_segment_hasher()
- self._plaintext_segment_hashes.append(p.digest())
- del self._plaintext_segment_hasher
- self.log("closing plaintext leaf hasher, hashed %d bytes" %
- self._plaintext_segment_hashed_bytes,
- level=log.NOISY)
- self.log(format="plaintext leaf hash [%(segnum)d] is %(hash)s",
- segnum=len(self._plaintext_segment_hashes)-1,
- hash=base32.b2a(p.digest()),
- level=log.NOISY)
- assert len(self._plaintext_segment_hashes) == num_segments
- return defer.succeed(tuple(self._plaintext_segment_hashes[first:last]))
-
- def get_plaintext_hash(self):
- h = self._plaintext_hasher.digest()
- return defer.succeed(h)
-
- def close(self):
- return self.original.close()
-
-class UploadStatus:
- implements(IUploadStatus)
- statusid_counter = itertools.count(0)
-
- def __init__(self):
- self.storage_index = None
- self.size = None
- self.helper = False
- self.status = "Not started"
- self.progress = [0.0, 0.0, 0.0]
- self.active = True
- self.results = None
- self.counter = self.statusid_counter.next()
- self.started = time.time()
-
- def get_started(self):
- return self.started
- def get_storage_index(self):
- return self.storage_index
- def get_size(self):
- return self.size
- def using_helper(self):
- return self.helper
- def get_status(self):
- return self.status
- def get_progress(self):
- return tuple(self.progress)
- def get_active(self):
- return self.active
- def get_results(self):
- return self.results
- def get_counter(self):
- return self.counter
-
- def set_storage_index(self, si):
- self.storage_index = si
- def set_size(self, size):
- self.size = size
- def set_helper(self, helper):
- self.helper = helper
- def set_status(self, status):
- self.status = status
- def set_progress(self, which, value):
- # [0]: chk, [1]: ciphertext, [2]: encode+push
- self.progress[which] = value
- def set_active(self, value):
- self.active = value
- def set_results(self, value):
- self.results = value
-
-class CHKUploader:
- peer_selector_class = Tahoe2PeerSelector
-
- def __init__(self, client):
- self._client = client
- self._log_number = self._client.log("CHKUploader starting")
- self._encoder = None
- self._results = UploadResults()
- self._storage_index = None
- self._upload_status = UploadStatus()
- self._upload_status.set_helper(False)
- self._upload_status.set_active(True)
- self._upload_status.set_results(self._results)
-
- def log(self, *args, **kwargs):
- if "parent" not in kwargs:
- kwargs["parent"] = self._log_number
- if "facility" not in kwargs:
- kwargs["facility"] = "tahoe.upload"
- return self._client.log(*args, **kwargs)
-
- def start(self, uploadable):
- """Start uploading the file.
-
- This method returns a Deferred that will fire with the URI (a
- string)."""
-
- self._started = time.time()
- uploadable = IUploadable(uploadable)
- self.log("starting upload of %s" % uploadable)
-
- eu = EncryptAnUploadable(uploadable, self._log_number)
- eu.set_upload_status(self._upload_status)
- d = self.start_encrypted(eu)
- def _uploaded(res):
- d1 = uploadable.get_encryption_key()
- d1.addCallback(lambda key: self._compute_uri(res, key))
- return d1
- d.addCallback(_uploaded)
- def _done(res):
- self._upload_status.set_active(False)
- return res
- d.addBoth(_done)
- return d
-
- def abort(self):
- """Call this is the upload must be abandoned before it completes.
- This will tell the shareholders to delete their partial shares. I
- return a Deferred that fires when these messages have been acked."""
- if not self._encoder:
- # how did you call abort() before calling start() ?
- return defer.succeed(None)
- return self._encoder.abort()
-
- def start_encrypted(self, encrypted):
- eu = IEncryptedUploadable(encrypted)
-
- started = time.time()
- self._encoder = e = encode.Encoder(self._log_number,
- self._upload_status)
- d = e.set_encrypted_uploadable(eu)
- d.addCallback(self.locate_all_shareholders, started)
- d.addCallback(self.set_shareholders, e)
- d.addCallback(lambda res: e.start())
- d.addCallback(self._encrypted_done)
- # this fires with the uri_extension_hash and other data
- return d
-
- def locate_all_shareholders(self, encoder, started):
- peer_selection_started = now = time.time()
- self._storage_index_elapsed = now - started
- storage_index = encoder.get_param("storage_index")
- self._storage_index = storage_index
- upload_id = storage.si_b2a(storage_index)[:5]
- self.log("using storage index %s" % upload_id)
- peer_selector = self.peer_selector_class(upload_id, self._log_number,
- self._upload_status)
-
- share_size = encoder.get_param("share_size")
- block_size = encoder.get_param("block_size")
- num_segments = encoder.get_param("num_segments")
- k,desired,n = encoder.get_param("share_counts")
-
- self._peer_selection_started = time.time()
- d = peer_selector.get_shareholders(self._client, storage_index,
- share_size, block_size,
- num_segments, n, desired)
- def _done(res):
- self._peer_selection_elapsed = time.time() - peer_selection_started
- return res
- d.addCallback(_done)
- return d
-
- def set_shareholders(self, (used_peers, already_peers), encoder):
- """
- @param used_peers: a sequence of PeerTracker objects
- @paran already_peers: a dict mapping sharenum to a peerid that
- claims to already have this share
- """
- self.log("_send_shares, used_peers is %s" % (used_peers,))
- # record already-present shares in self._results
- for (shnum, peerid) in already_peers.items():
- peerid_s = idlib.shortnodeid_b2a(peerid)
- self._results.sharemap[shnum] = "Found on [%s]" % peerid_s
- if peerid not in self._results.servermap:
- self._results.servermap[peerid] = set()
- self._results.servermap[peerid].add(shnum)
- self._results.preexisting_shares = len(already_peers)
-
- self._sharemap = {}
- for peer in used_peers:
- assert isinstance(peer, PeerTracker)
- buckets = {}
- for peer in used_peers:
- buckets.update(peer.buckets)
- for shnum in peer.buckets:
- self._sharemap[shnum] = peer
- assert len(buckets) == sum([len(peer.buckets) for peer in used_peers])
- encoder.set_shareholders(buckets)
-
- def _encrypted_done(self, res):
- r = self._results
- for shnum in self._encoder.get_shares_placed():
- peer_tracker = self._sharemap[shnum]
- peerid = peer_tracker.peerid
- peerid_s = idlib.shortnodeid_b2a(peerid)
- r.sharemap[shnum] = "Placed on [%s]" % peerid_s
- if peerid not in r.servermap:
- r.servermap[peerid] = set()
- r.servermap[peerid].add(shnum)
- r.pushed_shares = len(self._encoder.get_shares_placed())
- now = time.time()
- r.file_size = self._encoder.file_size
- r.timings["total"] = now - self._started
- r.timings["storage_index"] = self._storage_index_elapsed
- r.timings["peer_selection"] = self._peer_selection_elapsed
- r.timings.update(self._encoder.get_times())
- r.uri_extension_data = self._encoder.get_uri_extension_data()
- return res
-
- def _compute_uri(self, (uri_extension_hash,
- needed_shares, total_shares, size),
- key):
- u = uri.CHKFileURI(key=key,
- uri_extension_hash=uri_extension_hash,
- needed_shares=needed_shares,
- total_shares=total_shares,
- size=size,
- )
- r = self._results
- r.uri = u.to_string()
- return r
-
- def get_upload_status(self):
- return self._upload_status
-
-def read_this_many_bytes(uploadable, size, prepend_data=[]):
- if size == 0:
- return defer.succeed([])
- d = uploadable.read(size)
- def _got(data):
- assert isinstance(data, list)
- bytes = sum([len(piece) for piece in data])
- assert bytes > 0
- assert bytes <= size
- remaining = size - bytes
- if remaining:
- return read_this_many_bytes(uploadable, remaining,
- prepend_data + data)
- return prepend_data + data
- d.addCallback(_got)
- return d
-
-class LiteralUploader:
-
- def __init__(self, client):
- self._client = client
- self._results = UploadResults()
- self._status = s = UploadStatus()
- s.set_storage_index(None)
- s.set_helper(False)
- s.set_progress(0, 1.0)
- s.set_active(False)
- s.set_results(self._results)
-
- def start(self, uploadable):
- uploadable = IUploadable(uploadable)
- d = uploadable.get_size()
- def _got_size(size):
- self._size = size
- self._status.set_size(size)
- self._results.file_size = size
- return read_this_many_bytes(uploadable, size)
- d.addCallback(_got_size)
- d.addCallback(lambda data: uri.LiteralFileURI("".join(data)))
- d.addCallback(lambda u: u.to_string())
- d.addCallback(self._build_results)
- return d
-
- def _build_results(self, uri):
- self._results.uri = uri
- self._status.set_status("Done")
- self._status.set_progress(1, 1.0)
- self._status.set_progress(2, 1.0)
- return self._results
-
- def close(self):
- pass
-
- def get_upload_status(self):
- return self._status
-
-class RemoteEncryptedUploadable(Referenceable):
- implements(RIEncryptedUploadable)
-
- def __init__(self, encrypted_uploadable, upload_status):
- self._eu = IEncryptedUploadable(encrypted_uploadable)
- self._offset = 0
- self._bytes_sent = 0
- self._status = IUploadStatus(upload_status)
- # we are responsible for updating the status string while we run, and
- # for setting the ciphertext-fetch progress.
- self._size = None
-
- def get_size(self):
- if self._size is not None:
- return defer.succeed(self._size)
- d = self._eu.get_size()
- def _got_size(size):
- self._size = size
- return size
- d.addCallback(_got_size)
- return d
-
- def remote_get_size(self):
- return self.get_size()
- def remote_get_all_encoding_parameters(self):
- return self._eu.get_all_encoding_parameters()
-
- def _read_encrypted(self, length, hash_only):
- d = self._eu.read_encrypted(length, hash_only)
- def _read(strings):
- if hash_only:
- self._offset += length
- else:
- size = sum([len(data) for data in strings])
- self._offset += size
- return strings
- d.addCallback(_read)
- return d
-
- def remote_read_encrypted(self, offset, length):
- # we don't support seek backwards, but we allow skipping forwards
- precondition(offset >= 0, offset)
- precondition(length >= 0, length)
- lp = log.msg("remote_read_encrypted(%d-%d)" % (offset, offset+length),
- level=log.NOISY)
- precondition(offset >= self._offset, offset, self._offset)
- if offset > self._offset:
- # read the data from disk anyways, to build up the hash tree
- skip = offset - self._offset
- log.msg("remote_read_encrypted skipping ahead from %d to %d, skip=%d" %
- (self._offset, offset, skip), level=log.UNUSUAL, parent=lp)
- d = self._read_encrypted(skip, hash_only=True)
- else:
- d = defer.succeed(None)
-
- def _at_correct_offset(res):
- assert offset == self._offset, "%d != %d" % (offset, self._offset)
- return self._read_encrypted(length, hash_only=False)
- d.addCallback(_at_correct_offset)
-
- def _read(strings):
- size = sum([len(data) for data in strings])
- self._bytes_sent += size
- return strings
- d.addCallback(_read)
- return d
-
- def remote_get_plaintext_hashtree_leaves(self, first, last, num_segments):
- log.msg("remote_get_plaintext_hashtree_leaves: %d-%d of %d" %
- (first, last-1, num_segments),
- level=log.NOISY)
- d = self._eu.get_plaintext_hashtree_leaves(first, last, num_segments)
- d.addCallback(list)
- return d
- def remote_get_plaintext_hash(self):
- return self._eu.get_plaintext_hash()
- def remote_close(self):
- return self._eu.close()
-
-
-class AssistedUploader:
-
- def __init__(self, helper):
- self._helper = helper
- self._log_number = log.msg("AssistedUploader starting")
- self._storage_index = None
- self._upload_status = s = UploadStatus()
- s.set_helper(True)
- s.set_active(True)
-
- def log(self, *args, **kwargs):
- if "parent" not in kwargs:
- kwargs["parent"] = self._log_number
- return log.msg(*args, **kwargs)
-
- def start(self, uploadable):
- self._started = time.time()
- u = IUploadable(uploadable)
- eu = EncryptAnUploadable(u, self._log_number)
- eu.set_upload_status(self._upload_status)
- self._encuploadable = eu
- d = eu.get_size()
- d.addCallback(self._got_size)
- d.addCallback(lambda res: eu.get_all_encoding_parameters())
- d.addCallback(self._got_all_encoding_parameters)
- # when we get the encryption key, that will also compute the storage
- # index, so this only takes one pass.
- # TODO: I'm not sure it's cool to switch back and forth between
- # the Uploadable and the IEncryptedUploadable that wraps it.
- d.addCallback(lambda res: u.get_encryption_key())
- d.addCallback(self._got_encryption_key)
- d.addCallback(lambda res: eu.get_storage_index())
- d.addCallback(self._got_storage_index)
- d.addCallback(self._contact_helper)
- d.addCallback(self._build_readcap)
- def _done(res):
- self._upload_status.set_active(False)
- return res
- d.addBoth(_done)
- return d
-
- def _got_size(self, size):
- self._size = size
- self._upload_status.set_size(size)
-
- def _got_all_encoding_parameters(self, params):
- k, happy, n, segment_size = params
- # stash these for URI generation later
- self._needed_shares = k
- self._total_shares = n
- self._segment_size = segment_size
-
- def _got_encryption_key(self, key):
- self._key = key
-
- def _got_storage_index(self, storage_index):
- self._storage_index = storage_index
-
-
- def _contact_helper(self, res):
- now = self._time_contacting_helper_start = time.time()
- self._storage_index_elapsed = now - self._started
- self.log(format="contacting helper for SI %(si)s..",
- si=storage.si_b2a(self._storage_index))
- self._upload_status.set_status("Contacting Helper")
- d = self._helper.callRemote("upload_chk", self._storage_index)
- d.addCallback(self._contacted_helper)
- return d
-
- def _contacted_helper(self, (upload_results, upload_helper)):
- now = time.time()
- elapsed = now - self._time_contacting_helper_start
- self._elapsed_time_contacting_helper = elapsed
- if upload_helper:
- self.log("helper says we need to upload")
- self._upload_status.set_status("Uploading Ciphertext")
- # we need to upload the file
- reu = RemoteEncryptedUploadable(self._encuploadable,
- self._upload_status)
- # let it pre-compute the size for progress purposes
- d = reu.get_size()
- d.addCallback(lambda ignored:
- upload_helper.callRemote("upload", reu))
- # this Deferred will fire with the upload results
- return d
- self.log("helper says file is already uploaded")
- self._upload_status.set_progress(1, 1.0)
- self._upload_status.set_results(upload_results)
- return upload_results
-
- def _build_readcap(self, upload_results):
- self.log("upload finished, building readcap")
- self._upload_status.set_status("Building Readcap")
- r = upload_results
- assert r.uri_extension_data["needed_shares"] == self._needed_shares
- assert r.uri_extension_data["total_shares"] == self._total_shares
- assert r.uri_extension_data["segment_size"] == self._segment_size
- assert r.uri_extension_data["size"] == self._size
- u = uri.CHKFileURI(key=self._key,
- uri_extension_hash=r.uri_extension_hash,
- needed_shares=self._needed_shares,
- total_shares=self._total_shares,
- size=self._size,
- )
- r.uri = u.to_string()
- now = time.time()
- r.file_size = self._size
- r.timings["storage_index"] = self._storage_index_elapsed
- r.timings["contacting_helper"] = self._elapsed_time_contacting_helper
- if "total" in r.timings:
- r.timings["helper_total"] = r.timings["total"]
- r.timings["total"] = now - self._started
- self._upload_status.set_status("Done")
- self._upload_status.set_results(r)
- return r
-
- def get_upload_status(self):
- return self._upload_status
-
-class BaseUploadable:
- default_max_segment_size = 128*KiB # overridden by max_segment_size
- default_encoding_param_k = 3 # overridden by encoding_parameters
- default_encoding_param_happy = 7
- default_encoding_param_n = 10
-
- max_segment_size = None
- encoding_param_k = None
- encoding_param_happy = None
- encoding_param_n = None
-
- _all_encoding_parameters = None
- _status = None
-
- def set_upload_status(self, upload_status):
- self._status = IUploadStatus(upload_status)
-
- def set_default_encoding_parameters(self, default_params):
- assert isinstance(default_params, dict)
- for k,v in default_params.items():
- precondition(isinstance(k, str), k, v)
- precondition(isinstance(v, int), k, v)
- if "k" in default_params:
- self.default_encoding_param_k = default_params["k"]
- if "happy" in default_params:
- self.default_encoding_param_happy = default_params["happy"]
- if "n" in default_params:
- self.default_encoding_param_n = default_params["n"]
- if "max_segment_size" in default_params:
- self.default_max_segment_size = default_params["max_segment_size"]
-
- def get_all_encoding_parameters(self):
- if self._all_encoding_parameters:
- return defer.succeed(self._all_encoding_parameters)
-
- max_segsize = self.max_segment_size or self.default_max_segment_size
- k = self.encoding_param_k or self.default_encoding_param_k
- happy = self.encoding_param_happy or self.default_encoding_param_happy
- n = self.encoding_param_n or self.default_encoding_param_n
-
- d = self.get_size()
- def _got_size(file_size):
- # for small files, shrink the segment size to avoid wasting space
- segsize = min(max_segsize, file_size)
- # this must be a multiple of 'required_shares'==k
- segsize = mathutil.next_multiple(segsize, k)
- encoding_parameters = (k, happy, n, segsize)
- self._all_encoding_parameters = encoding_parameters
- return encoding_parameters
- d.addCallback(_got_size)
- return d
-
-class FileHandle(BaseUploadable):
- implements(IUploadable)
-
- def __init__(self, filehandle, convergence):
- """
- Upload the data from the filehandle. If convergence is None then a
- random encryption key will be used, else the plaintext will be hashed,
- then the hash will be hashed together with the string in the
- "convergence" argument to form the encryption key.
- """
- assert convergence is None or isinstance(convergence, str), (convergence, type(convergence))
- self._filehandle = filehandle
- self._key = None
- self.convergence = convergence
- self._size = None
-
- def _get_encryption_key_convergent(self):
- if self._key is not None:
- return defer.succeed(self._key)
-
- d = self.get_size()
- # that sets self._size as a side-effect
- d.addCallback(lambda size: self.get_all_encoding_parameters())
- def _got(params):
- k, happy, n, segsize = params
- f = self._filehandle
- enckey_hasher = convergence_hasher(k, n, segsize, self.convergence)
- f.seek(0)
- BLOCKSIZE = 64*1024
- bytes_read = 0
- while True:
- data = f.read(BLOCKSIZE)
- if not data:
- break
- enckey_hasher.update(data)
- # TODO: setting progress in a non-yielding loop is kind of
- # pointless, but I'm anticipating (perhaps prematurely) the
- # day when we use a slowjob or twisted's CooperatorService to
- # make this yield time to other jobs.
- bytes_read += len(data)
- if self._status:
- self._status.set_progress(0, float(bytes_read)/self._size)
- f.seek(0)
- self._key = enckey_hasher.digest()
- if self._status:
- self._status.set_progress(0, 1.0)
- assert len(self._key) == 16
- return self._key
- d.addCallback(_got)
- return d
-
- def _get_encryption_key_random(self):
- if self._key is None:
- self._key = os.urandom(16)
- return defer.succeed(self._key)
-
- def get_encryption_key(self):
- if self.convergence is not None:
- return self._get_encryption_key_convergent()
- else:
- return self._get_encryption_key_random()
-
- def get_size(self):
- if self._size is not None:
- return defer.succeed(self._size)
- self._filehandle.seek(0,2)
- size = self._filehandle.tell()
- self._size = size
- self._filehandle.seek(0)
- return defer.succeed(size)
-
- def read(self, length):
- return defer.succeed([self._filehandle.read(length)])
-
- def close(self):
- # the originator of the filehandle reserves the right to close it
- pass
-
-class FileName(FileHandle):
- def __init__(self, filename, convergence):
- """
- Upload the data from the filename. If convergence is None then a
- random encryption key will be used, else the plaintext will be hashed,
- then the hash will be hashed together with the string in the
- "convergence" argument to form the encryption key.
- """
- assert convergence is None or isinstance(convergence, str), (convergence, type(convergence))
- FileHandle.__init__(self, open(filename, "rb"), convergence=convergence)
- def close(self):
- FileHandle.close(self)
- self._filehandle.close()
-
-class Data(FileHandle):
- def __init__(self, data, convergence):
- """
- Upload the data from the data argument. If convergence is None then a
- random encryption key will be used, else the plaintext will be hashed,
- then the hash will be hashed together with the string in the
- "convergence" argument to form the encryption key.
- """
- assert convergence is None or isinstance(convergence, str), (convergence, type(convergence))
- FileHandle.__init__(self, StringIO(data), convergence=convergence)
-
-class Uploader(service.MultiService):
- """I am a service that allows file uploading. I am a service-child of the
- Client.
- """
- implements(IUploader)
- name = "uploader"
- uploader_class = CHKUploader
- URI_LIT_SIZE_THRESHOLD = 55
- MAX_UPLOAD_STATUSES = 10
-
- def __init__(self, helper_furl=None, stats_provider=None):
- self._helper_furl = helper_furl
- self.stats_provider = stats_provider
- self._helper = None
- self._all_uploads = weakref.WeakKeyDictionary() # for debugging
- self._all_upload_statuses = weakref.WeakKeyDictionary()
- self._recent_upload_statuses = []
- service.MultiService.__init__(self)
-
- def startService(self):
- service.MultiService.startService(self)
- if self._helper_furl:
- self.parent.tub.connectTo(self._helper_furl,
- self._got_helper)
-
- def _got_helper(self, helper):
- self._helper = helper
- helper.notifyOnDisconnect(self._lost_helper)
- def _lost_helper(self):
- self._helper = None
-
- def get_helper_info(self):
- # return a tuple of (helper_furl_or_None, connected_bool)
- return (self._helper_furl, bool(self._helper))
-
- def upload(self, uploadable):
- # this returns the URI
- assert self.parent
- assert self.running
-
- uploadable = IUploadable(uploadable)
- d = uploadable.get_size()
- def _got_size(size):
- default_params = self.parent.get_encoding_parameters()
- precondition(isinstance(default_params, dict), default_params)
- precondition("max_segment_size" in default_params, default_params)
- uploadable.set_default_encoding_parameters(default_params)
-
- if self.stats_provider:
- self.stats_provider.count('uploader.files_uploaded', 1)
- self.stats_provider.count('uploader.bytes_uploaded', size)
-
- if size <= self.URI_LIT_SIZE_THRESHOLD:
- uploader = LiteralUploader(self.parent)
- elif self._helper:
- uploader = AssistedUploader(self._helper)
- else:
- uploader = self.uploader_class(self.parent)
- self._add_upload(uploader)
- return uploader.start(uploadable)
- d.addCallback(_got_size)
- def _done(res):
- uploadable.close()
- return res
- d.addBoth(_done)
- return d
-
- def _add_upload(self, uploader):
- s = uploader.get_upload_status()
- self._all_uploads[uploader] = None
- self._all_upload_statuses[s] = None
- self._recent_upload_statuses.append(s)
- while len(self._recent_upload_statuses) > self.MAX_UPLOAD_STATUSES:
- self._recent_upload_statuses.pop(0)
-
- def list_all_upload_statuses(self):
- for us in self._all_upload_statuses:
- yield us
from nevow import url, rend
from nevow.inevow import IRequest
-from allmydata.upload import FileHandle
from allmydata.interfaces import IDownloadTarget, ExistingChildError
from allmydata.mutable.common import MODE_READ
+from allmydata.immutable.upload import FileHandle
from allmydata.util import log
from allmydata.web.common import text_plain, WebError, IClient, RenderMixin, \
from twisted.internet import defer
from nevow import rend, url, tags as T
from nevow.inevow import IRequest
-from allmydata.upload import FileHandle
+from allmydata.immutable.upload import FileHandle
from allmydata.web.common import IClient, getxmlfile, get_arg, boolean_of_arg
from allmydata.web import status