]> git.rkrishnan.org Git - tahoe-lafs/tahoe-lafs.git/blob - src/allmydata/immutable/encode.py
Fix up the behavior of #778, per reviewers' comments
[tahoe-lafs/tahoe-lafs.git] / src / allmydata / immutable / encode.py
1 # -*- test-case-name: allmydata.test.test_encode -*-
2
3 import time
4 from zope.interface import implements
5 from twisted.internet import defer
6 from foolscap.api import fireEventually
7 from allmydata import uri
8 from allmydata.storage.server import si_b2a
9 from allmydata.hashtree import HashTree
10 from allmydata.util import mathutil, hashutil, base32, log, happinessutil
11 from allmydata.util.assertutil import _assert, precondition
12 from allmydata.codec import CRSEncoder
13 from allmydata.interfaces import IEncoder, IStorageBucketWriter, \
14      IEncryptedUploadable, IUploadStatus, UploadUnhappinessError
15
16
17 """
18 The goal of the encoder is to turn the original file into a series of
19 'shares'. Each share is going to a 'shareholder' (nominally each shareholder
20 is a different host, but for small grids there may be overlap). The number
21 of shares is chosen to hit our reliability goals (more shares on more
22 machines means more reliability), and is limited by overhead (proportional to
23 numshares or log(numshares)) and the encoding technology in use (zfec permits
24 only 256 shares total). It is also constrained by the amount of data
25 we want to send to each host. For estimating purposes, think of 10 shares
26 out of which we need 3 to reconstruct the file.
27
28 The encoder starts by cutting the original file into segments. All segments
29 except the last are of equal size. The segment size is chosen to constrain
30 the memory footprint (which will probably vary between 1x and 4x segment
31 size) and to constrain the overhead (which will be proportional to
32 log(number of segments)).
33
34
35 Each segment (A,B,C) is read into memory, encrypted, and encoded into
36 blocks. The 'share' (say, share #1) that makes it out to a host is a
37 collection of these blocks (block A1, B1, C1), plus some hash-tree
38 information necessary to validate the data upon retrieval. Only one segment
39 is handled at a time: all blocks for segment A are delivered before any
40 work is begun on segment B.
41
42 As blocks are created, we retain the hash of each one. The list of block hashes
43 for a single share (say, hash(A1), hash(B1), hash(C1)) is used to form the base
44 of a Merkle hash tree for that share, called the block hash tree.
45
46 This hash tree has one terminal leaf per block. The complete block hash
47 tree is sent to the shareholder after all the data has been sent. At
48 retrieval time, the decoder will ask for specific pieces of this tree before
49 asking for blocks, whichever it needs to validate those blocks.
50
51 (Note: we don't really need to generate this whole block hash tree
52 ourselves. It would be sufficient to have the shareholder generate it and
53 just tell us the root. This gives us an extra level of validation on the
54 transfer, though, and it is relatively cheap to compute.)
55
56 Each of these block hash trees has a root hash. The collection of these
57 root hashes for all shares are collected into the 'share hash tree', which
58 has one terminal leaf per share. After sending the blocks and the complete
59 block hash tree to each shareholder, we send them the portion of the share
60 hash tree that is necessary to validate their share. The root of the share
61 hash tree is put into the URI.
62
63 """
64
65 class UploadAborted(Exception):
66     pass
67
68 KiB=1024
69 MiB=1024*KiB
70 GiB=1024*MiB
71 TiB=1024*GiB
72 PiB=1024*TiB
73
74 class Encoder(object):
75     implements(IEncoder)
76
77     def __init__(self, log_parent=None, upload_status=None):
78         object.__init__(self)
79         self.uri_extension_data = {}
80         self._codec = None
81         self._status = None
82         if upload_status:
83             self._status = IUploadStatus(upload_status)
84         precondition(log_parent is None or isinstance(log_parent, int),
85                      log_parent)
86         self._log_number = log.msg("creating Encoder %s" % self,
87                                    facility="tahoe.encoder", parent=log_parent)
88         self._aborted = False
89
90     def __repr__(self):
91         if hasattr(self, "_storage_index"):
92             return "<Encoder for %s>" % si_b2a(self._storage_index)[:5]
93         return "<Encoder for unknown storage index>"
94
95     def log(self, *args, **kwargs):
96         if "parent" not in kwargs:
97             kwargs["parent"] = self._log_number
98         if "facility" not in kwargs:
99             kwargs["facility"] = "tahoe.encoder"
100         return log.msg(*args, **kwargs)
101
102     def set_encrypted_uploadable(self, uploadable):
103         eu = self._uploadable = IEncryptedUploadable(uploadable)
104         d = eu.get_size()
105         def _got_size(size):
106             self.log(format="file size: %(size)d", size=size)
107             self.file_size = size
108         d.addCallback(_got_size)
109         d.addCallback(lambda res: eu.get_all_encoding_parameters())
110         d.addCallback(self._got_all_encoding_parameters)
111         d.addCallback(lambda res: eu.get_storage_index())
112         def _done(storage_index):
113             self._storage_index = storage_index
114             return self
115         d.addCallback(_done)
116         return d
117
118     def _got_all_encoding_parameters(self, params):
119         assert not self._codec
120         k, happy, n, segsize = params
121         self.required_shares = k
122         self.servers_of_happiness = happy
123         self.num_shares = n
124         self.segment_size = segsize
125         self.log("got encoding parameters: %d/%d/%d %d" % (k,happy,n, segsize))
126         self.log("now setting up codec")
127
128         assert self.segment_size % self.required_shares == 0
129
130         self.num_segments = mathutil.div_ceil(self.file_size,
131                                               self.segment_size)
132
133         self._codec = CRSEncoder()
134         self._codec.set_params(self.segment_size,
135                                self.required_shares, self.num_shares)
136
137         data = self.uri_extension_data
138         data['codec_name'] = self._codec.get_encoder_type()
139         data['codec_params'] = self._codec.get_serialized_params()
140
141         data['size'] = self.file_size
142         data['segment_size'] = self.segment_size
143         self.share_size = mathutil.div_ceil(self.file_size,
144                                             self.required_shares)
145         data['num_segments'] = self.num_segments
146         data['needed_shares'] = self.required_shares
147         data['total_shares'] = self.num_shares
148
149         # the "tail" is the last segment. This segment may or may not be
150         # shorter than all other segments. We use the "tail codec" to handle
151         # it. If the tail is short, we use a different codec instance. In
152         # addition, the tail codec must be fed data which has been padded out
153         # to the right size.
154         tail_size = self.file_size % self.segment_size
155         if not tail_size:
156             tail_size = self.segment_size
157
158         # the tail codec is responsible for encoding tail_size bytes
159         padded_tail_size = mathutil.next_multiple(tail_size,
160                                                   self.required_shares)
161         self._tail_codec = CRSEncoder()
162         self._tail_codec.set_params(padded_tail_size,
163                                     self.required_shares, self.num_shares)
164         data['tail_codec_params'] = self._tail_codec.get_serialized_params()
165
166     def _get_share_size(self):
167         share_size = mathutil.div_ceil(self.file_size, self.required_shares)
168         overhead = self._compute_overhead()
169         return share_size + overhead
170
171     def _compute_overhead(self):
172         return 0
173
174     def get_param(self, name):
175         assert self._codec
176
177         if name == "storage_index":
178             return self._storage_index
179         elif name == "share_counts":
180             return (self.required_shares, self.servers_of_happiness,
181                     self.num_shares)
182         elif name == "num_segments":
183             return self.num_segments
184         elif name == "segment_size":
185             return self.segment_size
186         elif name == "block_size":
187             return self._codec.get_block_size()
188         elif name == "share_size":
189             return self._get_share_size()
190         elif name == "serialized_params":
191             return self._codec.get_serialized_params()
192         else:
193             raise KeyError("unknown parameter name '%s'" % name)
194
195     def set_shareholders(self, landlords, servermap):
196         assert isinstance(landlords, dict)
197         for k in landlords:
198             assert IStorageBucketWriter.providedBy(landlords[k])
199         self.landlords = landlords.copy()
200         assert isinstance(servermap, dict)
201         for v in servermap.itervalues():
202             assert isinstance(v, set)
203         self.servermap = servermap.copy()
204
205     def start(self):
206         """ Returns a Deferred that will fire with the verify cap (an instance of
207         uri.CHKFileVerifierURI)."""
208         self.log("%s starting" % (self,))
209         #paddedsize = self._size + mathutil.pad_size(self._size, self.needed_shares)
210         assert self._codec
211         self._crypttext_hasher = hashutil.crypttext_hasher()
212         self._crypttext_hashes = []
213         self.segment_num = 0
214         self.block_hashes = [[] for x in range(self.num_shares)]
215         # block_hashes[i] is a list that will be accumulated and then send
216         # to landlord[i]. This list contains a hash of each segment_share
217         # that we sent to that landlord.
218         self.share_root_hashes = [None] * self.num_shares
219
220         self._times = {
221             "cumulative_encoding": 0.0,
222             "cumulative_sending": 0.0,
223             "hashes_and_close": 0.0,
224             "total_encode_and_push": 0.0,
225             }
226         self._start_total_timestamp = time.time()
227
228         d = fireEventually()
229
230         d.addCallback(lambda res: self.start_all_shareholders())
231
232         for i in range(self.num_segments-1):
233             # note to self: this form doesn't work, because lambda only
234             # captures the slot, not the value
235             #d.addCallback(lambda res: self.do_segment(i))
236             # use this form instead:
237             d.addCallback(lambda res, i=i: self._encode_segment(i))
238             d.addCallback(self._send_segment, i)
239             d.addCallback(self._turn_barrier)
240         last_segnum = self.num_segments - 1
241         d.addCallback(lambda res: self._encode_tail_segment(last_segnum))
242         d.addCallback(self._send_segment, last_segnum)
243         d.addCallback(self._turn_barrier)
244
245         d.addCallback(lambda res: self.finish_hashing())
246
247         d.addCallback(lambda res:
248                       self.send_crypttext_hash_tree_to_all_shareholders())
249         d.addCallback(lambda res: self.send_all_block_hash_trees())
250         d.addCallback(lambda res: self.send_all_share_hash_trees())
251         d.addCallback(lambda res: self.send_uri_extension_to_all_shareholders())
252
253         d.addCallback(lambda res: self.close_all_shareholders())
254         d.addCallbacks(self.done, self.err)
255         return d
256
257     def set_status(self, status):
258         if self._status:
259             self._status.set_status(status)
260
261     def set_encode_and_push_progress(self, sent_segments=None, extra=0.0):
262         if self._status:
263             # we treat the final hash+close as an extra segment
264             if sent_segments is None:
265                 sent_segments = self.num_segments
266             progress = float(sent_segments + extra) / (self.num_segments + 1)
267             self._status.set_progress(2, progress)
268
269     def abort(self):
270         self.log("aborting upload", level=log.UNUSUAL)
271         assert self._codec, "don't call abort before start"
272         self._aborted = True
273         # the next segment read (in _gather_data inside _encode_segment) will
274         # raise UploadAborted(), which will bypass the rest of the upload
275         # chain. If we've sent the final segment's shares, it's too late to
276         # abort. TODO: allow abort any time up to close_all_shareholders.
277
278     def _turn_barrier(self, res):
279         # putting this method in a Deferred chain imposes a guaranteed
280         # reactor turn between the pre- and post- portions of that chain.
281         # This can be useful to limit memory consumption: since Deferreds do
282         # not do tail recursion, code which uses defer.succeed(result) for
283         # consistency will cause objects to live for longer than you might
284         # normally expect.
285
286         return fireEventually(res)
287
288
289     def start_all_shareholders(self):
290         self.log("starting shareholders", level=log.NOISY)
291         self.set_status("Starting shareholders")
292         dl = []
293         for shareid in list(self.landlords):
294             d = self.landlords[shareid].put_header()
295             d.addErrback(self._remove_shareholder, shareid, "start")
296             dl.append(d)
297         return self._gather_responses(dl)
298
299     def _encode_segment(self, segnum):
300         codec = self._codec
301         start = time.time()
302
303         # the ICodecEncoder API wants to receive a total of self.segment_size
304         # bytes on each encode() call, broken up into a number of
305         # identically-sized pieces. Due to the way the codec algorithm works,
306         # these pieces need to be the same size as the share which the codec
307         # will generate. Therefore we must feed it with input_piece_size that
308         # equals the output share size.
309         input_piece_size = codec.get_block_size()
310
311         # as a result, the number of input pieces per encode() call will be
312         # equal to the number of required shares with which the codec was
313         # constructed. You can think of the codec as chopping up a
314         # 'segment_size' of data into 'required_shares' shares (not doing any
315         # fancy math at all, just doing a split), then creating some number
316         # of additional shares which can be substituted if the primary ones
317         # are unavailable
318
319         crypttext_segment_hasher = hashutil.crypttext_segment_hasher()
320
321         # memory footprint: we only hold a tiny piece of the plaintext at any
322         # given time. We build up a segment's worth of cryptttext, then hand
323         # it to the encoder. Assuming 3-of-10 encoding (3.3x expansion) and
324         # 1MiB max_segment_size, we get a peak memory footprint of 4.3*1MiB =
325         # 4.3MiB. Lowering max_segment_size to, say, 100KiB would drop the
326         # footprint to 430KiB at the expense of more hash-tree overhead.
327
328         d = self._gather_data(self.required_shares, input_piece_size,
329                               crypttext_segment_hasher)
330         def _done_gathering(chunks):
331             for c in chunks:
332                 assert len(c) == input_piece_size
333             self._crypttext_hashes.append(crypttext_segment_hasher.digest())
334             # during this call, we hit 5*segsize memory
335             return codec.encode(chunks)
336         d.addCallback(_done_gathering)
337         def _done(res):
338             elapsed = time.time() - start
339             self._times["cumulative_encoding"] += elapsed
340             return res
341         d.addCallback(_done)
342         return d
343
344     def _encode_tail_segment(self, segnum):
345
346         start = time.time()
347         codec = self._tail_codec
348         input_piece_size = codec.get_block_size()
349
350         crypttext_segment_hasher = hashutil.crypttext_segment_hasher()
351
352         d = self._gather_data(self.required_shares, input_piece_size,
353                               crypttext_segment_hasher,
354                               allow_short=True)
355         def _done_gathering(chunks):
356             for c in chunks:
357                 # a short trailing chunk will have been padded by
358                 # _gather_data
359                 assert len(c) == input_piece_size
360             self._crypttext_hashes.append(crypttext_segment_hasher.digest())
361             return codec.encode(chunks)
362         d.addCallback(_done_gathering)
363         def _done(res):
364             elapsed = time.time() - start
365             self._times["cumulative_encoding"] += elapsed
366             return res
367         d.addCallback(_done)
368         return d
369
370     def _gather_data(self, num_chunks, input_chunk_size,
371                      crypttext_segment_hasher,
372                      allow_short=False,
373                      previous_chunks=[]):
374         """Return a Deferred that will fire when the required number of
375         chunks have been read (and hashed and encrypted). The Deferred fires
376         with the combination of any 'previous_chunks' and the new chunks
377         which were gathered."""
378
379         if self._aborted:
380             raise UploadAborted()
381
382         if not num_chunks:
383             return defer.succeed(previous_chunks)
384
385         d = self._uploadable.read_encrypted(input_chunk_size, False)
386         def _got(data):
387             if self._aborted:
388                 raise UploadAborted()
389             encrypted_pieces = []
390             length = 0
391             while data:
392                 encrypted_piece = data.pop(0)
393                 length += len(encrypted_piece)
394                 crypttext_segment_hasher.update(encrypted_piece)
395                 self._crypttext_hasher.update(encrypted_piece)
396                 encrypted_pieces.append(encrypted_piece)
397
398             precondition(length <= input_chunk_size,
399                          "length=%d > input_chunk_size=%d" %
400                          (length, input_chunk_size))
401             if allow_short:
402                 if length < input_chunk_size:
403                     # padding
404                     pad_size = input_chunk_size - length
405                     encrypted_pieces.append('\x00' * pad_size)
406             else:
407                 # non-tail segments should be the full segment size
408                 if length != input_chunk_size:
409                     log.msg("non-tail segment should be full segment size: %d!=%d"
410                             % (length, input_chunk_size),
411                             level=log.BAD, umid="jNk5Yw")
412                 precondition(length == input_chunk_size,
413                              "length=%d != input_chunk_size=%d" %
414                              (length, input_chunk_size))
415
416             encrypted_piece = "".join(encrypted_pieces)
417             return previous_chunks + [encrypted_piece]
418
419         d.addCallback(_got)
420         d.addCallback(lambda chunks:
421                       self._gather_data(num_chunks-1, input_chunk_size,
422                                         crypttext_segment_hasher,
423                                         allow_short, chunks))
424         return d
425
426     def _send_segment(self, (shares, shareids), segnum):
427         # To generate the URI, we must generate the roothash, so we must
428         # generate all shares, even if we aren't actually giving them to
429         # anybody. This means that the set of shares we create will be equal
430         # to or larger than the set of landlords. If we have any landlord who
431         # *doesn't* have a share, that's an error.
432         _assert(set(self.landlords.keys()).issubset(set(shareids)),
433                 shareids=shareids, landlords=self.landlords)
434         start = time.time()
435         dl = []
436         self.set_status("Sending segment %d of %d" % (segnum+1,
437                                                       self.num_segments))
438         self.set_encode_and_push_progress(segnum)
439         lognum = self.log("send_segment(%d)" % segnum, level=log.NOISY)
440         for i in range(len(shares)):
441             block = shares[i]
442             shareid = shareids[i]
443             d = self.send_block(shareid, segnum, block, lognum)
444             dl.append(d)
445             block_hash = hashutil.block_hash(block)
446             #from allmydata.util import base32
447             #log.msg("creating block (shareid=%d, blocknum=%d) "
448             #        "len=%d %r .. %r: %s" %
449             #        (shareid, segnum, len(block),
450             #         block[:50], block[-50:], base32.b2a(block_hash)))
451             self.block_hashes[shareid].append(block_hash)
452
453         dl = self._gather_responses(dl)
454         def _logit(res):
455             self.log("%s uploaded %s / %s bytes (%d%%) of your file." %
456                      (self,
457                       self.segment_size*(segnum+1),
458                       self.segment_size*self.num_segments,
459                       100 * (segnum+1) / self.num_segments,
460                       ),
461                      level=log.OPERATIONAL)
462             elapsed = time.time() - start
463             self._times["cumulative_sending"] += elapsed
464             return res
465         dl.addCallback(_logit)
466         return dl
467
468     def send_block(self, shareid, segment_num, block, lognum):
469         if shareid not in self.landlords:
470             return defer.succeed(None)
471         sh = self.landlords[shareid]
472         lognum2 = self.log("put_block to %s" % self.landlords[shareid],
473                            parent=lognum, level=log.NOISY)
474         d = sh.put_block(segment_num, block)
475         def _done(res):
476             self.log("put_block done", parent=lognum2, level=log.NOISY)
477             return res
478         d.addCallback(_done)
479         d.addErrback(self._remove_shareholder, shareid,
480                      "segnum=%d" % segment_num)
481         return d
482
483     def _remove_shareholder(self, why, shareid, where):
484         ln = self.log(format="error while sending %(method)s to shareholder=%(shnum)d",
485                       method=where, shnum=shareid,
486                       level=log.UNUSUAL, failure=why)
487         if shareid in self.landlords:
488             self.landlords[shareid].abort()
489             peerid = self.landlords[shareid].get_peerid()
490             assert peerid
491             del self.landlords[shareid]
492             self.servermap[shareid].remove(peerid)
493             if not self.servermap[shareid]:
494                 del self.servermap[shareid]
495         else:
496             # even more UNUSUAL
497             self.log("they weren't in our list of landlords", parent=ln,
498                      level=log.WEIRD, umid="TQGFRw")
499         happiness = happinessutil.servers_of_happiness(self.servermap)
500         if happiness < self.servers_of_happiness:
501             peerids = set(happinessutil.shares_by_server(self.servermap).keys())
502             msg = happinessutil.failure_message(len(peerids),
503                                                 self.required_shares,
504                                                 self.servers_of_happiness,
505                                                 happiness)
506             msg = "%s: %s" % (msg, why)
507             raise UploadUnhappinessError(msg)
508         self.log("but we can still continue with %s shares, we'll be happy "
509                  "with at least %s" % (happiness,
510                                        self.servers_of_happiness),
511                  parent=ln)
512
513     def _gather_responses(self, dl):
514         d = defer.DeferredList(dl, fireOnOneErrback=True)
515         def _eatUploadUnhappinessError(f):
516             # all exceptions that occur while talking to a peer are handled
517             # in _remove_shareholder. That might raise UploadUnhappinessError,
518             # which will cause the DeferredList to errback but which should
519             # otherwise be consumed. Allow non-UploadUnhappinessError exceptions
520             # to pass through as an unhandled errback. We use this in lieu of
521             # consumeErrors=True to allow coding errors to be logged.
522             f.trap(UploadUnhappinessError)
523             return None
524         for d0 in dl:
525             d0.addErrback(_eatUploadUnhappinessError)
526         return d
527
528     def finish_hashing(self):
529         self._start_hashing_and_close_timestamp = time.time()
530         self.set_status("Finishing hashes")
531         self.set_encode_and_push_progress(extra=0.0)
532         crypttext_hash = self._crypttext_hasher.digest()
533         self.uri_extension_data["crypttext_hash"] = crypttext_hash
534         self._uploadable.close()
535
536     def send_crypttext_hash_tree_to_all_shareholders(self):
537         self.log("sending crypttext hash tree", level=log.NOISY)
538         self.set_status("Sending Crypttext Hash Tree")
539         self.set_encode_and_push_progress(extra=0.3)
540         t = HashTree(self._crypttext_hashes)
541         all_hashes = list(t)
542         self.uri_extension_data["crypttext_root_hash"] = t[0]
543         dl = []
544         for shareid in list(self.landlords):
545             dl.append(self.send_crypttext_hash_tree(shareid, all_hashes))
546         return self._gather_responses(dl)
547
548     def send_crypttext_hash_tree(self, shareid, all_hashes):
549         if shareid not in self.landlords:
550             return defer.succeed(None)
551         sh = self.landlords[shareid]
552         d = sh.put_crypttext_hashes(all_hashes)
553         d.addErrback(self._remove_shareholder, shareid, "put_crypttext_hashes")
554         return d
555
556     def send_all_block_hash_trees(self):
557         self.log("sending block hash trees", level=log.NOISY)
558         self.set_status("Sending Subshare Hash Trees")
559         self.set_encode_and_push_progress(extra=0.4)
560         dl = []
561         for shareid,hashes in enumerate(self.block_hashes):
562             # hashes is a list of the hashes of all blocks that were sent
563             # to shareholder[shareid].
564             dl.append(self.send_one_block_hash_tree(shareid, hashes))
565         return self._gather_responses(dl)
566
567     def send_one_block_hash_tree(self, shareid, block_hashes):
568         t = HashTree(block_hashes)
569         all_hashes = list(t)
570         # all_hashes[0] is the root hash, == hash(ah[1]+ah[2])
571         # all_hashes[1] is the left child, == hash(ah[3]+ah[4])
572         # all_hashes[n] == hash(all_hashes[2*n+1] + all_hashes[2*n+2])
573         self.share_root_hashes[shareid] = t[0]
574         if shareid not in self.landlords:
575             return defer.succeed(None)
576         sh = self.landlords[shareid]
577         d = sh.put_block_hashes(all_hashes)
578         d.addErrback(self._remove_shareholder, shareid, "put_block_hashes")
579         return d
580
581     def send_all_share_hash_trees(self):
582         # Each bucket gets a set of share hash tree nodes that are needed to validate their
583         # share. This includes the share hash itself, but does not include the top-level hash
584         # root (which is stored securely in the URI instead).
585         self.log("sending all share hash trees", level=log.NOISY)
586         self.set_status("Sending Share Hash Trees")
587         self.set_encode_and_push_progress(extra=0.6)
588         dl = []
589         for h in self.share_root_hashes:
590             assert h
591         # create the share hash tree
592         t = HashTree(self.share_root_hashes)
593         # the root of this hash tree goes into our URI
594         self.uri_extension_data['share_root_hash'] = t[0]
595         # now send just the necessary pieces out to each shareholder
596         for i in range(self.num_shares):
597             # the HashTree is given a list of leaves: 0,1,2,3..n .
598             # These become nodes A+0,A+1,A+2.. of the tree, where A=n-1
599             needed_hash_indices = t.needed_hashes(i, include_leaf=True)
600             hashes = [(hi, t[hi]) for hi in needed_hash_indices]
601             dl.append(self.send_one_share_hash_tree(i, hashes))
602         return self._gather_responses(dl)
603
604     def send_one_share_hash_tree(self, shareid, needed_hashes):
605         if shareid not in self.landlords:
606             return defer.succeed(None)
607         sh = self.landlords[shareid]
608         d = sh.put_share_hashes(needed_hashes)
609         d.addErrback(self._remove_shareholder, shareid, "put_share_hashes")
610         return d
611
612     def send_uri_extension_to_all_shareholders(self):
613         lp = self.log("sending uri_extension", level=log.NOISY)
614         self.set_status("Sending URI Extensions")
615         self.set_encode_and_push_progress(extra=0.8)
616         for k in ('crypttext_root_hash', 'crypttext_hash',
617                   ):
618             assert k in self.uri_extension_data
619         uri_extension = uri.pack_extension(self.uri_extension_data)
620         ed = {}
621         for k,v in self.uri_extension_data.items():
622             if k.endswith("hash"):
623                 ed[k] = base32.b2a(v)
624             else:
625                 ed[k] = v
626         self.log("uri_extension_data is %s" % (ed,), level=log.NOISY, parent=lp)
627         self.uri_extension_hash = hashutil.uri_extension_hash(uri_extension)
628         dl = []
629         for shareid in list(self.landlords):
630             dl.append(self.send_uri_extension(shareid, uri_extension))
631         return self._gather_responses(dl)
632
633     def send_uri_extension(self, shareid, uri_extension):
634         sh = self.landlords[shareid]
635         d = sh.put_uri_extension(uri_extension)
636         d.addErrback(self._remove_shareholder, shareid, "put_uri_extension")
637         return d
638
639     def close_all_shareholders(self):
640         self.log("closing shareholders", level=log.NOISY)
641         self.set_status("Closing Shareholders")
642         self.set_encode_and_push_progress(extra=0.9)
643         dl = []
644         for shareid in list(self.landlords):
645             d = self.landlords[shareid].close()
646             d.addErrback(self._remove_shareholder, shareid, "close")
647             dl.append(d)
648         return self._gather_responses(dl)
649
650     def done(self, res):
651         self.log("upload done", level=log.OPERATIONAL)
652         self.set_status("Finished")
653         self.set_encode_and_push_progress(extra=1.0) # done
654         now = time.time()
655         h_and_c_elapsed = now - self._start_hashing_and_close_timestamp
656         self._times["hashes_and_close"] = h_and_c_elapsed
657         total_elapsed = now - self._start_total_timestamp
658         self._times["total_encode_and_push"] = total_elapsed
659
660         # update our sharemap
661         self._shares_placed = set(self.landlords.keys())
662         return uri.CHKFileVerifierURI(self._storage_index, self.uri_extension_hash,
663                                       self.required_shares, self.num_shares, self.file_size)
664
665     def err(self, f):
666         self.log("upload failed", failure=f, level=log.UNUSUAL)
667         self.set_status("Failed")
668         # we need to abort any remaining shareholders, so they'll delete the
669         # partial share, allowing someone else to upload it again.
670         self.log("aborting shareholders", level=log.UNUSUAL)
671         for shareid in list(self.landlords):
672             self.landlords[shareid].abort()
673         if f.check(defer.FirstError):
674             return f.value.subFailure
675         return f
676
677     def get_shares_placed(self):
678         # return a set of share numbers that were successfully placed.
679         return self._shares_placed
680
681     def get_times(self):
682         # return a dictionary of encode+push timings
683         return self._times
684
685     def get_uri_extension_data(self):
686         return self.uri_extension_data