]> git.rkrishnan.org Git - tahoe-lafs/tahoe-lafs.git/blob - src/allmydata/mutable/layout.py
mutable: fix shape of 'verinfo' tuple returned from MDMFSlotWriteProxy.get_verinfo...
[tahoe-lafs/tahoe-lafs.git] / src / allmydata / mutable / layout.py
1
2 import struct
3 from allmydata.mutable.common import NeedMoreDataError, UnknownVersionError, \
4      BadShareError
5 from allmydata.interfaces import HASH_SIZE, SALT_SIZE, SDMF_VERSION, \
6                                  MDMF_VERSION, IMutableSlotWriter
7 from allmydata.util import mathutil
8 from twisted.python import failure
9 from twisted.internet import defer
10 from zope.interface import implements
11
12
13 # These strings describe the format of the packed structs they help process.
14 # Here's what they mean:
15 #
16 #  PREFIX:
17 #    >: Big-endian byte order; the most significant byte is first (leftmost).
18 #    B: The container version information; stored as an unsigned 8-bit integer.
19 #       This is currently either SDMF_VERSION or MDMF_VERSION.
20 #    Q: The sequence number; this is sort of like a revision history for
21 #       mutable files; they start at 1 and increase as they are changed after
22 #       being uploaded. Stored as an unsigned 64-bit integer.
23 #  32s: The root hash of the share hash tree. We use sha-256d, so we use 32 
24 #       bytes to store the value.
25 #  16s: The salt for the readkey. This is a 16-byte random value.
26 #
27 #  SIGNED_PREFIX additions, things that are covered by the signature:
28 #    B: The "k" encoding parameter. We store this as an unsigned 8-bit
29 #       integer, since our erasure coding scheme cannot encode to more than
30 #       255 pieces.
31 #    B: The "N" encoding parameter. Stored as an unsigned 8-bit integer for
32 #       the same reason as above.
33 #    Q: The segment size of the uploaded file. This is an unsigned 64-bit
34 #       integer, to allow handling large segments and files. For SDMF the
35 #       segment size is the data length plus padding; for MDMF it can be
36 #       smaller.
37 #    Q: The data length of the uploaded file. Like the segment size field,
38 #       it is an unsigned 64-bit integer.
39 #
40 #   HEADER additions:
41 #     L: The offset of the signature. An unsigned 32-bit integer.
42 #     L: The offset of the share hash chain. An unsigned 32-bit integer.
43 #     L: The offset of the block hash tree. An unsigned 32-bit integer.
44 #     L: The offset of the share data. An unsigned 32-bit integer.
45 #     Q: The offset of the encrypted private key. An unsigned 64-bit integer,
46 #        to account for the possibility of a lot of share data.
47 #     Q: The offset of the EOF. An unsigned 64-bit integer, to account for
48 #        the possibility of a lot of share data.
49
50 #  After all of these, we have the following:
51 #    - The verification key: Occupies the space between the end of the header
52 #      and the start of the signature (i.e.: data[HEADER_LENGTH:o['signature']].
53 #    - The signature, which goes from the signature offset to the share hash
54 #      chain offset.
55 #    - The share hash chain, which goes from the share hash chain offset to
56 #      the block hash tree offset.
57 #    - The share data, which goes from the share data offset to the encrypted
58 #      private key offset.
59 #    - The encrypted private key offset, which goes until the end of the file.
60
61 #  The block hash tree in this encoding has only one share, so the offset of
62 #  the share data will be 32 bits more than the offset of the block hash tree.
63 #  Given this, we may need to check to see how many bytes a reasonably sized
64 #  block hash tree will take up.
65
66 PREFIX = ">BQ32s16s" # each version may have a different prefix
67 SIGNED_PREFIX = ">BQ32s16s BBQQ" # this is covered by the signature
68 SIGNED_PREFIX_LENGTH = struct.calcsize(SIGNED_PREFIX)
69 HEADER = ">BQ32s16s BBQQ LLLLQQ" # includes offsets
70 HEADER_LENGTH = struct.calcsize(HEADER)
71 OFFSETS = ">LLLLQQ"
72 OFFSETS_LENGTH = struct.calcsize(OFFSETS)
73
74
75 # These are still used for some tests of SDMF files.
76 def unpack_header(data):
77     o = {}
78     (version,
79      seqnum,
80      root_hash,
81      IV,
82      k, N, segsize, datalen,
83      o['signature'],
84      o['share_hash_chain'],
85      o['block_hash_tree'],
86      o['share_data'],
87      o['enc_privkey'],
88      o['EOF']) = struct.unpack(HEADER, data[:HEADER_LENGTH])
89     return (version, seqnum, root_hash, IV, k, N, segsize, datalen, o)
90
91 def unpack_share(data):
92     assert len(data) >= HEADER_LENGTH
93     o = {}
94     (version,
95      seqnum,
96      root_hash,
97      IV,
98      k, N, segsize, datalen,
99      o['signature'],
100      o['share_hash_chain'],
101      o['block_hash_tree'],
102      o['share_data'],
103      o['enc_privkey'],
104      o['EOF']) = struct.unpack(HEADER, data[:HEADER_LENGTH])
105
106     if version != 0:
107         raise UnknownVersionError("got mutable share version %d, but I only understand version 0" % version)
108
109     if len(data) < o['EOF']:
110         raise NeedMoreDataError(o['EOF'],
111                                 o['enc_privkey'], o['EOF']-o['enc_privkey'])
112
113     pubkey = data[HEADER_LENGTH:o['signature']]
114     signature = data[o['signature']:o['share_hash_chain']]
115     share_hash_chain_s = data[o['share_hash_chain']:o['block_hash_tree']]
116     share_hash_format = ">H32s"
117     hsize = struct.calcsize(share_hash_format)
118     if len(share_hash_chain_s) % hsize != 0:
119         raise BadShareError("hash chain is %d bytes, not multiple of %d"
120                             % (len(share_hash_chain_s), hsize))
121     share_hash_chain = []
122     for i in range(0, len(share_hash_chain_s), hsize):
123         chunk = share_hash_chain_s[i:i+hsize]
124         (hid, h) = struct.unpack(share_hash_format, chunk)
125         share_hash_chain.append( (hid, h) )
126     share_hash_chain = dict(share_hash_chain)
127     block_hash_tree_s = data[o['block_hash_tree']:o['share_data']]
128     if len(block_hash_tree_s) % 32 != 0:
129         raise BadShareError("block_hash_tree is %d bytes, not multiple of %d"
130                             % (len(block_hash_tree_s), 32))
131     block_hash_tree = []
132     for i in range(0, len(block_hash_tree_s), 32):
133         block_hash_tree.append(block_hash_tree_s[i:i+32])
134
135     share_data = data[o['share_data']:o['enc_privkey']]
136     enc_privkey = data[o['enc_privkey']:o['EOF']]
137
138     return (seqnum, root_hash, IV, k, N, segsize, datalen,
139             pubkey, signature, share_hash_chain, block_hash_tree,
140             share_data, enc_privkey)
141
142 def get_version_from_checkstring(checkstring):
143     (t, ) = struct.unpack(">B", checkstring[:1])
144     return t
145
146 def unpack_sdmf_checkstring(checkstring):
147     cs_len = struct.calcsize(PREFIX)
148     version, seqnum, root_hash, IV = struct.unpack(PREFIX, checkstring[:cs_len])
149     assert version == SDMF_VERSION, version
150     return (seqnum, root_hash, IV)
151
152 def unpack_mdmf_checkstring(checkstring):
153     cs_len = struct.calcsize(MDMFCHECKSTRING)
154     version, seqnum, root_hash = struct.unpack(MDMFCHECKSTRING, checkstring[:cs_len])
155     assert version == MDMF_VERSION, version
156     return (seqnum, root_hash)
157
158 def pack_offsets(verification_key_length, signature_length,
159                  share_hash_chain_length, block_hash_tree_length,
160                  share_data_length, encprivkey_length):
161     post_offset = HEADER_LENGTH
162     offsets = {}
163     o1 = offsets['signature'] = post_offset + verification_key_length
164     o2 = offsets['share_hash_chain'] = o1 + signature_length
165     o3 = offsets['block_hash_tree'] = o2 + share_hash_chain_length
166     o4 = offsets['share_data'] = o3 + block_hash_tree_length
167     o5 = offsets['enc_privkey'] = o4 + share_data_length
168     offsets['EOF'] = o5 + encprivkey_length
169
170     return struct.pack(">LLLLQQ",
171                        offsets['signature'],
172                        offsets['share_hash_chain'],
173                        offsets['block_hash_tree'],
174                        offsets['share_data'],
175                        offsets['enc_privkey'],
176                        offsets['EOF'])
177
178 def pack_share(prefix, verification_key, signature,
179                share_hash_chain, block_hash_tree,
180                share_data, encprivkey):
181     share_hash_chain_s = "".join([struct.pack(">H32s", i, share_hash_chain[i])
182                                   for i in sorted(share_hash_chain.keys())])
183     for h in block_hash_tree:
184         assert len(h) == 32
185     block_hash_tree_s = "".join(block_hash_tree)
186
187     offsets = pack_offsets(len(verification_key),
188                            len(signature),
189                            len(share_hash_chain_s),
190                            len(block_hash_tree_s),
191                            len(share_data),
192                            len(encprivkey))
193     final_share = "".join([prefix,
194                            offsets,
195                            verification_key,
196                            signature,
197                            share_hash_chain_s,
198                            block_hash_tree_s,
199                            share_data,
200                            encprivkey])
201     return final_share
202
203 def pack_prefix(seqnum, root_hash, IV,
204                 required_shares, total_shares,
205                 segment_size, data_length):
206     prefix = struct.pack(SIGNED_PREFIX,
207                          0, # version,
208                          seqnum,
209                          root_hash,
210                          IV,
211                          required_shares,
212                          total_shares,
213                          segment_size,
214                          data_length,
215                          )
216     return prefix
217
218
219 class SDMFSlotWriteProxy:
220     implements(IMutableSlotWriter)
221     """
222     I represent a remote write slot for an SDMF mutable file. I build a
223     share in memory, and then write it in one piece to the remote
224     server. This mimics how SDMF shares were built before MDMF (and the
225     new MDMF uploader), but provides that functionality in a way that
226     allows the MDMF uploader to be built without much special-casing for
227     file format, which makes the uploader code more readable.
228     """
229     def __init__(self,
230                  shnum,
231                  rref, # a remote reference to a storage server
232                  storage_index,
233                  secrets, # (write_enabler, renew_secret, cancel_secret)
234                  seqnum, # the sequence number of the mutable file
235                  required_shares,
236                  total_shares,
237                  segment_size,
238                  data_length): # the length of the original file
239         self.shnum = shnum
240         self._rref = rref
241         self._storage_index = storage_index
242         self._secrets = secrets
243         self._seqnum = seqnum
244         self._required_shares = required_shares
245         self._total_shares = total_shares
246         self._segment_size = segment_size
247         self._data_length = data_length
248
249         # This is an SDMF file, so it should have only one segment, so, 
250         # modulo padding of the data length, the segment size and the
251         # data length should be the same.
252         expected_segment_size = mathutil.next_multiple(data_length,
253                                                        self._required_shares)
254         assert expected_segment_size == segment_size
255
256         self._block_size = self._segment_size / self._required_shares
257
258         # This is meant to mimic how SDMF files were built before MDMF
259         # entered the picture: we generate each share in its entirety,
260         # then push it off to the storage server in one write. When
261         # callers call set_*, they are just populating this dict.
262         # finish_publishing will stitch these pieces together into a
263         # coherent share, and then write the coherent share to the
264         # storage server.
265         self._share_pieces = {}
266
267         # This tells the write logic what checkstring to use when
268         # writing remote shares.
269         self._testvs = []
270
271         self._readvs = [(0, struct.calcsize(PREFIX))]
272
273
274     def set_checkstring(self, checkstring_or_seqnum,
275                               root_hash=None,
276                               salt=None):
277         """
278         Set the checkstring that I will pass to the remote server when
279         writing.
280
281             @param checkstring_or_seqnum: A packed checkstring to use,
282                    or a sequence number. I will treat this as a checkstr
283
284         Note that implementations can differ in which semantics they
285         wish to support for set_checkstring -- they can, for example,
286         build the checkstring themselves from its constituents, or
287         some other thing.
288         """
289         if root_hash and salt:
290             checkstring = struct.pack(PREFIX,
291                                       0,
292                                       checkstring_or_seqnum,
293                                       root_hash,
294                                       salt)
295         else:
296             checkstring = checkstring_or_seqnum
297         self._testvs = [(0, len(checkstring), "eq", checkstring)]
298
299
300     def get_checkstring(self):
301         """
302         Get the checkstring that I think currently exists on the remote
303         server.
304         """
305         if self._testvs:
306             return self._testvs[0][3]
307         return ""
308
309
310     def put_block(self, data, segnum, salt):
311         """
312         Add a block and salt to the share.
313         """
314         # SDMF files have only one segment
315         assert segnum == 0
316         assert len(data) == self._block_size
317         assert len(salt) == SALT_SIZE
318
319         self._share_pieces['sharedata'] = data
320         self._share_pieces['salt'] = salt
321
322         # TODO: Figure out something intelligent to return.
323         return defer.succeed(None)
324
325
326     def put_encprivkey(self, encprivkey):
327         """
328         Add the encrypted private key to the share.
329         """
330         self._share_pieces['encprivkey'] = encprivkey
331
332         return defer.succeed(None)
333
334
335     def put_blockhashes(self, blockhashes):
336         """
337         Add the block hash tree to the share.
338         """
339         assert isinstance(blockhashes, list)
340         for h in blockhashes:
341             assert len(h) == HASH_SIZE
342
343         # serialize the blockhashes, then set them.
344         blockhashes_s = "".join(blockhashes)
345         self._share_pieces['block_hash_tree'] = blockhashes_s
346
347         return defer.succeed(None)
348
349
350     def put_sharehashes(self, sharehashes):
351         """
352         Add the share hash chain to the share.
353         """
354         assert isinstance(sharehashes, dict)
355         for h in sharehashes.itervalues():
356             assert len(h) == HASH_SIZE
357
358         # serialize the sharehashes, then set them.
359         sharehashes_s = "".join([struct.pack(">H32s", i, sharehashes[i])
360                                  for i in sorted(sharehashes.keys())])
361         self._share_pieces['share_hash_chain'] = sharehashes_s
362
363         return defer.succeed(None)
364
365
366     def put_root_hash(self, root_hash):
367         """
368         Add the root hash to the share.
369         """
370         assert len(root_hash) == HASH_SIZE
371
372         self._share_pieces['root_hash'] = root_hash
373
374         return defer.succeed(None)
375
376
377     def put_salt(self, salt):
378         """
379         Add a salt to an empty SDMF file.
380         """
381         assert len(salt) == SALT_SIZE
382
383         self._share_pieces['salt'] = salt
384         self._share_pieces['sharedata'] = ""
385
386
387     def get_signable(self):
388         """
389         Return the part of the share that needs to be signed.
390
391         SDMF writers need to sign the packed representation of the
392         first eight fields of the remote share, that is:
393             - version number (0)
394             - sequence number
395             - root of the share hash tree
396             - salt
397             - k
398             - n
399             - segsize
400             - datalen
401
402         This method is responsible for returning that to callers.
403         """
404         return struct.pack(SIGNED_PREFIX,
405                            0,
406                            self._seqnum,
407                            self._share_pieces['root_hash'],
408                            self._share_pieces['salt'],
409                            self._required_shares,
410                            self._total_shares,
411                            self._segment_size,
412                            self._data_length)
413
414
415     def put_signature(self, signature):
416         """
417         Add the signature to the share.
418         """
419         self._share_pieces['signature'] = signature
420
421         return defer.succeed(None)
422
423
424     def put_verification_key(self, verification_key):
425         """
426         Add the verification key to the share.
427         """
428         self._share_pieces['verification_key'] = verification_key
429
430         return defer.succeed(None)
431
432
433     def get_verinfo(self):
434         """
435         I return my verinfo tuple. This is used by the ServermapUpdater
436         to keep track of versions of mutable files.
437
438         The verinfo tuple for MDMF files contains:
439             - seqnum
440             - root hash
441             - a blank (nothing)
442             - segsize
443             - datalen
444             - k
445             - n
446             - prefix (the thing that you sign)
447             - a tuple of offsets
448
449         We include the nonce in MDMF to simplify processing of version
450         information tuples.
451
452         The verinfo tuple for SDMF files is the same, but contains a
453         16-byte IV instead of a hash of salts.
454         """
455         return (self._seqnum,
456                 self._share_pieces['root_hash'],
457                 self._share_pieces['salt'],
458                 self._segment_size,
459                 self._data_length,
460                 self._required_shares,
461                 self._total_shares,
462                 self.get_signable(),
463                 self._get_offsets_tuple())
464
465     def _get_offsets_dict(self):
466         post_offset = HEADER_LENGTH
467         offsets = {}
468
469         verification_key_length = len(self._share_pieces['verification_key'])
470         o1 = offsets['signature'] = post_offset + verification_key_length
471
472         signature_length = len(self._share_pieces['signature'])
473         o2 = offsets['share_hash_chain'] = o1 + signature_length
474
475         share_hash_chain_length = len(self._share_pieces['share_hash_chain'])
476         o3 = offsets['block_hash_tree'] = o2 + share_hash_chain_length
477
478         block_hash_tree_length = len(self._share_pieces['block_hash_tree'])
479         o4 = offsets['share_data'] = o3 + block_hash_tree_length
480
481         share_data_length = len(self._share_pieces['sharedata'])
482         o5 = offsets['enc_privkey'] = o4 + share_data_length
483
484         encprivkey_length = len(self._share_pieces['encprivkey'])
485         offsets['EOF'] = o5 + encprivkey_length
486         return offsets
487
488
489     def _get_offsets_tuple(self):
490         offsets = self._get_offsets_dict()
491         return tuple([(key, value) for key, value in offsets.items()])
492
493
494     def _pack_offsets(self):
495         offsets = self._get_offsets_dict()
496         return struct.pack(">LLLLQQ",
497                            offsets['signature'],
498                            offsets['share_hash_chain'],
499                            offsets['block_hash_tree'],
500                            offsets['share_data'],
501                            offsets['enc_privkey'],
502                            offsets['EOF'])
503
504
505     def finish_publishing(self):
506         """
507         Do anything necessary to finish writing the share to a remote
508         server. I require that no further publishing needs to take place
509         after this method has been called.
510         """
511         for k in ["sharedata", "encprivkey", "signature", "verification_key",
512                   "share_hash_chain", "block_hash_tree"]:
513             assert k in self._share_pieces, (self.shnum, k, self._share_pieces.keys())
514         # This is the only method that actually writes something to the
515         # remote server.
516         # First, we need to pack the share into data that we can write
517         # to the remote server in one write.
518         offsets = self._pack_offsets()
519         prefix = self.get_signable()
520         final_share = "".join([prefix,
521                                offsets,
522                                self._share_pieces['verification_key'],
523                                self._share_pieces['signature'],
524                                self._share_pieces['share_hash_chain'],
525                                self._share_pieces['block_hash_tree'],
526                                self._share_pieces['sharedata'],
527                                self._share_pieces['encprivkey']])
528
529         # Our only data vector is going to be writing the final share,
530         # in its entirely.
531         datavs = [(0, final_share)]
532
533         if not self._testvs:
534             # Our caller has not provided us with another checkstring
535             # yet, so we assume that we are writing a new share, and set
536             # a test vector that will allow a new share to be written.
537             self._testvs = []
538             self._testvs.append(tuple([0, 1, "eq", ""]))
539
540         tw_vectors = {}
541         tw_vectors[self.shnum] = (self._testvs, datavs, None)
542         return self._rref.callRemote("slot_testv_and_readv_and_writev",
543                                      self._storage_index,
544                                      self._secrets,
545                                      tw_vectors,
546                                      # TODO is it useful to read something?
547                                      self._readvs)
548
549
550 MDMFHEADER = ">BQ32sBBQQ QQQQQQQQ"
551 MDMFHEADERWITHOUTOFFSETS = ">BQ32sBBQQ"
552 MDMFHEADERSIZE = struct.calcsize(MDMFHEADER)
553 MDMFHEADERWITHOUTOFFSETSSIZE = struct.calcsize(MDMFHEADERWITHOUTOFFSETS)
554 MDMFCHECKSTRING = ">BQ32s"
555 MDMFSIGNABLEHEADER = ">BQ32sBBQQ"
556 MDMFOFFSETS = ">QQQQQQQQ"
557 MDMFOFFSETS_LENGTH = struct.calcsize(MDMFOFFSETS)
558
559 PRIVATE_KEY_SIZE = 1220
560 SIGNATURE_SIZE = 260
561 VERIFICATION_KEY_SIZE = 292
562 # We know we won't have more than 256 shares, and we know that we won't need
563 # to store more than ln2(256) hash-chain nodes to validate, so that's our
564 # bound. Each node requires 2 bytes of node-number plus 32 bytes of hash.
565 SHARE_HASH_CHAIN_SIZE = (2+HASH_SIZE)*mathutil.log_ceil(256, 2)
566
567 class MDMFSlotWriteProxy:
568     implements(IMutableSlotWriter)
569
570     """
571     I represent a remote write slot for an MDMF mutable file.
572
573     I abstract away from my caller the details of block and salt
574     management, and the implementation of the on-disk format for MDMF
575     shares.
576     """
577     # Expected layout, MDMF:
578     # offset:     size:       name:
579     #-- signed part --
580     # 0           1           version number (01)
581     # 1           8           sequence number
582     # 9           32          share tree root hash
583     # 41          1           The "k" encoding parameter
584     # 42          1           The "N" encoding parameter
585     # 43          8           The segment size of the uploaded file
586     # 51          8           The data length of the original plaintext
587     #-- end signed part --
588     # 59          8           The offset of the encrypted private key
589     # 67          8           The offset of the share hash chain
590     # 75          8           The offset of the signature
591     # 83          8           The offset of the verification key
592     # 91          8           The offset of the end of the v. key.
593     # 99          8           The offset of the share data
594     # 107         8           The offset of the block hash tree
595     # 115         8           The offset of EOF
596     # 123         var         encrypted private key
597     # var         var         share hash chain
598     # var         var         signature
599     # var         var         verification key
600     # var         large       share data
601     # var         var         block hash tree
602     #
603     # We order the fields that way to make smart downloaders -- downloaders
604     # which prempetively read a big part of the share -- possible.
605     #
606     # The checkstring is the first three fields -- the version number,
607     # sequence number, root hash and root salt hash. This is consistent
608     # in meaning to what we have with SDMF files, except now instead of
609     # using the literal salt, we use a value derived from all of the
610     # salts -- the share hash root.
611     # 
612     # The salt is stored before the block for each segment. The block
613     # hash tree is computed over the combination of block and salt for
614     # each segment. In this way, we get integrity checking for both
615     # block and salt with the current block hash tree arrangement.
616     # 
617     # The ordering of the offsets is different to reflect the dependencies
618     # that we'll run into with an MDMF file. The expected write flow is
619     # something like this:
620     #
621     #   0: Initialize with the sequence number, encoding parameters and
622     #      data length. From this, we can deduce the number of segments,
623     #      and where they should go.. We can also figure out where the
624     #      encrypted private key should go, because we can figure out how
625     #      big the share data will be.
626     # 
627     #   1: Encrypt, encode, and upload the file in chunks. Do something
628     #      like 
629     #
630     #       put_block(data, segnum, salt)
631     #
632     #      to write a block and a salt to the disk. We can do both of
633     #      these operations now because we have enough of the offsets to
634     #      know where to put them.
635     # 
636     #   2: Put the encrypted private key. Use:
637     #
638     #        put_encprivkey(encprivkey)
639     #
640     #      Now that we know the length of the private key, we can fill
641     #      in the offset for the block hash tree.
642     #
643     #   3: We're now in a position to upload the block hash tree for
644     #      a share. Put that using something like:
645     #       
646     #        put_blockhashes(block_hash_tree)
647     #
648     #      Note that block_hash_tree is a list of hashes -- we'll take
649     #      care of the details of serializing that appropriately. When
650     #      we get the block hash tree, we are also in a position to
651     #      calculate the offset for the share hash chain, and fill that
652     #      into the offsets table.
653     #
654     #   4: We're now in a position to upload the share hash chain for
655     #      a share. Do that with something like:
656     #      
657     #        put_sharehashes(share_hash_chain) 
658     #
659     #      share_hash_chain should be a dictionary mapping shnums to 
660     #      32-byte hashes -- the wrapper handles serialization.
661     #      We'll know where to put the signature at this point, also.
662     #      The root of this tree will be put explicitly in the next
663     #      step.
664     # 
665     #   5: Before putting the signature, we must first put the
666     #      root_hash. Do this with:
667     # 
668     #        put_root_hash(root_hash).
669     #      
670     #      In terms of knowing where to put this value, it was always
671     #      possible to place it, but it makes sense semantically to
672     #      place it after the share hash tree, so that's why you do it
673     #      in this order.
674     #
675     #   6: With the root hash put, we can now sign the header. Use:
676     #
677     #        get_signable()
678     #
679     #      to get the part of the header that you want to sign, and use:
680     #       
681     #        put_signature(signature)
682     #
683     #      to write your signature to the remote server.
684     #
685     #   6: Add the verification key, and finish. Do:
686     #
687     #        put_verification_key(key) 
688     #
689     #      and 
690     #
691     #        finish_publish()
692     #
693     # Checkstring management:
694     # 
695     # To write to a mutable slot, we have to provide test vectors to ensure
696     # that we are writing to the same data that we think we are. These
697     # vectors allow us to detect uncoordinated writes; that is, writes
698     # where both we and some other shareholder are writing to the
699     # mutable slot, and to report those back to the parts of the program
700     # doing the writing. 
701     #
702     # With SDMF, this was easy -- all of the share data was written in
703     # one go, so it was easy to detect uncoordinated writes, and we only
704     # had to do it once. With MDMF, not all of the file is written at
705     # once.
706     #
707     # If a share is new, we write out as much of the header as we can
708     # before writing out anything else. This gives other writers a
709     # canary that they can use to detect uncoordinated writes, and, if
710     # they do the same thing, gives us the same canary. We them update
711     # the share. We won't be able to write out two fields of the header
712     # -- the share tree hash and the salt hash -- until we finish
713     # writing out the share. We only require the writer to provide the
714     # initial checkstring, and keep track of what it should be after
715     # updates ourselves.
716     #
717     # If we haven't written anything yet, then on the first write (which
718     # will probably be a block + salt of a share), we'll also write out
719     # the header. On subsequent passes, we'll expect to see the header.
720     # This changes in two places:
721     #
722     #   - When we write out the salt hash
723     #   - When we write out the root of the share hash tree
724     #
725     # since these values will change the header. It is possible that we 
726     # can just make those be written in one operation to minimize
727     # disruption.
728     def __init__(self,
729                  shnum,
730                  rref, # a remote reference to a storage server
731                  storage_index,
732                  secrets, # (write_enabler, renew_secret, cancel_secret)
733                  seqnum, # the sequence number of the mutable file
734                  required_shares,
735                  total_shares,
736                  segment_size,
737                  data_length): # the length of the original file
738         self.shnum = shnum
739         self._rref = rref
740         self._storage_index = storage_index
741         self._seqnum = seqnum
742         self._required_shares = required_shares
743         assert self.shnum >= 0 and self.shnum < total_shares
744         self._total_shares = total_shares
745         # We build up the offset table as we write things. It is the
746         # last thing we write to the remote server. 
747         self._offsets = {}
748         self._testvs = []
749         # This is a list of write vectors that will be sent to our
750         # remote server once we are directed to write things there.
751         self._writevs = []
752         self._secrets = secrets
753         # The segment size needs to be a multiple of the k parameter --
754         # any padding should have been carried out by the publisher
755         # already.
756         assert segment_size % required_shares == 0
757         self._segment_size = segment_size
758         self._data_length = data_length
759
760         # These are set later -- we define them here so that we can
761         # check for their existence easily
762
763         # This is the root of the share hash tree -- the Merkle tree
764         # over the roots of the block hash trees computed for shares in
765         # this upload.
766         self._root_hash = None
767
768         # We haven't yet written anything to the remote bucket. By
769         # setting this, we tell the _write method as much. The write
770         # method will then know that it also needs to add a write vector
771         # for the checkstring (or what we have of it) to the first write
772         # request. We'll then record that value for future use.  If
773         # we're expecting something to be there already, we need to call
774         # set_checkstring before we write anything to tell the first
775         # write about that.
776         self._written = False
777
778         # When writing data to the storage servers, we get a read vector
779         # for free. We'll read the checkstring, which will help us
780         # figure out what's gone wrong if a write fails.
781         self._readv = [(0, struct.calcsize(MDMFCHECKSTRING))]
782
783         # We calculate the number of segments because it tells us
784         # where the salt part of the file ends/share segment begins,
785         # and also because it provides a useful amount of bounds checking.
786         self._num_segments = mathutil.div_ceil(self._data_length,
787                                                self._segment_size)
788         self._block_size = self._segment_size / self._required_shares
789         # We also calculate the share size, to help us with block
790         # constraints later.
791         tail_size = self._data_length % self._segment_size
792         if not tail_size:
793             self._tail_block_size = self._block_size
794         else:
795             self._tail_block_size = mathutil.next_multiple(tail_size,
796                                                            self._required_shares)
797             self._tail_block_size /= self._required_shares
798
799         # We already know where the sharedata starts; right after the end
800         # of the header (which is defined as the signable part + the offsets)
801         # We can also calculate where the encrypted private key begins
802         # from what we know know.
803         self._actual_block_size = self._block_size + SALT_SIZE
804         data_size = self._actual_block_size * (self._num_segments - 1)
805         data_size += self._tail_block_size
806         data_size += SALT_SIZE
807         self._offsets['enc_privkey'] = MDMFHEADERSIZE
808
809         # We don't define offsets for these because we want them to be
810         # tightly packed -- this allows us to ignore the responsibility
811         # of padding individual values, and of removing that padding
812         # later. So nonconstant_start is where we start writing
813         # nonconstant data.
814         nonconstant_start = self._offsets['enc_privkey']
815         nonconstant_start += PRIVATE_KEY_SIZE
816         nonconstant_start += SIGNATURE_SIZE
817         nonconstant_start += VERIFICATION_KEY_SIZE
818         nonconstant_start += SHARE_HASH_CHAIN_SIZE
819
820         self._offsets['share_data'] = nonconstant_start
821
822         # Finally, we know how big the share data will be, so we can
823         # figure out where the block hash tree needs to go.
824         # XXX: But this will go away if Zooko wants to make it so that
825         # you don't need to know the size of the file before you start
826         # uploading it.
827         self._offsets['block_hash_tree'] = self._offsets['share_data'] + \
828                     data_size
829
830         # Done. We can snow start writing.
831
832
833     def set_checkstring(self,
834                         seqnum_or_checkstring,
835                         root_hash=None,
836                         salt=None):
837         """
838         Set checkstring checkstring for the given shnum.
839
840         This can be invoked in one of two ways.
841
842         With one argument, I assume that you are giving me a literal
843         checkstring -- e.g., the output of get_checkstring. I will then
844         set that checkstring as it is. This form is used by unit tests.
845
846         With two arguments, I assume that you are giving me a sequence
847         number and root hash to make a checkstring from. In that case, I
848         will build a checkstring and set it for you. This form is used
849         by the publisher.
850
851         By default, I assume that I am writing new shares to the grid.
852         If you don't explcitly set your own checkstring, I will use
853         one that requires that the remote share not exist. You will want
854         to use this method if you are updating a share in-place;
855         otherwise, writes will fail.
856         """
857         # You're allowed to overwrite checkstrings with this method;
858         # I assume that users know what they are doing when they call
859         # it.
860         if root_hash:
861             checkstring = struct.pack(MDMFCHECKSTRING,
862                                       1,
863                                       seqnum_or_checkstring,
864                                       root_hash)
865         else:
866             checkstring = seqnum_or_checkstring
867
868         if checkstring == "":
869             # We special-case this, since len("") = 0, but we need
870             # length of 1 for the case of an empty share to work on the
871             # storage server, which is what a checkstring that is the
872             # empty string means.
873             self._testvs = []
874         else:
875             self._testvs = []
876             self._testvs.append((0, len(checkstring), "eq", checkstring))
877
878
879     def __repr__(self):
880         return "MDMFSlotWriteProxy for share %d" % self.shnum
881
882
883     def get_checkstring(self):
884         """
885         Given a share number, I return a representation of what the
886         checkstring for that share on the server will look like.
887
888         I am mostly used for tests.
889         """
890         if self._root_hash:
891             roothash = self._root_hash
892         else:
893             roothash = "\x00" * 32
894         return struct.pack(MDMFCHECKSTRING,
895                            1,
896                            self._seqnum,
897                            roothash)
898
899
900     def put_block(self, data, segnum, salt):
901         """
902         I queue a write vector for the data, salt, and segment number
903         provided to me. I return None, as I do not actually cause
904         anything to be written yet.
905         """
906         if segnum >= self._num_segments:
907             raise LayoutInvalid("I won't overwrite the block hash tree")
908         if len(salt) != SALT_SIZE:
909             raise LayoutInvalid("I was given a salt of size %d, but "
910                                 "I wanted a salt of size %d")
911         if segnum + 1 == self._num_segments:
912             if len(data) != self._tail_block_size:
913                 raise LayoutInvalid("I was given the wrong size block to write")
914         elif len(data) != self._block_size:
915             raise LayoutInvalid("I was given the wrong size block to write")
916
917         # We want to write at len(MDMFHEADER) + segnum * block_size.
918         offset = self._offsets['share_data'] + \
919             (self._actual_block_size * segnum)
920         data = salt + data
921
922         self._writevs.append(tuple([offset, data]))
923
924
925     def put_encprivkey(self, encprivkey):
926         """
927         I queue a write vector for the encrypted private key provided to
928         me.
929         """
930         assert self._offsets
931         assert self._offsets['enc_privkey']
932         # You shouldn't re-write the encprivkey after the block hash
933         # tree is written, since that could cause the private key to run
934         # into the block hash tree. Before it writes the block hash
935         # tree, the block hash tree writing method writes the offset of
936         # the share hash chain. So that's a good indicator of whether or
937         # not the block hash tree has been written.
938         if "signature" in self._offsets:
939             raise LayoutInvalid("You can't put the encrypted private key "
940                                 "after putting the share hash chain")
941
942         self._offsets['share_hash_chain'] = self._offsets['enc_privkey'] + \
943                 len(encprivkey)
944
945         self._writevs.append(tuple([self._offsets['enc_privkey'], encprivkey]))
946
947
948     def put_blockhashes(self, blockhashes):
949         """
950         I queue a write vector to put the block hash tree in blockhashes
951         onto the remote server.
952
953         The encrypted private key must be queued before the block hash
954         tree, since we need to know how large it is to know where the
955         block hash tree should go. The block hash tree must be put
956         before the share hash chain, since its size determines the
957         offset of the share hash chain.
958         """
959         assert self._offsets
960         assert "block_hash_tree" in self._offsets
961
962         assert isinstance(blockhashes, list)
963
964         blockhashes_s = "".join(blockhashes)
965         self._offsets['EOF'] = self._offsets['block_hash_tree'] + len(blockhashes_s)
966
967         self._writevs.append(tuple([self._offsets['block_hash_tree'],
968                                   blockhashes_s]))
969
970
971     def put_sharehashes(self, sharehashes):
972         """
973         I queue a write vector to put the share hash chain in my
974         argument onto the remote server.
975
976         The block hash tree must be queued before the share hash chain,
977         since we need to know where the block hash tree ends before we
978         can know where the share hash chain starts. The share hash chain
979         must be put before the signature, since the length of the packed
980         share hash chain determines the offset of the signature. Also,
981         semantically, you must know what the root of the block hash tree
982         is before you can generate a valid signature.
983         """
984         assert isinstance(sharehashes, dict)
985         assert self._offsets
986         if "share_hash_chain" not in self._offsets:
987             raise LayoutInvalid("You must put the block hash tree before "
988                                 "putting the share hash chain")
989
990         # The signature comes after the share hash chain. If the
991         # signature has already been written, we must not write another
992         # share hash chain. The signature writes the verification key
993         # offset when it gets sent to the remote server, so we look for
994         # that.
995         if "verification_key" in self._offsets:
996             raise LayoutInvalid("You must write the share hash chain "
997                                 "before you write the signature")
998         sharehashes_s = "".join([struct.pack(">H32s", i, sharehashes[i])
999                                   for i in sorted(sharehashes.keys())])
1000         self._offsets['signature'] = self._offsets['share_hash_chain'] + \
1001             len(sharehashes_s)
1002         self._writevs.append(tuple([self._offsets['share_hash_chain'],
1003                             sharehashes_s]))
1004
1005
1006     def put_root_hash(self, roothash):
1007         """
1008         Put the root hash (the root of the share hash tree) in the
1009         remote slot.
1010         """
1011         # It does not make sense to be able to put the root 
1012         # hash without first putting the share hashes, since you need
1013         # the share hashes to generate the root hash.
1014         #
1015         # Signature is defined by the routine that places the share hash
1016         # chain, so it's a good thing to look for in finding out whether
1017         # or not the share hash chain exists on the remote server.
1018         if len(roothash) != HASH_SIZE:
1019             raise LayoutInvalid("hashes and salts must be exactly %d bytes"
1020                                  % HASH_SIZE)
1021         self._root_hash = roothash
1022         # To write both of these values, we update the checkstring on
1023         # the remote server, which includes them
1024         checkstring = self.get_checkstring()
1025         self._writevs.append(tuple([0, checkstring]))
1026         # This write, if successful, changes the checkstring, so we need
1027         # to update our internal checkstring to be consistent with the
1028         # one on the server.
1029
1030
1031     def get_signable(self):
1032         """
1033         Get the first seven fields of the mutable file; the parts that
1034         are signed.
1035         """
1036         if not self._root_hash:
1037             raise LayoutInvalid("You need to set the root hash "
1038                                 "before getting something to "
1039                                 "sign")
1040         return struct.pack(MDMFSIGNABLEHEADER,
1041                            1,
1042                            self._seqnum,
1043                            self._root_hash,
1044                            self._required_shares,
1045                            self._total_shares,
1046                            self._segment_size,
1047                            self._data_length)
1048
1049
1050     def put_signature(self, signature):
1051         """
1052         I queue a write vector for the signature of the MDMF share.
1053
1054         I require that the root hash and share hash chain have been put
1055         to the grid before I will write the signature to the grid.
1056         """
1057         if "signature" not in self._offsets:
1058             raise LayoutInvalid("You must put the share hash chain "
1059         # It does not make sense to put a signature without first
1060         # putting the root hash and the salt hash (since otherwise
1061         # the signature would be incomplete), so we don't allow that.
1062                        "before putting the signature")
1063         if not self._root_hash:
1064             raise LayoutInvalid("You must complete the signed prefix "
1065                                 "before computing a signature")
1066         # If we put the signature after we put the verification key, we
1067         # could end up running into the verification key, and will
1068         # probably screw up the offsets as well. So we don't allow that.
1069         if "verification_key_end" in self._offsets:
1070             raise LayoutInvalid("You can't put the signature after the "
1071                                 "verification key")
1072         # The method that writes the verification key defines the EOF
1073         # offset before writing the verification key, so look for that.
1074         self._offsets['verification_key'] = self._offsets['signature'] +\
1075             len(signature)
1076         self._writevs.append(tuple([self._offsets['signature'], signature]))
1077
1078
1079     def put_verification_key(self, verification_key):
1080         """
1081         I queue a write vector for the verification key.
1082
1083         I require that the signature have been written to the storage
1084         server before I allow the verification key to be written to the
1085         remote server.
1086         """
1087         if "verification_key" not in self._offsets:
1088             raise LayoutInvalid("You must put the signature before you "
1089                                 "can put the verification key")
1090
1091         self._offsets['verification_key_end'] = \
1092             self._offsets['verification_key'] + len(verification_key)
1093         assert self._offsets['verification_key_end'] <= self._offsets['share_data']
1094         self._writevs.append(tuple([self._offsets['verification_key'],
1095                             verification_key]))
1096
1097
1098     def _get_offsets_tuple(self):
1099         return tuple([(key, value) for key, value in self._offsets.items()])
1100
1101
1102     def get_verinfo(self):
1103         return (self._seqnum,
1104                 self._root_hash,
1105                 None,
1106                 self._segment_size,
1107                 self._data_length,
1108                 self._required_shares,
1109                 self._total_shares,
1110                 self.get_signable(),
1111                 self._get_offsets_tuple())
1112
1113
1114     def finish_publishing(self):
1115         """
1116         I add a write vector for the offsets table, and then cause all
1117         of the write vectors that I've dealt with so far to be published
1118         to the remote server, ending the write process.
1119         """
1120         if "verification_key_end" not in self._offsets:
1121             raise LayoutInvalid("You must put the verification key before "
1122                                 "you can publish the offsets")
1123         offsets_offset = struct.calcsize(MDMFHEADERWITHOUTOFFSETS)
1124         offsets = struct.pack(MDMFOFFSETS,
1125                               self._offsets['enc_privkey'],
1126                               self._offsets['share_hash_chain'],
1127                               self._offsets['signature'],
1128                               self._offsets['verification_key'],
1129                               self._offsets['verification_key_end'],
1130                               self._offsets['share_data'],
1131                               self._offsets['block_hash_tree'],
1132                               self._offsets['EOF'])
1133         self._writevs.append(tuple([offsets_offset, offsets]))
1134         encoding_parameters_offset = struct.calcsize(MDMFCHECKSTRING)
1135         params = struct.pack(">BBQQ",
1136                              self._required_shares,
1137                              self._total_shares,
1138                              self._segment_size,
1139                              self._data_length)
1140         self._writevs.append(tuple([encoding_parameters_offset, params]))
1141         return self._write(self._writevs)
1142
1143
1144     def _write(self, datavs, on_failure=None, on_success=None):
1145         """I write the data vectors in datavs to the remote slot."""
1146         tw_vectors = {}
1147         if not self._testvs:
1148             self._testvs = []
1149             self._testvs.append(tuple([0, 1, "eq", ""]))
1150         if not self._written:
1151             # Write a new checkstring to the share when we write it, so
1152             # that we have something to check later.
1153             new_checkstring = self.get_checkstring()
1154             datavs.append((0, new_checkstring))
1155             def _first_write():
1156                 self._written = True
1157                 self._testvs = [(0, len(new_checkstring), "eq", new_checkstring)]
1158             on_success = _first_write
1159         tw_vectors[self.shnum] = (self._testvs, datavs, None)
1160         d = self._rref.callRemote("slot_testv_and_readv_and_writev",
1161                                   self._storage_index,
1162                                   self._secrets,
1163                                   tw_vectors,
1164                                   self._readv)
1165         def _result(results):
1166             if isinstance(results, failure.Failure) or not results[0]:
1167                 # Do nothing; the write was unsuccessful.
1168                 if on_failure: on_failure()
1169             else:
1170                 if on_success: on_success()
1171             return results
1172         d.addCallback(_result)
1173         return d
1174
1175 def _handle_bad_struct(f):
1176     # struct.unpack errors mean the server didn't give us enough data, so
1177     # this share is bad
1178     f.trap(struct.error)
1179     raise BadShareError(f.value.args[0])
1180
1181 class MDMFSlotReadProxy:
1182     """
1183     I read from a mutable slot filled with data written in the MDMF data
1184     format (which is described above).
1185
1186     I can be initialized with some amount of data, which I will use (if
1187     it is valid) to eliminate some of the need to fetch it from servers.
1188     """
1189     def __init__(self,
1190                  rref,
1191                  storage_index,
1192                  shnum,
1193                  data=""):
1194         # Start the initialization process.
1195         self._rref = rref
1196         self._storage_index = storage_index
1197         self.shnum = shnum
1198
1199         # Before doing anything, the reader is probably going to want to
1200         # verify that the signature is correct. To do that, they'll need
1201         # the verification key, and the signature. To get those, we'll
1202         # need the offset table. So fetch the offset table on the
1203         # assumption that that will be the first thing that a reader is
1204         # going to do.
1205
1206         # The fact that these encoding parameters are None tells us
1207         # that we haven't yet fetched them from the remote share, so we
1208         # should. We could just not set them, but the checks will be
1209         # easier to read if we don't have to use hasattr.
1210         self._version_number = None
1211         self._sequence_number = None
1212         self._root_hash = None
1213         # Filled in if we're dealing with an SDMF file. Unused
1214         # otherwise.
1215         self._salt = None
1216         self._required_shares = None
1217         self._total_shares = None
1218         self._segment_size = None
1219         self._data_length = None
1220         self._offsets = None
1221
1222         # If the user has chosen to initialize us with some data, we'll
1223         # try to satisfy subsequent data requests with that data before
1224         # asking the storage server for it. If 
1225         self._data = data
1226         # The way callers interact with cache in the filenode returns
1227         # None if there isn't any cached data, but the way we index the
1228         # cached data requires a string, so convert None to "".
1229         if self._data == None:
1230             self._data = ""
1231
1232
1233     def _maybe_fetch_offsets_and_header(self, force_remote=False):
1234         """
1235         I fetch the offset table and the header from the remote slot if
1236         I don't already have them. If I do have them, I do nothing and
1237         return an empty Deferred.
1238         """
1239         if self._offsets:
1240             return defer.succeed(None)
1241         # At this point, we may be either SDMF or MDMF. Fetching 107 
1242         # bytes will be enough to get header and offsets for both SDMF and
1243         # MDMF, though we'll be left with 4 more bytes than we
1244         # need if this ends up being MDMF. This is probably less
1245         # expensive than the cost of a second roundtrip.
1246         readvs = [(0, 123)]
1247         d = self._read(readvs, force_remote)
1248         d.addCallback(self._process_encoding_parameters)
1249         d.addCallback(self._process_offsets)
1250         d.addErrback(_handle_bad_struct)
1251         return d
1252
1253
1254     def _process_encoding_parameters(self, encoding_parameters):
1255         if self.shnum not in encoding_parameters:
1256             raise BadShareError("no data for shnum %d" % self.shnum)
1257         encoding_parameters = encoding_parameters[self.shnum][0]
1258         # The first byte is the version number. It will tell us what
1259         # to do next.
1260         (verno,) = struct.unpack(">B", encoding_parameters[:1])
1261         if verno == MDMF_VERSION:
1262             read_size = MDMFHEADERWITHOUTOFFSETSSIZE
1263             (verno,
1264              seqnum,
1265              root_hash,
1266              k,
1267              n,
1268              segsize,
1269              datalen) = struct.unpack(MDMFHEADERWITHOUTOFFSETS,
1270                                       encoding_parameters[:read_size])
1271             if segsize == 0 and datalen == 0:
1272                 # Empty file, no segments.
1273                 self._num_segments = 0
1274             else:
1275                 self._num_segments = mathutil.div_ceil(datalen, segsize)
1276
1277         elif verno == SDMF_VERSION:
1278             read_size = SIGNED_PREFIX_LENGTH
1279             (verno,
1280              seqnum,
1281              root_hash,
1282              salt,
1283              k,
1284              n,
1285              segsize,
1286              datalen) = struct.unpack(">BQ32s16s BBQQ",
1287                                 encoding_parameters[:SIGNED_PREFIX_LENGTH])
1288             self._salt = salt
1289             if segsize == 0 and datalen == 0:
1290                 # empty file
1291                 self._num_segments = 0
1292             else:
1293                 # non-empty SDMF files have one segment.
1294                 self._num_segments = 1
1295         else:
1296             raise UnknownVersionError("You asked me to read mutable file "
1297                                       "version %d, but I only understand "
1298                                       "%d and %d" % (verno, SDMF_VERSION,
1299                                                      MDMF_VERSION))
1300
1301         self._version_number = verno
1302         self._sequence_number = seqnum
1303         self._root_hash = root_hash
1304         self._required_shares = k
1305         self._total_shares = n
1306         self._segment_size = segsize
1307         self._data_length = datalen
1308
1309         self._block_size = self._segment_size / self._required_shares
1310         # We can upload empty files, and need to account for this fact
1311         # so as to avoid zero-division and zero-modulo errors.
1312         if datalen > 0:
1313             tail_size = self._data_length % self._segment_size
1314         else:
1315             tail_size = 0
1316         if not tail_size:
1317             self._tail_block_size = self._block_size
1318         else:
1319             self._tail_block_size = mathutil.next_multiple(tail_size,
1320                                                     self._required_shares)
1321             self._tail_block_size /= self._required_shares
1322
1323         return encoding_parameters
1324
1325
1326     def _process_offsets(self, offsets):
1327         if self._version_number == 0:
1328             read_size = OFFSETS_LENGTH
1329             read_offset = SIGNED_PREFIX_LENGTH
1330             end = read_size + read_offset
1331             (signature,
1332              share_hash_chain,
1333              block_hash_tree,
1334              share_data,
1335              enc_privkey,
1336              EOF) = struct.unpack(">LLLLQQ",
1337                                   offsets[read_offset:end])
1338             self._offsets = {}
1339             self._offsets['signature'] = signature
1340             self._offsets['share_data'] = share_data
1341             self._offsets['block_hash_tree'] = block_hash_tree
1342             self._offsets['share_hash_chain'] = share_hash_chain
1343             self._offsets['enc_privkey'] = enc_privkey
1344             self._offsets['EOF'] = EOF
1345
1346         elif self._version_number == 1:
1347             read_offset = MDMFHEADERWITHOUTOFFSETSSIZE
1348             read_length = MDMFOFFSETS_LENGTH
1349             end = read_offset + read_length
1350             (encprivkey,
1351              sharehashes,
1352              signature,
1353              verification_key,
1354              verification_key_end,
1355              sharedata,
1356              blockhashes,
1357              eof) = struct.unpack(MDMFOFFSETS,
1358                                   offsets[read_offset:end])
1359             self._offsets = {}
1360             self._offsets['enc_privkey'] = encprivkey
1361             self._offsets['block_hash_tree'] = blockhashes
1362             self._offsets['share_hash_chain'] = sharehashes
1363             self._offsets['signature'] = signature
1364             self._offsets['verification_key'] = verification_key
1365             self._offsets['verification_key_end']= \
1366                 verification_key_end
1367             self._offsets['EOF'] = eof
1368             self._offsets['share_data'] = sharedata
1369
1370
1371     def get_block_and_salt(self, segnum):
1372         """
1373         I return (block, salt), where block is the block data and
1374         salt is the salt used to encrypt that segment.
1375         """
1376         d = self._maybe_fetch_offsets_and_header()
1377         def _then(ignored):
1378             base_share_offset = self._offsets['share_data']
1379
1380             if segnum + 1 > self._num_segments:
1381                 raise LayoutInvalid("Not a valid segment number")
1382
1383             if self._version_number == 0:
1384                 share_offset = base_share_offset + self._block_size * segnum
1385             else:
1386                 share_offset = base_share_offset + (self._block_size + \
1387                                                     SALT_SIZE) * segnum
1388             if segnum + 1 == self._num_segments:
1389                 data = self._tail_block_size
1390             else:
1391                 data = self._block_size
1392
1393             if self._version_number == 1:
1394                 data += SALT_SIZE
1395
1396             readvs = [(share_offset, data)]
1397             return readvs
1398         d.addCallback(_then)
1399         d.addCallback(lambda readvs: self._read(readvs))
1400         def _process_results(results):
1401             if self.shnum not in results:
1402                 raise BadShareError("no data for shnum %d" % self.shnum)
1403             if self._version_number == 0:
1404                 # We only read the share data, but we know the salt from
1405                 # when we fetched the header
1406                 data = results[self.shnum]
1407                 if not data:
1408                     data = ""
1409                 else:
1410                     if len(data) != 1:
1411                         raise BadShareError("got %d vectors, not 1" % len(data))
1412                     data = data[0]
1413                 salt = self._salt
1414             else:
1415                 data = results[self.shnum]
1416                 if not data:
1417                     salt = data = ""
1418                 else:
1419                     salt_and_data = results[self.shnum][0]
1420                     salt = salt_and_data[:SALT_SIZE]
1421                     data = salt_and_data[SALT_SIZE:]
1422             return data, salt
1423         d.addCallback(_process_results)
1424         return d
1425
1426
1427     def get_blockhashes(self, needed=None, force_remote=False):
1428         """
1429         I return the block hash tree
1430
1431         I take an optional argument, needed, which is a set of indices
1432         correspond to hashes that I should fetch. If this argument is
1433         missing, I will fetch the entire block hash tree; otherwise, I
1434         may attempt to fetch fewer hashes, based on what needed says
1435         that I should do. Note that I may fetch as many hashes as I
1436         want, so long as the set of hashes that I do fetch is a superset
1437         of the ones that I am asked for, so callers should be prepared
1438         to tolerate additional hashes.
1439         """
1440         # TODO: Return only the parts of the block hash tree necessary
1441         # to validate the blocknum provided?
1442         # This is a good idea, but it is hard to implement correctly. It
1443         # is bad to fetch any one block hash more than once, so we
1444         # probably just want to fetch the whole thing at once and then
1445         # serve it.
1446         if needed == set([]):
1447             return defer.succeed([])
1448         d = self._maybe_fetch_offsets_and_header()
1449         def _then(ignored):
1450             blockhashes_offset = self._offsets['block_hash_tree']
1451             if self._version_number == 1:
1452                 blockhashes_length = self._offsets['EOF'] - blockhashes_offset
1453             else:
1454                 blockhashes_length = self._offsets['share_data'] - blockhashes_offset
1455             readvs = [(blockhashes_offset, blockhashes_length)]
1456             return readvs
1457         d.addCallback(_then)
1458         d.addCallback(lambda readvs:
1459             self._read(readvs, force_remote=force_remote))
1460         def _build_block_hash_tree(results):
1461             if self.shnum not in results:
1462                 raise BadShareError("no data for shnum %d" % self.shnum)
1463
1464             rawhashes = results[self.shnum][0]
1465             results = [rawhashes[i:i+HASH_SIZE]
1466                        for i in range(0, len(rawhashes), HASH_SIZE)]
1467             return results
1468         d.addCallback(_build_block_hash_tree)
1469         return d
1470
1471
1472     def get_sharehashes(self, needed=None, force_remote=False):
1473         """
1474         I return the part of the share hash chain placed to validate
1475         this share.
1476
1477         I take an optional argument, needed. Needed is a set of indices
1478         that correspond to the hashes that I should fetch. If needed is
1479         not present, I will fetch and return the entire share hash
1480         chain. Otherwise, I may fetch and return any part of the share
1481         hash chain that is a superset of the part that I am asked to
1482         fetch. Callers should be prepared to deal with more hashes than
1483         they've asked for.
1484         """
1485         if needed == set([]):
1486             return defer.succeed([])
1487         d = self._maybe_fetch_offsets_and_header()
1488
1489         def _make_readvs(ignored):
1490             sharehashes_offset = self._offsets['share_hash_chain']
1491             if self._version_number == 0:
1492                 sharehashes_length = self._offsets['block_hash_tree'] - sharehashes_offset
1493             else:
1494                 sharehashes_length = self._offsets['signature'] - sharehashes_offset
1495             readvs = [(sharehashes_offset, sharehashes_length)]
1496             return readvs
1497         d.addCallback(_make_readvs)
1498         d.addCallback(lambda readvs:
1499             self._read(readvs, force_remote=force_remote))
1500         def _build_share_hash_chain(results):
1501             if self.shnum not in results:
1502                 raise BadShareError("no data for shnum %d" % self.shnum)
1503
1504             sharehashes = results[self.shnum][0]
1505             results = [sharehashes[i:i+(HASH_SIZE + 2)]
1506                        for i in range(0, len(sharehashes), HASH_SIZE + 2)]
1507             results = dict([struct.unpack(">H32s", data)
1508                             for data in results])
1509             return results
1510         d.addCallback(_build_share_hash_chain)
1511         d.addErrback(_handle_bad_struct)
1512         return d
1513
1514
1515     def get_encprivkey(self):
1516         """
1517         I return the encrypted private key.
1518         """
1519         d = self._maybe_fetch_offsets_and_header()
1520
1521         def _make_readvs(ignored):
1522             privkey_offset = self._offsets['enc_privkey']
1523             if self._version_number == 0:
1524                 privkey_length = self._offsets['EOF'] - privkey_offset
1525             else:
1526                 privkey_length = self._offsets['share_hash_chain'] - privkey_offset
1527             readvs = [(privkey_offset, privkey_length)]
1528             return readvs
1529         d.addCallback(_make_readvs)
1530         d.addCallback(lambda readvs: self._read(readvs))
1531         def _process_results(results):
1532             if self.shnum not in results:
1533                 raise BadShareError("no data for shnum %d" % self.shnum)
1534             privkey = results[self.shnum][0]
1535             return privkey
1536         d.addCallback(_process_results)
1537         return d
1538
1539
1540     def get_signature(self):
1541         """
1542         I return the signature of my share.
1543         """
1544         d = self._maybe_fetch_offsets_and_header()
1545
1546         def _make_readvs(ignored):
1547             signature_offset = self._offsets['signature']
1548             if self._version_number == 1:
1549                 signature_length = self._offsets['verification_key'] - signature_offset
1550             else:
1551                 signature_length = self._offsets['share_hash_chain'] - signature_offset
1552             readvs = [(signature_offset, signature_length)]
1553             return readvs
1554         d.addCallback(_make_readvs)
1555         d.addCallback(lambda readvs: self._read(readvs))
1556         def _process_results(results):
1557             if self.shnum not in results:
1558                 raise BadShareError("no data for shnum %d" % self.shnum)
1559             signature = results[self.shnum][0]
1560             return signature
1561         d.addCallback(_process_results)
1562         return d
1563
1564
1565     def get_verification_key(self):
1566         """
1567         I return the verification key.
1568         """
1569         d = self._maybe_fetch_offsets_and_header()
1570
1571         def _make_readvs(ignored):
1572             if self._version_number == 1:
1573                 vk_offset = self._offsets['verification_key']
1574                 vk_length = self._offsets['verification_key_end'] - vk_offset
1575             else:
1576                 vk_offset = struct.calcsize(">BQ32s16sBBQQLLLLQQ")
1577                 vk_length = self._offsets['signature'] - vk_offset
1578             readvs = [(vk_offset, vk_length)]
1579             return readvs
1580         d.addCallback(_make_readvs)
1581         d.addCallback(lambda readvs: self._read(readvs))
1582         def _process_results(results):
1583             if self.shnum not in results:
1584                 raise BadShareError("no data for shnum %d" % self.shnum)
1585             verification_key = results[self.shnum][0]
1586             return verification_key
1587         d.addCallback(_process_results)
1588         return d
1589
1590
1591     def get_encoding_parameters(self):
1592         """
1593         I return (k, n, segsize, datalen)
1594         """
1595         d = self._maybe_fetch_offsets_and_header()
1596         d.addCallback(lambda ignored:
1597             (self._required_shares,
1598              self._total_shares,
1599              self._segment_size,
1600              self._data_length))
1601         return d
1602
1603
1604     def get_seqnum(self):
1605         """
1606         I return the sequence number for this share.
1607         """
1608         d = self._maybe_fetch_offsets_and_header()
1609         d.addCallback(lambda ignored:
1610             self._sequence_number)
1611         return d
1612
1613
1614     def get_root_hash(self):
1615         """
1616         I return the root of the block hash tree
1617         """
1618         d = self._maybe_fetch_offsets_and_header()
1619         d.addCallback(lambda ignored: self._root_hash)
1620         return d
1621
1622
1623     def get_checkstring(self):
1624         """
1625         I return the packed representation of the following:
1626
1627             - version number
1628             - sequence number
1629             - root hash
1630             - salt hash
1631
1632         which my users use as a checkstring to detect other writers.
1633         """
1634         d = self._maybe_fetch_offsets_and_header()
1635         def _build_checkstring(ignored):
1636             if self._salt:
1637                 checkstring = struct.pack(PREFIX,
1638                                           self._version_number,
1639                                           self._sequence_number,
1640                                           self._root_hash,
1641                                           self._salt)
1642             else:
1643                 checkstring = struct.pack(MDMFCHECKSTRING,
1644                                           self._version_number,
1645                                           self._sequence_number,
1646                                           self._root_hash)
1647
1648             return checkstring
1649         d.addCallback(_build_checkstring)
1650         return d
1651
1652
1653     def get_prefix(self, force_remote):
1654         d = self._maybe_fetch_offsets_and_header(force_remote)
1655         d.addCallback(lambda ignored:
1656             self._build_prefix())
1657         return d
1658
1659
1660     def _build_prefix(self):
1661         # The prefix is another name for the part of the remote share
1662         # that gets signed. It consists of everything up to and
1663         # including the datalength, packed by struct.
1664         if self._version_number == SDMF_VERSION:
1665             return struct.pack(SIGNED_PREFIX,
1666                            self._version_number,
1667                            self._sequence_number,
1668                            self._root_hash,
1669                            self._salt,
1670                            self._required_shares,
1671                            self._total_shares,
1672                            self._segment_size,
1673                            self._data_length)
1674
1675         else:
1676             return struct.pack(MDMFSIGNABLEHEADER,
1677                            self._version_number,
1678                            self._sequence_number,
1679                            self._root_hash,
1680                            self._required_shares,
1681                            self._total_shares,
1682                            self._segment_size,
1683                            self._data_length)
1684
1685
1686     def _get_offsets_tuple(self):
1687         # The offsets tuple is another component of the version
1688         # information tuple. It is basically our offsets dictionary,
1689         # itemized and in a tuple.
1690         return self._offsets.copy()
1691
1692
1693     def get_verinfo(self):
1694         """
1695         I return my verinfo tuple. This is used by the ServermapUpdater
1696         to keep track of versions of mutable files.
1697
1698         The verinfo tuple for MDMF files contains:
1699             - seqnum
1700             - root hash
1701             - a blank (nothing)
1702             - segsize
1703             - datalen
1704             - k
1705             - n
1706             - prefix (the thing that you sign)
1707             - a tuple of offsets
1708
1709         We include the nonce in MDMF to simplify processing of version
1710         information tuples.
1711
1712         The verinfo tuple for SDMF files is the same, but contains a
1713         16-byte IV instead of a hash of salts.
1714         """
1715         d = self._maybe_fetch_offsets_and_header()
1716         def _build_verinfo(ignored):
1717             if self._version_number == SDMF_VERSION:
1718                 salt_to_use = self._salt
1719             else:
1720                 salt_to_use = None
1721             return (self._sequence_number,
1722                     self._root_hash,
1723                     salt_to_use,
1724                     self._segment_size,
1725                     self._data_length,
1726                     self._required_shares,
1727                     self._total_shares,
1728                     self._build_prefix(),
1729                     self._get_offsets_tuple())
1730         d.addCallback(_build_verinfo)
1731         return d
1732
1733
1734     def _read(self, readvs, force_remote=False):
1735         unsatisfiable = filter(lambda x: x[0] + x[1] > len(self._data), readvs)
1736         # TODO: It's entirely possible to tweak this so that it just
1737         # fulfills the requests that it can, and not demand that all
1738         # requests are satisfiable before running it.
1739         if not unsatisfiable and not force_remote:
1740             results = [self._data[offset:offset+length]
1741                        for (offset, length) in readvs]
1742             results = {self.shnum: results}
1743             return defer.succeed(results)
1744         else:
1745             return self._rref.callRemote("slot_readv",
1746                                          self._storage_index,
1747                                          [self.shnum],
1748                                          readvs)
1749
1750
1751     def is_sdmf(self):
1752         """I tell my caller whether or not my remote file is SDMF or MDMF
1753         """
1754         d = self._maybe_fetch_offsets_and_header()
1755         d.addCallback(lambda ignored:
1756             self._version_number == 0)
1757         return d
1758
1759
1760 class LayoutInvalid(BadShareError):
1761     """
1762     This isn't a valid MDMF mutable file
1763     """