]> git.rkrishnan.org Git - tahoe-lafs/tahoe-lafs.git/blob - src/allmydata/mutable/layout.py
Fix an error handling path that would never have been reached. fixes ticket:2543
[tahoe-lafs/tahoe-lafs.git] / src / allmydata / mutable / layout.py
1
2 import struct
3 from allmydata.mutable.common import NeedMoreDataError, UnknownVersionError, \
4      BadShareError
5 from allmydata.interfaces import HASH_SIZE, SALT_SIZE, SDMF_VERSION, \
6                                  MDMF_VERSION, IMutableSlotWriter
7 from allmydata.util import mathutil
8 from twisted.python import failure
9 from twisted.internet import defer
10 from zope.interface import implements
11
12
13 # These strings describe the format of the packed structs they help process.
14 # Here's what they mean:
15 #
16 #  PREFIX:
17 #    >: Big-endian byte order; the most significant byte is first (leftmost).
18 #    B: The container version information; stored as an unsigned 8-bit integer.
19 #       This is currently either SDMF_VERSION or MDMF_VERSION.
20 #    Q: The sequence number; this is sort of like a revision history for
21 #       mutable files; they start at 1 and increase as they are changed after
22 #       being uploaded. Stored as an unsigned 64-bit integer.
23 #  32s: The root hash of the share hash tree. We use sha-256d, so we use 32
24 #       bytes to store the value.
25 #  16s: The salt for the readkey. This is a 16-byte random value.
26 #
27 #  SIGNED_PREFIX additions, things that are covered by the signature:
28 #    B: The "k" encoding parameter. We store this as an unsigned 8-bit
29 #       integer, since our erasure coding scheme cannot encode to more than
30 #       255 pieces.
31 #    B: The "N" encoding parameter. Stored as an unsigned 8-bit integer for
32 #       the same reason as above.
33 #    Q: The segment size of the uploaded file. This is an unsigned 64-bit
34 #       integer, to allow handling large segments and files. For SDMF the
35 #       segment size is the data length plus padding; for MDMF it can be
36 #       smaller.
37 #    Q: The data length of the uploaded file. Like the segment size field,
38 #       it is an unsigned 64-bit integer.
39 #
40 #   HEADER additions:
41 #     L: The offset of the signature. An unsigned 32-bit integer.
42 #     L: The offset of the share hash chain. An unsigned 32-bit integer.
43 #     L: The offset of the block hash tree. An unsigned 32-bit integer.
44 #     L: The offset of the share data. An unsigned 32-bit integer.
45 #     Q: The offset of the encrypted private key. An unsigned 64-bit integer,
46 #        to account for the possibility of a lot of share data.
47 #     Q: The offset of the EOF. An unsigned 64-bit integer, to account for
48 #        the possibility of a lot of share data.
49 #
50 #  After all of these, we have the following:
51 #    - The verification key: Occupies the space between the end of the header
52 #      and the start of the signature (i.e.: data[HEADER_LENGTH:o['signature']].
53 #    - The signature, which goes from the signature offset to the share hash
54 #      chain offset.
55 #    - The share hash chain, which goes from the share hash chain offset to
56 #      the block hash tree offset.
57 #    - The share data, which goes from the share data offset to the encrypted
58 #      private key offset.
59 #    - The encrypted private key offset, which goes until the end of the file.
60 #
61 #  The block hash tree in this encoding has only one share, so the offset of
62 #  the share data will be 32 bits more than the offset of the block hash tree.
63 #  Given this, we may need to check to see how many bytes a reasonably sized
64 #  block hash tree will take up.
65
66 PREFIX = ">BQ32s16s" # each version may have a different prefix
67 SIGNED_PREFIX = ">BQ32s16s BBQQ" # this is covered by the signature
68 SIGNED_PREFIX_LENGTH = struct.calcsize(SIGNED_PREFIX)
69 HEADER = ">BQ32s16s BBQQ LLLLQQ" # includes offsets
70 HEADER_LENGTH = struct.calcsize(HEADER)
71 OFFSETS = ">LLLLQQ"
72 OFFSETS_LENGTH = struct.calcsize(OFFSETS)
73
74 MAX_MUTABLE_SHARE_SIZE = 69105*1000*1000*1000*1000 # 69105 TB, kind of arbitrary
75
76
77 # These are still used for some tests of SDMF files.
78 def unpack_header(data):
79     o = {}
80     (version,
81      seqnum,
82      root_hash,
83      IV,
84      k, N, segsize, datalen,
85      o['signature'],
86      o['share_hash_chain'],
87      o['block_hash_tree'],
88      o['share_data'],
89      o['enc_privkey'],
90      o['EOF']) = struct.unpack(HEADER, data[:HEADER_LENGTH])
91     return (version, seqnum, root_hash, IV, k, N, segsize, datalen, o)
92
93 def unpack_share(data):
94     assert len(data) >= HEADER_LENGTH
95     o = {}
96     (version,
97      seqnum,
98      root_hash,
99      IV,
100      k, N, segsize, datalen,
101      o['signature'],
102      o['share_hash_chain'],
103      o['block_hash_tree'],
104      o['share_data'],
105      o['enc_privkey'],
106      o['EOF']) = struct.unpack(HEADER, data[:HEADER_LENGTH])
107
108     if version != 0:
109         raise UnknownVersionError("got mutable share version %d, but I only understand version 0" % version)
110
111     if len(data) < o['EOF']:
112         raise NeedMoreDataError(o['EOF'],
113                                 o['enc_privkey'], o['EOF']-o['enc_privkey'])
114
115     pubkey = data[HEADER_LENGTH:o['signature']]
116     signature = data[o['signature']:o['share_hash_chain']]
117     share_hash_chain_s = data[o['share_hash_chain']:o['block_hash_tree']]
118     share_hash_format = ">H32s"
119     hsize = struct.calcsize(share_hash_format)
120     if len(share_hash_chain_s) % hsize != 0:
121         raise BadShareError("hash chain is %d bytes, not multiple of %d"
122                             % (len(share_hash_chain_s), hsize))
123     share_hash_chain = []
124     for i in range(0, len(share_hash_chain_s), hsize):
125         chunk = share_hash_chain_s[i:i+hsize]
126         (hid, h) = struct.unpack(share_hash_format, chunk)
127         share_hash_chain.append( (hid, h) )
128     share_hash_chain = dict(share_hash_chain)
129     block_hash_tree_s = data[o['block_hash_tree']:o['share_data']]
130     if len(block_hash_tree_s) % 32 != 0:
131         raise BadShareError("block_hash_tree is %d bytes, not multiple of %d"
132                             % (len(block_hash_tree_s), 32))
133     block_hash_tree = []
134     for i in range(0, len(block_hash_tree_s), 32):
135         block_hash_tree.append(block_hash_tree_s[i:i+32])
136
137     share_data = data[o['share_data']:o['enc_privkey']]
138     enc_privkey = data[o['enc_privkey']:o['EOF']]
139
140     return (seqnum, root_hash, IV, k, N, segsize, datalen,
141             pubkey, signature, share_hash_chain, block_hash_tree,
142             share_data, enc_privkey)
143
144 def get_version_from_checkstring(checkstring):
145     (t, ) = struct.unpack(">B", checkstring[:1])
146     return t
147
148 def unpack_sdmf_checkstring(checkstring):
149     cs_len = struct.calcsize(PREFIX)
150     version, seqnum, root_hash, IV = struct.unpack(PREFIX, checkstring[:cs_len])
151     assert version == SDMF_VERSION, version
152     return (seqnum, root_hash, IV)
153
154 def unpack_mdmf_checkstring(checkstring):
155     cs_len = struct.calcsize(MDMFCHECKSTRING)
156     version, seqnum, root_hash = struct.unpack(MDMFCHECKSTRING, checkstring[:cs_len])
157     assert version == MDMF_VERSION, version
158     return (seqnum, root_hash)
159
160 def pack_offsets(verification_key_length, signature_length,
161                  share_hash_chain_length, block_hash_tree_length,
162                  share_data_length, encprivkey_length):
163     post_offset = HEADER_LENGTH
164     offsets = {}
165     o1 = offsets['signature'] = post_offset + verification_key_length
166     o2 = offsets['share_hash_chain'] = o1 + signature_length
167     o3 = offsets['block_hash_tree'] = o2 + share_hash_chain_length
168     o4 = offsets['share_data'] = o3 + block_hash_tree_length
169     o5 = offsets['enc_privkey'] = o4 + share_data_length
170     offsets['EOF'] = o5 + encprivkey_length
171
172     return struct.pack(">LLLLQQ",
173                        offsets['signature'],
174                        offsets['share_hash_chain'],
175                        offsets['block_hash_tree'],
176                        offsets['share_data'],
177                        offsets['enc_privkey'],
178                        offsets['EOF'])
179
180 def pack_share(prefix, verification_key, signature,
181                share_hash_chain, block_hash_tree,
182                share_data, encprivkey):
183     share_hash_chain_s = "".join([struct.pack(">H32s", i, share_hash_chain[i])
184                                   for i in sorted(share_hash_chain.keys())])
185     for h in block_hash_tree:
186         assert len(h) == 32
187     block_hash_tree_s = "".join(block_hash_tree)
188
189     offsets = pack_offsets(len(verification_key),
190                            len(signature),
191                            len(share_hash_chain_s),
192                            len(block_hash_tree_s),
193                            len(share_data),
194                            len(encprivkey))
195     final_share = "".join([prefix,
196                            offsets,
197                            verification_key,
198                            signature,
199                            share_hash_chain_s,
200                            block_hash_tree_s,
201                            share_data,
202                            encprivkey])
203     return final_share
204
205 def pack_prefix(seqnum, root_hash, IV,
206                 required_shares, total_shares,
207                 segment_size, data_length):
208     prefix = struct.pack(SIGNED_PREFIX,
209                          0, # version,
210                          seqnum,
211                          root_hash,
212                          IV,
213                          required_shares,
214                          total_shares,
215                          segment_size,
216                          data_length,
217                          )
218     return prefix
219
220
221 class SDMFSlotWriteProxy:
222     implements(IMutableSlotWriter)
223     """
224     I represent a remote write slot for an SDMF mutable file. I build a
225     share in memory, and then write it in one piece to the remote
226     server. This mimics how SDMF shares were built before MDMF (and the
227     new MDMF uploader), but provides that functionality in a way that
228     allows the MDMF uploader to be built without much special-casing for
229     file format, which makes the uploader code more readable.
230     """
231     def __init__(self,
232                  shnum,
233                  rref, # a remote reference to a storage server
234                  storage_index,
235                  secrets, # (write_enabler, renew_secret, cancel_secret)
236                  seqnum, # the sequence number of the mutable file
237                  required_shares,
238                  total_shares,
239                  segment_size,
240                  data_length): # the length of the original file
241         self.shnum = shnum
242         self._rref = rref
243         self._storage_index = storage_index
244         self._secrets = secrets
245         self._seqnum = seqnum
246         self._required_shares = required_shares
247         self._total_shares = total_shares
248         self._segment_size = segment_size
249         self._data_length = data_length
250
251         # This is an SDMF file, so it should have only one segment, so,
252         # modulo padding of the data length, the segment size and the
253         # data length should be the same.
254         expected_segment_size = mathutil.next_multiple(data_length,
255                                                        self._required_shares)
256         assert expected_segment_size == segment_size
257
258         self._block_size = self._segment_size / self._required_shares
259
260         # This is meant to mimic how SDMF files were built before MDMF
261         # entered the picture: we generate each share in its entirety,
262         # then push it off to the storage server in one write. When
263         # callers call set_*, they are just populating this dict.
264         # finish_publishing will stitch these pieces together into a
265         # coherent share, and then write the coherent share to the
266         # storage server.
267         self._share_pieces = {}
268
269         # This tells the write logic what checkstring to use when
270         # writing remote shares.
271         self._testvs = []
272
273         self._readvs = [(0, struct.calcsize(PREFIX))]
274
275
276     def set_checkstring(self, checkstring_or_seqnum,
277                               root_hash=None,
278                               salt=None):
279         """
280         Set the checkstring that I will pass to the remote server when
281         writing.
282
283             @param checkstring_or_seqnum: A packed checkstring to use,
284                    or a sequence number. I will treat this as a checkstr
285
286         Note that implementations can differ in which semantics they
287         wish to support for set_checkstring -- they can, for example,
288         build the checkstring themselves from its constituents, or
289         some other thing.
290         """
291         if root_hash and salt:
292             checkstring = struct.pack(PREFIX,
293                                       0,
294                                       checkstring_or_seqnum,
295                                       root_hash,
296                                       salt)
297         else:
298             checkstring = checkstring_or_seqnum
299         self._testvs = [(0, len(checkstring), "eq", checkstring)]
300
301
302     def get_checkstring(self):
303         """
304         Get the checkstring that I think currently exists on the remote
305         server.
306         """
307         if self._testvs:
308             return self._testvs[0][3]
309         return ""
310
311
312     def put_block(self, data, segnum, salt):
313         """
314         Add a block and salt to the share.
315         """
316         # SDMF files have only one segment
317         assert segnum == 0
318         assert len(data) == self._block_size
319         assert len(salt) == SALT_SIZE
320
321         self._share_pieces['sharedata'] = data
322         self._share_pieces['salt'] = salt
323
324         # TODO: Figure out something intelligent to return.
325         return defer.succeed(None)
326
327
328     def put_encprivkey(self, encprivkey):
329         """
330         Add the encrypted private key to the share.
331         """
332         self._share_pieces['encprivkey'] = encprivkey
333
334         return defer.succeed(None)
335
336
337     def put_blockhashes(self, blockhashes):
338         """
339         Add the block hash tree to the share.
340         """
341         assert isinstance(blockhashes, list)
342         for h in blockhashes:
343             assert len(h) == HASH_SIZE
344
345         # serialize the blockhashes, then set them.
346         blockhashes_s = "".join(blockhashes)
347         self._share_pieces['block_hash_tree'] = blockhashes_s
348
349         return defer.succeed(None)
350
351
352     def put_sharehashes(self, sharehashes):
353         """
354         Add the share hash chain to the share.
355         """
356         assert isinstance(sharehashes, dict)
357         for h in sharehashes.itervalues():
358             assert len(h) == HASH_SIZE
359
360         # serialize the sharehashes, then set them.
361         sharehashes_s = "".join([struct.pack(">H32s", i, sharehashes[i])
362                                  for i in sorted(sharehashes.keys())])
363         self._share_pieces['share_hash_chain'] = sharehashes_s
364
365         return defer.succeed(None)
366
367
368     def put_root_hash(self, root_hash):
369         """
370         Add the root hash to the share.
371         """
372         assert len(root_hash) == HASH_SIZE
373
374         self._share_pieces['root_hash'] = root_hash
375
376         return defer.succeed(None)
377
378
379     def put_salt(self, salt):
380         """
381         Add a salt to an empty SDMF file.
382         """
383         assert len(salt) == SALT_SIZE
384
385         self._share_pieces['salt'] = salt
386         self._share_pieces['sharedata'] = ""
387
388
389     def get_signable(self):
390         """
391         Return the part of the share that needs to be signed.
392
393         SDMF writers need to sign the packed representation of the
394         first eight fields of the remote share, that is:
395             - version number (0)
396             - sequence number
397             - root of the share hash tree
398             - salt
399             - k
400             - n
401             - segsize
402             - datalen
403
404         This method is responsible for returning that to callers.
405         """
406         return struct.pack(SIGNED_PREFIX,
407                            0,
408                            self._seqnum,
409                            self._share_pieces['root_hash'],
410                            self._share_pieces['salt'],
411                            self._required_shares,
412                            self._total_shares,
413                            self._segment_size,
414                            self._data_length)
415
416
417     def put_signature(self, signature):
418         """
419         Add the signature to the share.
420         """
421         self._share_pieces['signature'] = signature
422
423         return defer.succeed(None)
424
425
426     def put_verification_key(self, verification_key):
427         """
428         Add the verification key to the share.
429         """
430         self._share_pieces['verification_key'] = verification_key
431
432         return defer.succeed(None)
433
434
435     def get_verinfo(self):
436         """
437         I return my verinfo tuple. This is used by the ServermapUpdater
438         to keep track of versions of mutable files.
439
440         The verinfo tuple for MDMF files contains:
441             - seqnum
442             - root hash
443             - a blank (nothing)
444             - segsize
445             - datalen
446             - k
447             - n
448             - prefix (the thing that you sign)
449             - a tuple of offsets
450
451         We include the nonce in MDMF to simplify processing of version
452         information tuples.
453
454         The verinfo tuple for SDMF files is the same, but contains a
455         16-byte IV instead of a hash of salts.
456         """
457         return (self._seqnum,
458                 self._share_pieces['root_hash'],
459                 self._share_pieces['salt'],
460                 self._segment_size,
461                 self._data_length,
462                 self._required_shares,
463                 self._total_shares,
464                 self.get_signable(),
465                 self._get_offsets_tuple())
466
467     def _get_offsets_dict(self):
468         post_offset = HEADER_LENGTH
469         offsets = {}
470
471         verification_key_length = len(self._share_pieces['verification_key'])
472         o1 = offsets['signature'] = post_offset + verification_key_length
473
474         signature_length = len(self._share_pieces['signature'])
475         o2 = offsets['share_hash_chain'] = o1 + signature_length
476
477         share_hash_chain_length = len(self._share_pieces['share_hash_chain'])
478         o3 = offsets['block_hash_tree'] = o2 + share_hash_chain_length
479
480         block_hash_tree_length = len(self._share_pieces['block_hash_tree'])
481         o4 = offsets['share_data'] = o3 + block_hash_tree_length
482
483         share_data_length = len(self._share_pieces['sharedata'])
484         o5 = offsets['enc_privkey'] = o4 + share_data_length
485
486         encprivkey_length = len(self._share_pieces['encprivkey'])
487         offsets['EOF'] = o5 + encprivkey_length
488         return offsets
489
490
491     def _get_offsets_tuple(self):
492         offsets = self._get_offsets_dict()
493         return tuple([(key, value) for key, value in offsets.items()])
494
495
496     def _pack_offsets(self):
497         offsets = self._get_offsets_dict()
498         return struct.pack(">LLLLQQ",
499                            offsets['signature'],
500                            offsets['share_hash_chain'],
501                            offsets['block_hash_tree'],
502                            offsets['share_data'],
503                            offsets['enc_privkey'],
504                            offsets['EOF'])
505
506
507     def finish_publishing(self):
508         """
509         Do anything necessary to finish writing the share to a remote
510         server. I require that no further publishing needs to take place
511         after this method has been called.
512         """
513         for k in ["sharedata", "encprivkey", "signature", "verification_key",
514                   "share_hash_chain", "block_hash_tree"]:
515             assert k in self._share_pieces, (self.shnum, k, self._share_pieces.keys())
516         # This is the only method that actually writes something to the
517         # remote server.
518         # First, we need to pack the share into data that we can write
519         # to the remote server in one write.
520         offsets = self._pack_offsets()
521         prefix = self.get_signable()
522         final_share = "".join([prefix,
523                                offsets,
524                                self._share_pieces['verification_key'],
525                                self._share_pieces['signature'],
526                                self._share_pieces['share_hash_chain'],
527                                self._share_pieces['block_hash_tree'],
528                                self._share_pieces['sharedata'],
529                                self._share_pieces['encprivkey']])
530
531         # Our only data vector is going to be writing the final share,
532         # in its entirely.
533         datavs = [(0, final_share)]
534
535         if not self._testvs:
536             # Our caller has not provided us with another checkstring
537             # yet, so we assume that we are writing a new share, and set
538             # a test vector that will allow a new share to be written.
539             self._testvs = []
540             self._testvs.append(tuple([0, 1, "eq", ""]))
541
542         tw_vectors = {}
543         tw_vectors[self.shnum] = (self._testvs, datavs, None)
544         return self._rref.callRemote("slot_testv_and_readv_and_writev",
545                                      self._storage_index,
546                                      self._secrets,
547                                      tw_vectors,
548                                      # TODO is it useful to read something?
549                                      self._readvs)
550
551
552 MDMFHEADER = ">BQ32sBBQQ QQQQQQQQ"
553 MDMFHEADERWITHOUTOFFSETS = ">BQ32sBBQQ"
554 MDMFHEADERSIZE = struct.calcsize(MDMFHEADER)
555 MDMFHEADERWITHOUTOFFSETSSIZE = struct.calcsize(MDMFHEADERWITHOUTOFFSETS)
556 MDMFCHECKSTRING = ">BQ32s"
557 MDMFSIGNABLEHEADER = ">BQ32sBBQQ"
558 MDMFOFFSETS = ">QQQQQQQQ"
559 MDMFOFFSETS_LENGTH = struct.calcsize(MDMFOFFSETS)
560
561 PRIVATE_KEY_SIZE = 1220
562 SIGNATURE_SIZE = 260
563 VERIFICATION_KEY_SIZE = 292
564 # We know we won't have more than 256 shares, and we know that we won't need
565 # to store more than ln2(256) hash-chain nodes to validate, so that's our
566 # bound. Each node requires 2 bytes of node-number plus 32 bytes of hash.
567 SHARE_HASH_CHAIN_SIZE = (2+HASH_SIZE)*mathutil.log_ceil(256, 2)
568
569 class MDMFSlotWriteProxy:
570     implements(IMutableSlotWriter)
571
572     """
573     I represent a remote write slot for an MDMF mutable file.
574
575     I abstract away from my caller the details of block and salt
576     management, and the implementation of the on-disk format for MDMF
577     shares.
578     """
579     # Expected layout, MDMF:
580     # offset:     size:       name:
581     #-- signed part --
582     # 0           1           version number (01)
583     # 1           8           sequence number
584     # 9           32          share tree root hash
585     # 41          1           The "k" encoding parameter
586     # 42          1           The "N" encoding parameter
587     # 43          8           The segment size of the uploaded file
588     # 51          8           The data length of the original plaintext
589     #-- end signed part --
590     # 59          8           The offset of the encrypted private key
591     # 67          8           The offset of the share hash chain
592     # 75          8           The offset of the signature
593     # 83          8           The offset of the verification key
594     # 91          8           The offset of the end of the v. key.
595     # 99          8           The offset of the share data
596     # 107         8           The offset of the block hash tree
597     # 115         8           The offset of EOF
598     # 123         var         encrypted private key
599     # var         var         share hash chain
600     # var         var         signature
601     # var         var         verification key
602     # var         large       share data
603     # var         var         block hash tree
604     #
605     # We order the fields that way to make smart downloaders -- downloaders
606     # which prempetively read a big part of the share -- possible.
607     #
608     # The checkstring is the first three fields -- the version number,
609     # sequence number, root hash and root salt hash. This is consistent
610     # in meaning to what we have with SDMF files, except now instead of
611     # using the literal salt, we use a value derived from all of the
612     # salts -- the share hash root.
613     #
614     # The salt is stored before the block for each segment. The block
615     # hash tree is computed over the combination of block and salt for
616     # each segment. In this way, we get integrity checking for both
617     # block and salt with the current block hash tree arrangement.
618     #
619     # The ordering of the offsets is different to reflect the dependencies
620     # that we'll run into with an MDMF file. The expected write flow is
621     # something like this:
622     #
623     #   0: Initialize with the sequence number, encoding parameters and
624     #      data length. From this, we can deduce the number of segments,
625     #      and where they should go.. We can also figure out where the
626     #      encrypted private key should go, because we can figure out how
627     #      big the share data will be.
628     #
629     #   1: Encrypt, encode, and upload the file in chunks. Do something
630     #      like
631     #
632     #       put_block(data, segnum, salt)
633     #
634     #      to write a block and a salt to the disk. We can do both of
635     #      these operations now because we have enough of the offsets to
636     #      know where to put them.
637     #
638     #   2: Put the encrypted private key. Use:
639     #
640     #        put_encprivkey(encprivkey)
641     #
642     #      Now that we know the length of the private key, we can fill
643     #      in the offset for the block hash tree.
644     #
645     #   3: We're now in a position to upload the block hash tree for
646     #      a share. Put that using something like:
647     #
648     #        put_blockhashes(block_hash_tree)
649     #
650     #      Note that block_hash_tree is a list of hashes -- we'll take
651     #      care of the details of serializing that appropriately. When
652     #      we get the block hash tree, we are also in a position to
653     #      calculate the offset for the share hash chain, and fill that
654     #      into the offsets table.
655     #
656     #   4: We're now in a position to upload the share hash chain for
657     #      a share. Do that with something like:
658     #
659     #        put_sharehashes(share_hash_chain)
660     #
661     #      share_hash_chain should be a dictionary mapping shnums to
662     #      32-byte hashes -- the wrapper handles serialization.
663     #      We'll know where to put the signature at this point, also.
664     #      The root of this tree will be put explicitly in the next
665     #      step.
666     #
667     #   5: Before putting the signature, we must first put the
668     #      root_hash. Do this with:
669     #
670     #        put_root_hash(root_hash).
671     #
672     #      In terms of knowing where to put this value, it was always
673     #      possible to place it, but it makes sense semantically to
674     #      place it after the share hash tree, so that's why you do it
675     #      in this order.
676     #
677     #   6: With the root hash put, we can now sign the header. Use:
678     #
679     #        get_signable()
680     #
681     #      to get the part of the header that you want to sign, and use:
682     #
683     #        put_signature(signature)
684     #
685     #      to write your signature to the remote server.
686     #
687     #   6: Add the verification key, and finish. Do:
688     #
689     #        put_verification_key(key)
690     #
691     #      and
692     #
693     #        finish_publish()
694     #
695     # Checkstring management:
696     #
697     # To write to a mutable slot, we have to provide test vectors to ensure
698     # that we are writing to the same data that we think we are. These
699     # vectors allow us to detect uncoordinated writes; that is, writes
700     # where both we and some other shareholder are writing to the
701     # mutable slot, and to report those back to the parts of the program
702     # doing the writing.
703     #
704     # With SDMF, this was easy -- all of the share data was written in
705     # one go, so it was easy to detect uncoordinated writes, and we only
706     # had to do it once. With MDMF, not all of the file is written at
707     # once.
708     #
709     # If a share is new, we write out as much of the header as we can
710     # before writing out anything else. This gives other writers a
711     # canary that they can use to detect uncoordinated writes, and, if
712     # they do the same thing, gives us the same canary. We them update
713     # the share. We won't be able to write out two fields of the header
714     # -- the share tree hash and the salt hash -- until we finish
715     # writing out the share. We only require the writer to provide the
716     # initial checkstring, and keep track of what it should be after
717     # updates ourselves.
718     #
719     # If we haven't written anything yet, then on the first write (which
720     # will probably be a block + salt of a share), we'll also write out
721     # the header. On subsequent passes, we'll expect to see the header.
722     # This changes in two places:
723     #
724     #   - When we write out the salt hash
725     #   - When we write out the root of the share hash tree
726     #
727     # since these values will change the header. It is possible that we
728     # can just make those be written in one operation to minimize
729     # disruption.
730     def __init__(self,
731                  shnum,
732                  rref, # a remote reference to a storage server
733                  storage_index,
734                  secrets, # (write_enabler, renew_secret, cancel_secret)
735                  seqnum, # the sequence number of the mutable file
736                  required_shares,
737                  total_shares,
738                  segment_size,
739                  data_length): # the length of the original file
740         self.shnum = shnum
741         self._rref = rref
742         self._storage_index = storage_index
743         self._seqnum = seqnum
744         self._required_shares = required_shares
745         assert self.shnum >= 0 and self.shnum < total_shares
746         self._total_shares = total_shares
747         # We build up the offset table as we write things. It is the
748         # last thing we write to the remote server.
749         self._offsets = {}
750         self._testvs = []
751         # This is a list of write vectors that will be sent to our
752         # remote server once we are directed to write things there.
753         self._writevs = []
754         self._secrets = secrets
755         # The segment size needs to be a multiple of the k parameter --
756         # any padding should have been carried out by the publisher
757         # already.
758         assert segment_size % required_shares == 0
759         self._segment_size = segment_size
760         self._data_length = data_length
761
762         # These are set later -- we define them here so that we can
763         # check for their existence easily
764
765         # This is the root of the share hash tree -- the Merkle tree
766         # over the roots of the block hash trees computed for shares in
767         # this upload.
768         self._root_hash = None
769
770         # We haven't yet written anything to the remote bucket. By
771         # setting this, we tell the _write method as much. The write
772         # method will then know that it also needs to add a write vector
773         # for the checkstring (or what we have of it) to the first write
774         # request. We'll then record that value for future use.  If
775         # we're expecting something to be there already, we need to call
776         # set_checkstring before we write anything to tell the first
777         # write about that.
778         self._written = False
779
780         # When writing data to the storage servers, we get a read vector
781         # for free. We'll read the checkstring, which will help us
782         # figure out what's gone wrong if a write fails.
783         self._readv = [(0, struct.calcsize(MDMFCHECKSTRING))]
784
785         # We calculate the number of segments because it tells us
786         # where the salt part of the file ends/share segment begins,
787         # and also because it provides a useful amount of bounds checking.
788         self._num_segments = mathutil.div_ceil(self._data_length,
789                                                self._segment_size)
790         self._block_size = self._segment_size / self._required_shares
791         # We also calculate the share size, to help us with block
792         # constraints later.
793         tail_size = self._data_length % self._segment_size
794         if not tail_size:
795             self._tail_block_size = self._block_size
796         else:
797             self._tail_block_size = mathutil.next_multiple(tail_size,
798                                                            self._required_shares)
799             self._tail_block_size /= self._required_shares
800
801         # We already know where the sharedata starts; right after the end
802         # of the header (which is defined as the signable part + the offsets)
803         # We can also calculate where the encrypted private key begins
804         # from what we know know.
805         self._actual_block_size = self._block_size + SALT_SIZE
806         data_size = self._actual_block_size * (self._num_segments - 1)
807         data_size += self._tail_block_size
808         data_size += SALT_SIZE
809         self._offsets['enc_privkey'] = MDMFHEADERSIZE
810
811         # We don't define offsets for these because we want them to be
812         # tightly packed -- this allows us to ignore the responsibility
813         # of padding individual values, and of removing that padding
814         # later. So nonconstant_start is where we start writing
815         # nonconstant data.
816         nonconstant_start = self._offsets['enc_privkey']
817         nonconstant_start += PRIVATE_KEY_SIZE
818         nonconstant_start += SIGNATURE_SIZE
819         nonconstant_start += VERIFICATION_KEY_SIZE
820         nonconstant_start += SHARE_HASH_CHAIN_SIZE
821
822         self._offsets['share_data'] = nonconstant_start
823
824         # Finally, we know how big the share data will be, so we can
825         # figure out where the block hash tree needs to go.
826         # XXX: But this will go away if Zooko wants to make it so that
827         # you don't need to know the size of the file before you start
828         # uploading it.
829         self._offsets['block_hash_tree'] = self._offsets['share_data'] + \
830                     data_size
831
832         # Done. We can snow start writing.
833
834
835     def set_checkstring(self,
836                         seqnum_or_checkstring,
837                         root_hash=None,
838                         salt=None):
839         """
840         Set checkstring checkstring for the given shnum.
841
842         This can be invoked in one of two ways.
843
844         With one argument, I assume that you are giving me a literal
845         checkstring -- e.g., the output of get_checkstring. I will then
846         set that checkstring as it is. This form is used by unit tests.
847
848         With two arguments, I assume that you are giving me a sequence
849         number and root hash to make a checkstring from. In that case, I
850         will build a checkstring and set it for you. This form is used
851         by the publisher.
852
853         By default, I assume that I am writing new shares to the grid.
854         If you don't explcitly set your own checkstring, I will use
855         one that requires that the remote share not exist. You will want
856         to use this method if you are updating a share in-place;
857         otherwise, writes will fail.
858         """
859         # You're allowed to overwrite checkstrings with this method;
860         # I assume that users know what they are doing when they call
861         # it.
862         if root_hash:
863             checkstring = struct.pack(MDMFCHECKSTRING,
864                                       1,
865                                       seqnum_or_checkstring,
866                                       root_hash)
867         else:
868             checkstring = seqnum_or_checkstring
869
870         if checkstring == "":
871             # We special-case this, since len("") = 0, but we need
872             # length of 1 for the case of an empty share to work on the
873             # storage server, which is what a checkstring that is the
874             # empty string means.
875             self._testvs = []
876         else:
877             self._testvs = []
878             self._testvs.append((0, len(checkstring), "eq", checkstring))
879
880
881     def __repr__(self):
882         return "MDMFSlotWriteProxy for share %d" % self.shnum
883
884
885     def get_checkstring(self):
886         """
887         Given a share number, I return a representation of what the
888         checkstring for that share on the server will look like.
889
890         I am mostly used for tests.
891         """
892         if self._root_hash:
893             roothash = self._root_hash
894         else:
895             roothash = "\x00" * 32
896         return struct.pack(MDMFCHECKSTRING,
897                            1,
898                            self._seqnum,
899                            roothash)
900
901
902     def put_block(self, data, segnum, salt):
903         """
904         I queue a write vector for the data, salt, and segment number
905         provided to me. I return None, as I do not actually cause
906         anything to be written yet.
907         """
908         if segnum >= self._num_segments:
909             raise LayoutInvalid("I won't overwrite the block hash tree")
910         if len(salt) != SALT_SIZE:
911             raise LayoutInvalid("I was given a salt of size %d, but "
912                                 "I wanted a salt of size %d")
913         if segnum + 1 == self._num_segments:
914             if len(data) != self._tail_block_size:
915                 raise LayoutInvalid("I was given the wrong size block to write")
916         elif len(data) != self._block_size:
917             raise LayoutInvalid("I was given the wrong size block to write")
918
919         # We want to write at len(MDMFHEADER) + segnum * block_size.
920         offset = self._offsets['share_data'] + \
921             (self._actual_block_size * segnum)
922         data = salt + data
923
924         self._writevs.append(tuple([offset, data]))
925
926
927     def put_encprivkey(self, encprivkey):
928         """
929         I queue a write vector for the encrypted private key provided to
930         me.
931         """
932         assert self._offsets
933         assert self._offsets['enc_privkey']
934         # You shouldn't re-write the encprivkey after the block hash
935         # tree is written, since that could cause the private key to run
936         # into the block hash tree. Before it writes the block hash
937         # tree, the block hash tree writing method writes the offset of
938         # the share hash chain. So that's a good indicator of whether or
939         # not the block hash tree has been written.
940         if "signature" in self._offsets:
941             raise LayoutInvalid("You can't put the encrypted private key "
942                                 "after putting the share hash chain")
943
944         self._offsets['share_hash_chain'] = self._offsets['enc_privkey'] + \
945                 len(encprivkey)
946
947         self._writevs.append(tuple([self._offsets['enc_privkey'], encprivkey]))
948
949
950     def put_blockhashes(self, blockhashes):
951         """
952         I queue a write vector to put the block hash tree in blockhashes
953         onto the remote server.
954
955         The encrypted private key must be queued before the block hash
956         tree, since we need to know how large it is to know where the
957         block hash tree should go. The block hash tree must be put
958         before the share hash chain, since its size determines the
959         offset of the share hash chain.
960         """
961         assert self._offsets
962         assert "block_hash_tree" in self._offsets
963
964         assert isinstance(blockhashes, list)
965
966         blockhashes_s = "".join(blockhashes)
967         self._offsets['EOF'] = self._offsets['block_hash_tree'] + len(blockhashes_s)
968
969         self._writevs.append(tuple([self._offsets['block_hash_tree'],
970                                   blockhashes_s]))
971
972
973     def put_sharehashes(self, sharehashes):
974         """
975         I queue a write vector to put the share hash chain in my
976         argument onto the remote server.
977
978         The block hash tree must be queued before the share hash chain,
979         since we need to know where the block hash tree ends before we
980         can know where the share hash chain starts. The share hash chain
981         must be put before the signature, since the length of the packed
982         share hash chain determines the offset of the signature. Also,
983         semantically, you must know what the root of the block hash tree
984         is before you can generate a valid signature.
985         """
986         assert isinstance(sharehashes, dict)
987         assert self._offsets
988         if "share_hash_chain" not in self._offsets:
989             raise LayoutInvalid("You must put the block hash tree before "
990                                 "putting the share hash chain")
991
992         # The signature comes after the share hash chain. If the
993         # signature has already been written, we must not write another
994         # share hash chain. The signature writes the verification key
995         # offset when it gets sent to the remote server, so we look for
996         # that.
997         if "verification_key" in self._offsets:
998             raise LayoutInvalid("You must write the share hash chain "
999                                 "before you write the signature")
1000         sharehashes_s = "".join([struct.pack(">H32s", i, sharehashes[i])
1001                                   for i in sorted(sharehashes.keys())])
1002         self._offsets['signature'] = self._offsets['share_hash_chain'] + \
1003             len(sharehashes_s)
1004         self._writevs.append(tuple([self._offsets['share_hash_chain'],
1005                             sharehashes_s]))
1006
1007
1008     def put_root_hash(self, roothash):
1009         """
1010         Put the root hash (the root of the share hash tree) in the
1011         remote slot.
1012         """
1013         # It does not make sense to be able to put the root
1014         # hash without first putting the share hashes, since you need
1015         # the share hashes to generate the root hash.
1016         #
1017         # Signature is defined by the routine that places the share hash
1018         # chain, so it's a good thing to look for in finding out whether
1019         # or not the share hash chain exists on the remote server.
1020         if len(roothash) != HASH_SIZE:
1021             raise LayoutInvalid("hashes and salts must be exactly %d bytes"
1022                                  % HASH_SIZE)
1023         self._root_hash = roothash
1024         # To write both of these values, we update the checkstring on
1025         # the remote server, which includes them
1026         checkstring = self.get_checkstring()
1027         self._writevs.append(tuple([0, checkstring]))
1028         # This write, if successful, changes the checkstring, so we need
1029         # to update our internal checkstring to be consistent with the
1030         # one on the server.
1031
1032
1033     def get_signable(self):
1034         """
1035         Get the first seven fields of the mutable file; the parts that
1036         are signed.
1037         """
1038         if not self._root_hash:
1039             raise LayoutInvalid("You need to set the root hash "
1040                                 "before getting something to "
1041                                 "sign")
1042         return struct.pack(MDMFSIGNABLEHEADER,
1043                            1,
1044                            self._seqnum,
1045                            self._root_hash,
1046                            self._required_shares,
1047                            self._total_shares,
1048                            self._segment_size,
1049                            self._data_length)
1050
1051
1052     def put_signature(self, signature):
1053         """
1054         I queue a write vector for the signature of the MDMF share.
1055
1056         I require that the root hash and share hash chain have been put
1057         to the grid before I will write the signature to the grid.
1058         """
1059         if "signature" not in self._offsets:
1060             raise LayoutInvalid("You must put the share hash chain "
1061         # It does not make sense to put a signature without first
1062         # putting the root hash and the salt hash (since otherwise
1063         # the signature would be incomplete), so we don't allow that.
1064                        "before putting the signature")
1065         if not self._root_hash:
1066             raise LayoutInvalid("You must complete the signed prefix "
1067                                 "before computing a signature")
1068         # If we put the signature after we put the verification key, we
1069         # could end up running into the verification key, and will
1070         # probably screw up the offsets as well. So we don't allow that.
1071         if "verification_key_end" in self._offsets:
1072             raise LayoutInvalid("You can't put the signature after the "
1073                                 "verification key")
1074         # The method that writes the verification key defines the EOF
1075         # offset before writing the verification key, so look for that.
1076         self._offsets['verification_key'] = self._offsets['signature'] +\
1077             len(signature)
1078         self._writevs.append(tuple([self._offsets['signature'], signature]))
1079
1080
1081     def put_verification_key(self, verification_key):
1082         """
1083         I queue a write vector for the verification key.
1084
1085         I require that the signature have been written to the storage
1086         server before I allow the verification key to be written to the
1087         remote server.
1088         """
1089         if "verification_key" not in self._offsets:
1090             raise LayoutInvalid("You must put the signature before you "
1091                                 "can put the verification key")
1092
1093         self._offsets['verification_key_end'] = \
1094             self._offsets['verification_key'] + len(verification_key)
1095         assert self._offsets['verification_key_end'] <= self._offsets['share_data']
1096         self._writevs.append(tuple([self._offsets['verification_key'],
1097                             verification_key]))
1098
1099
1100     def _get_offsets_tuple(self):
1101         return tuple([(key, value) for key, value in self._offsets.items()])
1102
1103
1104     def get_verinfo(self):
1105         return (self._seqnum,
1106                 self._root_hash,
1107                 None,
1108                 self._segment_size,
1109                 self._data_length,
1110                 self._required_shares,
1111                 self._total_shares,
1112                 self.get_signable(),
1113                 self._get_offsets_tuple())
1114
1115
1116     def finish_publishing(self):
1117         """
1118         I add a write vector for the offsets table, and then cause all
1119         of the write vectors that I've dealt with so far to be published
1120         to the remote server, ending the write process.
1121         """
1122         if "verification_key_end" not in self._offsets:
1123             raise LayoutInvalid("You must put the verification key before "
1124                                 "you can publish the offsets")
1125         offsets_offset = struct.calcsize(MDMFHEADERWITHOUTOFFSETS)
1126         offsets = struct.pack(MDMFOFFSETS,
1127                               self._offsets['enc_privkey'],
1128                               self._offsets['share_hash_chain'],
1129                               self._offsets['signature'],
1130                               self._offsets['verification_key'],
1131                               self._offsets['verification_key_end'],
1132                               self._offsets['share_data'],
1133                               self._offsets['block_hash_tree'],
1134                               self._offsets['EOF'])
1135         self._writevs.append(tuple([offsets_offset, offsets]))
1136         encoding_parameters_offset = struct.calcsize(MDMFCHECKSTRING)
1137         params = struct.pack(">BBQQ",
1138                              self._required_shares,
1139                              self._total_shares,
1140                              self._segment_size,
1141                              self._data_length)
1142         self._writevs.append(tuple([encoding_parameters_offset, params]))
1143         return self._write(self._writevs)
1144
1145
1146     def _write(self, datavs, on_failure=None, on_success=None):
1147         """I write the data vectors in datavs to the remote slot."""
1148         tw_vectors = {}
1149         if not self._testvs:
1150             self._testvs = []
1151             self._testvs.append(tuple([0, 1, "eq", ""]))
1152         if not self._written:
1153             # Write a new checkstring to the share when we write it, so
1154             # that we have something to check later.
1155             new_checkstring = self.get_checkstring()
1156             datavs.append((0, new_checkstring))
1157             def _first_write():
1158                 self._written = True
1159                 self._testvs = [(0, len(new_checkstring), "eq", new_checkstring)]
1160             on_success = _first_write
1161         tw_vectors[self.shnum] = (self._testvs, datavs, None)
1162         d = self._rref.callRemote("slot_testv_and_readv_and_writev",
1163                                   self._storage_index,
1164                                   self._secrets,
1165                                   tw_vectors,
1166                                   self._readv)
1167         def _result(results):
1168             if isinstance(results, failure.Failure) or not results[0]:
1169                 # Do nothing; the write was unsuccessful.
1170                 if on_failure: on_failure()
1171             else:
1172                 if on_success: on_success()
1173             return results
1174         d.addBoth(_result)
1175         return d
1176
1177 def _handle_bad_struct(f):
1178     # struct.unpack errors mean the server didn't give us enough data, so
1179     # this share is bad
1180     f.trap(struct.error)
1181     raise BadShareError(f.value.args[0])
1182
1183 class MDMFSlotReadProxy:
1184     """
1185     I read from a mutable slot filled with data written in the MDMF data
1186     format (which is described above).
1187
1188     I can be initialized with some amount of data, which I will use (if
1189     it is valid) to eliminate some of the need to fetch it from servers.
1190     """
1191     def __init__(self,
1192                  rref,
1193                  storage_index,
1194                  shnum,
1195                  data="",
1196                  data_is_everything=False):
1197         # Start the initialization process.
1198         self._rref = rref
1199         self._storage_index = storage_index
1200         self.shnum = shnum
1201
1202         # Before doing anything, the reader is probably going to want to
1203         # verify that the signature is correct. To do that, they'll need
1204         # the verification key, and the signature. To get those, we'll
1205         # need the offset table. So fetch the offset table on the
1206         # assumption that that will be the first thing that a reader is
1207         # going to do.
1208
1209         # The fact that these encoding parameters are None tells us
1210         # that we haven't yet fetched them from the remote share, so we
1211         # should. We could just not set them, but the checks will be
1212         # easier to read if we don't have to use hasattr.
1213         self._version_number = None
1214         self._sequence_number = None
1215         self._root_hash = None
1216         # Filled in if we're dealing with an SDMF file. Unused
1217         # otherwise.
1218         self._salt = None
1219         self._required_shares = None
1220         self._total_shares = None
1221         self._segment_size = None
1222         self._data_length = None
1223         self._offsets = None
1224
1225         # If the user has chosen to initialize us with some data, we'll
1226         # try to satisfy subsequent data requests with that data before
1227         # asking the storage server for it.
1228         self._data = data
1229
1230         # If the provided data is known to be complete, then we know there's
1231         # nothing to be gained by querying the server, so we should just
1232         # partially satisfy requests with what we have.
1233         self._data_is_everything = data_is_everything
1234
1235         # The way callers interact with cache in the filenode returns
1236         # None if there isn't any cached data, but the way we index the
1237         # cached data requires a string, so convert None to "".
1238         if self._data == None:
1239             self._data = ""
1240
1241
1242     def _maybe_fetch_offsets_and_header(self, force_remote=False):
1243         """
1244         I fetch the offset table and the header from the remote slot if
1245         I don't already have them. If I do have them, I do nothing and
1246         return an empty Deferred.
1247         """
1248         if self._offsets:
1249             return defer.succeed(None)
1250         # At this point, we may be either SDMF or MDMF. Fetching 107
1251         # bytes will be enough to get header and offsets for both SDMF and
1252         # MDMF, though we'll be left with 4 more bytes than we
1253         # need if this ends up being MDMF. This is probably less
1254         # expensive than the cost of a second roundtrip.
1255         readvs = [(0, 123)]
1256         d = self._read(readvs, force_remote)
1257         d.addCallback(self._process_encoding_parameters)
1258         d.addCallback(self._process_offsets)
1259         d.addErrback(_handle_bad_struct)
1260         return d
1261
1262
1263     def _process_encoding_parameters(self, encoding_parameters):
1264         if self.shnum not in encoding_parameters:
1265             raise BadShareError("no data for shnum %d" % self.shnum)
1266         encoding_parameters = encoding_parameters[self.shnum][0]
1267         # The first byte is the version number. It will tell us what
1268         # to do next.
1269         (verno,) = struct.unpack(">B", encoding_parameters[:1])
1270         if verno == MDMF_VERSION:
1271             read_size = MDMFHEADERWITHOUTOFFSETSSIZE
1272             (verno,
1273              seqnum,
1274              root_hash,
1275              k,
1276              n,
1277              segsize,
1278              datalen) = struct.unpack(MDMFHEADERWITHOUTOFFSETS,
1279                                       encoding_parameters[:read_size])
1280             if segsize == 0 and datalen == 0:
1281                 # Empty file, no segments.
1282                 self._num_segments = 0
1283             else:
1284                 self._num_segments = mathutil.div_ceil(datalen, segsize)
1285
1286         elif verno == SDMF_VERSION:
1287             read_size = SIGNED_PREFIX_LENGTH
1288             (verno,
1289              seqnum,
1290              root_hash,
1291              salt,
1292              k,
1293              n,
1294              segsize,
1295              datalen) = struct.unpack(">BQ32s16s BBQQ",
1296                                 encoding_parameters[:SIGNED_PREFIX_LENGTH])
1297             self._salt = salt
1298             if segsize == 0 and datalen == 0:
1299                 # empty file
1300                 self._num_segments = 0
1301             else:
1302                 # non-empty SDMF files have one segment.
1303                 self._num_segments = 1
1304         else:
1305             raise UnknownVersionError("You asked me to read mutable file "
1306                                       "version %d, but I only understand "
1307                                       "%d and %d" % (verno, SDMF_VERSION,
1308                                                      MDMF_VERSION))
1309
1310         self._version_number = verno
1311         self._sequence_number = seqnum
1312         self._root_hash = root_hash
1313         self._required_shares = k
1314         self._total_shares = n
1315         self._segment_size = segsize
1316         self._data_length = datalen
1317
1318         self._block_size = self._segment_size / self._required_shares
1319         # We can upload empty files, and need to account for this fact
1320         # so as to avoid zero-division and zero-modulo errors.
1321         if datalen > 0:
1322             tail_size = self._data_length % self._segment_size
1323         else:
1324             tail_size = 0
1325         if not tail_size:
1326             self._tail_block_size = self._block_size
1327         else:
1328             self._tail_block_size = mathutil.next_multiple(tail_size,
1329                                                     self._required_shares)
1330             self._tail_block_size /= self._required_shares
1331
1332         return encoding_parameters
1333
1334
1335     def _process_offsets(self, offsets):
1336         if self._version_number == 0:
1337             read_size = OFFSETS_LENGTH
1338             read_offset = SIGNED_PREFIX_LENGTH
1339             end = read_size + read_offset
1340             (signature,
1341              share_hash_chain,
1342              block_hash_tree,
1343              share_data,
1344              enc_privkey,
1345              EOF) = struct.unpack(">LLLLQQ",
1346                                   offsets[read_offset:end])
1347             self._offsets = {}
1348             self._offsets['signature'] = signature
1349             self._offsets['share_data'] = share_data
1350             self._offsets['block_hash_tree'] = block_hash_tree
1351             self._offsets['share_hash_chain'] = share_hash_chain
1352             self._offsets['enc_privkey'] = enc_privkey
1353             self._offsets['EOF'] = EOF
1354
1355         elif self._version_number == 1:
1356             read_offset = MDMFHEADERWITHOUTOFFSETSSIZE
1357             read_length = MDMFOFFSETS_LENGTH
1358             end = read_offset + read_length
1359             (encprivkey,
1360              sharehashes,
1361              signature,
1362              verification_key,
1363              verification_key_end,
1364              sharedata,
1365              blockhashes,
1366              eof) = struct.unpack(MDMFOFFSETS,
1367                                   offsets[read_offset:end])
1368             self._offsets = {}
1369             self._offsets['enc_privkey'] = encprivkey
1370             self._offsets['block_hash_tree'] = blockhashes
1371             self._offsets['share_hash_chain'] = sharehashes
1372             self._offsets['signature'] = signature
1373             self._offsets['verification_key'] = verification_key
1374             self._offsets['verification_key_end']= \
1375                 verification_key_end
1376             self._offsets['EOF'] = eof
1377             self._offsets['share_data'] = sharedata
1378
1379
1380     def get_block_and_salt(self, segnum):
1381         """
1382         I return (block, salt), where block is the block data and
1383         salt is the salt used to encrypt that segment.
1384         """
1385         d = self._maybe_fetch_offsets_and_header()
1386         def _then(ignored):
1387             base_share_offset = self._offsets['share_data']
1388
1389             if segnum + 1 > self._num_segments:
1390                 raise LayoutInvalid("Not a valid segment number")
1391
1392             if self._version_number == 0:
1393                 share_offset = base_share_offset + self._block_size * segnum
1394             else:
1395                 share_offset = base_share_offset + (self._block_size + \
1396                                                     SALT_SIZE) * segnum
1397             if segnum + 1 == self._num_segments:
1398                 data = self._tail_block_size
1399             else:
1400                 data = self._block_size
1401
1402             if self._version_number == 1:
1403                 data += SALT_SIZE
1404
1405             readvs = [(share_offset, data)]
1406             return readvs
1407         d.addCallback(_then)
1408         d.addCallback(lambda readvs: self._read(readvs))
1409         def _process_results(results):
1410             if self.shnum not in results:
1411                 raise BadShareError("no data for shnum %d" % self.shnum)
1412             if self._version_number == 0:
1413                 # We only read the share data, but we know the salt from
1414                 # when we fetched the header
1415                 data = results[self.shnum]
1416                 if not data:
1417                     data = ""
1418                 else:
1419                     if len(data) != 1:
1420                         raise BadShareError("got %d vectors, not 1" % len(data))
1421                     data = data[0]
1422                 salt = self._salt
1423             else:
1424                 data = results[self.shnum]
1425                 if not data:
1426                     salt = data = ""
1427                 else:
1428                     salt_and_data = results[self.shnum][0]
1429                     salt = salt_and_data[:SALT_SIZE]
1430                     data = salt_and_data[SALT_SIZE:]
1431             return data, salt
1432         d.addCallback(_process_results)
1433         return d
1434
1435
1436     def get_blockhashes(self, needed=None, force_remote=False):
1437         """
1438         I return the block hash tree
1439
1440         I take an optional argument, needed, which is a set of indices
1441         correspond to hashes that I should fetch. If this argument is
1442         missing, I will fetch the entire block hash tree; otherwise, I
1443         may attempt to fetch fewer hashes, based on what needed says
1444         that I should do. Note that I may fetch as many hashes as I
1445         want, so long as the set of hashes that I do fetch is a superset
1446         of the ones that I am asked for, so callers should be prepared
1447         to tolerate additional hashes.
1448         """
1449         # TODO: Return only the parts of the block hash tree necessary
1450         # to validate the blocknum provided?
1451         # This is a good idea, but it is hard to implement correctly. It
1452         # is bad to fetch any one block hash more than once, so we
1453         # probably just want to fetch the whole thing at once and then
1454         # serve it.
1455         if needed == set([]):
1456             return defer.succeed([])
1457         d = self._maybe_fetch_offsets_and_header()
1458         def _then(ignored):
1459             blockhashes_offset = self._offsets['block_hash_tree']
1460             if self._version_number == 1:
1461                 blockhashes_length = self._offsets['EOF'] - blockhashes_offset
1462             else:
1463                 blockhashes_length = self._offsets['share_data'] - blockhashes_offset
1464             readvs = [(blockhashes_offset, blockhashes_length)]
1465             return readvs
1466         d.addCallback(_then)
1467         d.addCallback(lambda readvs:
1468             self._read(readvs, force_remote=force_remote))
1469         def _build_block_hash_tree(results):
1470             if self.shnum not in results:
1471                 raise BadShareError("no data for shnum %d" % self.shnum)
1472
1473             rawhashes = results[self.shnum][0]
1474             results = [rawhashes[i:i+HASH_SIZE]
1475                        for i in range(0, len(rawhashes), HASH_SIZE)]
1476             return results
1477         d.addCallback(_build_block_hash_tree)
1478         return d
1479
1480
1481     def get_sharehashes(self, needed=None, force_remote=False):
1482         """
1483         I return the part of the share hash chain placed to validate
1484         this share.
1485
1486         I take an optional argument, needed. Needed is a set of indices
1487         that correspond to the hashes that I should fetch. If needed is
1488         not present, I will fetch and return the entire share hash
1489         chain. Otherwise, I may fetch and return any part of the share
1490         hash chain that is a superset of the part that I am asked to
1491         fetch. Callers should be prepared to deal with more hashes than
1492         they've asked for.
1493         """
1494         if needed == set([]):
1495             return defer.succeed([])
1496         d = self._maybe_fetch_offsets_and_header()
1497
1498         def _make_readvs(ignored):
1499             sharehashes_offset = self._offsets['share_hash_chain']
1500             if self._version_number == 0:
1501                 sharehashes_length = self._offsets['block_hash_tree'] - sharehashes_offset
1502             else:
1503                 sharehashes_length = self._offsets['signature'] - sharehashes_offset
1504             readvs = [(sharehashes_offset, sharehashes_length)]
1505             return readvs
1506         d.addCallback(_make_readvs)
1507         d.addCallback(lambda readvs:
1508             self._read(readvs, force_remote=force_remote))
1509         def _build_share_hash_chain(results):
1510             if self.shnum not in results:
1511                 raise BadShareError("no data for shnum %d" % self.shnum)
1512
1513             sharehashes = results[self.shnum][0]
1514             results = [sharehashes[i:i+(HASH_SIZE + 2)]
1515                        for i in range(0, len(sharehashes), HASH_SIZE + 2)]
1516             results = dict([struct.unpack(">H32s", data)
1517                             for data in results])
1518             return results
1519         d.addCallback(_build_share_hash_chain)
1520         d.addErrback(_handle_bad_struct)
1521         return d
1522
1523
1524     def get_encprivkey(self):
1525         """
1526         I return the encrypted private key.
1527         """
1528         d = self._maybe_fetch_offsets_and_header()
1529
1530         def _make_readvs(ignored):
1531             privkey_offset = self._offsets['enc_privkey']
1532             if self._version_number == 0:
1533                 privkey_length = self._offsets['EOF'] - privkey_offset
1534             else:
1535                 privkey_length = self._offsets['share_hash_chain'] - privkey_offset
1536             readvs = [(privkey_offset, privkey_length)]
1537             return readvs
1538         d.addCallback(_make_readvs)
1539         d.addCallback(lambda readvs: self._read(readvs))
1540         def _process_results(results):
1541             if self.shnum not in results:
1542                 raise BadShareError("no data for shnum %d" % self.shnum)
1543             privkey = results[self.shnum][0]
1544             return privkey
1545         d.addCallback(_process_results)
1546         return d
1547
1548
1549     def get_signature(self):
1550         """
1551         I return the signature of my share.
1552         """
1553         d = self._maybe_fetch_offsets_and_header()
1554
1555         def _make_readvs(ignored):
1556             signature_offset = self._offsets['signature']
1557             if self._version_number == 1:
1558                 signature_length = self._offsets['verification_key'] - signature_offset
1559             else:
1560                 signature_length = self._offsets['share_hash_chain'] - signature_offset
1561             readvs = [(signature_offset, signature_length)]
1562             return readvs
1563         d.addCallback(_make_readvs)
1564         d.addCallback(lambda readvs: self._read(readvs))
1565         def _process_results(results):
1566             if self.shnum not in results:
1567                 raise BadShareError("no data for shnum %d" % self.shnum)
1568             signature = results[self.shnum][0]
1569             return signature
1570         d.addCallback(_process_results)
1571         return d
1572
1573
1574     def get_verification_key(self):
1575         """
1576         I return the verification key.
1577         """
1578         d = self._maybe_fetch_offsets_and_header()
1579
1580         def _make_readvs(ignored):
1581             if self._version_number == 1:
1582                 vk_offset = self._offsets['verification_key']
1583                 vk_length = self._offsets['verification_key_end'] - vk_offset
1584             else:
1585                 vk_offset = struct.calcsize(">BQ32s16sBBQQLLLLQQ")
1586                 vk_length = self._offsets['signature'] - vk_offset
1587             readvs = [(vk_offset, vk_length)]
1588             return readvs
1589         d.addCallback(_make_readvs)
1590         d.addCallback(lambda readvs: self._read(readvs))
1591         def _process_results(results):
1592             if self.shnum not in results:
1593                 raise BadShareError("no data for shnum %d" % self.shnum)
1594             verification_key = results[self.shnum][0]
1595             return verification_key
1596         d.addCallback(_process_results)
1597         return d
1598
1599
1600     def get_encoding_parameters(self):
1601         """
1602         I return (k, n, segsize, datalen)
1603         """
1604         d = self._maybe_fetch_offsets_and_header()
1605         d.addCallback(lambda ignored:
1606             (self._required_shares,
1607              self._total_shares,
1608              self._segment_size,
1609              self._data_length))
1610         return d
1611
1612
1613     def get_seqnum(self):
1614         """
1615         I return the sequence number for this share.
1616         """
1617         d = self._maybe_fetch_offsets_and_header()
1618         d.addCallback(lambda ignored:
1619             self._sequence_number)
1620         return d
1621
1622
1623     def get_root_hash(self):
1624         """
1625         I return the root of the block hash tree
1626         """
1627         d = self._maybe_fetch_offsets_and_header()
1628         d.addCallback(lambda ignored: self._root_hash)
1629         return d
1630
1631
1632     def get_checkstring(self):
1633         """
1634         I return the packed representation of the following:
1635
1636             - version number
1637             - sequence number
1638             - root hash
1639             - salt hash
1640
1641         which my users use as a checkstring to detect other writers.
1642         """
1643         d = self._maybe_fetch_offsets_and_header()
1644         def _build_checkstring(ignored):
1645             if self._salt:
1646                 checkstring = struct.pack(PREFIX,
1647                                           self._version_number,
1648                                           self._sequence_number,
1649                                           self._root_hash,
1650                                           self._salt)
1651             else:
1652                 checkstring = struct.pack(MDMFCHECKSTRING,
1653                                           self._version_number,
1654                                           self._sequence_number,
1655                                           self._root_hash)
1656
1657             return checkstring
1658         d.addCallback(_build_checkstring)
1659         return d
1660
1661
1662     def get_prefix(self, force_remote):
1663         d = self._maybe_fetch_offsets_and_header(force_remote)
1664         d.addCallback(lambda ignored:
1665             self._build_prefix())
1666         return d
1667
1668
1669     def _build_prefix(self):
1670         # The prefix is another name for the part of the remote share
1671         # that gets signed. It consists of everything up to and
1672         # including the datalength, packed by struct.
1673         if self._version_number == SDMF_VERSION:
1674             return struct.pack(SIGNED_PREFIX,
1675                            self._version_number,
1676                            self._sequence_number,
1677                            self._root_hash,
1678                            self._salt,
1679                            self._required_shares,
1680                            self._total_shares,
1681                            self._segment_size,
1682                            self._data_length)
1683
1684         else:
1685             return struct.pack(MDMFSIGNABLEHEADER,
1686                            self._version_number,
1687                            self._sequence_number,
1688                            self._root_hash,
1689                            self._required_shares,
1690                            self._total_shares,
1691                            self._segment_size,
1692                            self._data_length)
1693
1694
1695     def _get_offsets_tuple(self):
1696         # The offsets tuple is another component of the version
1697         # information tuple. It is basically our offsets dictionary,
1698         # itemized and in a tuple.
1699         return self._offsets.copy()
1700
1701
1702     def get_verinfo(self):
1703         """
1704         I return my verinfo tuple. This is used by the ServermapUpdater
1705         to keep track of versions of mutable files.
1706
1707         The verinfo tuple for MDMF files contains:
1708             - seqnum
1709             - root hash
1710             - a blank (nothing)
1711             - segsize
1712             - datalen
1713             - k
1714             - n
1715             - prefix (the thing that you sign)
1716             - a tuple of offsets
1717
1718         We include the nonce in MDMF to simplify processing of version
1719         information tuples.
1720
1721         The verinfo tuple for SDMF files is the same, but contains a
1722         16-byte IV instead of a hash of salts.
1723         """
1724         d = self._maybe_fetch_offsets_and_header()
1725         def _build_verinfo(ignored):
1726             if self._version_number == SDMF_VERSION:
1727                 salt_to_use = self._salt
1728             else:
1729                 salt_to_use = None
1730             return (self._sequence_number,
1731                     self._root_hash,
1732                     salt_to_use,
1733                     self._segment_size,
1734                     self._data_length,
1735                     self._required_shares,
1736                     self._total_shares,
1737                     self._build_prefix(),
1738                     self._get_offsets_tuple())
1739         d.addCallback(_build_verinfo)
1740         return d
1741
1742
1743     def _read(self, readvs, force_remote=False):
1744         unsatisfiable = filter(lambda x: x[0] + x[1] > len(self._data), readvs)
1745         # TODO: It's entirely possible to tweak this so that it just
1746         # fulfills the requests that it can, and not demand that all
1747         # requests are satisfiable before running it.
1748
1749         if not unsatisfiable or self._data_is_everything:
1750             results = [self._data[offset:offset+length]
1751                        for (offset, length) in readvs]
1752             results = {self.shnum: results}
1753             return defer.succeed(results)
1754         else:
1755             return self._rref.callRemote("slot_readv",
1756                                          self._storage_index,
1757                                          [self.shnum],
1758                                          readvs)
1759
1760
1761     def is_sdmf(self):
1762         """I tell my caller whether or not my remote file is SDMF or MDMF
1763         """
1764         d = self._maybe_fetch_offsets_and_header()
1765         d.addCallback(lambda ignored:
1766             self._version_number == 0)
1767         return d
1768
1769
1770 class LayoutInvalid(BadShareError):
1771     """
1772     This isn't a valid MDMF mutable file
1773     """