]> git.rkrishnan.org Git - tahoe-lafs/tahoe-lafs.git/blob - src/allmydata/mutable/layout.py
layout.py: fix MDMF share layout documentation
[tahoe-lafs/tahoe-lafs.git] / src / allmydata / mutable / layout.py
1
2 import struct
3 from allmydata.mutable.common import NeedMoreDataError, UnknownVersionError
4 from allmydata.interfaces import HASH_SIZE, SALT_SIZE, SDMF_VERSION, \
5                                  MDMF_VERSION, IMutableSlotWriter
6 from allmydata.util import mathutil, observer
7 from twisted.python import failure
8 from twisted.internet import defer
9 from zope.interface import implements
10
11
12 # These strings describe the format of the packed structs they help process
13 # Here's what they mean:
14 #
15 #  PREFIX:
16 #    >: Big-endian byte order; the most significant byte is first (leftmost).
17 #    B: The version information; an 8 bit version identifier. Stored as
18 #       an unsigned char. This is currently 00 00 00 00; our modifications
19 #       will turn it into 00 00 00 01.
20 #    Q: The sequence number; this is sort of like a revision history for
21 #       mutable files; they start at 1 and increase as they are changed after
22 #       being uploaded. Stored as an unsigned long long, which is 8 bytes in
23 #       length.
24 #  32s: The root hash of the share hash tree. We use sha-256d, so we use 32 
25 #       characters = 32 bytes to store the value.
26 #  16s: The salt for the readkey. This is a 16-byte random value, stored as
27 #       16 characters.
28 #
29 #  SIGNED_PREFIX additions, things that are covered by the signature:
30 #    B: The "k" encoding parameter. We store this as an 8-bit character, 
31 #       which is convenient because our erasure coding scheme cannot 
32 #       encode if you ask for more than 255 pieces.
33 #    B: The "N" encoding parameter. Stored as an 8-bit character for the 
34 #       same reasons as above.
35 #    Q: The segment size of the uploaded file. This will essentially be the
36 #       length of the file in SDMF. An unsigned long long, so we can store 
37 #       files of quite large size.
38 #    Q: The data length of the uploaded file. Modulo padding, this will be
39 #       the same of the data length field. Like the data length field, it is
40 #       an unsigned long long and can be quite large.
41 #
42 #   HEADER additions:
43 #     L: The offset of the signature of this. An unsigned long.
44 #     L: The offset of the share hash chain. An unsigned long.
45 #     L: The offset of the block hash tree. An unsigned long.
46 #     L: The offset of the share data. An unsigned long.
47 #     Q: The offset of the encrypted private key. An unsigned long long, to
48 #        account for the possibility of a lot of share data.
49 #     Q: The offset of the EOF. An unsigned long long, to account for the
50 #        possibility of a lot of share data.
51
52 #  After all of these, we have the following:
53 #    - The verification key: Occupies the space between the end of the header
54 #      and the start of the signature (i.e.: data[HEADER_LENGTH:o['signature']].
55 #    - The signature, which goes from the signature offset to the share hash
56 #      chain offset.
57 #    - The share hash chain, which goes from the share hash chain offset to
58 #      the block hash tree offset.
59 #    - The share data, which goes from the share data offset to the encrypted
60 #      private key offset.
61 #    - The encrypted private key offset, which goes until the end of the file.
62
63 #  The block hash tree in this encoding has only one share, so the offset of
64 #  the share data will be 32 bits more than the offset of the block hash tree.
65 #  Given this, we may need to check to see how many bytes a reasonably sized
66 #  block hash tree will take up.
67
68 PREFIX = ">BQ32s16s" # each version has a different prefix
69 SIGNED_PREFIX = ">BQ32s16s BBQQ" # this is covered by the signature
70 SIGNED_PREFIX_LENGTH = struct.calcsize(SIGNED_PREFIX)
71 HEADER = ">BQ32s16s BBQQ LLLLQQ" # includes offsets
72 HEADER_LENGTH = struct.calcsize(HEADER)
73 OFFSETS = ">LLLLQQ"
74 OFFSETS_LENGTH = struct.calcsize(OFFSETS)
75
76 # These are still used for some tests.
77 def unpack_header(data):
78     o = {}
79     (version,
80      seqnum,
81      root_hash,
82      IV,
83      k, N, segsize, datalen,
84      o['signature'],
85      o['share_hash_chain'],
86      o['block_hash_tree'],
87      o['share_data'],
88      o['enc_privkey'],
89      o['EOF']) = struct.unpack(HEADER, data[:HEADER_LENGTH])
90     return (version, seqnum, root_hash, IV, k, N, segsize, datalen, o)
91
92 def unpack_share(data):
93     assert len(data) >= HEADER_LENGTH
94     o = {}
95     (version,
96      seqnum,
97      root_hash,
98      IV,
99      k, N, segsize, datalen,
100      o['signature'],
101      o['share_hash_chain'],
102      o['block_hash_tree'],
103      o['share_data'],
104      o['enc_privkey'],
105      o['EOF']) = struct.unpack(HEADER, data[:HEADER_LENGTH])
106
107     if version != 0:
108         raise UnknownVersionError("got mutable share version %d, but I only understand version 0" % version)
109
110     if len(data) < o['EOF']:
111         raise NeedMoreDataError(o['EOF'],
112                                 o['enc_privkey'], o['EOF']-o['enc_privkey'])
113
114     pubkey = data[HEADER_LENGTH:o['signature']]
115     signature = data[o['signature']:o['share_hash_chain']]
116     share_hash_chain_s = data[o['share_hash_chain']:o['block_hash_tree']]
117     share_hash_format = ">H32s"
118     hsize = struct.calcsize(share_hash_format)
119     assert len(share_hash_chain_s) % hsize == 0, len(share_hash_chain_s)
120     share_hash_chain = []
121     for i in range(0, len(share_hash_chain_s), hsize):
122         chunk = share_hash_chain_s[i:i+hsize]
123         (hid, h) = struct.unpack(share_hash_format, chunk)
124         share_hash_chain.append( (hid, h) )
125     share_hash_chain = dict(share_hash_chain)
126     block_hash_tree_s = data[o['block_hash_tree']:o['share_data']]
127     assert len(block_hash_tree_s) % 32 == 0, len(block_hash_tree_s)
128     block_hash_tree = []
129     for i in range(0, len(block_hash_tree_s), 32):
130         block_hash_tree.append(block_hash_tree_s[i:i+32])
131
132     share_data = data[o['share_data']:o['enc_privkey']]
133     enc_privkey = data[o['enc_privkey']:o['EOF']]
134
135     return (seqnum, root_hash, IV, k, N, segsize, datalen,
136             pubkey, signature, share_hash_chain, block_hash_tree,
137             share_data, enc_privkey)
138
139 def unpack_checkstring(checkstring):
140     cs_len = struct.calcsize(PREFIX)
141     version, seqnum, root_hash, IV = struct.unpack(PREFIX, checkstring[:cs_len])
142     if version != 0: # TODO: just ignore the share
143         raise UnknownVersionError("got mutable share version %d, but I only understand version 0" % version)
144     return (seqnum, root_hash, IV)
145
146
147 def pack_offsets(verification_key_length, signature_length,
148                  share_hash_chain_length, block_hash_tree_length,
149                  share_data_length, encprivkey_length):
150     post_offset = HEADER_LENGTH
151     offsets = {}
152     o1 = offsets['signature'] = post_offset + verification_key_length
153     o2 = offsets['share_hash_chain'] = o1 + signature_length
154     o3 = offsets['block_hash_tree'] = o2 + share_hash_chain_length
155     o4 = offsets['share_data'] = o3 + block_hash_tree_length
156     o5 = offsets['enc_privkey'] = o4 + share_data_length
157     offsets['EOF'] = o5 + encprivkey_length
158
159     return struct.pack(">LLLLQQ",
160                        offsets['signature'],
161                        offsets['share_hash_chain'],
162                        offsets['block_hash_tree'],
163                        offsets['share_data'],
164                        offsets['enc_privkey'],
165                        offsets['EOF'])
166
167 def pack_share(prefix, verification_key, signature,
168                share_hash_chain, block_hash_tree,
169                share_data, encprivkey):
170     share_hash_chain_s = "".join([struct.pack(">H32s", i, share_hash_chain[i])
171                                   for i in sorted(share_hash_chain.keys())])
172     for h in block_hash_tree:
173         assert len(h) == 32
174     block_hash_tree_s = "".join(block_hash_tree)
175
176     offsets = pack_offsets(len(verification_key),
177                            len(signature),
178                            len(share_hash_chain_s),
179                            len(block_hash_tree_s),
180                            len(share_data),
181                            len(encprivkey))
182     final_share = "".join([prefix,
183                            offsets,
184                            verification_key,
185                            signature,
186                            share_hash_chain_s,
187                            block_hash_tree_s,
188                            share_data,
189                            encprivkey])
190     return final_share
191
192 def pack_prefix(seqnum, root_hash, IV,
193                 required_shares, total_shares,
194                 segment_size, data_length):
195     prefix = struct.pack(SIGNED_PREFIX,
196                          0, # version,
197                          seqnum,
198                          root_hash,
199                          IV,
200                          required_shares,
201                          total_shares,
202                          segment_size,
203                          data_length,
204                          )
205     return prefix
206
207
208 class SDMFSlotWriteProxy:
209     implements(IMutableSlotWriter)
210     """
211     I represent a remote write slot for an SDMF mutable file. I build a
212     share in memory, and then write it in one piece to the remote
213     server. This mimics how SDMF shares were built before MDMF (and the
214     new MDMF uploader), but provides that functionality in a way that
215     allows the MDMF uploader to be built without much special-casing for
216     file format, which makes the uploader code more readable.
217     """
218     def __init__(self,
219                  shnum,
220                  rref, # a remote reference to a storage server
221                  storage_index,
222                  secrets, # (write_enabler, renew_secret, cancel_secret)
223                  seqnum, # the sequence number of the mutable file
224                  required_shares,
225                  total_shares,
226                  segment_size,
227                  data_length): # the length of the original file
228         self.shnum = shnum
229         self._rref = rref
230         self._storage_index = storage_index
231         self._secrets = secrets
232         self._seqnum = seqnum
233         self._required_shares = required_shares
234         self._total_shares = total_shares
235         self._segment_size = segment_size
236         self._data_length = data_length
237
238         # This is an SDMF file, so it should have only one segment, so, 
239         # modulo padding of the data length, the segment size and the
240         # data length should be the same.
241         expected_segment_size = mathutil.next_multiple(data_length,
242                                                        self._required_shares)
243         assert expected_segment_size == segment_size
244
245         self._block_size = self._segment_size / self._required_shares
246
247         # This is meant to mimic how SDMF files were built before MDMF
248         # entered the picture: we generate each share in its entirety,
249         # then push it off to the storage server in one write. When
250         # callers call set_*, they are just populating this dict.
251         # finish_publishing will stitch these pieces together into a
252         # coherent share, and then write the coherent share to the
253         # storage server.
254         self._share_pieces = {}
255
256         # This tells the write logic what checkstring to use when
257         # writing remote shares.
258         self._testvs = []
259
260         self._readvs = [(0, struct.calcsize(PREFIX))]
261
262
263     def set_checkstring(self, checkstring_or_seqnum,
264                               root_hash=None,
265                               salt=None):
266         """
267         Set the checkstring that I will pass to the remote server when
268         writing.
269
270             @param checkstring_or_seqnum: A packed checkstring to use,
271                    or a sequence number. I will treat this as a checkstr
272
273         Note that implementations can differ in which semantics they
274         wish to support for set_checkstring -- they can, for example,
275         build the checkstring themselves from its constituents, or
276         some other thing.
277         """
278         if root_hash and salt:
279             checkstring = struct.pack(PREFIX,
280                                       0,
281                                       checkstring_or_seqnum,
282                                       root_hash,
283                                       salt)
284         else:
285             checkstring = checkstring_or_seqnum
286         self._testvs = [(0, len(checkstring), "eq", checkstring)]
287
288
289     def get_checkstring(self):
290         """
291         Get the checkstring that I think currently exists on the remote
292         server.
293         """
294         if self._testvs:
295             return self._testvs[0][3]
296         return ""
297
298
299     def put_block(self, data, segnum, salt):
300         """
301         Add a block and salt to the share.
302         """
303         # SDMF files have only one segment
304         assert segnum == 0
305         assert len(data) == self._block_size
306         assert len(salt) == SALT_SIZE
307
308         self._share_pieces['sharedata'] = data
309         self._share_pieces['salt'] = salt
310
311         # TODO: Figure out something intelligent to return.
312         return defer.succeed(None)
313
314
315     def put_encprivkey(self, encprivkey):
316         """
317         Add the encrypted private key to the share.
318         """
319         self._share_pieces['encprivkey'] = encprivkey
320
321         return defer.succeed(None)
322
323
324     def put_blockhashes(self, blockhashes):
325         """
326         Add the block hash tree to the share.
327         """
328         assert isinstance(blockhashes, list)
329         for h in blockhashes:
330             assert len(h) == HASH_SIZE
331
332         # serialize the blockhashes, then set them.
333         blockhashes_s = "".join(blockhashes)
334         self._share_pieces['block_hash_tree'] = blockhashes_s
335
336         return defer.succeed(None)
337
338
339     def put_sharehashes(self, sharehashes):
340         """
341         Add the share hash chain to the share.
342         """
343         assert isinstance(sharehashes, dict)
344         for h in sharehashes.itervalues():
345             assert len(h) == HASH_SIZE
346
347         # serialize the sharehashes, then set them.
348         sharehashes_s = "".join([struct.pack(">H32s", i, sharehashes[i])
349                                  for i in sorted(sharehashes.keys())])
350         self._share_pieces['share_hash_chain'] = sharehashes_s
351
352         return defer.succeed(None)
353
354
355     def put_root_hash(self, root_hash):
356         """
357         Add the root hash to the share.
358         """
359         assert len(root_hash) == HASH_SIZE
360
361         self._share_pieces['root_hash'] = root_hash
362
363         return defer.succeed(None)
364
365
366     def put_salt(self, salt):
367         """
368         Add a salt to an empty SDMF file.
369         """
370         assert len(salt) == SALT_SIZE
371
372         self._share_pieces['salt'] = salt
373         self._share_pieces['sharedata'] = ""
374
375
376     def get_signable(self):
377         """
378         Return the part of the share that needs to be signed.
379
380         SDMF writers need to sign the packed representation of the
381         first eight fields of the remote share, that is:
382             - version number (0)
383             - sequence number
384             - root of the share hash tree
385             - salt
386             - k
387             - n
388             - segsize
389             - datalen
390
391         This method is responsible for returning that to callers.
392         """
393         return struct.pack(SIGNED_PREFIX,
394                            0,
395                            self._seqnum,
396                            self._share_pieces['root_hash'],
397                            self._share_pieces['salt'],
398                            self._required_shares,
399                            self._total_shares,
400                            self._segment_size,
401                            self._data_length)
402
403
404     def put_signature(self, signature):
405         """
406         Add the signature to the share.
407         """
408         self._share_pieces['signature'] = signature
409
410         return defer.succeed(None)
411
412
413     def put_verification_key(self, verification_key):
414         """
415         Add the verification key to the share.
416         """
417         self._share_pieces['verification_key'] = verification_key
418
419         return defer.succeed(None)
420
421
422     def get_verinfo(self):
423         """
424         I return my verinfo tuple. This is used by the ServermapUpdater
425         to keep track of versions of mutable files.
426
427         The verinfo tuple for MDMF files contains:
428             - seqnum
429             - root hash
430             - a blank (nothing)
431             - segsize
432             - datalen
433             - k
434             - n
435             - prefix (the thing that you sign)
436             - a tuple of offsets
437
438         We include the nonce in MDMF to simplify processing of version
439         information tuples.
440
441         The verinfo tuple for SDMF files is the same, but contains a
442         16-byte IV instead of a hash of salts.
443         """
444         return (self._seqnum,
445                 self._share_pieces['root_hash'],
446                 self._share_pieces['salt'],
447                 self._segment_size,
448                 self._data_length,
449                 self._required_shares,
450                 self._total_shares,
451                 self.get_signable(),
452                 self._get_offsets_tuple())
453
454     def _get_offsets_dict(self):
455         post_offset = HEADER_LENGTH
456         offsets = {}
457
458         verification_key_length = len(self._share_pieces['verification_key'])
459         o1 = offsets['signature'] = post_offset + verification_key_length
460
461         signature_length = len(self._share_pieces['signature'])
462         o2 = offsets['share_hash_chain'] = o1 + signature_length
463
464         share_hash_chain_length = len(self._share_pieces['share_hash_chain'])
465         o3 = offsets['block_hash_tree'] = o2 + share_hash_chain_length
466
467         block_hash_tree_length = len(self._share_pieces['block_hash_tree'])
468         o4 = offsets['share_data'] = o3 + block_hash_tree_length
469
470         share_data_length = len(self._share_pieces['sharedata'])
471         o5 = offsets['enc_privkey'] = o4 + share_data_length
472
473         encprivkey_length = len(self._share_pieces['encprivkey'])
474         offsets['EOF'] = o5 + encprivkey_length
475         return offsets
476
477
478     def _get_offsets_tuple(self):
479         offsets = self._get_offsets_dict()
480         return tuple([(key, value) for key, value in offsets.items()])
481
482
483     def _pack_offsets(self):
484         offsets = self._get_offsets_dict()
485         return struct.pack(">LLLLQQ",
486                            offsets['signature'],
487                            offsets['share_hash_chain'],
488                            offsets['block_hash_tree'],
489                            offsets['share_data'],
490                            offsets['enc_privkey'],
491                            offsets['EOF'])
492
493
494     def finish_publishing(self):
495         """
496         Do anything necessary to finish writing the share to a remote
497         server. I require that no further publishing needs to take place
498         after this method has been called.
499         """
500         for k in ["sharedata", "encprivkey", "signature", "verification_key",
501                   "share_hash_chain", "block_hash_tree"]:
502             assert k in self._share_pieces, (self.shnum, k, self._share_pieces.keys())
503         # This is the only method that actually writes something to the
504         # remote server.
505         # First, we need to pack the share into data that we can write
506         # to the remote server in one write.
507         offsets = self._pack_offsets()
508         prefix = self.get_signable()
509         final_share = "".join([prefix,
510                                offsets,
511                                self._share_pieces['verification_key'],
512                                self._share_pieces['signature'],
513                                self._share_pieces['share_hash_chain'],
514                                self._share_pieces['block_hash_tree'],
515                                self._share_pieces['sharedata'],
516                                self._share_pieces['encprivkey']])
517
518         # Our only data vector is going to be writing the final share,
519         # in its entirely.
520         datavs = [(0, final_share)]
521
522         if not self._testvs:
523             # Our caller has not provided us with another checkstring
524             # yet, so we assume that we are writing a new share, and set
525             # a test vector that will allow a new share to be written.
526             self._testvs = []
527             self._testvs.append(tuple([0, 1, "eq", ""]))
528
529         tw_vectors = {}
530         tw_vectors[self.shnum] = (self._testvs, datavs, None)
531         return self._rref.callRemote("slot_testv_and_readv_and_writev",
532                                      self._storage_index,
533                                      self._secrets,
534                                      tw_vectors,
535                                      # TODO is it useful to read something?
536                                      self._readvs)
537
538
539 MDMFHEADER = ">BQ32sBBQQ QQQQQQQQ"
540 MDMFHEADERWITHOUTOFFSETS = ">BQ32sBBQQ"
541 MDMFHEADERSIZE = struct.calcsize(MDMFHEADER)
542 MDMFHEADERWITHOUTOFFSETSSIZE = struct.calcsize(MDMFHEADERWITHOUTOFFSETS)
543 MDMFCHECKSTRING = ">BQ32s"
544 MDMFSIGNABLEHEADER = ">BQ32sBBQQ"
545 MDMFOFFSETS = ">QQQQQQQQ"
546 MDMFOFFSETS_LENGTH = struct.calcsize(MDMFOFFSETS)
547
548 PRIVATE_KEY_SIZE = 1220
549 SIGNATURE_SIZE = 260
550 VERIFICATION_KEY_SIZE = 292
551 # We know we won't have more than 256 shares, and we know that we won't need
552 # to store more than ln2(256) hash-chain nodes to validate, so that's our
553 # bound. Each node requires 2 bytes of node-number plus 32 bytes of hash.
554 SHARE_HASH_CHAIN_SIZE = (2+HASH_SIZE)*mathutil.log_ceil(256, 2)
555
556 class MDMFSlotWriteProxy:
557     implements(IMutableSlotWriter)
558
559     """
560     I represent a remote write slot for an MDMF mutable file.
561
562     I abstract away from my caller the details of block and salt
563     management, and the implementation of the on-disk format for MDMF
564     shares.
565     """
566     # Expected layout, MDMF:
567     # offset:     size:       name:
568     #-- signed part --
569     # 0           1           version number (01)
570     # 1           8           sequence number
571     # 9           32          share tree root hash
572     # 41          1           The "k" encoding parameter
573     # 42          1           The "N" encoding parameter
574     # 43          8           The segment size of the uploaded file
575     # 51          8           The data length of the original plaintext
576     #-- end signed part --
577     # 59          8           The offset of the encrypted private key
578     # 67          8           The offset of the share hash chain
579     # 75          8           The offset of the signature
580     # 83          8           The offset of the verification key
581     # 91          8           The offset of the end of the v. key.
582     # 99          8           The offset of the share data
583     # 107         8           The offset of the block hash tree
584     # 115         8           The offset of EOF
585     # 123         var         encrypted private key
586     # var         var         share hash chain
587     # var         var         signature
588     # var         var         verification key
589     # var         large       share data
590     # var         var         block hash tree
591     #
592     # We order the fields that way to make smart downloaders -- downloaders
593     # which prempetively read a big part of the share -- possible.
594     #
595     # The checkstring is the first three fields -- the version number,
596     # sequence number, root hash and root salt hash. This is consistent
597     # in meaning to what we have with SDMF files, except now instead of
598     # using the literal salt, we use a value derived from all of the
599     # salts -- the share hash root.
600     # 
601     # The salt is stored before the block for each segment. The block
602     # hash tree is computed over the combination of block and salt for
603     # each segment. In this way, we get integrity checking for both
604     # block and salt with the current block hash tree arrangement.
605     # 
606     # The ordering of the offsets is different to reflect the dependencies
607     # that we'll run into with an MDMF file. The expected write flow is
608     # something like this:
609     #
610     #   0: Initialize with the sequence number, encoding parameters and
611     #      data length. From this, we can deduce the number of segments,
612     #      and where they should go.. We can also figure out where the
613     #      encrypted private key should go, because we can figure out how
614     #      big the share data will be.
615     # 
616     #   1: Encrypt, encode, and upload the file in chunks. Do something
617     #      like 
618     #
619     #       put_block(data, segnum, salt)
620     #
621     #      to write a block and a salt to the disk. We can do both of
622     #      these operations now because we have enough of the offsets to
623     #      know where to put them.
624     # 
625     #   2: Put the encrypted private key. Use:
626     #
627     #        put_encprivkey(encprivkey)
628     #
629     #      Now that we know the length of the private key, we can fill
630     #      in the offset for the block hash tree.
631     #
632     #   3: We're now in a position to upload the block hash tree for
633     #      a share. Put that using something like:
634     #       
635     #        put_blockhashes(block_hash_tree)
636     #
637     #      Note that block_hash_tree is a list of hashes -- we'll take
638     #      care of the details of serializing that appropriately. When
639     #      we get the block hash tree, we are also in a position to
640     #      calculate the offset for the share hash chain, and fill that
641     #      into the offsets table.
642     #
643     #   4: We're now in a position to upload the share hash chain for
644     #      a share. Do that with something like:
645     #      
646     #        put_sharehashes(share_hash_chain) 
647     #
648     #      share_hash_chain should be a dictionary mapping shnums to 
649     #      32-byte hashes -- the wrapper handles serialization.
650     #      We'll know where to put the signature at this point, also.
651     #      The root of this tree will be put explicitly in the next
652     #      step.
653     # 
654     #   5: Before putting the signature, we must first put the
655     #      root_hash. Do this with:
656     # 
657     #        put_root_hash(root_hash).
658     #      
659     #      In terms of knowing where to put this value, it was always
660     #      possible to place it, but it makes sense semantically to
661     #      place it after the share hash tree, so that's why you do it
662     #      in this order.
663     #
664     #   6: With the root hash put, we can now sign the header. Use:
665     #
666     #        get_signable()
667     #
668     #      to get the part of the header that you want to sign, and use:
669     #       
670     #        put_signature(signature)
671     #
672     #      to write your signature to the remote server.
673     #
674     #   6: Add the verification key, and finish. Do:
675     #
676     #        put_verification_key(key) 
677     #
678     #      and 
679     #
680     #        finish_publish()
681     #
682     # Checkstring management:
683     # 
684     # To write to a mutable slot, we have to provide test vectors to ensure
685     # that we are writing to the same data that we think we are. These
686     # vectors allow us to detect uncoordinated writes; that is, writes
687     # where both we and some other shareholder are writing to the
688     # mutable slot, and to report those back to the parts of the program
689     # doing the writing. 
690     #
691     # With SDMF, this was easy -- all of the share data was written in
692     # one go, so it was easy to detect uncoordinated writes, and we only
693     # had to do it once. With MDMF, not all of the file is written at
694     # once.
695     #
696     # If a share is new, we write out as much of the header as we can
697     # before writing out anything else. This gives other writers a
698     # canary that they can use to detect uncoordinated writes, and, if
699     # they do the same thing, gives us the same canary. We them update
700     # the share. We won't be able to write out two fields of the header
701     # -- the share tree hash and the salt hash -- until we finish
702     # writing out the share. We only require the writer to provide the
703     # initial checkstring, and keep track of what it should be after
704     # updates ourselves.
705     #
706     # If we haven't written anything yet, then on the first write (which
707     # will probably be a block + salt of a share), we'll also write out
708     # the header. On subsequent passes, we'll expect to see the header.
709     # This changes in two places:
710     #
711     #   - When we write out the salt hash
712     #   - When we write out the root of the share hash tree
713     #
714     # since these values will change the header. It is possible that we 
715     # can just make those be written in one operation to minimize
716     # disruption.
717     def __init__(self,
718                  shnum,
719                  rref, # a remote reference to a storage server
720                  storage_index,
721                  secrets, # (write_enabler, renew_secret, cancel_secret)
722                  seqnum, # the sequence number of the mutable file
723                  required_shares,
724                  total_shares,
725                  segment_size,
726                  data_length): # the length of the original file
727         self.shnum = shnum
728         self._rref = rref
729         self._storage_index = storage_index
730         self._seqnum = seqnum
731         self._required_shares = required_shares
732         assert self.shnum >= 0 and self.shnum < total_shares
733         self._total_shares = total_shares
734         # We build up the offset table as we write things. It is the
735         # last thing we write to the remote server. 
736         self._offsets = {}
737         self._testvs = []
738         # This is a list of write vectors that will be sent to our
739         # remote server once we are directed to write things there.
740         self._writevs = []
741         self._secrets = secrets
742         # The segment size needs to be a multiple of the k parameter --
743         # any padding should have been carried out by the publisher
744         # already.
745         assert segment_size % required_shares == 0
746         self._segment_size = segment_size
747         self._data_length = data_length
748
749         # These are set later -- we define them here so that we can
750         # check for their existence easily
751
752         # This is the root of the share hash tree -- the Merkle tree
753         # over the roots of the block hash trees computed for shares in
754         # this upload.
755         self._root_hash = None
756
757         # We haven't yet written anything to the remote bucket. By
758         # setting this, we tell the _write method as much. The write
759         # method will then know that it also needs to add a write vector
760         # for the checkstring (or what we have of it) to the first write
761         # request. We'll then record that value for future use.  If
762         # we're expecting something to be there already, we need to call
763         # set_checkstring before we write anything to tell the first
764         # write about that.
765         self._written = False
766
767         # When writing data to the storage servers, we get a read vector
768         # for free. We'll read the checkstring, which will help us
769         # figure out what's gone wrong if a write fails.
770         self._readv = [(0, struct.calcsize(MDMFCHECKSTRING))]
771
772         # We calculate the number of segments because it tells us
773         # where the salt part of the file ends/share segment begins,
774         # and also because it provides a useful amount of bounds checking.
775         self._num_segments = mathutil.div_ceil(self._data_length,
776                                                self._segment_size)
777         self._block_size = self._segment_size / self._required_shares
778         # We also calculate the share size, to help us with block
779         # constraints later.
780         tail_size = self._data_length % self._segment_size
781         if not tail_size:
782             self._tail_block_size = self._block_size
783         else:
784             self._tail_block_size = mathutil.next_multiple(tail_size,
785                                                            self._required_shares)
786             self._tail_block_size /= self._required_shares
787
788         # We already know where the sharedata starts; right after the end
789         # of the header (which is defined as the signable part + the offsets)
790         # We can also calculate where the encrypted private key begins
791         # from what we know know.
792         self._actual_block_size = self._block_size + SALT_SIZE
793         data_size = self._actual_block_size * (self._num_segments - 1)
794         data_size += self._tail_block_size
795         data_size += SALT_SIZE
796         self._offsets['enc_privkey'] = MDMFHEADERSIZE
797
798         # We don't define offsets for these because we want them to be
799         # tightly packed -- this allows us to ignore the responsibility
800         # of padding individual values, and of removing that padding
801         # later. So nonconstant_start is where we start writing
802         # nonconstant data.
803         nonconstant_start = self._offsets['enc_privkey']
804         nonconstant_start += PRIVATE_KEY_SIZE
805         nonconstant_start += SIGNATURE_SIZE
806         nonconstant_start += VERIFICATION_KEY_SIZE
807         nonconstant_start += SHARE_HASH_CHAIN_SIZE
808
809         self._offsets['share_data'] = nonconstant_start
810
811         # Finally, we know how big the share data will be, so we can
812         # figure out where the block hash tree needs to go.
813         # XXX: But this will go away if Zooko wants to make it so that
814         # you don't need to know the size of the file before you start
815         # uploading it.
816         self._offsets['block_hash_tree'] = self._offsets['share_data'] + \
817                     data_size
818
819         # Done. We can snow start writing.
820
821
822     def set_checkstring(self,
823                         seqnum_or_checkstring,
824                         root_hash=None,
825                         salt=None):
826         """
827         Set checkstring checkstring for the given shnum.
828
829         This can be invoked in one of two ways.
830
831         With one argument, I assume that you are giving me a literal
832         checkstring -- e.g., the output of get_checkstring. I will then
833         set that checkstring as it is. This form is used by unit tests.
834
835         With two arguments, I assume that you are giving me a sequence
836         number and root hash to make a checkstring from. In that case, I
837         will build a checkstring and set it for you. This form is used
838         by the publisher.
839
840         By default, I assume that I am writing new shares to the grid.
841         If you don't explcitly set your own checkstring, I will use
842         one that requires that the remote share not exist. You will want
843         to use this method if you are updating a share in-place;
844         otherwise, writes will fail.
845         """
846         # You're allowed to overwrite checkstrings with this method;
847         # I assume that users know what they are doing when they call
848         # it.
849         if root_hash:
850             checkstring = struct.pack(MDMFCHECKSTRING,
851                                       1,
852                                       seqnum_or_checkstring,
853                                       root_hash)
854         else:
855             checkstring = seqnum_or_checkstring
856
857         if checkstring == "":
858             # We special-case this, since len("") = 0, but we need
859             # length of 1 for the case of an empty share to work on the
860             # storage server, which is what a checkstring that is the
861             # empty string means.
862             self._testvs = []
863         else:
864             self._testvs = []
865             self._testvs.append((0, len(checkstring), "eq", checkstring))
866
867
868     def __repr__(self):
869         return "MDMFSlotWriteProxy for share %d" % self.shnum
870
871
872     def get_checkstring(self):
873         """
874         Given a share number, I return a representation of what the
875         checkstring for that share on the server will look like.
876
877         I am mostly used for tests.
878         """
879         if self._root_hash:
880             roothash = self._root_hash
881         else:
882             roothash = "\x00" * 32
883         return struct.pack(MDMFCHECKSTRING,
884                            1,
885                            self._seqnum,
886                            roothash)
887
888
889     def put_block(self, data, segnum, salt):
890         """
891         I queue a write vector for the data, salt, and segment number
892         provided to me. I return None, as I do not actually cause
893         anything to be written yet.
894         """
895         if segnum >= self._num_segments:
896             raise LayoutInvalid("I won't overwrite the block hash tree")
897         if len(salt) != SALT_SIZE:
898             raise LayoutInvalid("I was given a salt of size %d, but "
899                                 "I wanted a salt of size %d")
900         if segnum + 1 == self._num_segments:
901             if len(data) != self._tail_block_size:
902                 raise LayoutInvalid("I was given the wrong size block to write")
903         elif len(data) != self._block_size:
904             raise LayoutInvalid("I was given the wrong size block to write")
905
906         # We want to write at len(MDMFHEADER) + segnum * block_size.
907         offset = self._offsets['share_data'] + \
908             (self._actual_block_size * segnum)
909         data = salt + data
910
911         self._writevs.append(tuple([offset, data]))
912
913
914     def put_encprivkey(self, encprivkey):
915         """
916         I queue a write vector for the encrypted private key provided to
917         me.
918         """
919         assert self._offsets
920         assert self._offsets['enc_privkey']
921         # You shouldn't re-write the encprivkey after the block hash
922         # tree is written, since that could cause the private key to run
923         # into the block hash tree. Before it writes the block hash
924         # tree, the block hash tree writing method writes the offset of
925         # the share hash chain. So that's a good indicator of whether or
926         # not the block hash tree has been written.
927         if "signature" in self._offsets:
928             raise LayoutInvalid("You can't put the encrypted private key "
929                                 "after putting the share hash chain")
930
931         self._offsets['share_hash_chain'] = self._offsets['enc_privkey'] + \
932                 len(encprivkey)
933
934         self._writevs.append(tuple([self._offsets['enc_privkey'], encprivkey]))
935
936
937     def put_blockhashes(self, blockhashes):
938         """
939         I queue a write vector to put the block hash tree in blockhashes
940         onto the remote server.
941
942         The encrypted private key must be queued before the block hash
943         tree, since we need to know how large it is to know where the
944         block hash tree should go. The block hash tree must be put
945         before the share hash chain, since its size determines the
946         offset of the share hash chain.
947         """
948         assert self._offsets
949         assert "block_hash_tree" in self._offsets
950
951         assert isinstance(blockhashes, list)
952
953         blockhashes_s = "".join(blockhashes)
954         self._offsets['EOF'] = self._offsets['block_hash_tree'] + len(blockhashes_s)
955
956         self._writevs.append(tuple([self._offsets['block_hash_tree'],
957                                   blockhashes_s]))
958
959
960     def put_sharehashes(self, sharehashes):
961         """
962         I queue a write vector to put the share hash chain in my
963         argument onto the remote server.
964
965         The block hash tree must be queued before the share hash chain,
966         since we need to know where the block hash tree ends before we
967         can know where the share hash chain starts. The share hash chain
968         must be put before the signature, since the length of the packed
969         share hash chain determines the offset of the signature. Also,
970         semantically, you must know what the root of the block hash tree
971         is before you can generate a valid signature.
972         """
973         assert isinstance(sharehashes, dict)
974         assert self._offsets
975         if "share_hash_chain" not in self._offsets:
976             raise LayoutInvalid("You must put the block hash tree before "
977                                 "putting the share hash chain")
978
979         # The signature comes after the share hash chain. If the
980         # signature has already been written, we must not write another
981         # share hash chain. The signature writes the verification key
982         # offset when it gets sent to the remote server, so we look for
983         # that.
984         if "verification_key" in self._offsets:
985             raise LayoutInvalid("You must write the share hash chain "
986                                 "before you write the signature")
987         sharehashes_s = "".join([struct.pack(">H32s", i, sharehashes[i])
988                                   for i in sorted(sharehashes.keys())])
989         self._offsets['signature'] = self._offsets['share_hash_chain'] + \
990             len(sharehashes_s)
991         self._writevs.append(tuple([self._offsets['share_hash_chain'],
992                             sharehashes_s]))
993
994
995     def put_root_hash(self, roothash):
996         """
997         Put the root hash (the root of the share hash tree) in the
998         remote slot.
999         """
1000         # It does not make sense to be able to put the root 
1001         # hash without first putting the share hashes, since you need
1002         # the share hashes to generate the root hash.
1003         #
1004         # Signature is defined by the routine that places the share hash
1005         # chain, so it's a good thing to look for in finding out whether
1006         # or not the share hash chain exists on the remote server.
1007         if len(roothash) != HASH_SIZE:
1008             raise LayoutInvalid("hashes and salts must be exactly %d bytes"
1009                                  % HASH_SIZE)
1010         self._root_hash = roothash
1011         # To write both of these values, we update the checkstring on
1012         # the remote server, which includes them
1013         checkstring = self.get_checkstring()
1014         self._writevs.append(tuple([0, checkstring]))
1015         # This write, if successful, changes the checkstring, so we need
1016         # to update our internal checkstring to be consistent with the
1017         # one on the server.
1018
1019
1020     def get_signable(self):
1021         """
1022         Get the first seven fields of the mutable file; the parts that
1023         are signed.
1024         """
1025         if not self._root_hash:
1026             raise LayoutInvalid("You need to set the root hash "
1027                                 "before getting something to "
1028                                 "sign")
1029         return struct.pack(MDMFSIGNABLEHEADER,
1030                            1,
1031                            self._seqnum,
1032                            self._root_hash,
1033                            self._required_shares,
1034                            self._total_shares,
1035                            self._segment_size,
1036                            self._data_length)
1037
1038
1039     def put_signature(self, signature):
1040         """
1041         I queue a write vector for the signature of the MDMF share.
1042
1043         I require that the root hash and share hash chain have been put
1044         to the grid before I will write the signature to the grid.
1045         """
1046         if "signature" not in self._offsets:
1047             raise LayoutInvalid("You must put the share hash chain "
1048         # It does not make sense to put a signature without first
1049         # putting the root hash and the salt hash (since otherwise
1050         # the signature would be incomplete), so we don't allow that.
1051                        "before putting the signature")
1052         if not self._root_hash:
1053             raise LayoutInvalid("You must complete the signed prefix "
1054                                 "before computing a signature")
1055         # If we put the signature after we put the verification key, we
1056         # could end up running into the verification key, and will
1057         # probably screw up the offsets as well. So we don't allow that.
1058         if "verification_key_end" in self._offsets:
1059             raise LayoutInvalid("You can't put the signature after the "
1060                                 "verification key")
1061         # The method that writes the verification key defines the EOF
1062         # offset before writing the verification key, so look for that.
1063         self._offsets['verification_key'] = self._offsets['signature'] +\
1064             len(signature)
1065         self._writevs.append(tuple([self._offsets['signature'], signature]))
1066
1067
1068     def put_verification_key(self, verification_key):
1069         """
1070         I queue a write vector for the verification key.
1071
1072         I require that the signature have been written to the storage
1073         server before I allow the verification key to be written to the
1074         remote server.
1075         """
1076         if "verification_key" not in self._offsets:
1077             raise LayoutInvalid("You must put the signature before you "
1078                                 "can put the verification key")
1079
1080         self._offsets['verification_key_end'] = \
1081             self._offsets['verification_key'] + len(verification_key)
1082         assert self._offsets['verification_key_end'] <= self._offsets['share_data']
1083         self._writevs.append(tuple([self._offsets['verification_key'],
1084                             verification_key]))
1085
1086
1087     def _get_offsets_tuple(self):
1088         return tuple([(key, value) for key, value in self._offsets.items()])
1089
1090
1091     def get_verinfo(self):
1092         return (self._seqnum,
1093                 self._root_hash,
1094                 self._required_shares,
1095                 self._total_shares,
1096                 self._segment_size,
1097                 self._data_length,
1098                 self.get_signable(),
1099                 self._get_offsets_tuple())
1100
1101
1102     def finish_publishing(self):
1103         """
1104         I add a write vector for the offsets table, and then cause all
1105         of the write vectors that I've dealt with so far to be published
1106         to the remote server, ending the write process.
1107         """
1108         if "verification_key_end" not in self._offsets:
1109             raise LayoutInvalid("You must put the verification key before "
1110                                 "you can publish the offsets")
1111         offsets_offset = struct.calcsize(MDMFHEADERWITHOUTOFFSETS)
1112         offsets = struct.pack(MDMFOFFSETS,
1113                               self._offsets['enc_privkey'],
1114                               self._offsets['share_hash_chain'],
1115                               self._offsets['signature'],
1116                               self._offsets['verification_key'],
1117                               self._offsets['verification_key_end'],
1118                               self._offsets['share_data'],
1119                               self._offsets['block_hash_tree'],
1120                               self._offsets['EOF'])
1121         self._writevs.append(tuple([offsets_offset, offsets]))
1122         encoding_parameters_offset = struct.calcsize(MDMFCHECKSTRING)
1123         params = struct.pack(">BBQQ",
1124                              self._required_shares,
1125                              self._total_shares,
1126                              self._segment_size,
1127                              self._data_length)
1128         self._writevs.append(tuple([encoding_parameters_offset, params]))
1129         return self._write(self._writevs)
1130
1131
1132     def _write(self, datavs, on_failure=None, on_success=None):
1133         """I write the data vectors in datavs to the remote slot."""
1134         tw_vectors = {}
1135         if not self._testvs:
1136             self._testvs = []
1137             self._testvs.append(tuple([0, 1, "eq", ""]))
1138         if not self._written:
1139             # Write a new checkstring to the share when we write it, so
1140             # that we have something to check later.
1141             new_checkstring = self.get_checkstring()
1142             datavs.append((0, new_checkstring))
1143             def _first_write():
1144                 self._written = True
1145                 self._testvs = [(0, len(new_checkstring), "eq", new_checkstring)]
1146             on_success = _first_write
1147         tw_vectors[self.shnum] = (self._testvs, datavs, None)
1148         d = self._rref.callRemote("slot_testv_and_readv_and_writev",
1149                                   self._storage_index,
1150                                   self._secrets,
1151                                   tw_vectors,
1152                                   self._readv)
1153         def _result(results):
1154             if isinstance(results, failure.Failure) or not results[0]:
1155                 # Do nothing; the write was unsuccessful.
1156                 if on_failure: on_failure()
1157             else:
1158                 if on_success: on_success()
1159             return results
1160         d.addCallback(_result)
1161         return d
1162
1163
1164 class MDMFSlotReadProxy:
1165     """
1166     I read from a mutable slot filled with data written in the MDMF data
1167     format (which is described above).
1168
1169     I can be initialized with some amount of data, which I will use (if
1170     it is valid) to eliminate some of the need to fetch it from servers.
1171     """
1172     def __init__(self,
1173                  rref,
1174                  storage_index,
1175                  shnum,
1176                  data=""):
1177         # Start the initialization process.
1178         self._rref = rref
1179         self._storage_index = storage_index
1180         self.shnum = shnum
1181
1182         # Before doing anything, the reader is probably going to want to
1183         # verify that the signature is correct. To do that, they'll need
1184         # the verification key, and the signature. To get those, we'll
1185         # need the offset table. So fetch the offset table on the
1186         # assumption that that will be the first thing that a reader is
1187         # going to do.
1188
1189         # The fact that these encoding parameters are None tells us
1190         # that we haven't yet fetched them from the remote share, so we
1191         # should. We could just not set them, but the checks will be
1192         # easier to read if we don't have to use hasattr.
1193         self._version_number = None
1194         self._sequence_number = None
1195         self._root_hash = None
1196         # Filled in if we're dealing with an SDMF file. Unused
1197         # otherwise.
1198         self._salt = None
1199         self._required_shares = None
1200         self._total_shares = None
1201         self._segment_size = None
1202         self._data_length = None
1203         self._offsets = None
1204
1205         # If the user has chosen to initialize us with some data, we'll
1206         # try to satisfy subsequent data requests with that data before
1207         # asking the storage server for it. If 
1208         self._data = data
1209         # The way callers interact with cache in the filenode returns
1210         # None if there isn't any cached data, but the way we index the
1211         # cached data requires a string, so convert None to "".
1212         if self._data == None:
1213             self._data = ""
1214
1215         self._queue_observers = observer.ObserverList()
1216         self._queue_errbacks = observer.ObserverList()
1217         self._readvs = []
1218
1219
1220     def _maybe_fetch_offsets_and_header(self, force_remote=False):
1221         """
1222         I fetch the offset table and the header from the remote slot if
1223         I don't already have them. If I do have them, I do nothing and
1224         return an empty Deferred.
1225         """
1226         if self._offsets:
1227             return defer.succeed(None)
1228         # At this point, we may be either SDMF or MDMF. Fetching 107 
1229         # bytes will be enough to get header and offsets for both SDMF and
1230         # MDMF, though we'll be left with 4 more bytes than we
1231         # need if this ends up being MDMF. This is probably less
1232         # expensive than the cost of a second roundtrip.
1233         readvs = [(0, 123)]
1234         d = self._read(readvs, force_remote)
1235         d.addCallback(self._process_encoding_parameters)
1236         d.addCallback(self._process_offsets)
1237         return d
1238
1239
1240     def _process_encoding_parameters(self, encoding_parameters):
1241         assert self.shnum in encoding_parameters
1242         encoding_parameters = encoding_parameters[self.shnum][0]
1243         # The first byte is the version number. It will tell us what
1244         # to do next.
1245         (verno,) = struct.unpack(">B", encoding_parameters[:1])
1246         if verno == MDMF_VERSION:
1247             read_size = MDMFHEADERWITHOUTOFFSETSSIZE
1248             (verno,
1249              seqnum,
1250              root_hash,
1251              k,
1252              n,
1253              segsize,
1254              datalen) = struct.unpack(MDMFHEADERWITHOUTOFFSETS,
1255                                       encoding_parameters[:read_size])
1256             if segsize == 0 and datalen == 0:
1257                 # Empty file, no segments.
1258                 self._num_segments = 0
1259             else:
1260                 self._num_segments = mathutil.div_ceil(datalen, segsize)
1261
1262         elif verno == SDMF_VERSION:
1263             read_size = SIGNED_PREFIX_LENGTH
1264             (verno,
1265              seqnum,
1266              root_hash,
1267              salt,
1268              k,
1269              n,
1270              segsize,
1271              datalen) = struct.unpack(">BQ32s16s BBQQ",
1272                                 encoding_parameters[:SIGNED_PREFIX_LENGTH])
1273             self._salt = salt
1274             if segsize == 0 and datalen == 0:
1275                 # empty file
1276                 self._num_segments = 0
1277             else:
1278                 # non-empty SDMF files have one segment.
1279                 self._num_segments = 1
1280         else:
1281             raise UnknownVersionError("You asked me to read mutable file "
1282                                       "version %d, but I only understand "
1283                                       "%d and %d" % (verno, SDMF_VERSION,
1284                                                      MDMF_VERSION))
1285
1286         self._version_number = verno
1287         self._sequence_number = seqnum
1288         self._root_hash = root_hash
1289         self._required_shares = k
1290         self._total_shares = n
1291         self._segment_size = segsize
1292         self._data_length = datalen
1293
1294         self._block_size = self._segment_size / self._required_shares
1295         # We can upload empty files, and need to account for this fact
1296         # so as to avoid zero-division and zero-modulo errors.
1297         if datalen > 0:
1298             tail_size = self._data_length % self._segment_size
1299         else:
1300             tail_size = 0
1301         if not tail_size:
1302             self._tail_block_size = self._block_size
1303         else:
1304             self._tail_block_size = mathutil.next_multiple(tail_size,
1305                                                     self._required_shares)
1306             self._tail_block_size /= self._required_shares
1307
1308         return encoding_parameters
1309
1310
1311     def _process_offsets(self, offsets):
1312         if self._version_number == 0:
1313             read_size = OFFSETS_LENGTH
1314             read_offset = SIGNED_PREFIX_LENGTH
1315             end = read_size + read_offset
1316             (signature,
1317              share_hash_chain,
1318              block_hash_tree,
1319              share_data,
1320              enc_privkey,
1321              EOF) = struct.unpack(">LLLLQQ",
1322                                   offsets[read_offset:end])
1323             self._offsets = {}
1324             self._offsets['signature'] = signature
1325             self._offsets['share_data'] = share_data
1326             self._offsets['block_hash_tree'] = block_hash_tree
1327             self._offsets['share_hash_chain'] = share_hash_chain
1328             self._offsets['enc_privkey'] = enc_privkey
1329             self._offsets['EOF'] = EOF
1330
1331         elif self._version_number == 1:
1332             read_offset = MDMFHEADERWITHOUTOFFSETSSIZE
1333             read_length = MDMFOFFSETS_LENGTH
1334             end = read_offset + read_length
1335             (encprivkey,
1336              sharehashes,
1337              signature,
1338              verification_key,
1339              verification_key_end,
1340              sharedata,
1341              blockhashes,
1342              eof) = struct.unpack(MDMFOFFSETS,
1343                                   offsets[read_offset:end])
1344             self._offsets = {}
1345             self._offsets['enc_privkey'] = encprivkey
1346             self._offsets['block_hash_tree'] = blockhashes
1347             self._offsets['share_hash_chain'] = sharehashes
1348             self._offsets['signature'] = signature
1349             self._offsets['verification_key'] = verification_key
1350             self._offsets['verification_key_end']= \
1351                 verification_key_end
1352             self._offsets['EOF'] = eof
1353             self._offsets['share_data'] = sharedata
1354
1355
1356     def get_block_and_salt(self, segnum, queue=False):
1357         """
1358         I return (block, salt), where block is the block data and
1359         salt is the salt used to encrypt that segment.
1360         """
1361         d = self._maybe_fetch_offsets_and_header()
1362         def _then(ignored):
1363             base_share_offset = self._offsets['share_data']
1364
1365             if segnum + 1 > self._num_segments:
1366                 raise LayoutInvalid("Not a valid segment number")
1367
1368             if self._version_number == 0:
1369                 share_offset = base_share_offset + self._block_size * segnum
1370             else:
1371                 share_offset = base_share_offset + (self._block_size + \
1372                                                     SALT_SIZE) * segnum
1373             if segnum + 1 == self._num_segments:
1374                 data = self._tail_block_size
1375             else:
1376                 data = self._block_size
1377
1378             if self._version_number == 1:
1379                 data += SALT_SIZE
1380
1381             readvs = [(share_offset, data)]
1382             return readvs
1383         d.addCallback(_then)
1384         d.addCallback(lambda readvs:
1385             self._read(readvs, queue=queue))
1386         def _process_results(results):
1387             assert self.shnum in results
1388             if self._version_number == 0:
1389                 # We only read the share data, but we know the salt from
1390                 # when we fetched the header
1391                 data = results[self.shnum]
1392                 if not data:
1393                     data = ""
1394                 else:
1395                     assert len(data) == 1
1396                     data = data[0]
1397                 salt = self._salt
1398             else:
1399                 data = results[self.shnum]
1400                 if not data:
1401                     salt = data = ""
1402                 else:
1403                     salt_and_data = results[self.shnum][0]
1404                     salt = salt_and_data[:SALT_SIZE]
1405                     data = salt_and_data[SALT_SIZE:]
1406             return data, salt
1407         d.addCallback(_process_results)
1408         return d
1409
1410
1411     def get_blockhashes(self, needed=None, queue=False, force_remote=False):
1412         """
1413         I return the block hash tree
1414
1415         I take an optional argument, needed, which is a set of indices
1416         correspond to hashes that I should fetch. If this argument is
1417         missing, I will fetch the entire block hash tree; otherwise, I
1418         may attempt to fetch fewer hashes, based on what needed says
1419         that I should do. Note that I may fetch as many hashes as I
1420         want, so long as the set of hashes that I do fetch is a superset
1421         of the ones that I am asked for, so callers should be prepared
1422         to tolerate additional hashes.
1423         """
1424         # TODO: Return only the parts of the block hash tree necessary
1425         # to validate the blocknum provided?
1426         # This is a good idea, but it is hard to implement correctly. It
1427         # is bad to fetch any one block hash more than once, so we
1428         # probably just want to fetch the whole thing at once and then
1429         # serve it.
1430         if needed == set([]):
1431             return defer.succeed([])
1432         d = self._maybe_fetch_offsets_and_header()
1433         def _then(ignored):
1434             blockhashes_offset = self._offsets['block_hash_tree']
1435             if self._version_number == 1:
1436                 blockhashes_length = self._offsets['EOF'] - blockhashes_offset
1437             else:
1438                 blockhashes_length = self._offsets['share_data'] - blockhashes_offset
1439             readvs = [(blockhashes_offset, blockhashes_length)]
1440             return readvs
1441         d.addCallback(_then)
1442         d.addCallback(lambda readvs:
1443             self._read(readvs, queue=queue, force_remote=force_remote))
1444         def _build_block_hash_tree(results):
1445             assert self.shnum in results
1446
1447             rawhashes = results[self.shnum][0]
1448             results = [rawhashes[i:i+HASH_SIZE]
1449                        for i in range(0, len(rawhashes), HASH_SIZE)]
1450             return results
1451         d.addCallback(_build_block_hash_tree)
1452         return d
1453
1454
1455     def get_sharehashes(self, needed=None, queue=False, force_remote=False):
1456         """
1457         I return the part of the share hash chain placed to validate
1458         this share.
1459
1460         I take an optional argument, needed. Needed is a set of indices
1461         that correspond to the hashes that I should fetch. If needed is
1462         not present, I will fetch and return the entire share hash
1463         chain. Otherwise, I may fetch and return any part of the share
1464         hash chain that is a superset of the part that I am asked to
1465         fetch. Callers should be prepared to deal with more hashes than
1466         they've asked for.
1467         """
1468         if needed == set([]):
1469             return defer.succeed([])
1470         d = self._maybe_fetch_offsets_and_header()
1471
1472         def _make_readvs(ignored):
1473             sharehashes_offset = self._offsets['share_hash_chain']
1474             if self._version_number == 0:
1475                 sharehashes_length = self._offsets['block_hash_tree'] - sharehashes_offset
1476             else:
1477                 sharehashes_length = self._offsets['signature'] - sharehashes_offset
1478             readvs = [(sharehashes_offset, sharehashes_length)]
1479             return readvs
1480         d.addCallback(_make_readvs)
1481         d.addCallback(lambda readvs:
1482             self._read(readvs, queue=queue, force_remote=force_remote))
1483         def _build_share_hash_chain(results):
1484             assert self.shnum in results
1485
1486             sharehashes = results[self.shnum][0]
1487             results = [sharehashes[i:i+(HASH_SIZE + 2)]
1488                        for i in range(0, len(sharehashes), HASH_SIZE + 2)]
1489             results = dict([struct.unpack(">H32s", data)
1490                             for data in results])
1491             return results
1492         d.addCallback(_build_share_hash_chain)
1493         return d
1494
1495
1496     def get_encprivkey(self, queue=False):
1497         """
1498         I return the encrypted private key.
1499         """
1500         d = self._maybe_fetch_offsets_and_header()
1501
1502         def _make_readvs(ignored):
1503             privkey_offset = self._offsets['enc_privkey']
1504             if self._version_number == 0:
1505                 privkey_length = self._offsets['EOF'] - privkey_offset
1506             else:
1507                 privkey_length = self._offsets['share_hash_chain'] - privkey_offset
1508             readvs = [(privkey_offset, privkey_length)]
1509             return readvs
1510         d.addCallback(_make_readvs)
1511         d.addCallback(lambda readvs:
1512             self._read(readvs, queue=queue))
1513         def _process_results(results):
1514             assert self.shnum in results
1515             privkey = results[self.shnum][0]
1516             return privkey
1517         d.addCallback(_process_results)
1518         return d
1519
1520
1521     def get_signature(self, queue=False):
1522         """
1523         I return the signature of my share.
1524         """
1525         d = self._maybe_fetch_offsets_and_header()
1526
1527         def _make_readvs(ignored):
1528             signature_offset = self._offsets['signature']
1529             if self._version_number == 1:
1530                 signature_length = self._offsets['verification_key'] - signature_offset
1531             else:
1532                 signature_length = self._offsets['share_hash_chain'] - signature_offset
1533             readvs = [(signature_offset, signature_length)]
1534             return readvs
1535         d.addCallback(_make_readvs)
1536         d.addCallback(lambda readvs:
1537             self._read(readvs, queue=queue))
1538         def _process_results(results):
1539             assert self.shnum in results
1540             signature = results[self.shnum][0]
1541             return signature
1542         d.addCallback(_process_results)
1543         return d
1544
1545
1546     def get_verification_key(self, queue=False):
1547         """
1548         I return the verification key.
1549         """
1550         d = self._maybe_fetch_offsets_and_header()
1551
1552         def _make_readvs(ignored):
1553             if self._version_number == 1:
1554                 vk_offset = self._offsets['verification_key']
1555                 vk_length = self._offsets['verification_key_end'] - vk_offset
1556             else:
1557                 vk_offset = struct.calcsize(">BQ32s16sBBQQLLLLQQ")
1558                 vk_length = self._offsets['signature'] - vk_offset
1559             readvs = [(vk_offset, vk_length)]
1560             return readvs
1561         d.addCallback(_make_readvs)
1562         d.addCallback(lambda readvs:
1563             self._read(readvs, queue=queue))
1564         def _process_results(results):
1565             assert self.shnum in results
1566             verification_key = results[self.shnum][0]
1567             return verification_key
1568         d.addCallback(_process_results)
1569         return d
1570
1571
1572     def get_encoding_parameters(self):
1573         """
1574         I return (k, n, segsize, datalen)
1575         """
1576         d = self._maybe_fetch_offsets_and_header()
1577         d.addCallback(lambda ignored:
1578             (self._required_shares,
1579              self._total_shares,
1580              self._segment_size,
1581              self._data_length))
1582         return d
1583
1584
1585     def get_seqnum(self):
1586         """
1587         I return the sequence number for this share.
1588         """
1589         d = self._maybe_fetch_offsets_and_header()
1590         d.addCallback(lambda ignored:
1591             self._sequence_number)
1592         return d
1593
1594
1595     def get_root_hash(self):
1596         """
1597         I return the root of the block hash tree
1598         """
1599         d = self._maybe_fetch_offsets_and_header()
1600         d.addCallback(lambda ignored: self._root_hash)
1601         return d
1602
1603
1604     def get_checkstring(self):
1605         """
1606         I return the packed representation of the following:
1607
1608             - version number
1609             - sequence number
1610             - root hash
1611             - salt hash
1612
1613         which my users use as a checkstring to detect other writers.
1614         """
1615         d = self._maybe_fetch_offsets_and_header()
1616         def _build_checkstring(ignored):
1617             if self._salt:
1618                 checkstring = struct.pack(PREFIX,
1619                                           self._version_number,
1620                                           self._sequence_number,
1621                                           self._root_hash,
1622                                           self._salt)
1623             else:
1624                 checkstring = struct.pack(MDMFCHECKSTRING,
1625                                           self._version_number,
1626                                           self._sequence_number,
1627                                           self._root_hash)
1628
1629             return checkstring
1630         d.addCallback(_build_checkstring)
1631         return d
1632
1633
1634     def get_prefix(self, force_remote):
1635         d = self._maybe_fetch_offsets_and_header(force_remote)
1636         d.addCallback(lambda ignored:
1637             self._build_prefix())
1638         return d
1639
1640
1641     def _build_prefix(self):
1642         # The prefix is another name for the part of the remote share
1643         # that gets signed. It consists of everything up to and
1644         # including the datalength, packed by struct.
1645         if self._version_number == SDMF_VERSION:
1646             return struct.pack(SIGNED_PREFIX,
1647                            self._version_number,
1648                            self._sequence_number,
1649                            self._root_hash,
1650                            self._salt,
1651                            self._required_shares,
1652                            self._total_shares,
1653                            self._segment_size,
1654                            self._data_length)
1655
1656         else:
1657             return struct.pack(MDMFSIGNABLEHEADER,
1658                            self._version_number,
1659                            self._sequence_number,
1660                            self._root_hash,
1661                            self._required_shares,
1662                            self._total_shares,
1663                            self._segment_size,
1664                            self._data_length)
1665
1666
1667     def _get_offsets_tuple(self):
1668         # The offsets tuple is another component of the version
1669         # information tuple. It is basically our offsets dictionary,
1670         # itemized and in a tuple.
1671         return self._offsets.copy()
1672
1673
1674     def get_verinfo(self):
1675         """
1676         I return my verinfo tuple. This is used by the ServermapUpdater
1677         to keep track of versions of mutable files.
1678
1679         The verinfo tuple for MDMF files contains:
1680             - seqnum
1681             - root hash
1682             - a blank (nothing)
1683             - segsize
1684             - datalen
1685             - k
1686             - n
1687             - prefix (the thing that you sign)
1688             - a tuple of offsets
1689
1690         We include the nonce in MDMF to simplify processing of version
1691         information tuples.
1692
1693         The verinfo tuple for SDMF files is the same, but contains a
1694         16-byte IV instead of a hash of salts.
1695         """
1696         d = self._maybe_fetch_offsets_and_header()
1697         def _build_verinfo(ignored):
1698             if self._version_number == SDMF_VERSION:
1699                 salt_to_use = self._salt
1700             else:
1701                 salt_to_use = None
1702             return (self._sequence_number,
1703                     self._root_hash,
1704                     salt_to_use,
1705                     self._segment_size,
1706                     self._data_length,
1707                     self._required_shares,
1708                     self._total_shares,
1709                     self._build_prefix(),
1710                     self._get_offsets_tuple())
1711         d.addCallback(_build_verinfo)
1712         return d
1713
1714
1715     def flush(self):
1716         """
1717         I flush my queue of read vectors.
1718         """
1719         d = self._read(self._readvs)
1720         def _then(results):
1721             self._readvs = []
1722             if isinstance(results, failure.Failure):
1723                 self._queue_errbacks.notify(results)
1724             else:
1725                 self._queue_observers.notify(results)
1726             self._queue_observers = observer.ObserverList()
1727             self._queue_errbacks = observer.ObserverList()
1728         d.addBoth(_then)
1729
1730
1731     def _read(self, readvs, force_remote=False, queue=False):
1732         unsatisfiable = filter(lambda x: x[0] + x[1] > len(self._data), readvs)
1733         # TODO: It's entirely possible to tweak this so that it just
1734         # fulfills the requests that it can, and not demand that all
1735         # requests are satisfiable before running it.
1736         if not unsatisfiable and not force_remote:
1737             results = [self._data[offset:offset+length]
1738                        for (offset, length) in readvs]
1739             results = {self.shnum: results}
1740             return defer.succeed(results)
1741         else:
1742             if queue:
1743                 start = len(self._readvs)
1744                 self._readvs += readvs
1745                 end = len(self._readvs)
1746                 def _get_results(results, start, end):
1747                     if not self.shnum in results:
1748                         return {self._shnum: [""]}
1749                     return {self.shnum: results[self.shnum][start:end]}
1750                 d = defer.Deferred()
1751                 d.addCallback(_get_results, start, end)
1752                 self._queue_observers.subscribe(d.callback)
1753                 self._queue_errbacks.subscribe(d.errback)
1754                 return d
1755             return self._rref.callRemote("slot_readv",
1756                                          self._storage_index,
1757                                          [self.shnum],
1758                                          readvs)
1759
1760
1761     def is_sdmf(self):
1762         """I tell my caller whether or not my remote file is SDMF or MDMF
1763         """
1764         d = self._maybe_fetch_offsets_and_header()
1765         d.addCallback(lambda ignored:
1766             self._version_number == 0)
1767         return d
1768
1769
1770 class LayoutInvalid(Exception):
1771     """
1772     This isn't a valid MDMF mutable file
1773     """