]> git.rkrishnan.org Git - tahoe-lafs/tahoe-lafs.git/blob - src/allmydata/mutable/layout.py
layout.py: annotate assertion to figure out 'tahoe backup' failure
[tahoe-lafs/tahoe-lafs.git] / src / allmydata / mutable / layout.py
1
2 import struct
3 from allmydata.mutable.common import NeedMoreDataError, UnknownVersionError
4 from allmydata.interfaces import HASH_SIZE, SALT_SIZE, SDMF_VERSION, \
5                                  MDMF_VERSION, IMutableSlotWriter
6 from allmydata.util import mathutil, observer
7 from twisted.python import failure
8 from twisted.internet import defer
9 from zope.interface import implements
10
11
12 # These strings describe the format of the packed structs they help process
13 # Here's what they mean:
14 #
15 #  PREFIX:
16 #    >: Big-endian byte order; the most significant byte is first (leftmost).
17 #    B: The version information; an 8 bit version identifier. Stored as
18 #       an unsigned char. This is currently 00 00 00 00; our modifications
19 #       will turn it into 00 00 00 01.
20 #    Q: The sequence number; this is sort of like a revision history for
21 #       mutable files; they start at 1 and increase as they are changed after
22 #       being uploaded. Stored as an unsigned long long, which is 8 bytes in
23 #       length.
24 #  32s: The root hash of the share hash tree. We use sha-256d, so we use 32 
25 #       characters = 32 bytes to store the value.
26 #  16s: The salt for the readkey. This is a 16-byte random value, stored as
27 #       16 characters.
28 #
29 #  SIGNED_PREFIX additions, things that are covered by the signature:
30 #    B: The "k" encoding parameter. We store this as an 8-bit character, 
31 #       which is convenient because our erasure coding scheme cannot 
32 #       encode if you ask for more than 255 pieces.
33 #    B: The "N" encoding parameter. Stored as an 8-bit character for the 
34 #       same reasons as above.
35 #    Q: The segment size of the uploaded file. This will essentially be the
36 #       length of the file in SDMF. An unsigned long long, so we can store 
37 #       files of quite large size.
38 #    Q: The data length of the uploaded file. Modulo padding, this will be
39 #       the same of the data length field. Like the data length field, it is
40 #       an unsigned long long and can be quite large.
41 #
42 #   HEADER additions:
43 #     L: The offset of the signature of this. An unsigned long.
44 #     L: The offset of the share hash chain. An unsigned long.
45 #     L: The offset of the block hash tree. An unsigned long.
46 #     L: The offset of the share data. An unsigned long.
47 #     Q: The offset of the encrypted private key. An unsigned long long, to
48 #        account for the possibility of a lot of share data.
49 #     Q: The offset of the EOF. An unsigned long long, to account for the
50 #        possibility of a lot of share data.
51
52 #  After all of these, we have the following:
53 #    - The verification key: Occupies the space between the end of the header
54 #      and the start of the signature (i.e.: data[HEADER_LENGTH:o['signature']].
55 #    - The signature, which goes from the signature offset to the share hash
56 #      chain offset.
57 #    - The share hash chain, which goes from the share hash chain offset to
58 #      the block hash tree offset.
59 #    - The share data, which goes from the share data offset to the encrypted
60 #      private key offset.
61 #    - The encrypted private key offset, which goes until the end of the file.
62
63 #  The block hash tree in this encoding has only one share, so the offset of
64 #  the share data will be 32 bits more than the offset of the block hash tree.
65 #  Given this, we may need to check to see how many bytes a reasonably sized
66 #  block hash tree will take up.
67
68 PREFIX = ">BQ32s16s" # each version has a different prefix
69 SIGNED_PREFIX = ">BQ32s16s BBQQ" # this is covered by the signature
70 SIGNED_PREFIX_LENGTH = struct.calcsize(SIGNED_PREFIX)
71 HEADER = ">BQ32s16s BBQQ LLLLQQ" # includes offsets
72 HEADER_LENGTH = struct.calcsize(HEADER)
73 OFFSETS = ">LLLLQQ"
74 OFFSETS_LENGTH = struct.calcsize(OFFSETS)
75
76 # These are still used for some tests.
77 def unpack_header(data):
78     o = {}
79     (version,
80      seqnum,
81      root_hash,
82      IV,
83      k, N, segsize, datalen,
84      o['signature'],
85      o['share_hash_chain'],
86      o['block_hash_tree'],
87      o['share_data'],
88      o['enc_privkey'],
89      o['EOF']) = struct.unpack(HEADER, data[:HEADER_LENGTH])
90     return (version, seqnum, root_hash, IV, k, N, segsize, datalen, o)
91
92 def unpack_share(data):
93     assert len(data) >= HEADER_LENGTH
94     o = {}
95     (version,
96      seqnum,
97      root_hash,
98      IV,
99      k, N, segsize, datalen,
100      o['signature'],
101      o['share_hash_chain'],
102      o['block_hash_tree'],
103      o['share_data'],
104      o['enc_privkey'],
105      o['EOF']) = struct.unpack(HEADER, data[:HEADER_LENGTH])
106
107     if version != 0:
108         raise UnknownVersionError("got mutable share version %d, but I only understand version 0" % version)
109
110     if len(data) < o['EOF']:
111         raise NeedMoreDataError(o['EOF'],
112                                 o['enc_privkey'], o['EOF']-o['enc_privkey'])
113
114     pubkey = data[HEADER_LENGTH:o['signature']]
115     signature = data[o['signature']:o['share_hash_chain']]
116     share_hash_chain_s = data[o['share_hash_chain']:o['block_hash_tree']]
117     share_hash_format = ">H32s"
118     hsize = struct.calcsize(share_hash_format)
119     assert len(share_hash_chain_s) % hsize == 0, len(share_hash_chain_s)
120     share_hash_chain = []
121     for i in range(0, len(share_hash_chain_s), hsize):
122         chunk = share_hash_chain_s[i:i+hsize]
123         (hid, h) = struct.unpack(share_hash_format, chunk)
124         share_hash_chain.append( (hid, h) )
125     share_hash_chain = dict(share_hash_chain)
126     block_hash_tree_s = data[o['block_hash_tree']:o['share_data']]
127     assert len(block_hash_tree_s) % 32 == 0, len(block_hash_tree_s)
128     block_hash_tree = []
129     for i in range(0, len(block_hash_tree_s), 32):
130         block_hash_tree.append(block_hash_tree_s[i:i+32])
131
132     share_data = data[o['share_data']:o['enc_privkey']]
133     enc_privkey = data[o['enc_privkey']:o['EOF']]
134
135     return (seqnum, root_hash, IV, k, N, segsize, datalen,
136             pubkey, signature, share_hash_chain, block_hash_tree,
137             share_data, enc_privkey)
138
139 def unpack_checkstring(checkstring):
140     cs_len = struct.calcsize(PREFIX)
141     version, seqnum, root_hash, IV = struct.unpack(PREFIX, checkstring[:cs_len])
142     if version != 0: # TODO: just ignore the share
143         raise UnknownVersionError("got mutable share version %d, but I only understand version 0" % version)
144     return (seqnum, root_hash, IV)
145
146
147 def pack_offsets(verification_key_length, signature_length,
148                  share_hash_chain_length, block_hash_tree_length,
149                  share_data_length, encprivkey_length):
150     post_offset = HEADER_LENGTH
151     offsets = {}
152     o1 = offsets['signature'] = post_offset + verification_key_length
153     o2 = offsets['share_hash_chain'] = o1 + signature_length
154     o3 = offsets['block_hash_tree'] = o2 + share_hash_chain_length
155     o4 = offsets['share_data'] = o3 + block_hash_tree_length
156     o5 = offsets['enc_privkey'] = o4 + share_data_length
157     offsets['EOF'] = o5 + encprivkey_length
158
159     return struct.pack(">LLLLQQ",
160                        offsets['signature'],
161                        offsets['share_hash_chain'],
162                        offsets['block_hash_tree'],
163                        offsets['share_data'],
164                        offsets['enc_privkey'],
165                        offsets['EOF'])
166
167 def pack_share(prefix, verification_key, signature,
168                share_hash_chain, block_hash_tree,
169                share_data, encprivkey):
170     share_hash_chain_s = "".join([struct.pack(">H32s", i, share_hash_chain[i])
171                                   for i in sorted(share_hash_chain.keys())])
172     for h in block_hash_tree:
173         assert len(h) == 32
174     block_hash_tree_s = "".join(block_hash_tree)
175
176     offsets = pack_offsets(len(verification_key),
177                            len(signature),
178                            len(share_hash_chain_s),
179                            len(block_hash_tree_s),
180                            len(share_data),
181                            len(encprivkey))
182     final_share = "".join([prefix,
183                            offsets,
184                            verification_key,
185                            signature,
186                            share_hash_chain_s,
187                            block_hash_tree_s,
188                            share_data,
189                            encprivkey])
190     return final_share
191
192 def pack_prefix(seqnum, root_hash, IV,
193                 required_shares, total_shares,
194                 segment_size, data_length):
195     prefix = struct.pack(SIGNED_PREFIX,
196                          0, # version,
197                          seqnum,
198                          root_hash,
199                          IV,
200                          required_shares,
201                          total_shares,
202                          segment_size,
203                          data_length,
204                          )
205     return prefix
206
207
208 class SDMFSlotWriteProxy:
209     implements(IMutableSlotWriter)
210     """
211     I represent a remote write slot for an SDMF mutable file. I build a
212     share in memory, and then write it in one piece to the remote
213     server. This mimics how SDMF shares were built before MDMF (and the
214     new MDMF uploader), but provides that functionality in a way that
215     allows the MDMF uploader to be built without much special-casing for
216     file format, which makes the uploader code more readable.
217     """
218     def __init__(self,
219                  shnum,
220                  rref, # a remote reference to a storage server
221                  storage_index,
222                  secrets, # (write_enabler, renew_secret, cancel_secret)
223                  seqnum, # the sequence number of the mutable file
224                  required_shares,
225                  total_shares,
226                  segment_size,
227                  data_length): # the length of the original file
228         self.shnum = shnum
229         self._rref = rref
230         self._storage_index = storage_index
231         self._secrets = secrets
232         self._seqnum = seqnum
233         self._required_shares = required_shares
234         self._total_shares = total_shares
235         self._segment_size = segment_size
236         self._data_length = data_length
237
238         # This is an SDMF file, so it should have only one segment, so, 
239         # modulo padding of the data length, the segment size and the
240         # data length should be the same.
241         expected_segment_size = mathutil.next_multiple(data_length,
242                                                        self._required_shares)
243         assert expected_segment_size == segment_size
244
245         self._block_size = self._segment_size / self._required_shares
246
247         # This is meant to mimic how SDMF files were built before MDMF
248         # entered the picture: we generate each share in its entirety,
249         # then push it off to the storage server in one write. When
250         # callers call set_*, they are just populating this dict.
251         # finish_publishing will stitch these pieces together into a
252         # coherent share, and then write the coherent share to the
253         # storage server.
254         self._share_pieces = {}
255
256         # This tells the write logic what checkstring to use when
257         # writing remote shares.
258         self._testvs = []
259
260         self._readvs = [(0, struct.calcsize(PREFIX))]
261
262
263     def set_checkstring(self, checkstring_or_seqnum,
264                               root_hash=None,
265                               salt=None):
266         """
267         Set the checkstring that I will pass to the remote server when
268         writing.
269
270             @param checkstring_or_seqnum: A packed checkstring to use,
271                    or a sequence number. I will treat this as a checkstr
272
273         Note that implementations can differ in which semantics they
274         wish to support for set_checkstring -- they can, for example,
275         build the checkstring themselves from its constituents, or
276         some other thing.
277         """
278         if root_hash and salt:
279             checkstring = struct.pack(PREFIX,
280                                       0,
281                                       checkstring_or_seqnum,
282                                       root_hash,
283                                       salt)
284         else:
285             checkstring = checkstring_or_seqnum
286         self._testvs = [(0, len(checkstring), "eq", checkstring)]
287
288
289     def get_checkstring(self):
290         """
291         Get the checkstring that I think currently exists on the remote
292         server.
293         """
294         if self._testvs:
295             return self._testvs[0][3]
296         return ""
297
298
299     def put_block(self, data, segnum, salt):
300         """
301         Add a block and salt to the share.
302         """
303         # SDMF files have only one segment
304         assert segnum == 0
305         assert len(data) == self._block_size
306         assert len(salt) == SALT_SIZE
307
308         self._share_pieces['sharedata'] = data
309         self._share_pieces['salt'] = salt
310
311         # TODO: Figure out something intelligent to return.
312         return defer.succeed(None)
313
314
315     def put_encprivkey(self, encprivkey):
316         """
317         Add the encrypted private key to the share.
318         """
319         self._share_pieces['encprivkey'] = encprivkey
320
321         return defer.succeed(None)
322
323
324     def put_blockhashes(self, blockhashes):
325         """
326         Add the block hash tree to the share.
327         """
328         assert isinstance(blockhashes, list)
329         for h in blockhashes:
330             assert len(h) == HASH_SIZE
331
332         # serialize the blockhashes, then set them.
333         blockhashes_s = "".join(blockhashes)
334         self._share_pieces['block_hash_tree'] = blockhashes_s
335
336         return defer.succeed(None)
337
338
339     def put_sharehashes(self, sharehashes):
340         """
341         Add the share hash chain to the share.
342         """
343         assert isinstance(sharehashes, dict)
344         for h in sharehashes.itervalues():
345             assert len(h) == HASH_SIZE
346
347         # serialize the sharehashes, then set them.
348         sharehashes_s = "".join([struct.pack(">H32s", i, sharehashes[i])
349                                  for i in sorted(sharehashes.keys())])
350         self._share_pieces['share_hash_chain'] = sharehashes_s
351
352         return defer.succeed(None)
353
354
355     def put_root_hash(self, root_hash):
356         """
357         Add the root hash to the share.
358         """
359         assert len(root_hash) == HASH_SIZE
360
361         self._share_pieces['root_hash'] = root_hash
362
363         return defer.succeed(None)
364
365
366     def put_salt(self, salt):
367         """
368         Add a salt to an empty SDMF file.
369         """
370         assert len(salt) == SALT_SIZE
371
372         self._share_pieces['salt'] = salt
373         self._share_pieces['sharedata'] = ""
374
375
376     def get_signable(self):
377         """
378         Return the part of the share that needs to be signed.
379
380         SDMF writers need to sign the packed representation of the
381         first eight fields of the remote share, that is:
382             - version number (0)
383             - sequence number
384             - root of the share hash tree
385             - salt
386             - k
387             - n
388             - segsize
389             - datalen
390
391         This method is responsible for returning that to callers.
392         """
393         return struct.pack(SIGNED_PREFIX,
394                            0,
395                            self._seqnum,
396                            self._share_pieces['root_hash'],
397                            self._share_pieces['salt'],
398                            self._required_shares,
399                            self._total_shares,
400                            self._segment_size,
401                            self._data_length)
402
403
404     def put_signature(self, signature):
405         """
406         Add the signature to the share.
407         """
408         self._share_pieces['signature'] = signature
409
410         return defer.succeed(None)
411
412
413     def put_verification_key(self, verification_key):
414         """
415         Add the verification key to the share.
416         """
417         self._share_pieces['verification_key'] = verification_key
418
419         return defer.succeed(None)
420
421
422     def get_verinfo(self):
423         """
424         I return my verinfo tuple. This is used by the ServermapUpdater
425         to keep track of versions of mutable files.
426
427         The verinfo tuple for MDMF files contains:
428             - seqnum
429             - root hash
430             - a blank (nothing)
431             - segsize
432             - datalen
433             - k
434             - n
435             - prefix (the thing that you sign)
436             - a tuple of offsets
437
438         We include the nonce in MDMF to simplify processing of version
439         information tuples.
440
441         The verinfo tuple for SDMF files is the same, but contains a
442         16-byte IV instead of a hash of salts.
443         """
444         return (self._seqnum,
445                 self._share_pieces['root_hash'],
446                 self._share_pieces['salt'],
447                 self._segment_size,
448                 self._data_length,
449                 self._required_shares,
450                 self._total_shares,
451                 self.get_signable(),
452                 self._get_offsets_tuple())
453
454     def _get_offsets_dict(self):
455         post_offset = HEADER_LENGTH
456         offsets = {}
457
458         verification_key_length = len(self._share_pieces['verification_key'])
459         o1 = offsets['signature'] = post_offset + verification_key_length
460
461         signature_length = len(self._share_pieces['signature'])
462         o2 = offsets['share_hash_chain'] = o1 + signature_length
463
464         share_hash_chain_length = len(self._share_pieces['share_hash_chain'])
465         o3 = offsets['block_hash_tree'] = o2 + share_hash_chain_length
466
467         block_hash_tree_length = len(self._share_pieces['block_hash_tree'])
468         o4 = offsets['share_data'] = o3 + block_hash_tree_length
469
470         share_data_length = len(self._share_pieces['sharedata'])
471         o5 = offsets['enc_privkey'] = o4 + share_data_length
472
473         encprivkey_length = len(self._share_pieces['encprivkey'])
474         offsets['EOF'] = o5 + encprivkey_length
475         return offsets
476
477
478     def _get_offsets_tuple(self):
479         offsets = self._get_offsets_dict()
480         return tuple([(key, value) for key, value in offsets.items()])
481
482
483     def _pack_offsets(self):
484         offsets = self._get_offsets_dict()
485         return struct.pack(">LLLLQQ",
486                            offsets['signature'],
487                            offsets['share_hash_chain'],
488                            offsets['block_hash_tree'],
489                            offsets['share_data'],
490                            offsets['enc_privkey'],
491                            offsets['EOF'])
492
493
494     def finish_publishing(self):
495         """
496         Do anything necessary to finish writing the share to a remote
497         server. I require that no further publishing needs to take place
498         after this method has been called.
499         """
500         for k in ["sharedata", "encprivkey", "signature", "verification_key",
501                   "share_hash_chain", "block_hash_tree"]:
502             assert k in self._share_pieces, (k, self._share_pieces.keys())
503         # This is the only method that actually writes something to the
504         # remote server.
505         # First, we need to pack the share into data that we can write
506         # to the remote server in one write.
507         offsets = self._pack_offsets()
508         prefix = self.get_signable()
509         final_share = "".join([prefix,
510                                offsets,
511                                self._share_pieces['verification_key'],
512                                self._share_pieces['signature'],
513                                self._share_pieces['share_hash_chain'],
514                                self._share_pieces['block_hash_tree'],
515                                self._share_pieces['sharedata'],
516                                self._share_pieces['encprivkey']])
517
518         # Our only data vector is going to be writing the final share,
519         # in its entirely.
520         datavs = [(0, final_share)]
521
522         if not self._testvs:
523             # Our caller has not provided us with another checkstring
524             # yet, so we assume that we are writing a new share, and set
525             # a test vector that will allow a new share to be written.
526             self._testvs = []
527             self._testvs.append(tuple([0, 1, "eq", ""]))
528
529         tw_vectors = {}
530         tw_vectors[self.shnum] = (self._testvs, datavs, None)
531         return self._rref.callRemote("slot_testv_and_readv_and_writev",
532                                      self._storage_index,
533                                      self._secrets,
534                                      tw_vectors,
535                                      # TODO is it useful to read something?
536                                      self._readvs)
537
538
539 MDMFHEADER = ">BQ32sBBQQ QQQQQQQQ"
540 MDMFHEADERWITHOUTOFFSETS = ">BQ32sBBQQ"
541 MDMFHEADERSIZE = struct.calcsize(MDMFHEADER)
542 MDMFHEADERWITHOUTOFFSETSSIZE = struct.calcsize(MDMFHEADERWITHOUTOFFSETS)
543 MDMFCHECKSTRING = ">BQ32s"
544 MDMFSIGNABLEHEADER = ">BQ32sBBQQ"
545 MDMFOFFSETS = ">QQQQQQQQ"
546 MDMFOFFSETS_LENGTH = struct.calcsize(MDMFOFFSETS)
547
548 PRIVATE_KEY_SIZE = 1220
549 SIGNATURE_SIZE = 260
550 VERIFICATION_KEY_SIZE = 292
551 # We know we won't have more than 256 shares, and we know that we won't need
552 # to store more than ln2(256) hash-chain nodes to validate, so that's our
553 # bound. Each node requires 2 bytes of node-number plus 32 bytes of hash.
554 SHARE_HASH_CHAIN_SIZE = (2+HASH_SIZE)*mathutil.log_ceil(256, 2)
555
556 class MDMFSlotWriteProxy:
557     implements(IMutableSlotWriter)
558
559     """
560     I represent a remote write slot for an MDMF mutable file.
561
562     I abstract away from my caller the details of block and salt
563     management, and the implementation of the on-disk format for MDMF
564     shares.
565     """
566     # Expected layout, MDMF:
567     # offset:     size:       name:
568     #-- signed part --
569     # 0           1           version number (01)
570     # 1           8           sequence number 
571     # 9           32          share tree root hash
572     # 41          1           The "k" encoding parameter
573     # 42          1           The "N" encoding parameter
574     # 43          8           The segment size of the uploaded file
575     # 51          8           The data length of the original plaintext
576     #-- end signed part --
577     # 59          8           The offset of the encrypted private key
578     # 67          8           The offset of the signature
579     # 75          8           The offset of the verification key
580     # 83          8           The offset of the end of the v. key.
581     # 92          8           The offset of the share data 
582     # 100         8           The offset of the block hash tree
583     # 108         8           The offset of the share hash chain
584     # 116         8           The offset of EOF
585     # 
586     # followed by the encrypted private key, signature, verification
587     # key, share hash chain, data, and block hash tree. We order the
588     # fields that way to make smart downloaders -- downloaders which
589     # prempetively read a big part of the share -- possible.
590     #
591     # The checkstring is the first three fields -- the version number,
592     # sequence number, root hash and root salt hash. This is consistent
593     # in meaning to what we have with SDMF files, except now instead of
594     # using the literal salt, we use a value derived from all of the
595     # salts -- the share hash root.
596     # 
597     # The salt is stored before the block for each segment. The block
598     # hash tree is computed over the combination of block and salt for
599     # each segment. In this way, we get integrity checking for both
600     # block and salt with the current block hash tree arrangement.
601     # 
602     # The ordering of the offsets is different to reflect the dependencies
603     # that we'll run into with an MDMF file. The expected write flow is
604     # something like this:
605     #
606     #   0: Initialize with the sequence number, encoding parameters and
607     #      data length. From this, we can deduce the number of segments,
608     #      and where they should go.. We can also figure out where the
609     #      encrypted private key should go, because we can figure out how
610     #      big the share data will be.
611     # 
612     #   1: Encrypt, encode, and upload the file in chunks. Do something
613     #      like 
614     #
615     #       put_block(data, segnum, salt)
616     #
617     #      to write a block and a salt to the disk. We can do both of
618     #      these operations now because we have enough of the offsets to
619     #      know where to put them.
620     # 
621     #   2: Put the encrypted private key. Use:
622     #
623     #        put_encprivkey(encprivkey)
624     #
625     #      Now that we know the length of the private key, we can fill
626     #      in the offset for the block hash tree.
627     #
628     #   3: We're now in a position to upload the block hash tree for
629     #      a share. Put that using something like:
630     #       
631     #        put_blockhashes(block_hash_tree)
632     #
633     #      Note that block_hash_tree is a list of hashes -- we'll take
634     #      care of the details of serializing that appropriately. When
635     #      we get the block hash tree, we are also in a position to
636     #      calculate the offset for the share hash chain, and fill that
637     #      into the offsets table.
638     #
639     #   4: We're now in a position to upload the share hash chain for
640     #      a share. Do that with something like:
641     #      
642     #        put_sharehashes(share_hash_chain) 
643     #
644     #      share_hash_chain should be a dictionary mapping shnums to 
645     #      32-byte hashes -- the wrapper handles serialization.
646     #      We'll know where to put the signature at this point, also.
647     #      The root of this tree will be put explicitly in the next
648     #      step.
649     # 
650     #   5: Before putting the signature, we must first put the
651     #      root_hash. Do this with:
652     # 
653     #        put_root_hash(root_hash).
654     #      
655     #      In terms of knowing where to put this value, it was always
656     #      possible to place it, but it makes sense semantically to
657     #      place it after the share hash tree, so that's why you do it
658     #      in this order.
659     #
660     #   6: With the root hash put, we can now sign the header. Use:
661     #
662     #        get_signable()
663     #
664     #      to get the part of the header that you want to sign, and use:
665     #       
666     #        put_signature(signature)
667     #
668     #      to write your signature to the remote server.
669     #
670     #   6: Add the verification key, and finish. Do:
671     #
672     #        put_verification_key(key) 
673     #
674     #      and 
675     #
676     #        finish_publish()
677     #
678     # Checkstring management:
679     # 
680     # To write to a mutable slot, we have to provide test vectors to ensure
681     # that we are writing to the same data that we think we are. These
682     # vectors allow us to detect uncoordinated writes; that is, writes
683     # where both we and some other shareholder are writing to the
684     # mutable slot, and to report those back to the parts of the program
685     # doing the writing. 
686     #
687     # With SDMF, this was easy -- all of the share data was written in
688     # one go, so it was easy to detect uncoordinated writes, and we only
689     # had to do it once. With MDMF, not all of the file is written at
690     # once.
691     #
692     # If a share is new, we write out as much of the header as we can
693     # before writing out anything else. This gives other writers a
694     # canary that they can use to detect uncoordinated writes, and, if
695     # they do the same thing, gives us the same canary. We them update
696     # the share. We won't be able to write out two fields of the header
697     # -- the share tree hash and the salt hash -- until we finish
698     # writing out the share. We only require the writer to provide the
699     # initial checkstring, and keep track of what it should be after
700     # updates ourselves.
701     #
702     # If we haven't written anything yet, then on the first write (which
703     # will probably be a block + salt of a share), we'll also write out
704     # the header. On subsequent passes, we'll expect to see the header.
705     # This changes in two places:
706     #
707     #   - When we write out the salt hash
708     #   - When we write out the root of the share hash tree
709     #
710     # since these values will change the header. It is possible that we 
711     # can just make those be written in one operation to minimize
712     # disruption.
713     def __init__(self,
714                  shnum,
715                  rref, # a remote reference to a storage server
716                  storage_index,
717                  secrets, # (write_enabler, renew_secret, cancel_secret)
718                  seqnum, # the sequence number of the mutable file
719                  required_shares,
720                  total_shares,
721                  segment_size,
722                  data_length): # the length of the original file
723         self.shnum = shnum
724         self._rref = rref
725         self._storage_index = storage_index
726         self._seqnum = seqnum
727         self._required_shares = required_shares
728         assert self.shnum >= 0 and self.shnum < total_shares
729         self._total_shares = total_shares
730         # We build up the offset table as we write things. It is the
731         # last thing we write to the remote server. 
732         self._offsets = {}
733         self._testvs = []
734         # This is a list of write vectors that will be sent to our
735         # remote server once we are directed to write things there.
736         self._writevs = []
737         self._secrets = secrets
738         # The segment size needs to be a multiple of the k parameter --
739         # any padding should have been carried out by the publisher
740         # already.
741         assert segment_size % required_shares == 0
742         self._segment_size = segment_size
743         self._data_length = data_length
744
745         # These are set later -- we define them here so that we can
746         # check for their existence easily
747
748         # This is the root of the share hash tree -- the Merkle tree
749         # over the roots of the block hash trees computed for shares in
750         # this upload.
751         self._root_hash = None
752
753         # We haven't yet written anything to the remote bucket. By
754         # setting this, we tell the _write method as much. The write
755         # method will then know that it also needs to add a write vector
756         # for the checkstring (or what we have of it) to the first write
757         # request. We'll then record that value for future use.  If
758         # we're expecting something to be there already, we need to call
759         # set_checkstring before we write anything to tell the first
760         # write about that.
761         self._written = False
762
763         # When writing data to the storage servers, we get a read vector
764         # for free. We'll read the checkstring, which will help us
765         # figure out what's gone wrong if a write fails.
766         self._readv = [(0, struct.calcsize(MDMFCHECKSTRING))]
767
768         # We calculate the number of segments because it tells us
769         # where the salt part of the file ends/share segment begins,
770         # and also because it provides a useful amount of bounds checking.
771         self._num_segments = mathutil.div_ceil(self._data_length,
772                                                self._segment_size)
773         self._block_size = self._segment_size / self._required_shares
774         # We also calculate the share size, to help us with block
775         # constraints later.
776         tail_size = self._data_length % self._segment_size
777         if not tail_size:
778             self._tail_block_size = self._block_size
779         else:
780             self._tail_block_size = mathutil.next_multiple(tail_size,
781                                                            self._required_shares)
782             self._tail_block_size /= self._required_shares
783
784         # We already know where the sharedata starts; right after the end
785         # of the header (which is defined as the signable part + the offsets)
786         # We can also calculate where the encrypted private key begins
787         # from what we know know.
788         self._actual_block_size = self._block_size + SALT_SIZE
789         data_size = self._actual_block_size * (self._num_segments - 1)
790         data_size += self._tail_block_size
791         data_size += SALT_SIZE
792         self._offsets['enc_privkey'] = MDMFHEADERSIZE
793
794         # We don't define offsets for these because we want them to be
795         # tightly packed -- this allows us to ignore the responsibility
796         # of padding individual values, and of removing that padding
797         # later. So nonconstant_start is where we start writing
798         # nonconstant data.
799         nonconstant_start = self._offsets['enc_privkey']
800         nonconstant_start += PRIVATE_KEY_SIZE
801         nonconstant_start += SIGNATURE_SIZE
802         nonconstant_start += VERIFICATION_KEY_SIZE
803         nonconstant_start += SHARE_HASH_CHAIN_SIZE
804
805         self._offsets['share_data'] = nonconstant_start
806
807         # Finally, we know how big the share data will be, so we can
808         # figure out where the block hash tree needs to go.
809         # XXX: But this will go away if Zooko wants to make it so that
810         # you don't need to know the size of the file before you start
811         # uploading it.
812         self._offsets['block_hash_tree'] = self._offsets['share_data'] + \
813                     data_size
814
815         # Done. We can snow start writing.
816
817
818     def set_checkstring(self,
819                         seqnum_or_checkstring,
820                         root_hash=None,
821                         salt=None):
822         """
823         Set checkstring checkstring for the given shnum.
824
825         This can be invoked in one of two ways.
826
827         With one argument, I assume that you are giving me a literal
828         checkstring -- e.g., the output of get_checkstring. I will then
829         set that checkstring as it is. This form is used by unit tests.
830
831         With two arguments, I assume that you are giving me a sequence
832         number and root hash to make a checkstring from. In that case, I
833         will build a checkstring and set it for you. This form is used
834         by the publisher.
835
836         By default, I assume that I am writing new shares to the grid.
837         If you don't explcitly set your own checkstring, I will use
838         one that requires that the remote share not exist. You will want
839         to use this method if you are updating a share in-place;
840         otherwise, writes will fail.
841         """
842         # You're allowed to overwrite checkstrings with this method;
843         # I assume that users know what they are doing when they call
844         # it.
845         if root_hash:
846             checkstring = struct.pack(MDMFCHECKSTRING,
847                                       1,
848                                       seqnum_or_checkstring,
849                                       root_hash)
850         else:
851             checkstring = seqnum_or_checkstring
852
853         if checkstring == "":
854             # We special-case this, since len("") = 0, but we need
855             # length of 1 for the case of an empty share to work on the
856             # storage server, which is what a checkstring that is the
857             # empty string means.
858             self._testvs = []
859         else:
860             self._testvs = []
861             self._testvs.append((0, len(checkstring), "eq", checkstring))
862
863
864     def __repr__(self):
865         return "MDMFSlotWriteProxy for share %d" % self.shnum
866
867
868     def get_checkstring(self):
869         """
870         Given a share number, I return a representation of what the
871         checkstring for that share on the server will look like.
872
873         I am mostly used for tests.
874         """
875         if self._root_hash:
876             roothash = self._root_hash
877         else:
878             roothash = "\x00" * 32
879         return struct.pack(MDMFCHECKSTRING,
880                            1,
881                            self._seqnum,
882                            roothash)
883
884
885     def put_block(self, data, segnum, salt):
886         """
887         I queue a write vector for the data, salt, and segment number
888         provided to me. I return None, as I do not actually cause
889         anything to be written yet.
890         """
891         if segnum >= self._num_segments:
892             raise LayoutInvalid("I won't overwrite the block hash tree")
893         if len(salt) != SALT_SIZE:
894             raise LayoutInvalid("I was given a salt of size %d, but "
895                                 "I wanted a salt of size %d")
896         if segnum + 1 == self._num_segments:
897             if len(data) != self._tail_block_size:
898                 raise LayoutInvalid("I was given the wrong size block to write")
899         elif len(data) != self._block_size:
900             raise LayoutInvalid("I was given the wrong size block to write")
901
902         # We want to write at len(MDMFHEADER) + segnum * block_size.
903         offset = self._offsets['share_data'] + \
904             (self._actual_block_size * segnum)
905         data = salt + data
906
907         self._writevs.append(tuple([offset, data]))
908
909
910     def put_encprivkey(self, encprivkey):
911         """
912         I queue a write vector for the encrypted private key provided to
913         me.
914         """
915         assert self._offsets
916         assert self._offsets['enc_privkey']
917         # You shouldn't re-write the encprivkey after the block hash
918         # tree is written, since that could cause the private key to run
919         # into the block hash tree. Before it writes the block hash
920         # tree, the block hash tree writing method writes the offset of
921         # the share hash chain. So that's a good indicator of whether or
922         # not the block hash tree has been written.
923         if "signature" in self._offsets:
924             raise LayoutInvalid("You can't put the encrypted private key "
925                                 "after putting the share hash chain")
926
927         self._offsets['share_hash_chain'] = self._offsets['enc_privkey'] + \
928                 len(encprivkey)
929
930         self._writevs.append(tuple([self._offsets['enc_privkey'], encprivkey]))
931
932
933     def put_blockhashes(self, blockhashes):
934         """
935         I queue a write vector to put the block hash tree in blockhashes
936         onto the remote server.
937
938         The encrypted private key must be queued before the block hash
939         tree, since we need to know how large it is to know where the
940         block hash tree should go. The block hash tree must be put
941         before the share hash chain, since its size determines the
942         offset of the share hash chain.
943         """
944         assert self._offsets
945         assert "block_hash_tree" in self._offsets
946
947         assert isinstance(blockhashes, list)
948
949         blockhashes_s = "".join(blockhashes)
950         self._offsets['EOF'] = self._offsets['block_hash_tree'] + len(blockhashes_s)
951
952         self._writevs.append(tuple([self._offsets['block_hash_tree'],
953                                   blockhashes_s]))
954
955
956     def put_sharehashes(self, sharehashes):
957         """
958         I queue a write vector to put the share hash chain in my
959         argument onto the remote server.
960
961         The block hash tree must be queued before the share hash chain,
962         since we need to know where the block hash tree ends before we
963         can know where the share hash chain starts. The share hash chain
964         must be put before the signature, since the length of the packed
965         share hash chain determines the offset of the signature. Also,
966         semantically, you must know what the root of the block hash tree
967         is before you can generate a valid signature.
968         """
969         assert isinstance(sharehashes, dict)
970         assert self._offsets
971         if "share_hash_chain" not in self._offsets:
972             raise LayoutInvalid("You must put the block hash tree before "
973                                 "putting the share hash chain")
974
975         # The signature comes after the share hash chain. If the
976         # signature has already been written, we must not write another
977         # share hash chain. The signature writes the verification key
978         # offset when it gets sent to the remote server, so we look for
979         # that.
980         if "verification_key" in self._offsets:
981             raise LayoutInvalid("You must write the share hash chain "
982                                 "before you write the signature")
983         sharehashes_s = "".join([struct.pack(">H32s", i, sharehashes[i])
984                                   for i in sorted(sharehashes.keys())])
985         self._offsets['signature'] = self._offsets['share_hash_chain'] + \
986             len(sharehashes_s)
987         self._writevs.append(tuple([self._offsets['share_hash_chain'],
988                             sharehashes_s]))
989
990
991     def put_root_hash(self, roothash):
992         """
993         Put the root hash (the root of the share hash tree) in the
994         remote slot.
995         """
996         # It does not make sense to be able to put the root 
997         # hash without first putting the share hashes, since you need
998         # the share hashes to generate the root hash.
999         #
1000         # Signature is defined by the routine that places the share hash
1001         # chain, so it's a good thing to look for in finding out whether
1002         # or not the share hash chain exists on the remote server.
1003         if len(roothash) != HASH_SIZE:
1004             raise LayoutInvalid("hashes and salts must be exactly %d bytes"
1005                                  % HASH_SIZE)
1006         self._root_hash = roothash
1007         # To write both of these values, we update the checkstring on
1008         # the remote server, which includes them
1009         checkstring = self.get_checkstring()
1010         self._writevs.append(tuple([0, checkstring]))
1011         # This write, if successful, changes the checkstring, so we need
1012         # to update our internal checkstring to be consistent with the
1013         # one on the server.
1014
1015
1016     def get_signable(self):
1017         """
1018         Get the first seven fields of the mutable file; the parts that
1019         are signed.
1020         """
1021         if not self._root_hash:
1022             raise LayoutInvalid("You need to set the root hash "
1023                                 "before getting something to "
1024                                 "sign")
1025         return struct.pack(MDMFSIGNABLEHEADER,
1026                            1,
1027                            self._seqnum,
1028                            self._root_hash,
1029                            self._required_shares,
1030                            self._total_shares,
1031                            self._segment_size,
1032                            self._data_length)
1033
1034
1035     def put_signature(self, signature):
1036         """
1037         I queue a write vector for the signature of the MDMF share.
1038
1039         I require that the root hash and share hash chain have been put
1040         to the grid before I will write the signature to the grid.
1041         """
1042         if "signature" not in self._offsets:
1043             raise LayoutInvalid("You must put the share hash chain "
1044         # It does not make sense to put a signature without first
1045         # putting the root hash and the salt hash (since otherwise
1046         # the signature would be incomplete), so we don't allow that.
1047                        "before putting the signature")
1048         if not self._root_hash:
1049             raise LayoutInvalid("You must complete the signed prefix "
1050                                 "before computing a signature")
1051         # If we put the signature after we put the verification key, we
1052         # could end up running into the verification key, and will
1053         # probably screw up the offsets as well. So we don't allow that.
1054         if "verification_key_end" in self._offsets:
1055             raise LayoutInvalid("You can't put the signature after the "
1056                                 "verification key")
1057         # The method that writes the verification key defines the EOF
1058         # offset before writing the verification key, so look for that.
1059         self._offsets['verification_key'] = self._offsets['signature'] +\
1060             len(signature)
1061         self._writevs.append(tuple([self._offsets['signature'], signature]))
1062
1063
1064     def put_verification_key(self, verification_key):
1065         """
1066         I queue a write vector for the verification key.
1067
1068         I require that the signature have been written to the storage
1069         server before I allow the verification key to be written to the
1070         remote server.
1071         """
1072         if "verification_key" not in self._offsets:
1073             raise LayoutInvalid("You must put the signature before you "
1074                                 "can put the verification key")
1075
1076         self._offsets['verification_key_end'] = \
1077             self._offsets['verification_key'] + len(verification_key)
1078         assert self._offsets['verification_key_end'] <= self._offsets['share_data']
1079         self._writevs.append(tuple([self._offsets['verification_key'],
1080                             verification_key]))
1081
1082
1083     def _get_offsets_tuple(self):
1084         return tuple([(key, value) for key, value in self._offsets.items()])
1085
1086
1087     def get_verinfo(self):
1088         return (self._seqnum,
1089                 self._root_hash,
1090                 self._required_shares,
1091                 self._total_shares,
1092                 self._segment_size,
1093                 self._data_length,
1094                 self.get_signable(),
1095                 self._get_offsets_tuple())
1096
1097
1098     def finish_publishing(self):
1099         """
1100         I add a write vector for the offsets table, and then cause all
1101         of the write vectors that I've dealt with so far to be published
1102         to the remote server, ending the write process.
1103         """
1104         if "verification_key_end" not in self._offsets:
1105             raise LayoutInvalid("You must put the verification key before "
1106                                 "you can publish the offsets")
1107         offsets_offset = struct.calcsize(MDMFHEADERWITHOUTOFFSETS)
1108         offsets = struct.pack(MDMFOFFSETS,
1109                               self._offsets['enc_privkey'],
1110                               self._offsets['share_hash_chain'],
1111                               self._offsets['signature'],
1112                               self._offsets['verification_key'],
1113                               self._offsets['verification_key_end'],
1114                               self._offsets['share_data'],
1115                               self._offsets['block_hash_tree'],
1116                               self._offsets['EOF'])
1117         self._writevs.append(tuple([offsets_offset, offsets]))
1118         encoding_parameters_offset = struct.calcsize(MDMFCHECKSTRING)
1119         params = struct.pack(">BBQQ",
1120                              self._required_shares,
1121                              self._total_shares,
1122                              self._segment_size,
1123                              self._data_length)
1124         self._writevs.append(tuple([encoding_parameters_offset, params]))
1125         return self._write(self._writevs)
1126
1127
1128     def _write(self, datavs, on_failure=None, on_success=None):
1129         """I write the data vectors in datavs to the remote slot."""
1130         tw_vectors = {}
1131         if not self._testvs:
1132             self._testvs = []
1133             self._testvs.append(tuple([0, 1, "eq", ""]))
1134         if not self._written:
1135             # Write a new checkstring to the share when we write it, so
1136             # that we have something to check later.
1137             new_checkstring = self.get_checkstring()
1138             datavs.append((0, new_checkstring))
1139             def _first_write():
1140                 self._written = True
1141                 self._testvs = [(0, len(new_checkstring), "eq", new_checkstring)]
1142             on_success = _first_write
1143         tw_vectors[self.shnum] = (self._testvs, datavs, None)
1144         d = self._rref.callRemote("slot_testv_and_readv_and_writev",
1145                                   self._storage_index,
1146                                   self._secrets,
1147                                   tw_vectors,
1148                                   self._readv)
1149         def _result(results):
1150             if isinstance(results, failure.Failure) or not results[0]:
1151                 # Do nothing; the write was unsuccessful.
1152                 if on_failure: on_failure()
1153             else:
1154                 if on_success: on_success()
1155             return results
1156         d.addCallback(_result)
1157         return d
1158
1159
1160 class MDMFSlotReadProxy:
1161     """
1162     I read from a mutable slot filled with data written in the MDMF data
1163     format (which is described above).
1164
1165     I can be initialized with some amount of data, which I will use (if
1166     it is valid) to eliminate some of the need to fetch it from servers.
1167     """
1168     def __init__(self,
1169                  rref,
1170                  storage_index,
1171                  shnum,
1172                  data=""):
1173         # Start the initialization process.
1174         self._rref = rref
1175         self._storage_index = storage_index
1176         self.shnum = shnum
1177
1178         # Before doing anything, the reader is probably going to want to
1179         # verify that the signature is correct. To do that, they'll need
1180         # the verification key, and the signature. To get those, we'll
1181         # need the offset table. So fetch the offset table on the
1182         # assumption that that will be the first thing that a reader is
1183         # going to do.
1184
1185         # The fact that these encoding parameters are None tells us
1186         # that we haven't yet fetched them from the remote share, so we
1187         # should. We could just not set them, but the checks will be
1188         # easier to read if we don't have to use hasattr.
1189         self._version_number = None
1190         self._sequence_number = None
1191         self._root_hash = None
1192         # Filled in if we're dealing with an SDMF file. Unused
1193         # otherwise.
1194         self._salt = None
1195         self._required_shares = None
1196         self._total_shares = None
1197         self._segment_size = None
1198         self._data_length = None
1199         self._offsets = None
1200
1201         # If the user has chosen to initialize us with some data, we'll
1202         # try to satisfy subsequent data requests with that data before
1203         # asking the storage server for it. If 
1204         self._data = data
1205         # The way callers interact with cache in the filenode returns
1206         # None if there isn't any cached data, but the way we index the
1207         # cached data requires a string, so convert None to "".
1208         if self._data == None:
1209             self._data = ""
1210
1211         self._queue_observers = observer.ObserverList()
1212         self._queue_errbacks = observer.ObserverList()
1213         self._readvs = []
1214
1215
1216     def _maybe_fetch_offsets_and_header(self, force_remote=False):
1217         """
1218         I fetch the offset table and the header from the remote slot if
1219         I don't already have them. If I do have them, I do nothing and
1220         return an empty Deferred.
1221         """
1222         if self._offsets:
1223             return defer.succeed(None)
1224         # At this point, we may be either SDMF or MDMF. Fetching 107 
1225         # bytes will be enough to get header and offsets for both SDMF and
1226         # MDMF, though we'll be left with 4 more bytes than we
1227         # need if this ends up being MDMF. This is probably less
1228         # expensive than the cost of a second roundtrip.
1229         readvs = [(0, 123)]
1230         d = self._read(readvs, force_remote)
1231         d.addCallback(self._process_encoding_parameters)
1232         d.addCallback(self._process_offsets)
1233         return d
1234
1235
1236     def _process_encoding_parameters(self, encoding_parameters):
1237         assert self.shnum in encoding_parameters
1238         encoding_parameters = encoding_parameters[self.shnum][0]
1239         # The first byte is the version number. It will tell us what
1240         # to do next.
1241         (verno,) = struct.unpack(">B", encoding_parameters[:1])
1242         if verno == MDMF_VERSION:
1243             read_size = MDMFHEADERWITHOUTOFFSETSSIZE
1244             (verno,
1245              seqnum,
1246              root_hash,
1247              k,
1248              n,
1249              segsize,
1250              datalen) = struct.unpack(MDMFHEADERWITHOUTOFFSETS,
1251                                       encoding_parameters[:read_size])
1252             if segsize == 0 and datalen == 0:
1253                 # Empty file, no segments.
1254                 self._num_segments = 0
1255             else:
1256                 self._num_segments = mathutil.div_ceil(datalen, segsize)
1257
1258         elif verno == SDMF_VERSION:
1259             read_size = SIGNED_PREFIX_LENGTH
1260             (verno,
1261              seqnum,
1262              root_hash,
1263              salt,
1264              k,
1265              n,
1266              segsize,
1267              datalen) = struct.unpack(">BQ32s16s BBQQ",
1268                                 encoding_parameters[:SIGNED_PREFIX_LENGTH])
1269             self._salt = salt
1270             if segsize == 0 and datalen == 0:
1271                 # empty file
1272                 self._num_segments = 0
1273             else:
1274                 # non-empty SDMF files have one segment.
1275                 self._num_segments = 1
1276         else:
1277             raise UnknownVersionError("You asked me to read mutable file "
1278                                       "version %d, but I only understand "
1279                                       "%d and %d" % (verno, SDMF_VERSION,
1280                                                      MDMF_VERSION))
1281
1282         self._version_number = verno
1283         self._sequence_number = seqnum
1284         self._root_hash = root_hash
1285         self._required_shares = k
1286         self._total_shares = n
1287         self._segment_size = segsize
1288         self._data_length = datalen
1289
1290         self._block_size = self._segment_size / self._required_shares
1291         # We can upload empty files, and need to account for this fact
1292         # so as to avoid zero-division and zero-modulo errors.
1293         if datalen > 0:
1294             tail_size = self._data_length % self._segment_size
1295         else:
1296             tail_size = 0
1297         if not tail_size:
1298             self._tail_block_size = self._block_size
1299         else:
1300             self._tail_block_size = mathutil.next_multiple(tail_size,
1301                                                     self._required_shares)
1302             self._tail_block_size /= self._required_shares
1303
1304         return encoding_parameters
1305
1306
1307     def _process_offsets(self, offsets):
1308         if self._version_number == 0:
1309             read_size = OFFSETS_LENGTH
1310             read_offset = SIGNED_PREFIX_LENGTH
1311             end = read_size + read_offset
1312             (signature,
1313              share_hash_chain,
1314              block_hash_tree,
1315              share_data,
1316              enc_privkey,
1317              EOF) = struct.unpack(">LLLLQQ",
1318                                   offsets[read_offset:end])
1319             self._offsets = {}
1320             self._offsets['signature'] = signature
1321             self._offsets['share_data'] = share_data
1322             self._offsets['block_hash_tree'] = block_hash_tree
1323             self._offsets['share_hash_chain'] = share_hash_chain
1324             self._offsets['enc_privkey'] = enc_privkey
1325             self._offsets['EOF'] = EOF
1326
1327         elif self._version_number == 1:
1328             read_offset = MDMFHEADERWITHOUTOFFSETSSIZE
1329             read_length = MDMFOFFSETS_LENGTH
1330             end = read_offset + read_length
1331             (encprivkey,
1332              sharehashes,
1333              signature,
1334              verification_key,
1335              verification_key_end,
1336              sharedata,
1337              blockhashes,
1338              eof) = struct.unpack(MDMFOFFSETS,
1339                                   offsets[read_offset:end])
1340             self._offsets = {}
1341             self._offsets['enc_privkey'] = encprivkey
1342             self._offsets['block_hash_tree'] = blockhashes
1343             self._offsets['share_hash_chain'] = sharehashes
1344             self._offsets['signature'] = signature
1345             self._offsets['verification_key'] = verification_key
1346             self._offsets['verification_key_end']= \
1347                 verification_key_end
1348             self._offsets['EOF'] = eof
1349             self._offsets['share_data'] = sharedata
1350
1351
1352     def get_block_and_salt(self, segnum, queue=False):
1353         """
1354         I return (block, salt), where block is the block data and
1355         salt is the salt used to encrypt that segment.
1356         """
1357         d = self._maybe_fetch_offsets_and_header()
1358         def _then(ignored):
1359             base_share_offset = self._offsets['share_data']
1360
1361             if segnum + 1 > self._num_segments:
1362                 raise LayoutInvalid("Not a valid segment number")
1363
1364             if self._version_number == 0:
1365                 share_offset = base_share_offset + self._block_size * segnum
1366             else:
1367                 share_offset = base_share_offset + (self._block_size + \
1368                                                     SALT_SIZE) * segnum
1369             if segnum + 1 == self._num_segments:
1370                 data = self._tail_block_size
1371             else:
1372                 data = self._block_size
1373
1374             if self._version_number == 1:
1375                 data += SALT_SIZE
1376
1377             readvs = [(share_offset, data)]
1378             return readvs
1379         d.addCallback(_then)
1380         d.addCallback(lambda readvs:
1381             self._read(readvs, queue=queue))
1382         def _process_results(results):
1383             assert self.shnum in results
1384             if self._version_number == 0:
1385                 # We only read the share data, but we know the salt from
1386                 # when we fetched the header
1387                 data = results[self.shnum]
1388                 if not data:
1389                     data = ""
1390                 else:
1391                     assert len(data) == 1
1392                     data = data[0]
1393                 salt = self._salt
1394             else:
1395                 data = results[self.shnum]
1396                 if not data:
1397                     salt = data = ""
1398                 else:
1399                     salt_and_data = results[self.shnum][0]
1400                     salt = salt_and_data[:SALT_SIZE]
1401                     data = salt_and_data[SALT_SIZE:]
1402             return data, salt
1403         d.addCallback(_process_results)
1404         return d
1405
1406
1407     def get_blockhashes(self, needed=None, queue=False, force_remote=False):
1408         """
1409         I return the block hash tree
1410
1411         I take an optional argument, needed, which is a set of indices
1412         correspond to hashes that I should fetch. If this argument is
1413         missing, I will fetch the entire block hash tree; otherwise, I
1414         may attempt to fetch fewer hashes, based on what needed says
1415         that I should do. Note that I may fetch as many hashes as I
1416         want, so long as the set of hashes that I do fetch is a superset
1417         of the ones that I am asked for, so callers should be prepared
1418         to tolerate additional hashes.
1419         """
1420         # TODO: Return only the parts of the block hash tree necessary
1421         # to validate the blocknum provided?
1422         # This is a good idea, but it is hard to implement correctly. It
1423         # is bad to fetch any one block hash more than once, so we
1424         # probably just want to fetch the whole thing at once and then
1425         # serve it.
1426         if needed == set([]):
1427             return defer.succeed([])
1428         d = self._maybe_fetch_offsets_and_header()
1429         def _then(ignored):
1430             blockhashes_offset = self._offsets['block_hash_tree']
1431             if self._version_number == 1:
1432                 blockhashes_length = self._offsets['EOF'] - blockhashes_offset
1433             else:
1434                 blockhashes_length = self._offsets['share_data'] - blockhashes_offset
1435             readvs = [(blockhashes_offset, blockhashes_length)]
1436             return readvs
1437         d.addCallback(_then)
1438         d.addCallback(lambda readvs:
1439             self._read(readvs, queue=queue, force_remote=force_remote))
1440         def _build_block_hash_tree(results):
1441             assert self.shnum in results
1442
1443             rawhashes = results[self.shnum][0]
1444             results = [rawhashes[i:i+HASH_SIZE]
1445                        for i in range(0, len(rawhashes), HASH_SIZE)]
1446             return results
1447         d.addCallback(_build_block_hash_tree)
1448         return d
1449
1450
1451     def get_sharehashes(self, needed=None, queue=False, force_remote=False):
1452         """
1453         I return the part of the share hash chain placed to validate
1454         this share.
1455
1456         I take an optional argument, needed. Needed is a set of indices
1457         that correspond to the hashes that I should fetch. If needed is
1458         not present, I will fetch and return the entire share hash
1459         chain. Otherwise, I may fetch and return any part of the share
1460         hash chain that is a superset of the part that I am asked to
1461         fetch. Callers should be prepared to deal with more hashes than
1462         they've asked for.
1463         """
1464         if needed == set([]):
1465             return defer.succeed([])
1466         d = self._maybe_fetch_offsets_and_header()
1467
1468         def _make_readvs(ignored):
1469             sharehashes_offset = self._offsets['share_hash_chain']
1470             if self._version_number == 0:
1471                 sharehashes_length = self._offsets['block_hash_tree'] - sharehashes_offset
1472             else:
1473                 sharehashes_length = self._offsets['signature'] - sharehashes_offset
1474             readvs = [(sharehashes_offset, sharehashes_length)]
1475             return readvs
1476         d.addCallback(_make_readvs)
1477         d.addCallback(lambda readvs:
1478             self._read(readvs, queue=queue, force_remote=force_remote))
1479         def _build_share_hash_chain(results):
1480             assert self.shnum in results
1481
1482             sharehashes = results[self.shnum][0]
1483             results = [sharehashes[i:i+(HASH_SIZE + 2)]
1484                        for i in range(0, len(sharehashes), HASH_SIZE + 2)]
1485             results = dict([struct.unpack(">H32s", data)
1486                             for data in results])
1487             return results
1488         d.addCallback(_build_share_hash_chain)
1489         return d
1490
1491
1492     def get_encprivkey(self, queue=False):
1493         """
1494         I return the encrypted private key.
1495         """
1496         d = self._maybe_fetch_offsets_and_header()
1497
1498         def _make_readvs(ignored):
1499             privkey_offset = self._offsets['enc_privkey']
1500             if self._version_number == 0:
1501                 privkey_length = self._offsets['EOF'] - privkey_offset
1502             else:
1503                 privkey_length = self._offsets['share_hash_chain'] - privkey_offset
1504             readvs = [(privkey_offset, privkey_length)]
1505             return readvs
1506         d.addCallback(_make_readvs)
1507         d.addCallback(lambda readvs:
1508             self._read(readvs, queue=queue))
1509         def _process_results(results):
1510             assert self.shnum in results
1511             privkey = results[self.shnum][0]
1512             return privkey
1513         d.addCallback(_process_results)
1514         return d
1515
1516
1517     def get_signature(self, queue=False):
1518         """
1519         I return the signature of my share.
1520         """
1521         d = self._maybe_fetch_offsets_and_header()
1522
1523         def _make_readvs(ignored):
1524             signature_offset = self._offsets['signature']
1525             if self._version_number == 1:
1526                 signature_length = self._offsets['verification_key'] - signature_offset
1527             else:
1528                 signature_length = self._offsets['share_hash_chain'] - signature_offset
1529             readvs = [(signature_offset, signature_length)]
1530             return readvs
1531         d.addCallback(_make_readvs)
1532         d.addCallback(lambda readvs:
1533             self._read(readvs, queue=queue))
1534         def _process_results(results):
1535             assert self.shnum in results
1536             signature = results[self.shnum][0]
1537             return signature
1538         d.addCallback(_process_results)
1539         return d
1540
1541
1542     def get_verification_key(self, queue=False):
1543         """
1544         I return the verification key.
1545         """
1546         d = self._maybe_fetch_offsets_and_header()
1547
1548         def _make_readvs(ignored):
1549             if self._version_number == 1:
1550                 vk_offset = self._offsets['verification_key']
1551                 vk_length = self._offsets['verification_key_end'] - vk_offset
1552             else:
1553                 vk_offset = struct.calcsize(">BQ32s16sBBQQLLLLQQ")
1554                 vk_length = self._offsets['signature'] - vk_offset
1555             readvs = [(vk_offset, vk_length)]
1556             return readvs
1557         d.addCallback(_make_readvs)
1558         d.addCallback(lambda readvs:
1559             self._read(readvs, queue=queue))
1560         def _process_results(results):
1561             assert self.shnum in results
1562             verification_key = results[self.shnum][0]
1563             return verification_key
1564         d.addCallback(_process_results)
1565         return d
1566
1567
1568     def get_encoding_parameters(self):
1569         """
1570         I return (k, n, segsize, datalen)
1571         """
1572         d = self._maybe_fetch_offsets_and_header()
1573         d.addCallback(lambda ignored:
1574             (self._required_shares,
1575              self._total_shares,
1576              self._segment_size,
1577              self._data_length))
1578         return d
1579
1580
1581     def get_seqnum(self):
1582         """
1583         I return the sequence number for this share.
1584         """
1585         d = self._maybe_fetch_offsets_and_header()
1586         d.addCallback(lambda ignored:
1587             self._sequence_number)
1588         return d
1589
1590
1591     def get_root_hash(self):
1592         """
1593         I return the root of the block hash tree
1594         """
1595         d = self._maybe_fetch_offsets_and_header()
1596         d.addCallback(lambda ignored: self._root_hash)
1597         return d
1598
1599
1600     def get_checkstring(self):
1601         """
1602         I return the packed representation of the following:
1603
1604             - version number
1605             - sequence number
1606             - root hash
1607             - salt hash
1608
1609         which my users use as a checkstring to detect other writers.
1610         """
1611         d = self._maybe_fetch_offsets_and_header()
1612         def _build_checkstring(ignored):
1613             if self._salt:
1614                 checkstring = struct.pack(PREFIX,
1615                                           self._version_number,
1616                                           self._sequence_number,
1617                                           self._root_hash,
1618                                           self._salt)
1619             else:
1620                 checkstring = struct.pack(MDMFCHECKSTRING,
1621                                           self._version_number,
1622                                           self._sequence_number,
1623                                           self._root_hash)
1624
1625             return checkstring
1626         d.addCallback(_build_checkstring)
1627         return d
1628
1629
1630     def get_prefix(self, force_remote):
1631         d = self._maybe_fetch_offsets_and_header(force_remote)
1632         d.addCallback(lambda ignored:
1633             self._build_prefix())
1634         return d
1635
1636
1637     def _build_prefix(self):
1638         # The prefix is another name for the part of the remote share
1639         # that gets signed. It consists of everything up to and
1640         # including the datalength, packed by struct.
1641         if self._version_number == SDMF_VERSION:
1642             return struct.pack(SIGNED_PREFIX,
1643                            self._version_number,
1644                            self._sequence_number,
1645                            self._root_hash,
1646                            self._salt,
1647                            self._required_shares,
1648                            self._total_shares,
1649                            self._segment_size,
1650                            self._data_length)
1651
1652         else:
1653             return struct.pack(MDMFSIGNABLEHEADER,
1654                            self._version_number,
1655                            self._sequence_number,
1656                            self._root_hash,
1657                            self._required_shares,
1658                            self._total_shares,
1659                            self._segment_size,
1660                            self._data_length)
1661
1662
1663     def _get_offsets_tuple(self):
1664         # The offsets tuple is another component of the version
1665         # information tuple. It is basically our offsets dictionary,
1666         # itemized and in a tuple.
1667         return self._offsets.copy()
1668
1669
1670     def get_verinfo(self):
1671         """
1672         I return my verinfo tuple. This is used by the ServermapUpdater
1673         to keep track of versions of mutable files.
1674
1675         The verinfo tuple for MDMF files contains:
1676             - seqnum
1677             - root hash
1678             - a blank (nothing)
1679             - segsize
1680             - datalen
1681             - k
1682             - n
1683             - prefix (the thing that you sign)
1684             - a tuple of offsets
1685
1686         We include the nonce in MDMF to simplify processing of version
1687         information tuples.
1688
1689         The verinfo tuple for SDMF files is the same, but contains a
1690         16-byte IV instead of a hash of salts.
1691         """
1692         d = self._maybe_fetch_offsets_and_header()
1693         def _build_verinfo(ignored):
1694             if self._version_number == SDMF_VERSION:
1695                 salt_to_use = self._salt
1696             else:
1697                 salt_to_use = None
1698             return (self._sequence_number,
1699                     self._root_hash,
1700                     salt_to_use,
1701                     self._segment_size,
1702                     self._data_length,
1703                     self._required_shares,
1704                     self._total_shares,
1705                     self._build_prefix(),
1706                     self._get_offsets_tuple())
1707         d.addCallback(_build_verinfo)
1708         return d
1709
1710
1711     def flush(self):
1712         """
1713         I flush my queue of read vectors.
1714         """
1715         d = self._read(self._readvs)
1716         def _then(results):
1717             self._readvs = []
1718             if isinstance(results, failure.Failure):
1719                 self._queue_errbacks.notify(results)
1720             else:
1721                 self._queue_observers.notify(results)
1722             self._queue_observers = observer.ObserverList()
1723             self._queue_errbacks = observer.ObserverList()
1724         d.addBoth(_then)
1725
1726
1727     def _read(self, readvs, force_remote=False, queue=False):
1728         unsatisfiable = filter(lambda x: x[0] + x[1] > len(self._data), readvs)
1729         # TODO: It's entirely possible to tweak this so that it just
1730         # fulfills the requests that it can, and not demand that all
1731         # requests are satisfiable before running it.
1732         if not unsatisfiable and not force_remote:
1733             results = [self._data[offset:offset+length]
1734                        for (offset, length) in readvs]
1735             results = {self.shnum: results}
1736             return defer.succeed(results)
1737         else:
1738             if queue:
1739                 start = len(self._readvs)
1740                 self._readvs += readvs
1741                 end = len(self._readvs)
1742                 def _get_results(results, start, end):
1743                     if not self.shnum in results:
1744                         return {self._shnum: [""]}
1745                     return {self.shnum: results[self.shnum][start:end]}
1746                 d = defer.Deferred()
1747                 d.addCallback(_get_results, start, end)
1748                 self._queue_observers.subscribe(d.callback)
1749                 self._queue_errbacks.subscribe(d.errback)
1750                 return d
1751             return self._rref.callRemote("slot_readv",
1752                                          self._storage_index,
1753                                          [self.shnum],
1754                                          readvs)
1755
1756
1757     def is_sdmf(self):
1758         """I tell my caller whether or not my remote file is SDMF or MDMF
1759         """
1760         d = self._maybe_fetch_offsets_and_header()
1761         d.addCallback(lambda ignored:
1762             self._version_number == 0)
1763         return d
1764
1765
1766 class LayoutInvalid(Exception):
1767     """
1768     This isn't a valid MDMF mutable file
1769     """