]> git.rkrishnan.org Git - tahoe-lafs/tahoe-lafs.git/blob - src/allmydata/mutable/layout.py
mutable/layout.py: make unpack_sdmf_checkstring and unpack_mdmf_checkstring more...
[tahoe-lafs/tahoe-lafs.git] / src / allmydata / mutable / layout.py
1
2 import struct
3 from allmydata.mutable.common import NeedMoreDataError, UnknownVersionError
4 from allmydata.interfaces import HASH_SIZE, SALT_SIZE, SDMF_VERSION, \
5                                  MDMF_VERSION, IMutableSlotWriter
6 from allmydata.util import mathutil
7 from twisted.python import failure
8 from twisted.internet import defer
9 from zope.interface import implements
10
11
12 # These strings describe the format of the packed structs they help process
13 # Here's what they mean:
14 #
15 #  PREFIX:
16 #    >: Big-endian byte order; the most significant byte is first (leftmost).
17 #    B: The version information; an 8 bit version identifier. Stored as
18 #       an unsigned char. This is currently 00 00 00 00; our modifications
19 #       will turn it into 00 00 00 01.
20 #    Q: The sequence number; this is sort of like a revision history for
21 #       mutable files; they start at 1 and increase as they are changed after
22 #       being uploaded. Stored as an unsigned long long, which is 8 bytes in
23 #       length.
24 #  32s: The root hash of the share hash tree. We use sha-256d, so we use 32 
25 #       characters = 32 bytes to store the value.
26 #  16s: The salt for the readkey. This is a 16-byte random value, stored as
27 #       16 characters.
28 #
29 #  SIGNED_PREFIX additions, things that are covered by the signature:
30 #    B: The "k" encoding parameter. We store this as an 8-bit character, 
31 #       which is convenient because our erasure coding scheme cannot 
32 #       encode if you ask for more than 255 pieces.
33 #    B: The "N" encoding parameter. Stored as an 8-bit character for the 
34 #       same reasons as above.
35 #    Q: The segment size of the uploaded file. This will essentially be the
36 #       length of the file in SDMF. An unsigned long long, so we can store 
37 #       files of quite large size.
38 #    Q: The data length of the uploaded file. Modulo padding, this will be
39 #       the same of the data length field. Like the data length field, it is
40 #       an unsigned long long and can be quite large.
41 #
42 #   HEADER additions:
43 #     L: The offset of the signature of this. An unsigned long.
44 #     L: The offset of the share hash chain. An unsigned long.
45 #     L: The offset of the block hash tree. An unsigned long.
46 #     L: The offset of the share data. An unsigned long.
47 #     Q: The offset of the encrypted private key. An unsigned long long, to
48 #        account for the possibility of a lot of share data.
49 #     Q: The offset of the EOF. An unsigned long long, to account for the
50 #        possibility of a lot of share data.
51
52 #  After all of these, we have the following:
53 #    - The verification key: Occupies the space between the end of the header
54 #      and the start of the signature (i.e.: data[HEADER_LENGTH:o['signature']].
55 #    - The signature, which goes from the signature offset to the share hash
56 #      chain offset.
57 #    - The share hash chain, which goes from the share hash chain offset to
58 #      the block hash tree offset.
59 #    - The share data, which goes from the share data offset to the encrypted
60 #      private key offset.
61 #    - The encrypted private key offset, which goes until the end of the file.
62
63 #  The block hash tree in this encoding has only one share, so the offset of
64 #  the share data will be 32 bits more than the offset of the block hash tree.
65 #  Given this, we may need to check to see how many bytes a reasonably sized
66 #  block hash tree will take up.
67
68 PREFIX = ">BQ32s16s" # each version has a different prefix
69 SIGNED_PREFIX = ">BQ32s16s BBQQ" # this is covered by the signature
70 SIGNED_PREFIX_LENGTH = struct.calcsize(SIGNED_PREFIX)
71 HEADER = ">BQ32s16s BBQQ LLLLQQ" # includes offsets
72 HEADER_LENGTH = struct.calcsize(HEADER)
73 OFFSETS = ">LLLLQQ"
74 OFFSETS_LENGTH = struct.calcsize(OFFSETS)
75
76 # These are still used for some tests.
77 def unpack_header(data):
78     o = {}
79     (version,
80      seqnum,
81      root_hash,
82      IV,
83      k, N, segsize, datalen,
84      o['signature'],
85      o['share_hash_chain'],
86      o['block_hash_tree'],
87      o['share_data'],
88      o['enc_privkey'],
89      o['EOF']) = struct.unpack(HEADER, data[:HEADER_LENGTH])
90     return (version, seqnum, root_hash, IV, k, N, segsize, datalen, o)
91
92 def unpack_share(data):
93     assert len(data) >= HEADER_LENGTH
94     o = {}
95     (version,
96      seqnum,
97      root_hash,
98      IV,
99      k, N, segsize, datalen,
100      o['signature'],
101      o['share_hash_chain'],
102      o['block_hash_tree'],
103      o['share_data'],
104      o['enc_privkey'],
105      o['EOF']) = struct.unpack(HEADER, data[:HEADER_LENGTH])
106
107     if version != 0:
108         raise UnknownVersionError("got mutable share version %d, but I only understand version 0" % version)
109
110     if len(data) < o['EOF']:
111         raise NeedMoreDataError(o['EOF'],
112                                 o['enc_privkey'], o['EOF']-o['enc_privkey'])
113
114     pubkey = data[HEADER_LENGTH:o['signature']]
115     signature = data[o['signature']:o['share_hash_chain']]
116     share_hash_chain_s = data[o['share_hash_chain']:o['block_hash_tree']]
117     share_hash_format = ">H32s"
118     hsize = struct.calcsize(share_hash_format)
119     assert len(share_hash_chain_s) % hsize == 0, len(share_hash_chain_s)
120     share_hash_chain = []
121     for i in range(0, len(share_hash_chain_s), hsize):
122         chunk = share_hash_chain_s[i:i+hsize]
123         (hid, h) = struct.unpack(share_hash_format, chunk)
124         share_hash_chain.append( (hid, h) )
125     share_hash_chain = dict(share_hash_chain)
126     block_hash_tree_s = data[o['block_hash_tree']:o['share_data']]
127     assert len(block_hash_tree_s) % 32 == 0, len(block_hash_tree_s)
128     block_hash_tree = []
129     for i in range(0, len(block_hash_tree_s), 32):
130         block_hash_tree.append(block_hash_tree_s[i:i+32])
131
132     share_data = data[o['share_data']:o['enc_privkey']]
133     enc_privkey = data[o['enc_privkey']:o['EOF']]
134
135     return (seqnum, root_hash, IV, k, N, segsize, datalen,
136             pubkey, signature, share_hash_chain, block_hash_tree,
137             share_data, enc_privkey)
138
139 def get_version_from_checkstring(checkstring):
140     (t, ) = struct.unpack(">B", checkstring[:1])
141     return t
142
143 def unpack_sdmf_checkstring(checkstring):
144     cs_len = struct.calcsize(PREFIX)
145     version, seqnum, root_hash, IV = struct.unpack(PREFIX, checkstring[:cs_len])
146     assert version == SDMF_VERSION, version
147     return (seqnum, root_hash, IV)
148
149 def unpack_mdmf_checkstring(checkstring):
150     cs_len = struct.calcsize(MDMFCHECKSTRING)
151     version, seqnum, root_hash = struct.unpack(MDMFCHECKSTRING, checkstring[:cs_len])
152     assert version == MDMF_VERSION, version
153     return (seqnum, root_hash)
154
155 def pack_offsets(verification_key_length, signature_length,
156                  share_hash_chain_length, block_hash_tree_length,
157                  share_data_length, encprivkey_length):
158     post_offset = HEADER_LENGTH
159     offsets = {}
160     o1 = offsets['signature'] = post_offset + verification_key_length
161     o2 = offsets['share_hash_chain'] = o1 + signature_length
162     o3 = offsets['block_hash_tree'] = o2 + share_hash_chain_length
163     o4 = offsets['share_data'] = o3 + block_hash_tree_length
164     o5 = offsets['enc_privkey'] = o4 + share_data_length
165     offsets['EOF'] = o5 + encprivkey_length
166
167     return struct.pack(">LLLLQQ",
168                        offsets['signature'],
169                        offsets['share_hash_chain'],
170                        offsets['block_hash_tree'],
171                        offsets['share_data'],
172                        offsets['enc_privkey'],
173                        offsets['EOF'])
174
175 def pack_share(prefix, verification_key, signature,
176                share_hash_chain, block_hash_tree,
177                share_data, encprivkey):
178     share_hash_chain_s = "".join([struct.pack(">H32s", i, share_hash_chain[i])
179                                   for i in sorted(share_hash_chain.keys())])
180     for h in block_hash_tree:
181         assert len(h) == 32
182     block_hash_tree_s = "".join(block_hash_tree)
183
184     offsets = pack_offsets(len(verification_key),
185                            len(signature),
186                            len(share_hash_chain_s),
187                            len(block_hash_tree_s),
188                            len(share_data),
189                            len(encprivkey))
190     final_share = "".join([prefix,
191                            offsets,
192                            verification_key,
193                            signature,
194                            share_hash_chain_s,
195                            block_hash_tree_s,
196                            share_data,
197                            encprivkey])
198     return final_share
199
200 def pack_prefix(seqnum, root_hash, IV,
201                 required_shares, total_shares,
202                 segment_size, data_length):
203     prefix = struct.pack(SIGNED_PREFIX,
204                          0, # version,
205                          seqnum,
206                          root_hash,
207                          IV,
208                          required_shares,
209                          total_shares,
210                          segment_size,
211                          data_length,
212                          )
213     return prefix
214
215
216 class SDMFSlotWriteProxy:
217     implements(IMutableSlotWriter)
218     """
219     I represent a remote write slot for an SDMF mutable file. I build a
220     share in memory, and then write it in one piece to the remote
221     server. This mimics how SDMF shares were built before MDMF (and the
222     new MDMF uploader), but provides that functionality in a way that
223     allows the MDMF uploader to be built without much special-casing for
224     file format, which makes the uploader code more readable.
225     """
226     def __init__(self,
227                  shnum,
228                  rref, # a remote reference to a storage server
229                  storage_index,
230                  secrets, # (write_enabler, renew_secret, cancel_secret)
231                  seqnum, # the sequence number of the mutable file
232                  required_shares,
233                  total_shares,
234                  segment_size,
235                  data_length): # the length of the original file
236         self.shnum = shnum
237         self._rref = rref
238         self._storage_index = storage_index
239         self._secrets = secrets
240         self._seqnum = seqnum
241         self._required_shares = required_shares
242         self._total_shares = total_shares
243         self._segment_size = segment_size
244         self._data_length = data_length
245
246         # This is an SDMF file, so it should have only one segment, so, 
247         # modulo padding of the data length, the segment size and the
248         # data length should be the same.
249         expected_segment_size = mathutil.next_multiple(data_length,
250                                                        self._required_shares)
251         assert expected_segment_size == segment_size
252
253         self._block_size = self._segment_size / self._required_shares
254
255         # This is meant to mimic how SDMF files were built before MDMF
256         # entered the picture: we generate each share in its entirety,
257         # then push it off to the storage server in one write. When
258         # callers call set_*, they are just populating this dict.
259         # finish_publishing will stitch these pieces together into a
260         # coherent share, and then write the coherent share to the
261         # storage server.
262         self._share_pieces = {}
263
264         # This tells the write logic what checkstring to use when
265         # writing remote shares.
266         self._testvs = []
267
268         self._readvs = [(0, struct.calcsize(PREFIX))]
269
270
271     def set_checkstring(self, checkstring_or_seqnum,
272                               root_hash=None,
273                               salt=None):
274         """
275         Set the checkstring that I will pass to the remote server when
276         writing.
277
278             @param checkstring_or_seqnum: A packed checkstring to use,
279                    or a sequence number. I will treat this as a checkstr
280
281         Note that implementations can differ in which semantics they
282         wish to support for set_checkstring -- they can, for example,
283         build the checkstring themselves from its constituents, or
284         some other thing.
285         """
286         if root_hash and salt:
287             checkstring = struct.pack(PREFIX,
288                                       0,
289                                       checkstring_or_seqnum,
290                                       root_hash,
291                                       salt)
292         else:
293             checkstring = checkstring_or_seqnum
294         self._testvs = [(0, len(checkstring), "eq", checkstring)]
295
296
297     def get_checkstring(self):
298         """
299         Get the checkstring that I think currently exists on the remote
300         server.
301         """
302         if self._testvs:
303             return self._testvs[0][3]
304         return ""
305
306
307     def put_block(self, data, segnum, salt):
308         """
309         Add a block and salt to the share.
310         """
311         # SDMF files have only one segment
312         assert segnum == 0
313         assert len(data) == self._block_size
314         assert len(salt) == SALT_SIZE
315
316         self._share_pieces['sharedata'] = data
317         self._share_pieces['salt'] = salt
318
319         # TODO: Figure out something intelligent to return.
320         return defer.succeed(None)
321
322
323     def put_encprivkey(self, encprivkey):
324         """
325         Add the encrypted private key to the share.
326         """
327         self._share_pieces['encprivkey'] = encprivkey
328
329         return defer.succeed(None)
330
331
332     def put_blockhashes(self, blockhashes):
333         """
334         Add the block hash tree to the share.
335         """
336         assert isinstance(blockhashes, list)
337         for h in blockhashes:
338             assert len(h) == HASH_SIZE
339
340         # serialize the blockhashes, then set them.
341         blockhashes_s = "".join(blockhashes)
342         self._share_pieces['block_hash_tree'] = blockhashes_s
343
344         return defer.succeed(None)
345
346
347     def put_sharehashes(self, sharehashes):
348         """
349         Add the share hash chain to the share.
350         """
351         assert isinstance(sharehashes, dict)
352         for h in sharehashes.itervalues():
353             assert len(h) == HASH_SIZE
354
355         # serialize the sharehashes, then set them.
356         sharehashes_s = "".join([struct.pack(">H32s", i, sharehashes[i])
357                                  for i in sorted(sharehashes.keys())])
358         self._share_pieces['share_hash_chain'] = sharehashes_s
359
360         return defer.succeed(None)
361
362
363     def put_root_hash(self, root_hash):
364         """
365         Add the root hash to the share.
366         """
367         assert len(root_hash) == HASH_SIZE
368
369         self._share_pieces['root_hash'] = root_hash
370
371         return defer.succeed(None)
372
373
374     def put_salt(self, salt):
375         """
376         Add a salt to an empty SDMF file.
377         """
378         assert len(salt) == SALT_SIZE
379
380         self._share_pieces['salt'] = salt
381         self._share_pieces['sharedata'] = ""
382
383
384     def get_signable(self):
385         """
386         Return the part of the share that needs to be signed.
387
388         SDMF writers need to sign the packed representation of the
389         first eight fields of the remote share, that is:
390             - version number (0)
391             - sequence number
392             - root of the share hash tree
393             - salt
394             - k
395             - n
396             - segsize
397             - datalen
398
399         This method is responsible for returning that to callers.
400         """
401         return struct.pack(SIGNED_PREFIX,
402                            0,
403                            self._seqnum,
404                            self._share_pieces['root_hash'],
405                            self._share_pieces['salt'],
406                            self._required_shares,
407                            self._total_shares,
408                            self._segment_size,
409                            self._data_length)
410
411
412     def put_signature(self, signature):
413         """
414         Add the signature to the share.
415         """
416         self._share_pieces['signature'] = signature
417
418         return defer.succeed(None)
419
420
421     def put_verification_key(self, verification_key):
422         """
423         Add the verification key to the share.
424         """
425         self._share_pieces['verification_key'] = verification_key
426
427         return defer.succeed(None)
428
429
430     def get_verinfo(self):
431         """
432         I return my verinfo tuple. This is used by the ServermapUpdater
433         to keep track of versions of mutable files.
434
435         The verinfo tuple for MDMF files contains:
436             - seqnum
437             - root hash
438             - a blank (nothing)
439             - segsize
440             - datalen
441             - k
442             - n
443             - prefix (the thing that you sign)
444             - a tuple of offsets
445
446         We include the nonce in MDMF to simplify processing of version
447         information tuples.
448
449         The verinfo tuple for SDMF files is the same, but contains a
450         16-byte IV instead of a hash of salts.
451         """
452         return (self._seqnum,
453                 self._share_pieces['root_hash'],
454                 self._share_pieces['salt'],
455                 self._segment_size,
456                 self._data_length,
457                 self._required_shares,
458                 self._total_shares,
459                 self.get_signable(),
460                 self._get_offsets_tuple())
461
462     def _get_offsets_dict(self):
463         post_offset = HEADER_LENGTH
464         offsets = {}
465
466         verification_key_length = len(self._share_pieces['verification_key'])
467         o1 = offsets['signature'] = post_offset + verification_key_length
468
469         signature_length = len(self._share_pieces['signature'])
470         o2 = offsets['share_hash_chain'] = o1 + signature_length
471
472         share_hash_chain_length = len(self._share_pieces['share_hash_chain'])
473         o3 = offsets['block_hash_tree'] = o2 + share_hash_chain_length
474
475         block_hash_tree_length = len(self._share_pieces['block_hash_tree'])
476         o4 = offsets['share_data'] = o3 + block_hash_tree_length
477
478         share_data_length = len(self._share_pieces['sharedata'])
479         o5 = offsets['enc_privkey'] = o4 + share_data_length
480
481         encprivkey_length = len(self._share_pieces['encprivkey'])
482         offsets['EOF'] = o5 + encprivkey_length
483         return offsets
484
485
486     def _get_offsets_tuple(self):
487         offsets = self._get_offsets_dict()
488         return tuple([(key, value) for key, value in offsets.items()])
489
490
491     def _pack_offsets(self):
492         offsets = self._get_offsets_dict()
493         return struct.pack(">LLLLQQ",
494                            offsets['signature'],
495                            offsets['share_hash_chain'],
496                            offsets['block_hash_tree'],
497                            offsets['share_data'],
498                            offsets['enc_privkey'],
499                            offsets['EOF'])
500
501
502     def finish_publishing(self):
503         """
504         Do anything necessary to finish writing the share to a remote
505         server. I require that no further publishing needs to take place
506         after this method has been called.
507         """
508         for k in ["sharedata", "encprivkey", "signature", "verification_key",
509                   "share_hash_chain", "block_hash_tree"]:
510             assert k in self._share_pieces, (self.shnum, k, self._share_pieces.keys())
511         # This is the only method that actually writes something to the
512         # remote server.
513         # First, we need to pack the share into data that we can write
514         # to the remote server in one write.
515         offsets = self._pack_offsets()
516         prefix = self.get_signable()
517         final_share = "".join([prefix,
518                                offsets,
519                                self._share_pieces['verification_key'],
520                                self._share_pieces['signature'],
521                                self._share_pieces['share_hash_chain'],
522                                self._share_pieces['block_hash_tree'],
523                                self._share_pieces['sharedata'],
524                                self._share_pieces['encprivkey']])
525
526         # Our only data vector is going to be writing the final share,
527         # in its entirely.
528         datavs = [(0, final_share)]
529
530         if not self._testvs:
531             # Our caller has not provided us with another checkstring
532             # yet, so we assume that we are writing a new share, and set
533             # a test vector that will allow a new share to be written.
534             self._testvs = []
535             self._testvs.append(tuple([0, 1, "eq", ""]))
536
537         tw_vectors = {}
538         tw_vectors[self.shnum] = (self._testvs, datavs, None)
539         return self._rref.callRemote("slot_testv_and_readv_and_writev",
540                                      self._storage_index,
541                                      self._secrets,
542                                      tw_vectors,
543                                      # TODO is it useful to read something?
544                                      self._readvs)
545
546
547 MDMFHEADER = ">BQ32sBBQQ QQQQQQQQ"
548 MDMFHEADERWITHOUTOFFSETS = ">BQ32sBBQQ"
549 MDMFHEADERSIZE = struct.calcsize(MDMFHEADER)
550 MDMFHEADERWITHOUTOFFSETSSIZE = struct.calcsize(MDMFHEADERWITHOUTOFFSETS)
551 MDMFCHECKSTRING = ">BQ32s"
552 MDMFSIGNABLEHEADER = ">BQ32sBBQQ"
553 MDMFOFFSETS = ">QQQQQQQQ"
554 MDMFOFFSETS_LENGTH = struct.calcsize(MDMFOFFSETS)
555
556 PRIVATE_KEY_SIZE = 1220
557 SIGNATURE_SIZE = 260
558 VERIFICATION_KEY_SIZE = 292
559 # We know we won't have more than 256 shares, and we know that we won't need
560 # to store more than ln2(256) hash-chain nodes to validate, so that's our
561 # bound. Each node requires 2 bytes of node-number plus 32 bytes of hash.
562 SHARE_HASH_CHAIN_SIZE = (2+HASH_SIZE)*mathutil.log_ceil(256, 2)
563
564 class MDMFSlotWriteProxy:
565     implements(IMutableSlotWriter)
566
567     """
568     I represent a remote write slot for an MDMF mutable file.
569
570     I abstract away from my caller the details of block and salt
571     management, and the implementation of the on-disk format for MDMF
572     shares.
573     """
574     # Expected layout, MDMF:
575     # offset:     size:       name:
576     #-- signed part --
577     # 0           1           version number (01)
578     # 1           8           sequence number
579     # 9           32          share tree root hash
580     # 41          1           The "k" encoding parameter
581     # 42          1           The "N" encoding parameter
582     # 43          8           The segment size of the uploaded file
583     # 51          8           The data length of the original plaintext
584     #-- end signed part --
585     # 59          8           The offset of the encrypted private key
586     # 67          8           The offset of the share hash chain
587     # 75          8           The offset of the signature
588     # 83          8           The offset of the verification key
589     # 91          8           The offset of the end of the v. key.
590     # 99          8           The offset of the share data
591     # 107         8           The offset of the block hash tree
592     # 115         8           The offset of EOF
593     # 123         var         encrypted private key
594     # var         var         share hash chain
595     # var         var         signature
596     # var         var         verification key
597     # var         large       share data
598     # var         var         block hash tree
599     #
600     # We order the fields that way to make smart downloaders -- downloaders
601     # which prempetively read a big part of the share -- possible.
602     #
603     # The checkstring is the first three fields -- the version number,
604     # sequence number, root hash and root salt hash. This is consistent
605     # in meaning to what we have with SDMF files, except now instead of
606     # using the literal salt, we use a value derived from all of the
607     # salts -- the share hash root.
608     # 
609     # The salt is stored before the block for each segment. The block
610     # hash tree is computed over the combination of block and salt for
611     # each segment. In this way, we get integrity checking for both
612     # block and salt with the current block hash tree arrangement.
613     # 
614     # The ordering of the offsets is different to reflect the dependencies
615     # that we'll run into with an MDMF file. The expected write flow is
616     # something like this:
617     #
618     #   0: Initialize with the sequence number, encoding parameters and
619     #      data length. From this, we can deduce the number of segments,
620     #      and where they should go.. We can also figure out where the
621     #      encrypted private key should go, because we can figure out how
622     #      big the share data will be.
623     # 
624     #   1: Encrypt, encode, and upload the file in chunks. Do something
625     #      like 
626     #
627     #       put_block(data, segnum, salt)
628     #
629     #      to write a block and a salt to the disk. We can do both of
630     #      these operations now because we have enough of the offsets to
631     #      know where to put them.
632     # 
633     #   2: Put the encrypted private key. Use:
634     #
635     #        put_encprivkey(encprivkey)
636     #
637     #      Now that we know the length of the private key, we can fill
638     #      in the offset for the block hash tree.
639     #
640     #   3: We're now in a position to upload the block hash tree for
641     #      a share. Put that using something like:
642     #       
643     #        put_blockhashes(block_hash_tree)
644     #
645     #      Note that block_hash_tree is a list of hashes -- we'll take
646     #      care of the details of serializing that appropriately. When
647     #      we get the block hash tree, we are also in a position to
648     #      calculate the offset for the share hash chain, and fill that
649     #      into the offsets table.
650     #
651     #   4: We're now in a position to upload the share hash chain for
652     #      a share. Do that with something like:
653     #      
654     #        put_sharehashes(share_hash_chain) 
655     #
656     #      share_hash_chain should be a dictionary mapping shnums to 
657     #      32-byte hashes -- the wrapper handles serialization.
658     #      We'll know where to put the signature at this point, also.
659     #      The root of this tree will be put explicitly in the next
660     #      step.
661     # 
662     #   5: Before putting the signature, we must first put the
663     #      root_hash. Do this with:
664     # 
665     #        put_root_hash(root_hash).
666     #      
667     #      In terms of knowing where to put this value, it was always
668     #      possible to place it, but it makes sense semantically to
669     #      place it after the share hash tree, so that's why you do it
670     #      in this order.
671     #
672     #   6: With the root hash put, we can now sign the header. Use:
673     #
674     #        get_signable()
675     #
676     #      to get the part of the header that you want to sign, and use:
677     #       
678     #        put_signature(signature)
679     #
680     #      to write your signature to the remote server.
681     #
682     #   6: Add the verification key, and finish. Do:
683     #
684     #        put_verification_key(key) 
685     #
686     #      and 
687     #
688     #        finish_publish()
689     #
690     # Checkstring management:
691     # 
692     # To write to a mutable slot, we have to provide test vectors to ensure
693     # that we are writing to the same data that we think we are. These
694     # vectors allow us to detect uncoordinated writes; that is, writes
695     # where both we and some other shareholder are writing to the
696     # mutable slot, and to report those back to the parts of the program
697     # doing the writing. 
698     #
699     # With SDMF, this was easy -- all of the share data was written in
700     # one go, so it was easy to detect uncoordinated writes, and we only
701     # had to do it once. With MDMF, not all of the file is written at
702     # once.
703     #
704     # If a share is new, we write out as much of the header as we can
705     # before writing out anything else. This gives other writers a
706     # canary that they can use to detect uncoordinated writes, and, if
707     # they do the same thing, gives us the same canary. We them update
708     # the share. We won't be able to write out two fields of the header
709     # -- the share tree hash and the salt hash -- until we finish
710     # writing out the share. We only require the writer to provide the
711     # initial checkstring, and keep track of what it should be after
712     # updates ourselves.
713     #
714     # If we haven't written anything yet, then on the first write (which
715     # will probably be a block + salt of a share), we'll also write out
716     # the header. On subsequent passes, we'll expect to see the header.
717     # This changes in two places:
718     #
719     #   - When we write out the salt hash
720     #   - When we write out the root of the share hash tree
721     #
722     # since these values will change the header. It is possible that we 
723     # can just make those be written in one operation to minimize
724     # disruption.
725     def __init__(self,
726                  shnum,
727                  rref, # a remote reference to a storage server
728                  storage_index,
729                  secrets, # (write_enabler, renew_secret, cancel_secret)
730                  seqnum, # the sequence number of the mutable file
731                  required_shares,
732                  total_shares,
733                  segment_size,
734                  data_length): # the length of the original file
735         self.shnum = shnum
736         self._rref = rref
737         self._storage_index = storage_index
738         self._seqnum = seqnum
739         self._required_shares = required_shares
740         assert self.shnum >= 0 and self.shnum < total_shares
741         self._total_shares = total_shares
742         # We build up the offset table as we write things. It is the
743         # last thing we write to the remote server. 
744         self._offsets = {}
745         self._testvs = []
746         # This is a list of write vectors that will be sent to our
747         # remote server once we are directed to write things there.
748         self._writevs = []
749         self._secrets = secrets
750         # The segment size needs to be a multiple of the k parameter --
751         # any padding should have been carried out by the publisher
752         # already.
753         assert segment_size % required_shares == 0
754         self._segment_size = segment_size
755         self._data_length = data_length
756
757         # These are set later -- we define them here so that we can
758         # check for their existence easily
759
760         # This is the root of the share hash tree -- the Merkle tree
761         # over the roots of the block hash trees computed for shares in
762         # this upload.
763         self._root_hash = None
764
765         # We haven't yet written anything to the remote bucket. By
766         # setting this, we tell the _write method as much. The write
767         # method will then know that it also needs to add a write vector
768         # for the checkstring (or what we have of it) to the first write
769         # request. We'll then record that value for future use.  If
770         # we're expecting something to be there already, we need to call
771         # set_checkstring before we write anything to tell the first
772         # write about that.
773         self._written = False
774
775         # When writing data to the storage servers, we get a read vector
776         # for free. We'll read the checkstring, which will help us
777         # figure out what's gone wrong if a write fails.
778         self._readv = [(0, struct.calcsize(MDMFCHECKSTRING))]
779
780         # We calculate the number of segments because it tells us
781         # where the salt part of the file ends/share segment begins,
782         # and also because it provides a useful amount of bounds checking.
783         self._num_segments = mathutil.div_ceil(self._data_length,
784                                                self._segment_size)
785         self._block_size = self._segment_size / self._required_shares
786         # We also calculate the share size, to help us with block
787         # constraints later.
788         tail_size = self._data_length % self._segment_size
789         if not tail_size:
790             self._tail_block_size = self._block_size
791         else:
792             self._tail_block_size = mathutil.next_multiple(tail_size,
793                                                            self._required_shares)
794             self._tail_block_size /= self._required_shares
795
796         # We already know where the sharedata starts; right after the end
797         # of the header (which is defined as the signable part + the offsets)
798         # We can also calculate where the encrypted private key begins
799         # from what we know know.
800         self._actual_block_size = self._block_size + SALT_SIZE
801         data_size = self._actual_block_size * (self._num_segments - 1)
802         data_size += self._tail_block_size
803         data_size += SALT_SIZE
804         self._offsets['enc_privkey'] = MDMFHEADERSIZE
805
806         # We don't define offsets for these because we want them to be
807         # tightly packed -- this allows us to ignore the responsibility
808         # of padding individual values, and of removing that padding
809         # later. So nonconstant_start is where we start writing
810         # nonconstant data.
811         nonconstant_start = self._offsets['enc_privkey']
812         nonconstant_start += PRIVATE_KEY_SIZE
813         nonconstant_start += SIGNATURE_SIZE
814         nonconstant_start += VERIFICATION_KEY_SIZE
815         nonconstant_start += SHARE_HASH_CHAIN_SIZE
816
817         self._offsets['share_data'] = nonconstant_start
818
819         # Finally, we know how big the share data will be, so we can
820         # figure out where the block hash tree needs to go.
821         # XXX: But this will go away if Zooko wants to make it so that
822         # you don't need to know the size of the file before you start
823         # uploading it.
824         self._offsets['block_hash_tree'] = self._offsets['share_data'] + \
825                     data_size
826
827         # Done. We can snow start writing.
828
829
830     def set_checkstring(self,
831                         seqnum_or_checkstring,
832                         root_hash=None,
833                         salt=None):
834         """
835         Set checkstring checkstring for the given shnum.
836
837         This can be invoked in one of two ways.
838
839         With one argument, I assume that you are giving me a literal
840         checkstring -- e.g., the output of get_checkstring. I will then
841         set that checkstring as it is. This form is used by unit tests.
842
843         With two arguments, I assume that you are giving me a sequence
844         number and root hash to make a checkstring from. In that case, I
845         will build a checkstring and set it for you. This form is used
846         by the publisher.
847
848         By default, I assume that I am writing new shares to the grid.
849         If you don't explcitly set your own checkstring, I will use
850         one that requires that the remote share not exist. You will want
851         to use this method if you are updating a share in-place;
852         otherwise, writes will fail.
853         """
854         # You're allowed to overwrite checkstrings with this method;
855         # I assume that users know what they are doing when they call
856         # it.
857         if root_hash:
858             checkstring = struct.pack(MDMFCHECKSTRING,
859                                       1,
860                                       seqnum_or_checkstring,
861                                       root_hash)
862         else:
863             checkstring = seqnum_or_checkstring
864
865         if checkstring == "":
866             # We special-case this, since len("") = 0, but we need
867             # length of 1 for the case of an empty share to work on the
868             # storage server, which is what a checkstring that is the
869             # empty string means.
870             self._testvs = []
871         else:
872             self._testvs = []
873             self._testvs.append((0, len(checkstring), "eq", checkstring))
874
875
876     def __repr__(self):
877         return "MDMFSlotWriteProxy for share %d" % self.shnum
878
879
880     def get_checkstring(self):
881         """
882         Given a share number, I return a representation of what the
883         checkstring for that share on the server will look like.
884
885         I am mostly used for tests.
886         """
887         if self._root_hash:
888             roothash = self._root_hash
889         else:
890             roothash = "\x00" * 32
891         return struct.pack(MDMFCHECKSTRING,
892                            1,
893                            self._seqnum,
894                            roothash)
895
896
897     def put_block(self, data, segnum, salt):
898         """
899         I queue a write vector for the data, salt, and segment number
900         provided to me. I return None, as I do not actually cause
901         anything to be written yet.
902         """
903         if segnum >= self._num_segments:
904             raise LayoutInvalid("I won't overwrite the block hash tree")
905         if len(salt) != SALT_SIZE:
906             raise LayoutInvalid("I was given a salt of size %d, but "
907                                 "I wanted a salt of size %d")
908         if segnum + 1 == self._num_segments:
909             if len(data) != self._tail_block_size:
910                 raise LayoutInvalid("I was given the wrong size block to write")
911         elif len(data) != self._block_size:
912             raise LayoutInvalid("I was given the wrong size block to write")
913
914         # We want to write at len(MDMFHEADER) + segnum * block_size.
915         offset = self._offsets['share_data'] + \
916             (self._actual_block_size * segnum)
917         data = salt + data
918
919         self._writevs.append(tuple([offset, data]))
920
921
922     def put_encprivkey(self, encprivkey):
923         """
924         I queue a write vector for the encrypted private key provided to
925         me.
926         """
927         assert self._offsets
928         assert self._offsets['enc_privkey']
929         # You shouldn't re-write the encprivkey after the block hash
930         # tree is written, since that could cause the private key to run
931         # into the block hash tree. Before it writes the block hash
932         # tree, the block hash tree writing method writes the offset of
933         # the share hash chain. So that's a good indicator of whether or
934         # not the block hash tree has been written.
935         if "signature" in self._offsets:
936             raise LayoutInvalid("You can't put the encrypted private key "
937                                 "after putting the share hash chain")
938
939         self._offsets['share_hash_chain'] = self._offsets['enc_privkey'] + \
940                 len(encprivkey)
941
942         self._writevs.append(tuple([self._offsets['enc_privkey'], encprivkey]))
943
944
945     def put_blockhashes(self, blockhashes):
946         """
947         I queue a write vector to put the block hash tree in blockhashes
948         onto the remote server.
949
950         The encrypted private key must be queued before the block hash
951         tree, since we need to know how large it is to know where the
952         block hash tree should go. The block hash tree must be put
953         before the share hash chain, since its size determines the
954         offset of the share hash chain.
955         """
956         assert self._offsets
957         assert "block_hash_tree" in self._offsets
958
959         assert isinstance(blockhashes, list)
960
961         blockhashes_s = "".join(blockhashes)
962         self._offsets['EOF'] = self._offsets['block_hash_tree'] + len(blockhashes_s)
963
964         self._writevs.append(tuple([self._offsets['block_hash_tree'],
965                                   blockhashes_s]))
966
967
968     def put_sharehashes(self, sharehashes):
969         """
970         I queue a write vector to put the share hash chain in my
971         argument onto the remote server.
972
973         The block hash tree must be queued before the share hash chain,
974         since we need to know where the block hash tree ends before we
975         can know where the share hash chain starts. The share hash chain
976         must be put before the signature, since the length of the packed
977         share hash chain determines the offset of the signature. Also,
978         semantically, you must know what the root of the block hash tree
979         is before you can generate a valid signature.
980         """
981         assert isinstance(sharehashes, dict)
982         assert self._offsets
983         if "share_hash_chain" not in self._offsets:
984             raise LayoutInvalid("You must put the block hash tree before "
985                                 "putting the share hash chain")
986
987         # The signature comes after the share hash chain. If the
988         # signature has already been written, we must not write another
989         # share hash chain. The signature writes the verification key
990         # offset when it gets sent to the remote server, so we look for
991         # that.
992         if "verification_key" in self._offsets:
993             raise LayoutInvalid("You must write the share hash chain "
994                                 "before you write the signature")
995         sharehashes_s = "".join([struct.pack(">H32s", i, sharehashes[i])
996                                   for i in sorted(sharehashes.keys())])
997         self._offsets['signature'] = self._offsets['share_hash_chain'] + \
998             len(sharehashes_s)
999         self._writevs.append(tuple([self._offsets['share_hash_chain'],
1000                             sharehashes_s]))
1001
1002
1003     def put_root_hash(self, roothash):
1004         """
1005         Put the root hash (the root of the share hash tree) in the
1006         remote slot.
1007         """
1008         # It does not make sense to be able to put the root 
1009         # hash without first putting the share hashes, since you need
1010         # the share hashes to generate the root hash.
1011         #
1012         # Signature is defined by the routine that places the share hash
1013         # chain, so it's a good thing to look for in finding out whether
1014         # or not the share hash chain exists on the remote server.
1015         if len(roothash) != HASH_SIZE:
1016             raise LayoutInvalid("hashes and salts must be exactly %d bytes"
1017                                  % HASH_SIZE)
1018         self._root_hash = roothash
1019         # To write both of these values, we update the checkstring on
1020         # the remote server, which includes them
1021         checkstring = self.get_checkstring()
1022         self._writevs.append(tuple([0, checkstring]))
1023         # This write, if successful, changes the checkstring, so we need
1024         # to update our internal checkstring to be consistent with the
1025         # one on the server.
1026
1027
1028     def get_signable(self):
1029         """
1030         Get the first seven fields of the mutable file; the parts that
1031         are signed.
1032         """
1033         if not self._root_hash:
1034             raise LayoutInvalid("You need to set the root hash "
1035                                 "before getting something to "
1036                                 "sign")
1037         return struct.pack(MDMFSIGNABLEHEADER,
1038                            1,
1039                            self._seqnum,
1040                            self._root_hash,
1041                            self._required_shares,
1042                            self._total_shares,
1043                            self._segment_size,
1044                            self._data_length)
1045
1046
1047     def put_signature(self, signature):
1048         """
1049         I queue a write vector for the signature of the MDMF share.
1050
1051         I require that the root hash and share hash chain have been put
1052         to the grid before I will write the signature to the grid.
1053         """
1054         if "signature" not in self._offsets:
1055             raise LayoutInvalid("You must put the share hash chain "
1056         # It does not make sense to put a signature without first
1057         # putting the root hash and the salt hash (since otherwise
1058         # the signature would be incomplete), so we don't allow that.
1059                        "before putting the signature")
1060         if not self._root_hash:
1061             raise LayoutInvalid("You must complete the signed prefix "
1062                                 "before computing a signature")
1063         # If we put the signature after we put the verification key, we
1064         # could end up running into the verification key, and will
1065         # probably screw up the offsets as well. So we don't allow that.
1066         if "verification_key_end" in self._offsets:
1067             raise LayoutInvalid("You can't put the signature after the "
1068                                 "verification key")
1069         # The method that writes the verification key defines the EOF
1070         # offset before writing the verification key, so look for that.
1071         self._offsets['verification_key'] = self._offsets['signature'] +\
1072             len(signature)
1073         self._writevs.append(tuple([self._offsets['signature'], signature]))
1074
1075
1076     def put_verification_key(self, verification_key):
1077         """
1078         I queue a write vector for the verification key.
1079
1080         I require that the signature have been written to the storage
1081         server before I allow the verification key to be written to the
1082         remote server.
1083         """
1084         if "verification_key" not in self._offsets:
1085             raise LayoutInvalid("You must put the signature before you "
1086                                 "can put the verification key")
1087
1088         self._offsets['verification_key_end'] = \
1089             self._offsets['verification_key'] + len(verification_key)
1090         assert self._offsets['verification_key_end'] <= self._offsets['share_data']
1091         self._writevs.append(tuple([self._offsets['verification_key'],
1092                             verification_key]))
1093
1094
1095     def _get_offsets_tuple(self):
1096         return tuple([(key, value) for key, value in self._offsets.items()])
1097
1098
1099     def get_verinfo(self):
1100         return (self._seqnum,
1101                 self._root_hash,
1102                 self._required_shares,
1103                 self._total_shares,
1104                 self._segment_size,
1105                 self._data_length,
1106                 self.get_signable(),
1107                 self._get_offsets_tuple())
1108
1109
1110     def finish_publishing(self):
1111         """
1112         I add a write vector for the offsets table, and then cause all
1113         of the write vectors that I've dealt with so far to be published
1114         to the remote server, ending the write process.
1115         """
1116         if "verification_key_end" not in self._offsets:
1117             raise LayoutInvalid("You must put the verification key before "
1118                                 "you can publish the offsets")
1119         offsets_offset = struct.calcsize(MDMFHEADERWITHOUTOFFSETS)
1120         offsets = struct.pack(MDMFOFFSETS,
1121                               self._offsets['enc_privkey'],
1122                               self._offsets['share_hash_chain'],
1123                               self._offsets['signature'],
1124                               self._offsets['verification_key'],
1125                               self._offsets['verification_key_end'],
1126                               self._offsets['share_data'],
1127                               self._offsets['block_hash_tree'],
1128                               self._offsets['EOF'])
1129         self._writevs.append(tuple([offsets_offset, offsets]))
1130         encoding_parameters_offset = struct.calcsize(MDMFCHECKSTRING)
1131         params = struct.pack(">BBQQ",
1132                              self._required_shares,
1133                              self._total_shares,
1134                              self._segment_size,
1135                              self._data_length)
1136         self._writevs.append(tuple([encoding_parameters_offset, params]))
1137         return self._write(self._writevs)
1138
1139
1140     def _write(self, datavs, on_failure=None, on_success=None):
1141         """I write the data vectors in datavs to the remote slot."""
1142         tw_vectors = {}
1143         if not self._testvs:
1144             self._testvs = []
1145             self._testvs.append(tuple([0, 1, "eq", ""]))
1146         if not self._written:
1147             # Write a new checkstring to the share when we write it, so
1148             # that we have something to check later.
1149             new_checkstring = self.get_checkstring()
1150             datavs.append((0, new_checkstring))
1151             def _first_write():
1152                 self._written = True
1153                 self._testvs = [(0, len(new_checkstring), "eq", new_checkstring)]
1154             on_success = _first_write
1155         tw_vectors[self.shnum] = (self._testvs, datavs, None)
1156         d = self._rref.callRemote("slot_testv_and_readv_and_writev",
1157                                   self._storage_index,
1158                                   self._secrets,
1159                                   tw_vectors,
1160                                   self._readv)
1161         def _result(results):
1162             if isinstance(results, failure.Failure) or not results[0]:
1163                 # Do nothing; the write was unsuccessful.
1164                 if on_failure: on_failure()
1165             else:
1166                 if on_success: on_success()
1167             return results
1168         d.addCallback(_result)
1169         return d
1170
1171
1172 class MDMFSlotReadProxy:
1173     """
1174     I read from a mutable slot filled with data written in the MDMF data
1175     format (which is described above).
1176
1177     I can be initialized with some amount of data, which I will use (if
1178     it is valid) to eliminate some of the need to fetch it from servers.
1179     """
1180     def __init__(self,
1181                  rref,
1182                  storage_index,
1183                  shnum,
1184                  data=""):
1185         # Start the initialization process.
1186         self._rref = rref
1187         self._storage_index = storage_index
1188         self.shnum = shnum
1189
1190         # Before doing anything, the reader is probably going to want to
1191         # verify that the signature is correct. To do that, they'll need
1192         # the verification key, and the signature. To get those, we'll
1193         # need the offset table. So fetch the offset table on the
1194         # assumption that that will be the first thing that a reader is
1195         # going to do.
1196
1197         # The fact that these encoding parameters are None tells us
1198         # that we haven't yet fetched them from the remote share, so we
1199         # should. We could just not set them, but the checks will be
1200         # easier to read if we don't have to use hasattr.
1201         self._version_number = None
1202         self._sequence_number = None
1203         self._root_hash = None
1204         # Filled in if we're dealing with an SDMF file. Unused
1205         # otherwise.
1206         self._salt = None
1207         self._required_shares = None
1208         self._total_shares = None
1209         self._segment_size = None
1210         self._data_length = None
1211         self._offsets = None
1212
1213         # If the user has chosen to initialize us with some data, we'll
1214         # try to satisfy subsequent data requests with that data before
1215         # asking the storage server for it. If 
1216         self._data = data
1217         # The way callers interact with cache in the filenode returns
1218         # None if there isn't any cached data, but the way we index the
1219         # cached data requires a string, so convert None to "".
1220         if self._data == None:
1221             self._data = ""
1222
1223
1224     def _maybe_fetch_offsets_and_header(self, force_remote=False):
1225         """
1226         I fetch the offset table and the header from the remote slot if
1227         I don't already have them. If I do have them, I do nothing and
1228         return an empty Deferred.
1229         """
1230         if self._offsets:
1231             return defer.succeed(None)
1232         # At this point, we may be either SDMF or MDMF. Fetching 107 
1233         # bytes will be enough to get header and offsets for both SDMF and
1234         # MDMF, though we'll be left with 4 more bytes than we
1235         # need if this ends up being MDMF. This is probably less
1236         # expensive than the cost of a second roundtrip.
1237         readvs = [(0, 123)]
1238         d = self._read(readvs, force_remote)
1239         d.addCallback(self._process_encoding_parameters)
1240         d.addCallback(self._process_offsets)
1241         return d
1242
1243
1244     def _process_encoding_parameters(self, encoding_parameters):
1245         assert self.shnum in encoding_parameters
1246         encoding_parameters = encoding_parameters[self.shnum][0]
1247         # The first byte is the version number. It will tell us what
1248         # to do next.
1249         (verno,) = struct.unpack(">B", encoding_parameters[:1])
1250         if verno == MDMF_VERSION:
1251             read_size = MDMFHEADERWITHOUTOFFSETSSIZE
1252             (verno,
1253              seqnum,
1254              root_hash,
1255              k,
1256              n,
1257              segsize,
1258              datalen) = struct.unpack(MDMFHEADERWITHOUTOFFSETS,
1259                                       encoding_parameters[:read_size])
1260             if segsize == 0 and datalen == 0:
1261                 # Empty file, no segments.
1262                 self._num_segments = 0
1263             else:
1264                 self._num_segments = mathutil.div_ceil(datalen, segsize)
1265
1266         elif verno == SDMF_VERSION:
1267             read_size = SIGNED_PREFIX_LENGTH
1268             (verno,
1269              seqnum,
1270              root_hash,
1271              salt,
1272              k,
1273              n,
1274              segsize,
1275              datalen) = struct.unpack(">BQ32s16s BBQQ",
1276                                 encoding_parameters[:SIGNED_PREFIX_LENGTH])
1277             self._salt = salt
1278             if segsize == 0 and datalen == 0:
1279                 # empty file
1280                 self._num_segments = 0
1281             else:
1282                 # non-empty SDMF files have one segment.
1283                 self._num_segments = 1
1284         else:
1285             raise UnknownVersionError("You asked me to read mutable file "
1286                                       "version %d, but I only understand "
1287                                       "%d and %d" % (verno, SDMF_VERSION,
1288                                                      MDMF_VERSION))
1289
1290         self._version_number = verno
1291         self._sequence_number = seqnum
1292         self._root_hash = root_hash
1293         self._required_shares = k
1294         self._total_shares = n
1295         self._segment_size = segsize
1296         self._data_length = datalen
1297
1298         self._block_size = self._segment_size / self._required_shares
1299         # We can upload empty files, and need to account for this fact
1300         # so as to avoid zero-division and zero-modulo errors.
1301         if datalen > 0:
1302             tail_size = self._data_length % self._segment_size
1303         else:
1304             tail_size = 0
1305         if not tail_size:
1306             self._tail_block_size = self._block_size
1307         else:
1308             self._tail_block_size = mathutil.next_multiple(tail_size,
1309                                                     self._required_shares)
1310             self._tail_block_size /= self._required_shares
1311
1312         return encoding_parameters
1313
1314
1315     def _process_offsets(self, offsets):
1316         if self._version_number == 0:
1317             read_size = OFFSETS_LENGTH
1318             read_offset = SIGNED_PREFIX_LENGTH
1319             end = read_size + read_offset
1320             (signature,
1321              share_hash_chain,
1322              block_hash_tree,
1323              share_data,
1324              enc_privkey,
1325              EOF) = struct.unpack(">LLLLQQ",
1326                                   offsets[read_offset:end])
1327             self._offsets = {}
1328             self._offsets['signature'] = signature
1329             self._offsets['share_data'] = share_data
1330             self._offsets['block_hash_tree'] = block_hash_tree
1331             self._offsets['share_hash_chain'] = share_hash_chain
1332             self._offsets['enc_privkey'] = enc_privkey
1333             self._offsets['EOF'] = EOF
1334
1335         elif self._version_number == 1:
1336             read_offset = MDMFHEADERWITHOUTOFFSETSSIZE
1337             read_length = MDMFOFFSETS_LENGTH
1338             end = read_offset + read_length
1339             (encprivkey,
1340              sharehashes,
1341              signature,
1342              verification_key,
1343              verification_key_end,
1344              sharedata,
1345              blockhashes,
1346              eof) = struct.unpack(MDMFOFFSETS,
1347                                   offsets[read_offset:end])
1348             self._offsets = {}
1349             self._offsets['enc_privkey'] = encprivkey
1350             self._offsets['block_hash_tree'] = blockhashes
1351             self._offsets['share_hash_chain'] = sharehashes
1352             self._offsets['signature'] = signature
1353             self._offsets['verification_key'] = verification_key
1354             self._offsets['verification_key_end']= \
1355                 verification_key_end
1356             self._offsets['EOF'] = eof
1357             self._offsets['share_data'] = sharedata
1358
1359
1360     def get_block_and_salt(self, segnum):
1361         """
1362         I return (block, salt), where block is the block data and
1363         salt is the salt used to encrypt that segment.
1364         """
1365         d = self._maybe_fetch_offsets_and_header()
1366         def _then(ignored):
1367             base_share_offset = self._offsets['share_data']
1368
1369             if segnum + 1 > self._num_segments:
1370                 raise LayoutInvalid("Not a valid segment number")
1371
1372             if self._version_number == 0:
1373                 share_offset = base_share_offset + self._block_size * segnum
1374             else:
1375                 share_offset = base_share_offset + (self._block_size + \
1376                                                     SALT_SIZE) * segnum
1377             if segnum + 1 == self._num_segments:
1378                 data = self._tail_block_size
1379             else:
1380                 data = self._block_size
1381
1382             if self._version_number == 1:
1383                 data += SALT_SIZE
1384
1385             readvs = [(share_offset, data)]
1386             return readvs
1387         d.addCallback(_then)
1388         d.addCallback(lambda readvs: self._read(readvs))
1389         def _process_results(results):
1390             assert self.shnum in results
1391             if self._version_number == 0:
1392                 # We only read the share data, but we know the salt from
1393                 # when we fetched the header
1394                 data = results[self.shnum]
1395                 if not data:
1396                     data = ""
1397                 else:
1398                     assert len(data) == 1
1399                     data = data[0]
1400                 salt = self._salt
1401             else:
1402                 data = results[self.shnum]
1403                 if not data:
1404                     salt = data = ""
1405                 else:
1406                     salt_and_data = results[self.shnum][0]
1407                     salt = salt_and_data[:SALT_SIZE]
1408                     data = salt_and_data[SALT_SIZE:]
1409             return data, salt
1410         d.addCallback(_process_results)
1411         return d
1412
1413
1414     def get_blockhashes(self, needed=None, force_remote=False):
1415         """
1416         I return the block hash tree
1417
1418         I take an optional argument, needed, which is a set of indices
1419         correspond to hashes that I should fetch. If this argument is
1420         missing, I will fetch the entire block hash tree; otherwise, I
1421         may attempt to fetch fewer hashes, based on what needed says
1422         that I should do. Note that I may fetch as many hashes as I
1423         want, so long as the set of hashes that I do fetch is a superset
1424         of the ones that I am asked for, so callers should be prepared
1425         to tolerate additional hashes.
1426         """
1427         # TODO: Return only the parts of the block hash tree necessary
1428         # to validate the blocknum provided?
1429         # This is a good idea, but it is hard to implement correctly. It
1430         # is bad to fetch any one block hash more than once, so we
1431         # probably just want to fetch the whole thing at once and then
1432         # serve it.
1433         if needed == set([]):
1434             return defer.succeed([])
1435         d = self._maybe_fetch_offsets_and_header()
1436         def _then(ignored):
1437             blockhashes_offset = self._offsets['block_hash_tree']
1438             if self._version_number == 1:
1439                 blockhashes_length = self._offsets['EOF'] - blockhashes_offset
1440             else:
1441                 blockhashes_length = self._offsets['share_data'] - blockhashes_offset
1442             readvs = [(blockhashes_offset, blockhashes_length)]
1443             return readvs
1444         d.addCallback(_then)
1445         d.addCallback(lambda readvs:
1446             self._read(readvs, force_remote=force_remote))
1447         def _build_block_hash_tree(results):
1448             assert self.shnum in results
1449
1450             rawhashes = results[self.shnum][0]
1451             results = [rawhashes[i:i+HASH_SIZE]
1452                        for i in range(0, len(rawhashes), HASH_SIZE)]
1453             return results
1454         d.addCallback(_build_block_hash_tree)
1455         return d
1456
1457
1458     def get_sharehashes(self, needed=None, force_remote=False):
1459         """
1460         I return the part of the share hash chain placed to validate
1461         this share.
1462
1463         I take an optional argument, needed. Needed is a set of indices
1464         that correspond to the hashes that I should fetch. If needed is
1465         not present, I will fetch and return the entire share hash
1466         chain. Otherwise, I may fetch and return any part of the share
1467         hash chain that is a superset of the part that I am asked to
1468         fetch. Callers should be prepared to deal with more hashes than
1469         they've asked for.
1470         """
1471         if needed == set([]):
1472             return defer.succeed([])
1473         d = self._maybe_fetch_offsets_and_header()
1474
1475         def _make_readvs(ignored):
1476             sharehashes_offset = self._offsets['share_hash_chain']
1477             if self._version_number == 0:
1478                 sharehashes_length = self._offsets['block_hash_tree'] - sharehashes_offset
1479             else:
1480                 sharehashes_length = self._offsets['signature'] - sharehashes_offset
1481             readvs = [(sharehashes_offset, sharehashes_length)]
1482             return readvs
1483         d.addCallback(_make_readvs)
1484         d.addCallback(lambda readvs:
1485             self._read(readvs, force_remote=force_remote))
1486         def _build_share_hash_chain(results):
1487             assert self.shnum in results
1488
1489             sharehashes = results[self.shnum][0]
1490             results = [sharehashes[i:i+(HASH_SIZE + 2)]
1491                        for i in range(0, len(sharehashes), HASH_SIZE + 2)]
1492             results = dict([struct.unpack(">H32s", data)
1493                             for data in results])
1494             return results
1495         d.addCallback(_build_share_hash_chain)
1496         return d
1497
1498
1499     def get_encprivkey(self):
1500         """
1501         I return the encrypted private key.
1502         """
1503         d = self._maybe_fetch_offsets_and_header()
1504
1505         def _make_readvs(ignored):
1506             privkey_offset = self._offsets['enc_privkey']
1507             if self._version_number == 0:
1508                 privkey_length = self._offsets['EOF'] - privkey_offset
1509             else:
1510                 privkey_length = self._offsets['share_hash_chain'] - privkey_offset
1511             readvs = [(privkey_offset, privkey_length)]
1512             return readvs
1513         d.addCallback(_make_readvs)
1514         d.addCallback(lambda readvs: self._read(readvs))
1515         def _process_results(results):
1516             assert self.shnum in results
1517             privkey = results[self.shnum][0]
1518             return privkey
1519         d.addCallback(_process_results)
1520         return d
1521
1522
1523     def get_signature(self):
1524         """
1525         I return the signature of my share.
1526         """
1527         d = self._maybe_fetch_offsets_and_header()
1528
1529         def _make_readvs(ignored):
1530             signature_offset = self._offsets['signature']
1531             if self._version_number == 1:
1532                 signature_length = self._offsets['verification_key'] - signature_offset
1533             else:
1534                 signature_length = self._offsets['share_hash_chain'] - signature_offset
1535             readvs = [(signature_offset, signature_length)]
1536             return readvs
1537         d.addCallback(_make_readvs)
1538         d.addCallback(lambda readvs: self._read(readvs))
1539         def _process_results(results):
1540             assert self.shnum in results
1541             signature = results[self.shnum][0]
1542             return signature
1543         d.addCallback(_process_results)
1544         return d
1545
1546
1547     def get_verification_key(self):
1548         """
1549         I return the verification key.
1550         """
1551         d = self._maybe_fetch_offsets_and_header()
1552
1553         def _make_readvs(ignored):
1554             if self._version_number == 1:
1555                 vk_offset = self._offsets['verification_key']
1556                 vk_length = self._offsets['verification_key_end'] - vk_offset
1557             else:
1558                 vk_offset = struct.calcsize(">BQ32s16sBBQQLLLLQQ")
1559                 vk_length = self._offsets['signature'] - vk_offset
1560             readvs = [(vk_offset, vk_length)]
1561             return readvs
1562         d.addCallback(_make_readvs)
1563         d.addCallback(lambda readvs: self._read(readvs))
1564         def _process_results(results):
1565             assert self.shnum in results
1566             verification_key = results[self.shnum][0]
1567             return verification_key
1568         d.addCallback(_process_results)
1569         return d
1570
1571
1572     def get_encoding_parameters(self):
1573         """
1574         I return (k, n, segsize, datalen)
1575         """
1576         d = self._maybe_fetch_offsets_and_header()
1577         d.addCallback(lambda ignored:
1578             (self._required_shares,
1579              self._total_shares,
1580              self._segment_size,
1581              self._data_length))
1582         return d
1583
1584
1585     def get_seqnum(self):
1586         """
1587         I return the sequence number for this share.
1588         """
1589         d = self._maybe_fetch_offsets_and_header()
1590         d.addCallback(lambda ignored:
1591             self._sequence_number)
1592         return d
1593
1594
1595     def get_root_hash(self):
1596         """
1597         I return the root of the block hash tree
1598         """
1599         d = self._maybe_fetch_offsets_and_header()
1600         d.addCallback(lambda ignored: self._root_hash)
1601         return d
1602
1603
1604     def get_checkstring(self):
1605         """
1606         I return the packed representation of the following:
1607
1608             - version number
1609             - sequence number
1610             - root hash
1611             - salt hash
1612
1613         which my users use as a checkstring to detect other writers.
1614         """
1615         d = self._maybe_fetch_offsets_and_header()
1616         def _build_checkstring(ignored):
1617             if self._salt:
1618                 checkstring = struct.pack(PREFIX,
1619                                           self._version_number,
1620                                           self._sequence_number,
1621                                           self._root_hash,
1622                                           self._salt)
1623             else:
1624                 checkstring = struct.pack(MDMFCHECKSTRING,
1625                                           self._version_number,
1626                                           self._sequence_number,
1627                                           self._root_hash)
1628
1629             return checkstring
1630         d.addCallback(_build_checkstring)
1631         return d
1632
1633
1634     def get_prefix(self, force_remote):
1635         d = self._maybe_fetch_offsets_and_header(force_remote)
1636         d.addCallback(lambda ignored:
1637             self._build_prefix())
1638         return d
1639
1640
1641     def _build_prefix(self):
1642         # The prefix is another name for the part of the remote share
1643         # that gets signed. It consists of everything up to and
1644         # including the datalength, packed by struct.
1645         if self._version_number == SDMF_VERSION:
1646             return struct.pack(SIGNED_PREFIX,
1647                            self._version_number,
1648                            self._sequence_number,
1649                            self._root_hash,
1650                            self._salt,
1651                            self._required_shares,
1652                            self._total_shares,
1653                            self._segment_size,
1654                            self._data_length)
1655
1656         else:
1657             return struct.pack(MDMFSIGNABLEHEADER,
1658                            self._version_number,
1659                            self._sequence_number,
1660                            self._root_hash,
1661                            self._required_shares,
1662                            self._total_shares,
1663                            self._segment_size,
1664                            self._data_length)
1665
1666
1667     def _get_offsets_tuple(self):
1668         # The offsets tuple is another component of the version
1669         # information tuple. It is basically our offsets dictionary,
1670         # itemized and in a tuple.
1671         return self._offsets.copy()
1672
1673
1674     def get_verinfo(self):
1675         """
1676         I return my verinfo tuple. This is used by the ServermapUpdater
1677         to keep track of versions of mutable files.
1678
1679         The verinfo tuple for MDMF files contains:
1680             - seqnum
1681             - root hash
1682             - a blank (nothing)
1683             - segsize
1684             - datalen
1685             - k
1686             - n
1687             - prefix (the thing that you sign)
1688             - a tuple of offsets
1689
1690         We include the nonce in MDMF to simplify processing of version
1691         information tuples.
1692
1693         The verinfo tuple for SDMF files is the same, but contains a
1694         16-byte IV instead of a hash of salts.
1695         """
1696         d = self._maybe_fetch_offsets_and_header()
1697         def _build_verinfo(ignored):
1698             if self._version_number == SDMF_VERSION:
1699                 salt_to_use = self._salt
1700             else:
1701                 salt_to_use = None
1702             return (self._sequence_number,
1703                     self._root_hash,
1704                     salt_to_use,
1705                     self._segment_size,
1706                     self._data_length,
1707                     self._required_shares,
1708                     self._total_shares,
1709                     self._build_prefix(),
1710                     self._get_offsets_tuple())
1711         d.addCallback(_build_verinfo)
1712         return d
1713
1714
1715     def _read(self, readvs, force_remote=False):
1716         unsatisfiable = filter(lambda x: x[0] + x[1] > len(self._data), readvs)
1717         # TODO: It's entirely possible to tweak this so that it just
1718         # fulfills the requests that it can, and not demand that all
1719         # requests are satisfiable before running it.
1720         if not unsatisfiable and not force_remote:
1721             results = [self._data[offset:offset+length]
1722                        for (offset, length) in readvs]
1723             results = {self.shnum: results}
1724             return defer.succeed(results)
1725         else:
1726             return self._rref.callRemote("slot_readv",
1727                                          self._storage_index,
1728                                          [self.shnum],
1729                                          readvs)
1730
1731
1732     def is_sdmf(self):
1733         """I tell my caller whether or not my remote file is SDMF or MDMF
1734         """
1735         d = self._maybe_fetch_offsets_and_header()
1736         d.addCallback(lambda ignored:
1737             self._version_number == 0)
1738         return d
1739
1740
1741 class LayoutInvalid(Exception):
1742     """
1743     This isn't a valid MDMF mutable file
1744     """