X-Git-Url: https://git.rkrishnan.org/?a=blobdiff_plain;f=src%2Fallmydata%2Fmutable%2Flayout.py;h=40fd9331badcfdd988ed014a06b1ed9109f22958;hb=278ee0db76cd39682a0d0469ad9d6946828ce05f;hp=ee89128cc4d30208cc274158f995b3c2b1693a40;hpb=d8ba8c2eb5d4b1b5de448bc9cebe2c81cde4247d;p=tahoe-lafs%2Ftahoe-lafs.git diff --git a/src/allmydata/mutable/layout.py b/src/allmydata/mutable/layout.py index ee89128c..40fd9331 100644 --- a/src/allmydata/mutable/layout.py +++ b/src/allmydata/mutable/layout.py @@ -1,13 +1,80 @@ import struct -from common import NeedMoreDataError, UnknownVersionError +from allmydata.mutable.common import NeedMoreDataError, UnknownVersionError, \ + BadShareError +from allmydata.interfaces import HASH_SIZE, SALT_SIZE, SDMF_VERSION, \ + MDMF_VERSION, IMutableSlotWriter +from allmydata.util import mathutil +from twisted.python import failure +from twisted.internet import defer +from zope.interface import implements -PREFIX = ">BQ32s16s" # each version has a different prefix + +# These strings describe the format of the packed structs they help process. +# Here's what they mean: +# +# PREFIX: +# >: Big-endian byte order; the most significant byte is first (leftmost). +# B: The container version information; stored as an unsigned 8-bit integer. +# This is currently either SDMF_VERSION or MDMF_VERSION. +# Q: The sequence number; this is sort of like a revision history for +# mutable files; they start at 1 and increase as they are changed after +# being uploaded. Stored as an unsigned 64-bit integer. +# 32s: The root hash of the share hash tree. We use sha-256d, so we use 32 +# bytes to store the value. +# 16s: The salt for the readkey. This is a 16-byte random value. +# +# SIGNED_PREFIX additions, things that are covered by the signature: +# B: The "k" encoding parameter. We store this as an unsigned 8-bit +# integer, since our erasure coding scheme cannot encode to more than +# 255 pieces. +# B: The "N" encoding parameter. Stored as an unsigned 8-bit integer for +# the same reason as above. +# Q: The segment size of the uploaded file. This is an unsigned 64-bit +# integer, to allow handling large segments and files. For SDMF the +# segment size is the data length plus padding; for MDMF it can be +# smaller. +# Q: The data length of the uploaded file. Like the segment size field, +# it is an unsigned 64-bit integer. +# +# HEADER additions: +# L: The offset of the signature. An unsigned 32-bit integer. +# L: The offset of the share hash chain. An unsigned 32-bit integer. +# L: The offset of the block hash tree. An unsigned 32-bit integer. +# L: The offset of the share data. An unsigned 32-bit integer. +# Q: The offset of the encrypted private key. An unsigned 64-bit integer, +# to account for the possibility of a lot of share data. +# Q: The offset of the EOF. An unsigned 64-bit integer, to account for +# the possibility of a lot of share data. +# +# After all of these, we have the following: +# - The verification key: Occupies the space between the end of the header +# and the start of the signature (i.e.: data[HEADER_LENGTH:o['signature']]. +# - The signature, which goes from the signature offset to the share hash +# chain offset. +# - The share hash chain, which goes from the share hash chain offset to +# the block hash tree offset. +# - The share data, which goes from the share data offset to the encrypted +# private key offset. +# - The encrypted private key offset, which goes until the end of the file. +# +# The block hash tree in this encoding has only one share, so the offset of +# the share data will be 32 bits more than the offset of the block hash tree. +# Given this, we may need to check to see how many bytes a reasonably sized +# block hash tree will take up. + +PREFIX = ">BQ32s16s" # each version may have a different prefix SIGNED_PREFIX = ">BQ32s16s BBQQ" # this is covered by the signature SIGNED_PREFIX_LENGTH = struct.calcsize(SIGNED_PREFIX) HEADER = ">BQ32s16s BBQQ LLLLQQ" # includes offsets HEADER_LENGTH = struct.calcsize(HEADER) +OFFSETS = ">LLLLQQ" +OFFSETS_LENGTH = struct.calcsize(OFFSETS) + +MAX_MUTABLE_SHARE_SIZE = 69105*1000*1000*1000*1000 # 69105 TB, kind of arbitrary + +# These are still used for some tests of SDMF files. def unpack_header(data): o = {} (version, @@ -23,30 +90,6 @@ def unpack_header(data): o['EOF']) = struct.unpack(HEADER, data[:HEADER_LENGTH]) return (version, seqnum, root_hash, IV, k, N, segsize, datalen, o) -def unpack_prefix_and_signature(data): - assert len(data) >= HEADER_LENGTH, len(data) - prefix = data[:SIGNED_PREFIX_LENGTH] - - (version, - seqnum, - root_hash, - IV, - k, N, segsize, datalen, - o) = unpack_header(data) - - if version != 0: - raise UnknownVersionError("got mutable share version %d, but I only understand version 0" % version) - - if len(data) < o['share_hash_chain']: - raise NeedMoreDataError(o['share_hash_chain'], - o['enc_privkey'], o['EOF']-o['enc_privkey']) - - pubkey_s = data[HEADER_LENGTH:o['signature']] - signature = data[o['signature']:o['share_hash_chain']] - - return (seqnum, root_hash, IV, k, N, segsize, datalen, - pubkey_s, signature, prefix) - def unpack_share(data): assert len(data) >= HEADER_LENGTH o = {} @@ -74,7 +117,9 @@ def unpack_share(data): share_hash_chain_s = data[o['share_hash_chain']:o['block_hash_tree']] share_hash_format = ">H32s" hsize = struct.calcsize(share_hash_format) - assert len(share_hash_chain_s) % hsize == 0, len(share_hash_chain_s) + if len(share_hash_chain_s) % hsize != 0: + raise BadShareError("hash chain is %d bytes, not multiple of %d" + % (len(share_hash_chain_s), hsize)) share_hash_chain = [] for i in range(0, len(share_hash_chain_s), hsize): chunk = share_hash_chain_s[i:i+hsize] @@ -82,7 +127,9 @@ def unpack_share(data): share_hash_chain.append( (hid, h) ) share_hash_chain = dict(share_hash_chain) block_hash_tree_s = data[o['block_hash_tree']:o['share_data']] - assert len(block_hash_tree_s) % 32 == 0, len(block_hash_tree_s) + if len(block_hash_tree_s) % 32 != 0: + raise BadShareError("block_hash_tree is %d bytes, not multiple of %d" + % (len(block_hash_tree_s), 32)) block_hash_tree = [] for i in range(0, len(block_hash_tree_s), 32): block_hash_tree.append(block_hash_tree_s[i:i+32]) @@ -94,67 +141,21 @@ def unpack_share(data): pubkey, signature, share_hash_chain, block_hash_tree, share_data, enc_privkey) -def unpack_share_data(verinfo, hash_and_data): - (seqnum, root_hash, IV, segsize, datalength, k, N, prefix, o_t) = verinfo - - # hash_and_data starts with the share_hash_chain, so figure out what the - # offsets really are - o = dict(o_t) - o_share_hash_chain = 0 - o_block_hash_tree = o['block_hash_tree'] - o['share_hash_chain'] - o_share_data = o['share_data'] - o['share_hash_chain'] - o_enc_privkey = o['enc_privkey'] - o['share_hash_chain'] - - share_hash_chain_s = hash_and_data[o_share_hash_chain:o_block_hash_tree] - share_hash_format = ">H32s" - hsize = struct.calcsize(share_hash_format) - assert len(share_hash_chain_s) % hsize == 0, len(share_hash_chain_s) - share_hash_chain = [] - for i in range(0, len(share_hash_chain_s), hsize): - chunk = share_hash_chain_s[i:i+hsize] - (hid, h) = struct.unpack(share_hash_format, chunk) - share_hash_chain.append( (hid, h) ) - share_hash_chain = dict(share_hash_chain) - block_hash_tree_s = hash_and_data[o_block_hash_tree:o_share_data] - assert len(block_hash_tree_s) % 32 == 0, len(block_hash_tree_s) - block_hash_tree = [] - for i in range(0, len(block_hash_tree_s), 32): - block_hash_tree.append(block_hash_tree_s[i:i+32]) - - share_data = hash_and_data[o_share_data:o_enc_privkey] +def get_version_from_checkstring(checkstring): + (t, ) = struct.unpack(">B", checkstring[:1]) + return t - return (share_hash_chain, block_hash_tree, share_data) - - -def pack_checkstring(seqnum, root_hash, IV): - return struct.pack(PREFIX, - 0, # version, - seqnum, - root_hash, - IV) - -def unpack_checkstring(checkstring): +def unpack_sdmf_checkstring(checkstring): cs_len = struct.calcsize(PREFIX) version, seqnum, root_hash, IV = struct.unpack(PREFIX, checkstring[:cs_len]) - if version != 0: # TODO: just ignore the share - raise UnknownVersionError("got mutable share version %d, but I only understand version 0" % version) + assert version == SDMF_VERSION, version return (seqnum, root_hash, IV) -def pack_prefix(seqnum, root_hash, IV, - required_shares, total_shares, - segment_size, data_length): - prefix = struct.pack(SIGNED_PREFIX, - 0, # version, - seqnum, - root_hash, - IV, - - required_shares, - total_shares, - segment_size, - data_length, - ) - return prefix +def unpack_mdmf_checkstring(checkstring): + cs_len = struct.calcsize(MDMFCHECKSTRING) + version, seqnum, root_hash = struct.unpack(MDMFCHECKSTRING, checkstring[:cs_len]) + assert version == MDMF_VERSION, version + return (seqnum, root_hash) def pack_offsets(verification_key_length, signature_length, share_hash_chain_length, block_hash_tree_length, @@ -166,7 +167,7 @@ def pack_offsets(verification_key_length, signature_length, o3 = offsets['block_hash_tree'] = o2 + share_hash_chain_length o4 = offsets['share_data'] = o3 + block_hash_tree_length o5 = offsets['enc_privkey'] = o4 + share_data_length - o6 = offsets['EOF'] = o5 + encprivkey_length + offsets['EOF'] = o5 + encprivkey_length return struct.pack(">LLLLQQ", offsets['signature'], @@ -201,3 +202,1572 @@ def pack_share(prefix, verification_key, signature, encprivkey]) return final_share +def pack_prefix(seqnum, root_hash, IV, + required_shares, total_shares, + segment_size, data_length): + prefix = struct.pack(SIGNED_PREFIX, + 0, # version, + seqnum, + root_hash, + IV, + required_shares, + total_shares, + segment_size, + data_length, + ) + return prefix + + +class SDMFSlotWriteProxy: + implements(IMutableSlotWriter) + """ + I represent a remote write slot for an SDMF mutable file. I build a + share in memory, and then write it in one piece to the remote + server. This mimics how SDMF shares were built before MDMF (and the + new MDMF uploader), but provides that functionality in a way that + allows the MDMF uploader to be built without much special-casing for + file format, which makes the uploader code more readable. + """ + def __init__(self, + shnum, + rref, # a remote reference to a storage server + storage_index, + secrets, # (write_enabler, renew_secret, cancel_secret) + seqnum, # the sequence number of the mutable file + required_shares, + total_shares, + segment_size, + data_length): # the length of the original file + self.shnum = shnum + self._rref = rref + self._storage_index = storage_index + self._secrets = secrets + self._seqnum = seqnum + self._required_shares = required_shares + self._total_shares = total_shares + self._segment_size = segment_size + self._data_length = data_length + + # This is an SDMF file, so it should have only one segment, so, + # modulo padding of the data length, the segment size and the + # data length should be the same. + expected_segment_size = mathutil.next_multiple(data_length, + self._required_shares) + assert expected_segment_size == segment_size + + self._block_size = self._segment_size / self._required_shares + + # This is meant to mimic how SDMF files were built before MDMF + # entered the picture: we generate each share in its entirety, + # then push it off to the storage server in one write. When + # callers call set_*, they are just populating this dict. + # finish_publishing will stitch these pieces together into a + # coherent share, and then write the coherent share to the + # storage server. + self._share_pieces = {} + + # This tells the write logic what checkstring to use when + # writing remote shares. + self._testvs = [] + + self._readvs = [(0, struct.calcsize(PREFIX))] + + + def set_checkstring(self, checkstring_or_seqnum, + root_hash=None, + salt=None): + """ + Set the checkstring that I will pass to the remote server when + writing. + + @param checkstring_or_seqnum: A packed checkstring to use, + or a sequence number. I will treat this as a checkstr + + Note that implementations can differ in which semantics they + wish to support for set_checkstring -- they can, for example, + build the checkstring themselves from its constituents, or + some other thing. + """ + if root_hash and salt: + checkstring = struct.pack(PREFIX, + 0, + checkstring_or_seqnum, + root_hash, + salt) + else: + checkstring = checkstring_or_seqnum + self._testvs = [(0, len(checkstring), "eq", checkstring)] + + + def get_checkstring(self): + """ + Get the checkstring that I think currently exists on the remote + server. + """ + if self._testvs: + return self._testvs[0][3] + return "" + + + def put_block(self, data, segnum, salt): + """ + Add a block and salt to the share. + """ + # SDMF files have only one segment + assert segnum == 0 + assert len(data) == self._block_size + assert len(salt) == SALT_SIZE + + self._share_pieces['sharedata'] = data + self._share_pieces['salt'] = salt + + # TODO: Figure out something intelligent to return. + return defer.succeed(None) + + + def put_encprivkey(self, encprivkey): + """ + Add the encrypted private key to the share. + """ + self._share_pieces['encprivkey'] = encprivkey + + return defer.succeed(None) + + + def put_blockhashes(self, blockhashes): + """ + Add the block hash tree to the share. + """ + assert isinstance(blockhashes, list) + for h in blockhashes: + assert len(h) == HASH_SIZE + + # serialize the blockhashes, then set them. + blockhashes_s = "".join(blockhashes) + self._share_pieces['block_hash_tree'] = blockhashes_s + + return defer.succeed(None) + + + def put_sharehashes(self, sharehashes): + """ + Add the share hash chain to the share. + """ + assert isinstance(sharehashes, dict) + for h in sharehashes.itervalues(): + assert len(h) == HASH_SIZE + + # serialize the sharehashes, then set them. + sharehashes_s = "".join([struct.pack(">H32s", i, sharehashes[i]) + for i in sorted(sharehashes.keys())]) + self._share_pieces['share_hash_chain'] = sharehashes_s + + return defer.succeed(None) + + + def put_root_hash(self, root_hash): + """ + Add the root hash to the share. + """ + assert len(root_hash) == HASH_SIZE + + self._share_pieces['root_hash'] = root_hash + + return defer.succeed(None) + + + def put_salt(self, salt): + """ + Add a salt to an empty SDMF file. + """ + assert len(salt) == SALT_SIZE + + self._share_pieces['salt'] = salt + self._share_pieces['sharedata'] = "" + + + def get_signable(self): + """ + Return the part of the share that needs to be signed. + + SDMF writers need to sign the packed representation of the + first eight fields of the remote share, that is: + - version number (0) + - sequence number + - root of the share hash tree + - salt + - k + - n + - segsize + - datalen + + This method is responsible for returning that to callers. + """ + return struct.pack(SIGNED_PREFIX, + 0, + self._seqnum, + self._share_pieces['root_hash'], + self._share_pieces['salt'], + self._required_shares, + self._total_shares, + self._segment_size, + self._data_length) + + + def put_signature(self, signature): + """ + Add the signature to the share. + """ + self._share_pieces['signature'] = signature + + return defer.succeed(None) + + + def put_verification_key(self, verification_key): + """ + Add the verification key to the share. + """ + self._share_pieces['verification_key'] = verification_key + + return defer.succeed(None) + + + def get_verinfo(self): + """ + I return my verinfo tuple. This is used by the ServermapUpdater + to keep track of versions of mutable files. + + The verinfo tuple for MDMF files contains: + - seqnum + - root hash + - a blank (nothing) + - segsize + - datalen + - k + - n + - prefix (the thing that you sign) + - a tuple of offsets + + We include the nonce in MDMF to simplify processing of version + information tuples. + + The verinfo tuple for SDMF files is the same, but contains a + 16-byte IV instead of a hash of salts. + """ + return (self._seqnum, + self._share_pieces['root_hash'], + self._share_pieces['salt'], + self._segment_size, + self._data_length, + self._required_shares, + self._total_shares, + self.get_signable(), + self._get_offsets_tuple()) + + def _get_offsets_dict(self): + post_offset = HEADER_LENGTH + offsets = {} + + verification_key_length = len(self._share_pieces['verification_key']) + o1 = offsets['signature'] = post_offset + verification_key_length + + signature_length = len(self._share_pieces['signature']) + o2 = offsets['share_hash_chain'] = o1 + signature_length + + share_hash_chain_length = len(self._share_pieces['share_hash_chain']) + o3 = offsets['block_hash_tree'] = o2 + share_hash_chain_length + + block_hash_tree_length = len(self._share_pieces['block_hash_tree']) + o4 = offsets['share_data'] = o3 + block_hash_tree_length + + share_data_length = len(self._share_pieces['sharedata']) + o5 = offsets['enc_privkey'] = o4 + share_data_length + + encprivkey_length = len(self._share_pieces['encprivkey']) + offsets['EOF'] = o5 + encprivkey_length + return offsets + + + def _get_offsets_tuple(self): + offsets = self._get_offsets_dict() + return tuple([(key, value) for key, value in offsets.items()]) + + + def _pack_offsets(self): + offsets = self._get_offsets_dict() + return struct.pack(">LLLLQQ", + offsets['signature'], + offsets['share_hash_chain'], + offsets['block_hash_tree'], + offsets['share_data'], + offsets['enc_privkey'], + offsets['EOF']) + + + def finish_publishing(self): + """ + Do anything necessary to finish writing the share to a remote + server. I require that no further publishing needs to take place + after this method has been called. + """ + for k in ["sharedata", "encprivkey", "signature", "verification_key", + "share_hash_chain", "block_hash_tree"]: + assert k in self._share_pieces, (self.shnum, k, self._share_pieces.keys()) + # This is the only method that actually writes something to the + # remote server. + # First, we need to pack the share into data that we can write + # to the remote server in one write. + offsets = self._pack_offsets() + prefix = self.get_signable() + final_share = "".join([prefix, + offsets, + self._share_pieces['verification_key'], + self._share_pieces['signature'], + self._share_pieces['share_hash_chain'], + self._share_pieces['block_hash_tree'], + self._share_pieces['sharedata'], + self._share_pieces['encprivkey']]) + + # Our only data vector is going to be writing the final share, + # in its entirely. + datavs = [(0, final_share)] + + if not self._testvs: + # Our caller has not provided us with another checkstring + # yet, so we assume that we are writing a new share, and set + # a test vector that will allow a new share to be written. + self._testvs = [] + self._testvs.append(tuple([0, 1, "eq", ""])) + + tw_vectors = {} + tw_vectors[self.shnum] = (self._testvs, datavs, None) + return self._rref.callRemote("slot_testv_and_readv_and_writev", + self._storage_index, + self._secrets, + tw_vectors, + # TODO is it useful to read something? + self._readvs) + + +MDMFHEADER = ">BQ32sBBQQ QQQQQQQQ" +MDMFHEADERWITHOUTOFFSETS = ">BQ32sBBQQ" +MDMFHEADERSIZE = struct.calcsize(MDMFHEADER) +MDMFHEADERWITHOUTOFFSETSSIZE = struct.calcsize(MDMFHEADERWITHOUTOFFSETS) +MDMFCHECKSTRING = ">BQ32s" +MDMFSIGNABLEHEADER = ">BQ32sBBQQ" +MDMFOFFSETS = ">QQQQQQQQ" +MDMFOFFSETS_LENGTH = struct.calcsize(MDMFOFFSETS) + +PRIVATE_KEY_SIZE = 1220 +SIGNATURE_SIZE = 260 +VERIFICATION_KEY_SIZE = 292 +# We know we won't have more than 256 shares, and we know that we won't need +# to store more than ln2(256) hash-chain nodes to validate, so that's our +# bound. Each node requires 2 bytes of node-number plus 32 bytes of hash. +SHARE_HASH_CHAIN_SIZE = (2+HASH_SIZE)*mathutil.log_ceil(256, 2) + +class MDMFSlotWriteProxy: + implements(IMutableSlotWriter) + + """ + I represent a remote write slot for an MDMF mutable file. + + I abstract away from my caller the details of block and salt + management, and the implementation of the on-disk format for MDMF + shares. + """ + # Expected layout, MDMF: + # offset: size: name: + #-- signed part -- + # 0 1 version number (01) + # 1 8 sequence number + # 9 32 share tree root hash + # 41 1 The "k" encoding parameter + # 42 1 The "N" encoding parameter + # 43 8 The segment size of the uploaded file + # 51 8 The data length of the original plaintext + #-- end signed part -- + # 59 8 The offset of the encrypted private key + # 67 8 The offset of the share hash chain + # 75 8 The offset of the signature + # 83 8 The offset of the verification key + # 91 8 The offset of the end of the v. key. + # 99 8 The offset of the share data + # 107 8 The offset of the block hash tree + # 115 8 The offset of EOF + # 123 var encrypted private key + # var var share hash chain + # var var signature + # var var verification key + # var large share data + # var var block hash tree + # + # We order the fields that way to make smart downloaders -- downloaders + # which prempetively read a big part of the share -- possible. + # + # The checkstring is the first three fields -- the version number, + # sequence number, root hash and root salt hash. This is consistent + # in meaning to what we have with SDMF files, except now instead of + # using the literal salt, we use a value derived from all of the + # salts -- the share hash root. + # + # The salt is stored before the block for each segment. The block + # hash tree is computed over the combination of block and salt for + # each segment. In this way, we get integrity checking for both + # block and salt with the current block hash tree arrangement. + # + # The ordering of the offsets is different to reflect the dependencies + # that we'll run into with an MDMF file. The expected write flow is + # something like this: + # + # 0: Initialize with the sequence number, encoding parameters and + # data length. From this, we can deduce the number of segments, + # and where they should go.. We can also figure out where the + # encrypted private key should go, because we can figure out how + # big the share data will be. + # + # 1: Encrypt, encode, and upload the file in chunks. Do something + # like + # + # put_block(data, segnum, salt) + # + # to write a block and a salt to the disk. We can do both of + # these operations now because we have enough of the offsets to + # know where to put them. + # + # 2: Put the encrypted private key. Use: + # + # put_encprivkey(encprivkey) + # + # Now that we know the length of the private key, we can fill + # in the offset for the block hash tree. + # + # 3: We're now in a position to upload the block hash tree for + # a share. Put that using something like: + # + # put_blockhashes(block_hash_tree) + # + # Note that block_hash_tree is a list of hashes -- we'll take + # care of the details of serializing that appropriately. When + # we get the block hash tree, we are also in a position to + # calculate the offset for the share hash chain, and fill that + # into the offsets table. + # + # 4: We're now in a position to upload the share hash chain for + # a share. Do that with something like: + # + # put_sharehashes(share_hash_chain) + # + # share_hash_chain should be a dictionary mapping shnums to + # 32-byte hashes -- the wrapper handles serialization. + # We'll know where to put the signature at this point, also. + # The root of this tree will be put explicitly in the next + # step. + # + # 5: Before putting the signature, we must first put the + # root_hash. Do this with: + # + # put_root_hash(root_hash). + # + # In terms of knowing where to put this value, it was always + # possible to place it, but it makes sense semantically to + # place it after the share hash tree, so that's why you do it + # in this order. + # + # 6: With the root hash put, we can now sign the header. Use: + # + # get_signable() + # + # to get the part of the header that you want to sign, and use: + # + # put_signature(signature) + # + # to write your signature to the remote server. + # + # 6: Add the verification key, and finish. Do: + # + # put_verification_key(key) + # + # and + # + # finish_publish() + # + # Checkstring management: + # + # To write to a mutable slot, we have to provide test vectors to ensure + # that we are writing to the same data that we think we are. These + # vectors allow us to detect uncoordinated writes; that is, writes + # where both we and some other shareholder are writing to the + # mutable slot, and to report those back to the parts of the program + # doing the writing. + # + # With SDMF, this was easy -- all of the share data was written in + # one go, so it was easy to detect uncoordinated writes, and we only + # had to do it once. With MDMF, not all of the file is written at + # once. + # + # If a share is new, we write out as much of the header as we can + # before writing out anything else. This gives other writers a + # canary that they can use to detect uncoordinated writes, and, if + # they do the same thing, gives us the same canary. We them update + # the share. We won't be able to write out two fields of the header + # -- the share tree hash and the salt hash -- until we finish + # writing out the share. We only require the writer to provide the + # initial checkstring, and keep track of what it should be after + # updates ourselves. + # + # If we haven't written anything yet, then on the first write (which + # will probably be a block + salt of a share), we'll also write out + # the header. On subsequent passes, we'll expect to see the header. + # This changes in two places: + # + # - When we write out the salt hash + # - When we write out the root of the share hash tree + # + # since these values will change the header. It is possible that we + # can just make those be written in one operation to minimize + # disruption. + def __init__(self, + shnum, + rref, # a remote reference to a storage server + storage_index, + secrets, # (write_enabler, renew_secret, cancel_secret) + seqnum, # the sequence number of the mutable file + required_shares, + total_shares, + segment_size, + data_length): # the length of the original file + self.shnum = shnum + self._rref = rref + self._storage_index = storage_index + self._seqnum = seqnum + self._required_shares = required_shares + assert self.shnum >= 0 and self.shnum < total_shares + self._total_shares = total_shares + # We build up the offset table as we write things. It is the + # last thing we write to the remote server. + self._offsets = {} + self._testvs = [] + # This is a list of write vectors that will be sent to our + # remote server once we are directed to write things there. + self._writevs = [] + self._secrets = secrets + # The segment size needs to be a multiple of the k parameter -- + # any padding should have been carried out by the publisher + # already. + assert segment_size % required_shares == 0 + self._segment_size = segment_size + self._data_length = data_length + + # These are set later -- we define them here so that we can + # check for their existence easily + + # This is the root of the share hash tree -- the Merkle tree + # over the roots of the block hash trees computed for shares in + # this upload. + self._root_hash = None + + # We haven't yet written anything to the remote bucket. By + # setting this, we tell the _write method as much. The write + # method will then know that it also needs to add a write vector + # for the checkstring (or what we have of it) to the first write + # request. We'll then record that value for future use. If + # we're expecting something to be there already, we need to call + # set_checkstring before we write anything to tell the first + # write about that. + self._written = False + + # When writing data to the storage servers, we get a read vector + # for free. We'll read the checkstring, which will help us + # figure out what's gone wrong if a write fails. + self._readv = [(0, struct.calcsize(MDMFCHECKSTRING))] + + # We calculate the number of segments because it tells us + # where the salt part of the file ends/share segment begins, + # and also because it provides a useful amount of bounds checking. + self._num_segments = mathutil.div_ceil(self._data_length, + self._segment_size) + self._block_size = self._segment_size / self._required_shares + # We also calculate the share size, to help us with block + # constraints later. + tail_size = self._data_length % self._segment_size + if not tail_size: + self._tail_block_size = self._block_size + else: + self._tail_block_size = mathutil.next_multiple(tail_size, + self._required_shares) + self._tail_block_size /= self._required_shares + + # We already know where the sharedata starts; right after the end + # of the header (which is defined as the signable part + the offsets) + # We can also calculate where the encrypted private key begins + # from what we know know. + self._actual_block_size = self._block_size + SALT_SIZE + data_size = self._actual_block_size * (self._num_segments - 1) + data_size += self._tail_block_size + data_size += SALT_SIZE + self._offsets['enc_privkey'] = MDMFHEADERSIZE + + # We don't define offsets for these because we want them to be + # tightly packed -- this allows us to ignore the responsibility + # of padding individual values, and of removing that padding + # later. So nonconstant_start is where we start writing + # nonconstant data. + nonconstant_start = self._offsets['enc_privkey'] + nonconstant_start += PRIVATE_KEY_SIZE + nonconstant_start += SIGNATURE_SIZE + nonconstant_start += VERIFICATION_KEY_SIZE + nonconstant_start += SHARE_HASH_CHAIN_SIZE + + self._offsets['share_data'] = nonconstant_start + + # Finally, we know how big the share data will be, so we can + # figure out where the block hash tree needs to go. + # XXX: But this will go away if Zooko wants to make it so that + # you don't need to know the size of the file before you start + # uploading it. + self._offsets['block_hash_tree'] = self._offsets['share_data'] + \ + data_size + + # Done. We can snow start writing. + + + def set_checkstring(self, + seqnum_or_checkstring, + root_hash=None, + salt=None): + """ + Set checkstring checkstring for the given shnum. + + This can be invoked in one of two ways. + + With one argument, I assume that you are giving me a literal + checkstring -- e.g., the output of get_checkstring. I will then + set that checkstring as it is. This form is used by unit tests. + + With two arguments, I assume that you are giving me a sequence + number and root hash to make a checkstring from. In that case, I + will build a checkstring and set it for you. This form is used + by the publisher. + + By default, I assume that I am writing new shares to the grid. + If you don't explcitly set your own checkstring, I will use + one that requires that the remote share not exist. You will want + to use this method if you are updating a share in-place; + otherwise, writes will fail. + """ + # You're allowed to overwrite checkstrings with this method; + # I assume that users know what they are doing when they call + # it. + if root_hash: + checkstring = struct.pack(MDMFCHECKSTRING, + 1, + seqnum_or_checkstring, + root_hash) + else: + checkstring = seqnum_or_checkstring + + if checkstring == "": + # We special-case this, since len("") = 0, but we need + # length of 1 for the case of an empty share to work on the + # storage server, which is what a checkstring that is the + # empty string means. + self._testvs = [] + else: + self._testvs = [] + self._testvs.append((0, len(checkstring), "eq", checkstring)) + + + def __repr__(self): + return "MDMFSlotWriteProxy for share %d" % self.shnum + + + def get_checkstring(self): + """ + Given a share number, I return a representation of what the + checkstring for that share on the server will look like. + + I am mostly used for tests. + """ + if self._root_hash: + roothash = self._root_hash + else: + roothash = "\x00" * 32 + return struct.pack(MDMFCHECKSTRING, + 1, + self._seqnum, + roothash) + + + def put_block(self, data, segnum, salt): + """ + I queue a write vector for the data, salt, and segment number + provided to me. I return None, as I do not actually cause + anything to be written yet. + """ + if segnum >= self._num_segments: + raise LayoutInvalid("I won't overwrite the block hash tree") + if len(salt) != SALT_SIZE: + raise LayoutInvalid("I was given a salt of size %d, but " + "I wanted a salt of size %d") + if segnum + 1 == self._num_segments: + if len(data) != self._tail_block_size: + raise LayoutInvalid("I was given the wrong size block to write") + elif len(data) != self._block_size: + raise LayoutInvalid("I was given the wrong size block to write") + + # We want to write at len(MDMFHEADER) + segnum * block_size. + offset = self._offsets['share_data'] + \ + (self._actual_block_size * segnum) + data = salt + data + + self._writevs.append(tuple([offset, data])) + + + def put_encprivkey(self, encprivkey): + """ + I queue a write vector for the encrypted private key provided to + me. + """ + assert self._offsets + assert self._offsets['enc_privkey'] + # You shouldn't re-write the encprivkey after the block hash + # tree is written, since that could cause the private key to run + # into the block hash tree. Before it writes the block hash + # tree, the block hash tree writing method writes the offset of + # the share hash chain. So that's a good indicator of whether or + # not the block hash tree has been written. + if "signature" in self._offsets: + raise LayoutInvalid("You can't put the encrypted private key " + "after putting the share hash chain") + + self._offsets['share_hash_chain'] = self._offsets['enc_privkey'] + \ + len(encprivkey) + + self._writevs.append(tuple([self._offsets['enc_privkey'], encprivkey])) + + + def put_blockhashes(self, blockhashes): + """ + I queue a write vector to put the block hash tree in blockhashes + onto the remote server. + + The encrypted private key must be queued before the block hash + tree, since we need to know how large it is to know where the + block hash tree should go. The block hash tree must be put + before the share hash chain, since its size determines the + offset of the share hash chain. + """ + assert self._offsets + assert "block_hash_tree" in self._offsets + + assert isinstance(blockhashes, list) + + blockhashes_s = "".join(blockhashes) + self._offsets['EOF'] = self._offsets['block_hash_tree'] + len(blockhashes_s) + + self._writevs.append(tuple([self._offsets['block_hash_tree'], + blockhashes_s])) + + + def put_sharehashes(self, sharehashes): + """ + I queue a write vector to put the share hash chain in my + argument onto the remote server. + + The block hash tree must be queued before the share hash chain, + since we need to know where the block hash tree ends before we + can know where the share hash chain starts. The share hash chain + must be put before the signature, since the length of the packed + share hash chain determines the offset of the signature. Also, + semantically, you must know what the root of the block hash tree + is before you can generate a valid signature. + """ + assert isinstance(sharehashes, dict) + assert self._offsets + if "share_hash_chain" not in self._offsets: + raise LayoutInvalid("You must put the block hash tree before " + "putting the share hash chain") + + # The signature comes after the share hash chain. If the + # signature has already been written, we must not write another + # share hash chain. The signature writes the verification key + # offset when it gets sent to the remote server, so we look for + # that. + if "verification_key" in self._offsets: + raise LayoutInvalid("You must write the share hash chain " + "before you write the signature") + sharehashes_s = "".join([struct.pack(">H32s", i, sharehashes[i]) + for i in sorted(sharehashes.keys())]) + self._offsets['signature'] = self._offsets['share_hash_chain'] + \ + len(sharehashes_s) + self._writevs.append(tuple([self._offsets['share_hash_chain'], + sharehashes_s])) + + + def put_root_hash(self, roothash): + """ + Put the root hash (the root of the share hash tree) in the + remote slot. + """ + # It does not make sense to be able to put the root + # hash without first putting the share hashes, since you need + # the share hashes to generate the root hash. + # + # Signature is defined by the routine that places the share hash + # chain, so it's a good thing to look for in finding out whether + # or not the share hash chain exists on the remote server. + if len(roothash) != HASH_SIZE: + raise LayoutInvalid("hashes and salts must be exactly %d bytes" + % HASH_SIZE) + self._root_hash = roothash + # To write both of these values, we update the checkstring on + # the remote server, which includes them + checkstring = self.get_checkstring() + self._writevs.append(tuple([0, checkstring])) + # This write, if successful, changes the checkstring, so we need + # to update our internal checkstring to be consistent with the + # one on the server. + + + def get_signable(self): + """ + Get the first seven fields of the mutable file; the parts that + are signed. + """ + if not self._root_hash: + raise LayoutInvalid("You need to set the root hash " + "before getting something to " + "sign") + return struct.pack(MDMFSIGNABLEHEADER, + 1, + self._seqnum, + self._root_hash, + self._required_shares, + self._total_shares, + self._segment_size, + self._data_length) + + + def put_signature(self, signature): + """ + I queue a write vector for the signature of the MDMF share. + + I require that the root hash and share hash chain have been put + to the grid before I will write the signature to the grid. + """ + if "signature" not in self._offsets: + raise LayoutInvalid("You must put the share hash chain " + # It does not make sense to put a signature without first + # putting the root hash and the salt hash (since otherwise + # the signature would be incomplete), so we don't allow that. + "before putting the signature") + if not self._root_hash: + raise LayoutInvalid("You must complete the signed prefix " + "before computing a signature") + # If we put the signature after we put the verification key, we + # could end up running into the verification key, and will + # probably screw up the offsets as well. So we don't allow that. + if "verification_key_end" in self._offsets: + raise LayoutInvalid("You can't put the signature after the " + "verification key") + # The method that writes the verification key defines the EOF + # offset before writing the verification key, so look for that. + self._offsets['verification_key'] = self._offsets['signature'] +\ + len(signature) + self._writevs.append(tuple([self._offsets['signature'], signature])) + + + def put_verification_key(self, verification_key): + """ + I queue a write vector for the verification key. + + I require that the signature have been written to the storage + server before I allow the verification key to be written to the + remote server. + """ + if "verification_key" not in self._offsets: + raise LayoutInvalid("You must put the signature before you " + "can put the verification key") + + self._offsets['verification_key_end'] = \ + self._offsets['verification_key'] + len(verification_key) + assert self._offsets['verification_key_end'] <= self._offsets['share_data'] + self._writevs.append(tuple([self._offsets['verification_key'], + verification_key])) + + + def _get_offsets_tuple(self): + return tuple([(key, value) for key, value in self._offsets.items()]) + + + def get_verinfo(self): + return (self._seqnum, + self._root_hash, + None, + self._segment_size, + self._data_length, + self._required_shares, + self._total_shares, + self.get_signable(), + self._get_offsets_tuple()) + + + def finish_publishing(self): + """ + I add a write vector for the offsets table, and then cause all + of the write vectors that I've dealt with so far to be published + to the remote server, ending the write process. + """ + if "verification_key_end" not in self._offsets: + raise LayoutInvalid("You must put the verification key before " + "you can publish the offsets") + offsets_offset = struct.calcsize(MDMFHEADERWITHOUTOFFSETS) + offsets = struct.pack(MDMFOFFSETS, + self._offsets['enc_privkey'], + self._offsets['share_hash_chain'], + self._offsets['signature'], + self._offsets['verification_key'], + self._offsets['verification_key_end'], + self._offsets['share_data'], + self._offsets['block_hash_tree'], + self._offsets['EOF']) + self._writevs.append(tuple([offsets_offset, offsets])) + encoding_parameters_offset = struct.calcsize(MDMFCHECKSTRING) + params = struct.pack(">BBQQ", + self._required_shares, + self._total_shares, + self._segment_size, + self._data_length) + self._writevs.append(tuple([encoding_parameters_offset, params])) + return self._write(self._writevs) + + + def _write(self, datavs, on_failure=None, on_success=None): + """I write the data vectors in datavs to the remote slot.""" + tw_vectors = {} + if not self._testvs: + self._testvs = [] + self._testvs.append(tuple([0, 1, "eq", ""])) + if not self._written: + # Write a new checkstring to the share when we write it, so + # that we have something to check later. + new_checkstring = self.get_checkstring() + datavs.append((0, new_checkstring)) + def _first_write(): + self._written = True + self._testvs = [(0, len(new_checkstring), "eq", new_checkstring)] + on_success = _first_write + tw_vectors[self.shnum] = (self._testvs, datavs, None) + d = self._rref.callRemote("slot_testv_and_readv_and_writev", + self._storage_index, + self._secrets, + tw_vectors, + self._readv) + def _result(results): + if isinstance(results, failure.Failure) or not results[0]: + # Do nothing; the write was unsuccessful. + if on_failure: on_failure() + else: + if on_success: on_success() + return results + d.addBoth(_result) + return d + +def _handle_bad_struct(f): + # struct.unpack errors mean the server didn't give us enough data, so + # this share is bad + f.trap(struct.error) + raise BadShareError(f.value.args[0]) + +class MDMFSlotReadProxy: + """ + I read from a mutable slot filled with data written in the MDMF data + format (which is described above). + + I can be initialized with some amount of data, which I will use (if + it is valid) to eliminate some of the need to fetch it from servers. + """ + def __init__(self, + rref, + storage_index, + shnum, + data="", + data_is_everything=False): + # Start the initialization process. + self._rref = rref + self._storage_index = storage_index + self.shnum = shnum + + # Before doing anything, the reader is probably going to want to + # verify that the signature is correct. To do that, they'll need + # the verification key, and the signature. To get those, we'll + # need the offset table. So fetch the offset table on the + # assumption that that will be the first thing that a reader is + # going to do. + + # The fact that these encoding parameters are None tells us + # that we haven't yet fetched them from the remote share, so we + # should. We could just not set them, but the checks will be + # easier to read if we don't have to use hasattr. + self._version_number = None + self._sequence_number = None + self._root_hash = None + # Filled in if we're dealing with an SDMF file. Unused + # otherwise. + self._salt = None + self._required_shares = None + self._total_shares = None + self._segment_size = None + self._data_length = None + self._offsets = None + + # If the user has chosen to initialize us with some data, we'll + # try to satisfy subsequent data requests with that data before + # asking the storage server for it. + self._data = data + + # If the provided data is known to be complete, then we know there's + # nothing to be gained by querying the server, so we should just + # partially satisfy requests with what we have. + self._data_is_everything = data_is_everything + + # The way callers interact with cache in the filenode returns + # None if there isn't any cached data, but the way we index the + # cached data requires a string, so convert None to "". + if self._data == None: + self._data = "" + + + def _maybe_fetch_offsets_and_header(self, force_remote=False): + """ + I fetch the offset table and the header from the remote slot if + I don't already have them. If I do have them, I do nothing and + return an empty Deferred. + """ + if self._offsets: + return defer.succeed(None) + # At this point, we may be either SDMF or MDMF. Fetching 107 + # bytes will be enough to get header and offsets for both SDMF and + # MDMF, though we'll be left with 4 more bytes than we + # need if this ends up being MDMF. This is probably less + # expensive than the cost of a second roundtrip. + readvs = [(0, 123)] + d = self._read(readvs, force_remote) + d.addCallback(self._process_encoding_parameters) + d.addCallback(self._process_offsets) + d.addErrback(_handle_bad_struct) + return d + + + def _process_encoding_parameters(self, encoding_parameters): + if self.shnum not in encoding_parameters: + raise BadShareError("no data for shnum %d" % self.shnum) + encoding_parameters = encoding_parameters[self.shnum][0] + # The first byte is the version number. It will tell us what + # to do next. + (verno,) = struct.unpack(">B", encoding_parameters[:1]) + if verno == MDMF_VERSION: + read_size = MDMFHEADERWITHOUTOFFSETSSIZE + (verno, + seqnum, + root_hash, + k, + n, + segsize, + datalen) = struct.unpack(MDMFHEADERWITHOUTOFFSETS, + encoding_parameters[:read_size]) + if segsize == 0 and datalen == 0: + # Empty file, no segments. + self._num_segments = 0 + else: + self._num_segments = mathutil.div_ceil(datalen, segsize) + + elif verno == SDMF_VERSION: + read_size = SIGNED_PREFIX_LENGTH + (verno, + seqnum, + root_hash, + salt, + k, + n, + segsize, + datalen) = struct.unpack(">BQ32s16s BBQQ", + encoding_parameters[:SIGNED_PREFIX_LENGTH]) + self._salt = salt + if segsize == 0 and datalen == 0: + # empty file + self._num_segments = 0 + else: + # non-empty SDMF files have one segment. + self._num_segments = 1 + else: + raise UnknownVersionError("You asked me to read mutable file " + "version %d, but I only understand " + "%d and %d" % (verno, SDMF_VERSION, + MDMF_VERSION)) + + self._version_number = verno + self._sequence_number = seqnum + self._root_hash = root_hash + self._required_shares = k + self._total_shares = n + self._segment_size = segsize + self._data_length = datalen + + self._block_size = self._segment_size / self._required_shares + # We can upload empty files, and need to account for this fact + # so as to avoid zero-division and zero-modulo errors. + if datalen > 0: + tail_size = self._data_length % self._segment_size + else: + tail_size = 0 + if not tail_size: + self._tail_block_size = self._block_size + else: + self._tail_block_size = mathutil.next_multiple(tail_size, + self._required_shares) + self._tail_block_size /= self._required_shares + + return encoding_parameters + + + def _process_offsets(self, offsets): + if self._version_number == 0: + read_size = OFFSETS_LENGTH + read_offset = SIGNED_PREFIX_LENGTH + end = read_size + read_offset + (signature, + share_hash_chain, + block_hash_tree, + share_data, + enc_privkey, + EOF) = struct.unpack(">LLLLQQ", + offsets[read_offset:end]) + self._offsets = {} + self._offsets['signature'] = signature + self._offsets['share_data'] = share_data + self._offsets['block_hash_tree'] = block_hash_tree + self._offsets['share_hash_chain'] = share_hash_chain + self._offsets['enc_privkey'] = enc_privkey + self._offsets['EOF'] = EOF + + elif self._version_number == 1: + read_offset = MDMFHEADERWITHOUTOFFSETSSIZE + read_length = MDMFOFFSETS_LENGTH + end = read_offset + read_length + (encprivkey, + sharehashes, + signature, + verification_key, + verification_key_end, + sharedata, + blockhashes, + eof) = struct.unpack(MDMFOFFSETS, + offsets[read_offset:end]) + self._offsets = {} + self._offsets['enc_privkey'] = encprivkey + self._offsets['block_hash_tree'] = blockhashes + self._offsets['share_hash_chain'] = sharehashes + self._offsets['signature'] = signature + self._offsets['verification_key'] = verification_key + self._offsets['verification_key_end']= \ + verification_key_end + self._offsets['EOF'] = eof + self._offsets['share_data'] = sharedata + + + def get_block_and_salt(self, segnum): + """ + I return (block, salt), where block is the block data and + salt is the salt used to encrypt that segment. + """ + d = self._maybe_fetch_offsets_and_header() + def _then(ignored): + base_share_offset = self._offsets['share_data'] + + if segnum + 1 > self._num_segments: + raise LayoutInvalid("Not a valid segment number") + + if self._version_number == 0: + share_offset = base_share_offset + self._block_size * segnum + else: + share_offset = base_share_offset + (self._block_size + \ + SALT_SIZE) * segnum + if segnum + 1 == self._num_segments: + data = self._tail_block_size + else: + data = self._block_size + + if self._version_number == 1: + data += SALT_SIZE + + readvs = [(share_offset, data)] + return readvs + d.addCallback(_then) + d.addCallback(lambda readvs: self._read(readvs)) + def _process_results(results): + if self.shnum not in results: + raise BadShareError("no data for shnum %d" % self.shnum) + if self._version_number == 0: + # We only read the share data, but we know the salt from + # when we fetched the header + data = results[self.shnum] + if not data: + data = "" + else: + if len(data) != 1: + raise BadShareError("got %d vectors, not 1" % len(data)) + data = data[0] + salt = self._salt + else: + data = results[self.shnum] + if not data: + salt = data = "" + else: + salt_and_data = results[self.shnum][0] + salt = salt_and_data[:SALT_SIZE] + data = salt_and_data[SALT_SIZE:] + return data, salt + d.addCallback(_process_results) + return d + + + def get_blockhashes(self, needed=None, force_remote=False): + """ + I return the block hash tree + + I take an optional argument, needed, which is a set of indices + correspond to hashes that I should fetch. If this argument is + missing, I will fetch the entire block hash tree; otherwise, I + may attempt to fetch fewer hashes, based on what needed says + that I should do. Note that I may fetch as many hashes as I + want, so long as the set of hashes that I do fetch is a superset + of the ones that I am asked for, so callers should be prepared + to tolerate additional hashes. + """ + # TODO: Return only the parts of the block hash tree necessary + # to validate the blocknum provided? + # This is a good idea, but it is hard to implement correctly. It + # is bad to fetch any one block hash more than once, so we + # probably just want to fetch the whole thing at once and then + # serve it. + if needed == set([]): + return defer.succeed([]) + d = self._maybe_fetch_offsets_and_header() + def _then(ignored): + blockhashes_offset = self._offsets['block_hash_tree'] + if self._version_number == 1: + blockhashes_length = self._offsets['EOF'] - blockhashes_offset + else: + blockhashes_length = self._offsets['share_data'] - blockhashes_offset + readvs = [(blockhashes_offset, blockhashes_length)] + return readvs + d.addCallback(_then) + d.addCallback(lambda readvs: + self._read(readvs, force_remote=force_remote)) + def _build_block_hash_tree(results): + if self.shnum not in results: + raise BadShareError("no data for shnum %d" % self.shnum) + + rawhashes = results[self.shnum][0] + results = [rawhashes[i:i+HASH_SIZE] + for i in range(0, len(rawhashes), HASH_SIZE)] + return results + d.addCallback(_build_block_hash_tree) + return d + + + def get_sharehashes(self, needed=None, force_remote=False): + """ + I return the part of the share hash chain placed to validate + this share. + + I take an optional argument, needed. Needed is a set of indices + that correspond to the hashes that I should fetch. If needed is + not present, I will fetch and return the entire share hash + chain. Otherwise, I may fetch and return any part of the share + hash chain that is a superset of the part that I am asked to + fetch. Callers should be prepared to deal with more hashes than + they've asked for. + """ + if needed == set([]): + return defer.succeed([]) + d = self._maybe_fetch_offsets_and_header() + + def _make_readvs(ignored): + sharehashes_offset = self._offsets['share_hash_chain'] + if self._version_number == 0: + sharehashes_length = self._offsets['block_hash_tree'] - sharehashes_offset + else: + sharehashes_length = self._offsets['signature'] - sharehashes_offset + readvs = [(sharehashes_offset, sharehashes_length)] + return readvs + d.addCallback(_make_readvs) + d.addCallback(lambda readvs: + self._read(readvs, force_remote=force_remote)) + def _build_share_hash_chain(results): + if self.shnum not in results: + raise BadShareError("no data for shnum %d" % self.shnum) + + sharehashes = results[self.shnum][0] + results = [sharehashes[i:i+(HASH_SIZE + 2)] + for i in range(0, len(sharehashes), HASH_SIZE + 2)] + results = dict([struct.unpack(">H32s", data) + for data in results]) + return results + d.addCallback(_build_share_hash_chain) + d.addErrback(_handle_bad_struct) + return d + + + def get_encprivkey(self): + """ + I return the encrypted private key. + """ + d = self._maybe_fetch_offsets_and_header() + + def _make_readvs(ignored): + privkey_offset = self._offsets['enc_privkey'] + if self._version_number == 0: + privkey_length = self._offsets['EOF'] - privkey_offset + else: + privkey_length = self._offsets['share_hash_chain'] - privkey_offset + readvs = [(privkey_offset, privkey_length)] + return readvs + d.addCallback(_make_readvs) + d.addCallback(lambda readvs: self._read(readvs)) + def _process_results(results): + if self.shnum not in results: + raise BadShareError("no data for shnum %d" % self.shnum) + privkey = results[self.shnum][0] + return privkey + d.addCallback(_process_results) + return d + + + def get_signature(self): + """ + I return the signature of my share. + """ + d = self._maybe_fetch_offsets_and_header() + + def _make_readvs(ignored): + signature_offset = self._offsets['signature'] + if self._version_number == 1: + signature_length = self._offsets['verification_key'] - signature_offset + else: + signature_length = self._offsets['share_hash_chain'] - signature_offset + readvs = [(signature_offset, signature_length)] + return readvs + d.addCallback(_make_readvs) + d.addCallback(lambda readvs: self._read(readvs)) + def _process_results(results): + if self.shnum not in results: + raise BadShareError("no data for shnum %d" % self.shnum) + signature = results[self.shnum][0] + return signature + d.addCallback(_process_results) + return d + + + def get_verification_key(self): + """ + I return the verification key. + """ + d = self._maybe_fetch_offsets_and_header() + + def _make_readvs(ignored): + if self._version_number == 1: + vk_offset = self._offsets['verification_key'] + vk_length = self._offsets['verification_key_end'] - vk_offset + else: + vk_offset = struct.calcsize(">BQ32s16sBBQQLLLLQQ") + vk_length = self._offsets['signature'] - vk_offset + readvs = [(vk_offset, vk_length)] + return readvs + d.addCallback(_make_readvs) + d.addCallback(lambda readvs: self._read(readvs)) + def _process_results(results): + if self.shnum not in results: + raise BadShareError("no data for shnum %d" % self.shnum) + verification_key = results[self.shnum][0] + return verification_key + d.addCallback(_process_results) + return d + + + def get_encoding_parameters(self): + """ + I return (k, n, segsize, datalen) + """ + d = self._maybe_fetch_offsets_and_header() + d.addCallback(lambda ignored: + (self._required_shares, + self._total_shares, + self._segment_size, + self._data_length)) + return d + + + def get_seqnum(self): + """ + I return the sequence number for this share. + """ + d = self._maybe_fetch_offsets_and_header() + d.addCallback(lambda ignored: + self._sequence_number) + return d + + + def get_root_hash(self): + """ + I return the root of the block hash tree + """ + d = self._maybe_fetch_offsets_and_header() + d.addCallback(lambda ignored: self._root_hash) + return d + + + def get_checkstring(self): + """ + I return the packed representation of the following: + + - version number + - sequence number + - root hash + - salt hash + + which my users use as a checkstring to detect other writers. + """ + d = self._maybe_fetch_offsets_and_header() + def _build_checkstring(ignored): + if self._salt: + checkstring = struct.pack(PREFIX, + self._version_number, + self._sequence_number, + self._root_hash, + self._salt) + else: + checkstring = struct.pack(MDMFCHECKSTRING, + self._version_number, + self._sequence_number, + self._root_hash) + + return checkstring + d.addCallback(_build_checkstring) + return d + + + def get_prefix(self, force_remote): + d = self._maybe_fetch_offsets_and_header(force_remote) + d.addCallback(lambda ignored: + self._build_prefix()) + return d + + + def _build_prefix(self): + # The prefix is another name for the part of the remote share + # that gets signed. It consists of everything up to and + # including the datalength, packed by struct. + if self._version_number == SDMF_VERSION: + return struct.pack(SIGNED_PREFIX, + self._version_number, + self._sequence_number, + self._root_hash, + self._salt, + self._required_shares, + self._total_shares, + self._segment_size, + self._data_length) + + else: + return struct.pack(MDMFSIGNABLEHEADER, + self._version_number, + self._sequence_number, + self._root_hash, + self._required_shares, + self._total_shares, + self._segment_size, + self._data_length) + + + def _get_offsets_tuple(self): + # The offsets tuple is another component of the version + # information tuple. It is basically our offsets dictionary, + # itemized and in a tuple. + return self._offsets.copy() + + + def get_verinfo(self): + """ + I return my verinfo tuple. This is used by the ServermapUpdater + to keep track of versions of mutable files. + + The verinfo tuple for MDMF files contains: + - seqnum + - root hash + - a blank (nothing) + - segsize + - datalen + - k + - n + - prefix (the thing that you sign) + - a tuple of offsets + + We include the nonce in MDMF to simplify processing of version + information tuples. + + The verinfo tuple for SDMF files is the same, but contains a + 16-byte IV instead of a hash of salts. + """ + d = self._maybe_fetch_offsets_and_header() + def _build_verinfo(ignored): + if self._version_number == SDMF_VERSION: + salt_to_use = self._salt + else: + salt_to_use = None + return (self._sequence_number, + self._root_hash, + salt_to_use, + self._segment_size, + self._data_length, + self._required_shares, + self._total_shares, + self._build_prefix(), + self._get_offsets_tuple()) + d.addCallback(_build_verinfo) + return d + + + def _read(self, readvs, force_remote=False): + unsatisfiable = filter(lambda x: x[0] + x[1] > len(self._data), readvs) + # TODO: It's entirely possible to tweak this so that it just + # fulfills the requests that it can, and not demand that all + # requests are satisfiable before running it. + + if not unsatisfiable or self._data_is_everything: + results = [self._data[offset:offset+length] + for (offset, length) in readvs] + results = {self.shnum: results} + return defer.succeed(results) + else: + return self._rref.callRemote("slot_readv", + self._storage_index, + [self.shnum], + readvs) + + + def is_sdmf(self): + """I tell my caller whether or not my remote file is SDMF or MDMF + """ + d = self._maybe_fetch_offsets_and_header() + d.addCallback(lambda ignored: + self._version_number == 0) + return d + + +class LayoutInvalid(BadShareError): + """ + This isn't a valid MDMF mutable file + """