import struct
-from allmydata.mutable.common import NeedMoreDataError, UnknownVersionError
+from allmydata.mutable.common import NeedMoreDataError, UnknownVersionError, \
+ BadShareError
from allmydata.interfaces import HASH_SIZE, SALT_SIZE, SDMF_VERSION, \
MDMF_VERSION, IMutableSlotWriter
-from allmydata.util import mathutil, observer
+from allmydata.util import mathutil
from twisted.python import failure
from twisted.internet import defer
from zope.interface import implements
-# These strings describe the format of the packed structs they help process
+# These strings describe the format of the packed structs they help process.
# Here's what they mean:
#
# PREFIX:
# >: Big-endian byte order; the most significant byte is first (leftmost).
-# B: The version information; an 8 bit version identifier. Stored as
-# an unsigned char. This is currently 00 00 00 00; our modifications
-# will turn it into 00 00 00 01.
+# B: The container version information; stored as an unsigned 8-bit integer.
+# This is currently either SDMF_VERSION or MDMF_VERSION.
# Q: The sequence number; this is sort of like a revision history for
# mutable files; they start at 1 and increase as they are changed after
-# being uploaded. Stored as an unsigned long long, which is 8 bytes in
-# length.
-# 32s: The root hash of the share hash tree. We use sha-256d, so we use 32
-# characters = 32 bytes to store the value.
-# 16s: The salt for the readkey. This is a 16-byte random value, stored as
-# 16 characters.
+# being uploaded. Stored as an unsigned 64-bit integer.
+# 32s: The root hash of the share hash tree. We use sha-256d, so we use 32
+# bytes to store the value.
+# 16s: The salt for the readkey. This is a 16-byte random value.
#
# SIGNED_PREFIX additions, things that are covered by the signature:
-# B: The "k" encoding parameter. We store this as an 8-bit character,
-# which is convenient because our erasure coding scheme cannot
-# encode if you ask for more than 255 pieces.
-# B: The "N" encoding parameter. Stored as an 8-bit character for the
-# same reasons as above.
-# Q: The segment size of the uploaded file. This will essentially be the
-# length of the file in SDMF. An unsigned long long, so we can store
-# files of quite large size.
-# Q: The data length of the uploaded file. Modulo padding, this will be
-# the same of the data length field. Like the data length field, it is
-# an unsigned long long and can be quite large.
+# B: The "k" encoding parameter. We store this as an unsigned 8-bit
+# integer, since our erasure coding scheme cannot encode to more than
+# 255 pieces.
+# B: The "N" encoding parameter. Stored as an unsigned 8-bit integer for
+# the same reason as above.
+# Q: The segment size of the uploaded file. This is an unsigned 64-bit
+# integer, to allow handling large segments and files. For SDMF the
+# segment size is the data length plus padding; for MDMF it can be
+# smaller.
+# Q: The data length of the uploaded file. Like the segment size field,
+# it is an unsigned 64-bit integer.
#
# HEADER additions:
-# L: The offset of the signature of this. An unsigned long.
-# L: The offset of the share hash chain. An unsigned long.
-# L: The offset of the block hash tree. An unsigned long.
-# L: The offset of the share data. An unsigned long.
-# Q: The offset of the encrypted private key. An unsigned long long, to
-# account for the possibility of a lot of share data.
-# Q: The offset of the EOF. An unsigned long long, to account for the
-# possibility of a lot of share data.
-#
+# L: The offset of the signature. An unsigned 32-bit integer.
+# L: The offset of the share hash chain. An unsigned 32-bit integer.
+# L: The offset of the block hash tree. An unsigned 32-bit integer.
+# L: The offset of the share data. An unsigned 32-bit integer.
+# Q: The offset of the encrypted private key. An unsigned 64-bit integer,
+# to account for the possibility of a lot of share data.
+# Q: The offset of the EOF. An unsigned 64-bit integer, to account for
+# the possibility of a lot of share data.
+#
# After all of these, we have the following:
# - The verification key: Occupies the space between the end of the header
# and the start of the signature (i.e.: data[HEADER_LENGTH:o['signature']].
# - The share data, which goes from the share data offset to the encrypted
# private key offset.
# - The encrypted private key offset, which goes until the end of the file.
-#
+#
# The block hash tree in this encoding has only one share, so the offset of
# the share data will be 32 bits more than the offset of the block hash tree.
# Given this, we may need to check to see how many bytes a reasonably sized
# block hash tree will take up.
-PREFIX = ">BQ32s16s" # each version has a different prefix
+PREFIX = ">BQ32s16s" # each version may have a different prefix
SIGNED_PREFIX = ">BQ32s16s BBQQ" # this is covered by the signature
SIGNED_PREFIX_LENGTH = struct.calcsize(SIGNED_PREFIX)
HEADER = ">BQ32s16s BBQQ LLLLQQ" # includes offsets
OFFSETS = ">LLLLQQ"
OFFSETS_LENGTH = struct.calcsize(OFFSETS)
-# These are still used for some tests.
+MAX_MUTABLE_SHARE_SIZE = 69105*1000*1000*1000*1000 # 69105 TB, kind of arbitrary
+
+
+# These are still used for some tests of SDMF files.
def unpack_header(data):
o = {}
(version,
share_hash_chain_s = data[o['share_hash_chain']:o['block_hash_tree']]
share_hash_format = ">H32s"
hsize = struct.calcsize(share_hash_format)
- assert len(share_hash_chain_s) % hsize == 0, len(share_hash_chain_s)
+ if len(share_hash_chain_s) % hsize != 0:
+ raise BadShareError("hash chain is %d bytes, not multiple of %d"
+ % (len(share_hash_chain_s), hsize))
share_hash_chain = []
for i in range(0, len(share_hash_chain_s), hsize):
chunk = share_hash_chain_s[i:i+hsize]
share_hash_chain.append( (hid, h) )
share_hash_chain = dict(share_hash_chain)
block_hash_tree_s = data[o['block_hash_tree']:o['share_data']]
- assert len(block_hash_tree_s) % 32 == 0, len(block_hash_tree_s)
+ if len(block_hash_tree_s) % 32 != 0:
+ raise BadShareError("block_hash_tree is %d bytes, not multiple of %d"
+ % (len(block_hash_tree_s), 32))
block_hash_tree = []
for i in range(0, len(block_hash_tree_s), 32):
block_hash_tree.append(block_hash_tree_s[i:i+32])
pubkey, signature, share_hash_chain, block_hash_tree,
share_data, enc_privkey)
-def unpack_checkstring(checkstring):
+def get_version_from_checkstring(checkstring):
+ (t, ) = struct.unpack(">B", checkstring[:1])
+ return t
+
+def unpack_sdmf_checkstring(checkstring):
cs_len = struct.calcsize(PREFIX)
version, seqnum, root_hash, IV = struct.unpack(PREFIX, checkstring[:cs_len])
- if version != 0: # TODO: just ignore the share
- raise UnknownVersionError("got mutable share version %d, but I only understand version 0" % version)
+ assert version == SDMF_VERSION, version
return (seqnum, root_hash, IV)
+def unpack_mdmf_checkstring(checkstring):
+ cs_len = struct.calcsize(MDMFCHECKSTRING)
+ version, seqnum, root_hash = struct.unpack(MDMFCHECKSTRING, checkstring[:cs_len])
+ assert version == MDMF_VERSION, version
+ return (seqnum, root_hash)
def pack_offsets(verification_key_length, signature_length,
share_hash_chain_length, block_hash_tree_length,
self._segment_size = segment_size
self._data_length = data_length
- # This is an SDMF file, so it should have only one segment, so,
+ # This is an SDMF file, so it should have only one segment, so,
# modulo padding of the data length, the segment size and the
# data length should be the same.
expected_segment_size = mathutil.next_multiple(data_length,
"""
for k in ["sharedata", "encprivkey", "signature", "verification_key",
"share_hash_chain", "block_hash_tree"]:
- assert k in self._share_pieces, (k, self._share_pieces.keys())
+ assert k in self._share_pieces, (self.shnum, k, self._share_pieces.keys())
# This is the only method that actually writes something to the
# remote server.
# First, we need to pack the share into data that we can write
# offset: size: name:
#-- signed part --
# 0 1 version number (01)
- # 1 8 sequence number
+ # 1 8 sequence number
# 9 32 share tree root hash
# 41 1 The "k" encoding parameter
# 42 1 The "N" encoding parameter
# 51 8 The data length of the original plaintext
#-- end signed part --
# 59 8 The offset of the encrypted private key
- # 67 8 The offset of the signature
- # 75 8 The offset of the verification key
- # 83 8 The offset of the end of the v. key.
- # 92 8 The offset of the share data
- # 100 8 The offset of the block hash tree
- # 108 8 The offset of the share hash chain
- # 116 8 The offset of EOF
- #
- # followed by the encrypted private key, signature, verification
- # key, share hash chain, data, and block hash tree. We order the
- # fields that way to make smart downloaders -- downloaders which
- # prempetively read a big part of the share -- possible.
+ # 67 8 The offset of the share hash chain
+ # 75 8 The offset of the signature
+ # 83 8 The offset of the verification key
+ # 91 8 The offset of the end of the v. key.
+ # 99 8 The offset of the share data
+ # 107 8 The offset of the block hash tree
+ # 115 8 The offset of EOF
+ # 123 var encrypted private key
+ # var var share hash chain
+ # var var signature
+ # var var verification key
+ # var large share data
+ # var var block hash tree
+ #
+ # We order the fields that way to make smart downloaders -- downloaders
+ # which prempetively read a big part of the share -- possible.
#
# The checkstring is the first three fields -- the version number,
# sequence number, root hash and root salt hash. This is consistent
# in meaning to what we have with SDMF files, except now instead of
# using the literal salt, we use a value derived from all of the
# salts -- the share hash root.
- #
+ #
# The salt is stored before the block for each segment. The block
# hash tree is computed over the combination of block and salt for
# each segment. In this way, we get integrity checking for both
# block and salt with the current block hash tree arrangement.
- #
+ #
# The ordering of the offsets is different to reflect the dependencies
# that we'll run into with an MDMF file. The expected write flow is
# something like this:
# and where they should go.. We can also figure out where the
# encrypted private key should go, because we can figure out how
# big the share data will be.
- #
+ #
# 1: Encrypt, encode, and upload the file in chunks. Do something
- # like
+ # like
#
# put_block(data, segnum, salt)
#
# to write a block and a salt to the disk. We can do both of
# these operations now because we have enough of the offsets to
# know where to put them.
- #
+ #
# 2: Put the encrypted private key. Use:
#
# put_encprivkey(encprivkey)
#
# 3: We're now in a position to upload the block hash tree for
# a share. Put that using something like:
- #
+ #
# put_blockhashes(block_hash_tree)
#
# Note that block_hash_tree is a list of hashes -- we'll take
#
# 4: We're now in a position to upload the share hash chain for
# a share. Do that with something like:
- #
- # put_sharehashes(share_hash_chain)
#
- # share_hash_chain should be a dictionary mapping shnums to
+ # put_sharehashes(share_hash_chain)
+ #
+ # share_hash_chain should be a dictionary mapping shnums to
# 32-byte hashes -- the wrapper handles serialization.
# We'll know where to put the signature at this point, also.
# The root of this tree will be put explicitly in the next
# step.
- #
+ #
# 5: Before putting the signature, we must first put the
# root_hash. Do this with:
- #
+ #
# put_root_hash(root_hash).
- #
+ #
# In terms of knowing where to put this value, it was always
# possible to place it, but it makes sense semantically to
# place it after the share hash tree, so that's why you do it
# get_signable()
#
# to get the part of the header that you want to sign, and use:
- #
+ #
# put_signature(signature)
#
# to write your signature to the remote server.
#
# 6: Add the verification key, and finish. Do:
#
- # put_verification_key(key)
+ # put_verification_key(key)
#
- # and
+ # and
#
# finish_publish()
#
# Checkstring management:
- #
+ #
# To write to a mutable slot, we have to provide test vectors to ensure
# that we are writing to the same data that we think we are. These
# vectors allow us to detect uncoordinated writes; that is, writes
# where both we and some other shareholder are writing to the
# mutable slot, and to report those back to the parts of the program
- # doing the writing.
+ # doing the writing.
#
# With SDMF, this was easy -- all of the share data was written in
# one go, so it was easy to detect uncoordinated writes, and we only
# - When we write out the salt hash
# - When we write out the root of the share hash tree
#
- # since these values will change the header. It is possible that we
+ # since these values will change the header. It is possible that we
# can just make those be written in one operation to minimize
# disruption.
def __init__(self,
assert self.shnum >= 0 and self.shnum < total_shares
self._total_shares = total_shares
# We build up the offset table as we write things. It is the
- # last thing we write to the remote server.
+ # last thing we write to the remote server.
self._offsets = {}
self._testvs = []
# This is a list of write vectors that will be sent to our
Put the root hash (the root of the share hash tree) in the
remote slot.
"""
- # It does not make sense to be able to put the root
+ # It does not make sense to be able to put the root
# hash without first putting the share hashes, since you need
# the share hashes to generate the root hash.
#
def get_verinfo(self):
return (self._seqnum,
self._root_hash,
- self._required_shares,
- self._total_shares,
+ None,
self._segment_size,
self._data_length,
+ self._required_shares,
+ self._total_shares,
self.get_signable(),
self._get_offsets_tuple())
else:
if on_success: on_success()
return results
- d.addCallback(_result)
+ d.addBoth(_result)
return d
+def _handle_bad_struct(f):
+ # struct.unpack errors mean the server didn't give us enough data, so
+ # this share is bad
+ f.trap(struct.error)
+ raise BadShareError(f.value.args[0])
class MDMFSlotReadProxy:
"""
rref,
storage_index,
shnum,
- data=""):
+ data="",
+ data_is_everything=False):
# Start the initialization process.
self._rref = rref
self._storage_index = storage_index
# If the user has chosen to initialize us with some data, we'll
# try to satisfy subsequent data requests with that data before
- # asking the storage server for it. If
+ # asking the storage server for it.
self._data = data
+
+ # If the provided data is known to be complete, then we know there's
+ # nothing to be gained by querying the server, so we should just
+ # partially satisfy requests with what we have.
+ self._data_is_everything = data_is_everything
+
# The way callers interact with cache in the filenode returns
# None if there isn't any cached data, but the way we index the
# cached data requires a string, so convert None to "".
if self._data == None:
self._data = ""
- self._queue_observers = observer.ObserverList()
- self._queue_errbacks = observer.ObserverList()
- self._readvs = []
-
def _maybe_fetch_offsets_and_header(self, force_remote=False):
"""
"""
if self._offsets:
return defer.succeed(None)
- # At this point, we may be either SDMF or MDMF. Fetching 107
+ # At this point, we may be either SDMF or MDMF. Fetching 107
# bytes will be enough to get header and offsets for both SDMF and
# MDMF, though we'll be left with 4 more bytes than we
# need if this ends up being MDMF. This is probably less
d = self._read(readvs, force_remote)
d.addCallback(self._process_encoding_parameters)
d.addCallback(self._process_offsets)
+ d.addErrback(_handle_bad_struct)
return d
def _process_encoding_parameters(self, encoding_parameters):
- assert self.shnum in encoding_parameters
+ if self.shnum not in encoding_parameters:
+ raise BadShareError("no data for shnum %d" % self.shnum)
encoding_parameters = encoding_parameters[self.shnum][0]
# The first byte is the version number. It will tell us what
# to do next.
self._offsets['share_data'] = sharedata
- def get_block_and_salt(self, segnum, queue=False):
+ def get_block_and_salt(self, segnum):
"""
I return (block, salt), where block is the block data and
salt is the salt used to encrypt that segment.
readvs = [(share_offset, data)]
return readvs
d.addCallback(_then)
- d.addCallback(lambda readvs:
- self._read(readvs, queue=queue))
+ d.addCallback(lambda readvs: self._read(readvs))
def _process_results(results):
- assert self.shnum in results
+ if self.shnum not in results:
+ raise BadShareError("no data for shnum %d" % self.shnum)
if self._version_number == 0:
# We only read the share data, but we know the salt from
# when we fetched the header
if not data:
data = ""
else:
- assert len(data) == 1
+ if len(data) != 1:
+ raise BadShareError("got %d vectors, not 1" % len(data))
data = data[0]
salt = self._salt
else:
return d
- def get_blockhashes(self, needed=None, queue=False, force_remote=False):
+ def get_blockhashes(self, needed=None, force_remote=False):
"""
I return the block hash tree
return readvs
d.addCallback(_then)
d.addCallback(lambda readvs:
- self._read(readvs, queue=queue, force_remote=force_remote))
+ self._read(readvs, force_remote=force_remote))
def _build_block_hash_tree(results):
- assert self.shnum in results
+ if self.shnum not in results:
+ raise BadShareError("no data for shnum %d" % self.shnum)
rawhashes = results[self.shnum][0]
results = [rawhashes[i:i+HASH_SIZE]
return d
- def get_sharehashes(self, needed=None, queue=False, force_remote=False):
+ def get_sharehashes(self, needed=None, force_remote=False):
"""
I return the part of the share hash chain placed to validate
this share.
return readvs
d.addCallback(_make_readvs)
d.addCallback(lambda readvs:
- self._read(readvs, queue=queue, force_remote=force_remote))
+ self._read(readvs, force_remote=force_remote))
def _build_share_hash_chain(results):
- assert self.shnum in results
+ if self.shnum not in results:
+ raise BadShareError("no data for shnum %d" % self.shnum)
sharehashes = results[self.shnum][0]
results = [sharehashes[i:i+(HASH_SIZE + 2)]
for data in results])
return results
d.addCallback(_build_share_hash_chain)
+ d.addErrback(_handle_bad_struct)
return d
- def get_encprivkey(self, queue=False):
+ def get_encprivkey(self):
"""
I return the encrypted private key.
"""
readvs = [(privkey_offset, privkey_length)]
return readvs
d.addCallback(_make_readvs)
- d.addCallback(lambda readvs:
- self._read(readvs, queue=queue))
+ d.addCallback(lambda readvs: self._read(readvs))
def _process_results(results):
- assert self.shnum in results
+ if self.shnum not in results:
+ raise BadShareError("no data for shnum %d" % self.shnum)
privkey = results[self.shnum][0]
return privkey
d.addCallback(_process_results)
return d
- def get_signature(self, queue=False):
+ def get_signature(self):
"""
I return the signature of my share.
"""
readvs = [(signature_offset, signature_length)]
return readvs
d.addCallback(_make_readvs)
- d.addCallback(lambda readvs:
- self._read(readvs, queue=queue))
+ d.addCallback(lambda readvs: self._read(readvs))
def _process_results(results):
- assert self.shnum in results
+ if self.shnum not in results:
+ raise BadShareError("no data for shnum %d" % self.shnum)
signature = results[self.shnum][0]
return signature
d.addCallback(_process_results)
return d
- def get_verification_key(self, queue=False):
+ def get_verification_key(self):
"""
I return the verification key.
"""
readvs = [(vk_offset, vk_length)]
return readvs
d.addCallback(_make_readvs)
- d.addCallback(lambda readvs:
- self._read(readvs, queue=queue))
+ d.addCallback(lambda readvs: self._read(readvs))
def _process_results(results):
- assert self.shnum in results
+ if self.shnum not in results:
+ raise BadShareError("no data for shnum %d" % self.shnum)
verification_key = results[self.shnum][0]
return verification_key
d.addCallback(_process_results)
return d
- def flush(self):
- """
- I flush my queue of read vectors.
- """
- d = self._read(self._readvs)
- def _then(results):
- self._readvs = []
- if isinstance(results, failure.Failure):
- self._queue_errbacks.notify(results)
- else:
- self._queue_observers.notify(results)
- self._queue_observers = observer.ObserverList()
- self._queue_errbacks = observer.ObserverList()
- d.addBoth(_then)
-
-
- def _read(self, readvs, force_remote=False, queue=False):
+ def _read(self, readvs, force_remote=False):
unsatisfiable = filter(lambda x: x[0] + x[1] > len(self._data), readvs)
# TODO: It's entirely possible to tweak this so that it just
# fulfills the requests that it can, and not demand that all
# requests are satisfiable before running it.
- if not unsatisfiable and not force_remote:
+
+ if not unsatisfiable or self._data_is_everything:
results = [self._data[offset:offset+length]
for (offset, length) in readvs]
results = {self.shnum: results}
return defer.succeed(results)
else:
- if queue:
- start = len(self._readvs)
- self._readvs += readvs
- end = len(self._readvs)
- def _get_results(results, start, end):
- if not self.shnum in results:
- return {self._shnum: [""]}
- return {self.shnum: results[self.shnum][start:end]}
- d = defer.Deferred()
- d.addCallback(_get_results, start, end)
- self._queue_observers.subscribe(d.callback)
- self._queue_errbacks.subscribe(d.errback)
- return d
return self._rref.callRemote("slot_readv",
self._storage_index,
[self.shnum],
return d
-class LayoutInvalid(Exception):
+class LayoutInvalid(BadShareError):
"""
This isn't a valid MDMF mutable file
"""