ChoiceOf, IntegerConstraint, Any, RemoteInterface, Referenceable
HASH_SIZE=32
+SALT_SIZE=16
+
+SDMF_VERSION=0
+MDMF_VERSION=1
Hash = StringConstraint(maxLength=HASH_SIZE,
minLength=HASH_SIZE)# binary format 32-byte SHA256 hash
MAX_BUCKETS = 256 # per peer -- zfec offers at most 256 shares per file
+DEFAULT_MAX_SEGMENT_SIZE = 128*1024
+
ShareData = StringConstraint(None)
URIExtensionData = StringConstraint(1000)
Number = IntegerConstraint(8) # 2**(8*8) == 16EiB ~= 18e18 ~= 18 exabytes
LeaseRenewSecret = Hash # used to protect bucket lease renewal requests
LeaseCancelSecret = Hash # used to protect bucket lease cancellation requests
-KiB = 1024
-DEFAULT_MAX_SEGMENT_SIZE = 128*KiB
-
class RIStubClient(RemoteInterface):
"""Each client publishes a service announcement for a dummy object called
the StubClient. This object doesn't actually offer any services, but the
"""
class IStorageBroker(Interface):
- def get_servers_for_index(peer_selection_index):
+ def get_servers_for_psi(peer_selection_index):
+ """
+ @return: list of IServer instances
"""
- @return: list of (peerid, versioned-rref) tuples
+ def get_connected_servers():
"""
- def get_all_servers():
+ @return: frozenset of connected IServer instances
"""
- @return: frozenset of (peerid, versioned-rref) tuples
+ def get_known_servers():
+ """
+ @return: frozenset of IServer instances
"""
def get_all_serverids():
"""
"""
+class IMutableSlotWriter(Interface):
+ """
+ The interface for a writer around a mutable slot on a remote server.
+ """
+ def set_checkstring(checkstring, *args):
+ """
+ Set the checkstring that I will pass to the remote server when
+ writing.
+
+ @param checkstring A packed checkstring to use.
+
+ Note that implementations can differ in which semantics they
+ wish to support for set_checkstring -- they can, for example,
+ build the checkstring themselves from its constituents, or
+ some other thing.
+ """
+
+ def get_checkstring():
+ """
+ Get the checkstring that I think currently exists on the remote
+ server.
+ """
+
+ def put_block(data, segnum, salt):
+ """
+ Add a block and salt to the share.
+ """
+
+ def put_encprivey(encprivkey):
+ """
+ Add the encrypted private key to the share.
+ """
+
+ def put_blockhashes(blockhashes=list):
+ """
+ Add the block hash tree to the share.
+ """
+
+ def put_sharehashes(sharehashes=dict):
+ """
+ Add the share hash chain to the share.
+ """
+
+ def get_signable():
+ """
+ Return the part of the share that needs to be signed.
+ """
+
+ def put_signature(signature):
+ """
+ Add the signature to the share.
+ """
+
+ def put_verification_key(verification_key):
+ """
+ Add the verification key to the share.
+ """
+
+ def finish_publishing():
+ """
+ Do anything necessary to finish writing the share to a remote
+ server. I require that no further publishing needs to take place
+ after this method has been called.
+ """
+
+
class IURI(Interface):
def init_from_string(uri):
"""Accept a string (as created by my to_string() method) and populate
class IMutableFileURI(Interface):
"""I am a URI which represents a mutable filenode."""
+ def get_extension_params():
+ """Return the extension parameters in the URI"""
+
+ def set_extension_params():
+ """Set the extension parameters that should be in the URI"""
class IDirectoryURI(Interface):
pass
class MustNotBeUnknownRWError(CapConstraintError):
"""Cannot add an unknown child cap specified in a rw_uri field."""
+
+class IReadable(Interface):
+ """I represent a readable object -- either an immutable file, or a
+ specific version of a mutable file.
+ """
+
+ def is_readonly():
+ """Return True if this reference provides mutable access to the given
+ file or directory (i.e. if you can modify it), or False if not. Note
+ that even if this reference is read-only, someone else may hold a
+ read-write reference to it.
+
+ For an IReadable returned by get_best_readable_version(), this will
+ always return True, but for instances of subinterfaces such as
+ IMutableFileVersion, it may return False."""
+
+ def is_mutable():
+ """Return True if this file or directory is mutable (by *somebody*,
+ not necessarily you), False if it is is immutable. Note that a file
+ might be mutable overall, but your reference to it might be
+ read-only. On the other hand, all references to an immutable file
+ will be read-only; there are no read-write references to an immutable
+ file."""
+
+ def get_storage_index():
+ """Return the storage index of the file."""
+
+ def get_size():
+ """Return the length (in bytes) of this readable object."""
+
+ def download_to_data():
+ """Download all of the file contents. I return a Deferred that fires
+ with the contents as a byte string."""
+
+ def read(consumer, offset=0, size=None):
+ """Download a portion (possibly all) of the file's contents, making
+ them available to the given IConsumer. Return a Deferred that fires
+ (with the consumer) when the consumer is unregistered (either because
+ the last byte has been given to it, or because the consumer threw an
+ exception during write(), possibly because it no longer wants to
+ receive data). The portion downloaded will start at 'offset' and
+ contain 'size' bytes (or the remainder of the file if size==None).
+
+ The consumer will be used in non-streaming mode: an IPullProducer
+ will be attached to it.
+
+ The consumer will not receive data right away: several network trips
+ must occur first. The order of events will be::
+
+ consumer.registerProducer(p, streaming)
+ (if streaming == False)::
+ consumer does p.resumeProducing()
+ consumer.write(data)
+ consumer does p.resumeProducing()
+ consumer.write(data).. (repeat until all data is written)
+ consumer.unregisterProducer()
+ deferred.callback(consumer)
+
+ If a download error occurs, or an exception is raised by
+ consumer.registerProducer() or consumer.write(), I will call
+ consumer.unregisterProducer() and then deliver the exception via
+ deferred.errback(). To cancel the download, the consumer should call
+ p.stopProducing(), which will result in an exception being delivered
+ via deferred.errback().
+
+ See src/allmydata/util/consumer.py for an example of a simple
+ download-to-memory consumer.
+ """
+
+
+class IWriteable(Interface):
+ """
+ I define methods that callers can use to update SDMF and MDMF
+ mutable files on a Tahoe-LAFS grid.
+ """
+ # XXX: For the moment, we have only this. It is possible that we
+ # want to move overwrite() and modify() in here too.
+ def update(data, offset):
+ """
+ I write the data from my data argument to the MDMF file,
+ starting at offset. I continue writing data until my data
+ argument is exhausted, appending data to the file as necessary.
+ """
+ # assert IMutableUploadable.providedBy(data)
+ # to append data: offset=node.get_size_of_best_version()
+ # do we want to support compacting MDMF?
+ # for an MDMF file, this can be done with O(data.get_size())
+ # memory. For an SDMF file, any modification takes
+ # O(node.get_size_of_best_version()).
+
+
+class IMutableFileVersion(IReadable):
+ """I provide access to a particular version of a mutable file. The
+ access is read/write if I was obtained from a filenode derived from
+ a write cap, or read-only if the filenode was derived from a read cap.
+ """
+
+ def get_sequence_number():
+ """Return the sequence number of this version."""
+
+ def get_servermap():
+ """Return the IMutableFileServerMap instance that was used to create
+ this object.
+ """
+
+ def get_writekey():
+ """Return this filenode's writekey, or None if the node does not have
+ write-capability. This may be used to assist with data structures
+ that need to make certain data available only to writers, such as the
+ read-write child caps in dirnodes. The recommended process is to have
+ reader-visible data be submitted to the filenode in the clear (where
+ it will be encrypted by the filenode using the readkey), but encrypt
+ writer-visible data using this writekey.
+ """
+
+ # TODO: Can this be overwrite instead of replace?
+ def replace(new_contents):
+ """Replace the contents of the mutable file, provided that no other
+ node has published (or is attempting to publish, concurrently) a
+ newer version of the file than this one.
+
+ I will avoid modifying any share that is different than the version
+ given by get_sequence_number(). However, if another node is writing
+ to the file at the same time as me, I may manage to update some shares
+ while they update others. If I see any evidence of this, I will signal
+ UncoordinatedWriteError, and the file will be left in an inconsistent
+ state (possibly the version you provided, possibly the old version,
+ possibly somebody else's version, and possibly a mix of shares from
+ all of these).
+
+ The recommended response to UncoordinatedWriteError is to either
+ return it to the caller (since they failed to coordinate their
+ writes), or to attempt some sort of recovery. It may be sufficient to
+ wait a random interval (with exponential backoff) and repeat your
+ operation. If I do not signal UncoordinatedWriteError, then I was
+ able to write the new version without incident.
+
+ I return a Deferred that fires (with a PublishStatus object) when the
+ update has completed.
+ """
+
+ def modify(modifier_cb):
+ """Modify the contents of the file, by downloading this version,
+ applying the modifier function (or bound method), then uploading
+ the new version. This will succeed as long as no other node
+ publishes a version between the download and the upload.
+ I return a Deferred that fires (with a PublishStatus object) when
+ the update is complete.
+
+ The modifier callable will be given three arguments: a string (with
+ the old contents), a 'first_time' boolean, and a servermap. As with
+ download_to_data(), the old contents will be from this version,
+ but the modifier can use the servermap to make other decisions
+ (such as refusing to apply the delta if there are multiple parallel
+ versions, or if there is evidence of a newer unrecoverable version).
+ 'first_time' will be True the first time the modifier is called,
+ and False on any subsequent calls.
+
+ The callable should return a string with the new contents. The
+ callable must be prepared to be called multiple times, and must
+ examine the input string to see if the change that it wants to make
+ is already present in the old version. If it does not need to make
+ any changes, it can either return None, or return its input string.
+
+ If the modifier raises an exception, it will be returned in the
+ errback.
+ """
+
+
# The hierarchy looks like this:
# IFilesystemNode
# IFileNode
read-only access with others, use get_readonly_uri().
"""
- def get_write_uri(n):
+ def get_write_uri():
"""Return the URI string that can be used by others to get write
access to this node, if it is writeable. If this is a read-only node,
return None."""
def raise_error():
"""Raise any error associated with this node."""
+ # XXX: These may not be appropriate outside the context of an IReadable.
def get_size():
"""Return the length (in bytes) of the data this node represents. For
directory nodes, I return the size of the backing store. I return
class IFileNode(IFilesystemNode):
"""I am a node which represents a file: a sequence of bytes. I am not a
container, like IDirectoryNode."""
+ def get_best_readable_version():
+ """Return a Deferred that fires with an IReadable for the 'best'
+ available version of the file. The IReadable provides only read
+ access, even if this filenode was derived from a write cap.
-class IImmutableFileNode(IFileNode):
- def read(consumer, offset=0, size=None):
- """Download a portion (possibly all) of the file's contents, making
- them available to the given IConsumer. Return a Deferred that fires
- (with the consumer) when the consumer is unregistered (either because
- the last byte has been given to it, or because the consumer threw an
- exception during write(), possibly because it no longer wants to
- receive data). The portion downloaded will start at 'offset' and
- contain 'size' bytes (or the remainder of the file if size==None).
-
- The consumer will be used in non-streaming mode: an IPullProducer
- will be attached to it.
+ For an immutable file, there is only one version. For a mutable
+ file, the 'best' version is the recoverable version with the
+ highest sequence number. If no uncoordinated writes have occurred,
+ and if enough shares are available, then this will be the most
+ recent version that has been uploaded. If no version is recoverable,
+ the Deferred will errback with an UnrecoverableFileError.
+ """
- The consumer will not receive data right away: several network trips
- must occur first. The order of events will be::
+ def download_best_version():
+ """Download the contents of the version that would be returned
+ by get_best_readable_version(). This is equivalent to calling
+ download_to_data() on the IReadable given by that method.
- consumer.registerProducer(p, streaming)
- (if streaming == False)::
- consumer does p.resumeProducing()
- consumer.write(data)
- consumer does p.resumeProducing()
- consumer.write(data).. (repeat until all data is written)
- consumer.unregisterProducer()
- deferred.callback(consumer)
+ I return a Deferred that fires with a byte string when the file
+ has been fully downloaded. To support streaming download, use
+ the 'read' method of IReadable. If no version is recoverable,
+ the Deferred will errback with an UnrecoverableFileError.
+ """
- If a download error occurs, or an exception is raised by
- consumer.registerProducer() or consumer.write(), I will call
- consumer.unregisterProducer() and then deliver the exception via
- deferred.errback(). To cancel the download, the consumer should call
- p.stopProducing(), which will result in an exception being delivered
- via deferred.errback().
+ def get_size_of_best_version():
+ """Find the size of the version that would be returned by
+ get_best_readable_version().
- See src/allmydata/util/consumer.py for an example of a simple
- download-to-memory consumer.
+ I return a Deferred that fires with an integer. If no version
+ is recoverable, the Deferred will errback with an
+ UnrecoverableFileError.
"""
+
+class IImmutableFileNode(IFileNode, IReadable):
+ """I am a node representing an immutable file. Immutable files have
+ only one version"""
+
+
class IMutableFileNode(IFileNode):
"""I provide access to a 'mutable file', which retains its identity
regardless of what contents are put in it.
only be retrieved and updated all-at-once, as a single big string. Future
versions of our mutable files will remove this restriction.
"""
-
- def download_best_version():
- """Download the 'best' available version of the file, meaning one of
- the recoverable versions with the highest sequence number. If no
+ def get_best_mutable_version():
+ """Return a Deferred that fires with an IMutableFileVersion for
+ the 'best' available version of the file. The best version is
+ the recoverable version with the highest sequence number. If no
uncoordinated writes have occurred, and if enough shares are
- available, then this will be the most recent version that has been
- uploaded.
+ available, then this will be the most recent version that has
+ been uploaded.
- I update an internal servermap with MODE_READ, determine which
- version of the file is indicated by
- servermap.best_recoverable_version(), and return a Deferred that
- fires with its contents. If no version is recoverable, the Deferred
- will errback with UnrecoverableFileError.
- """
-
- def get_size_of_best_version():
- """Find the size of the version that would be downloaded with
- download_best_version(), without actually downloading the whole file.
-
- I return a Deferred that fires with an integer.
+ If no version is recoverable, the Deferred will errback with an
+ UnrecoverableFileError.
"""
def overwrite(new_contents):
errback.
"""
-
def get_servermap(mode):
"""Return a Deferred that fires with an IMutableFileServerMap
instance, updated using the given mode.
writer-visible data using this writekey.
"""
+ def get_version():
+ """Returns the mutable file protocol version."""
+
class NotEnoughSharesError(Exception):
"""Download was unable to get enough shares"""
class NoSharesError(Exception):
"""Download was unable to get any shares at all."""
+class DownloadStopped(Exception):
+ pass
+
class UploadUnhappinessError(Exception):
"""Upload was unable to satisfy 'servers_of_happiness'"""
encode(), unless of course it already happens to be an even multiple
of required_shares in length.)
- ALSO: the requirement to break up your data into 'required_shares'
- chunks before calling encode() feels a bit surprising, at least from
- the point of view of a user who doesn't know how FEC works. It feels
- like an implementation detail that has leaked outside the
- abstraction barrier. Can you imagine a use case in which the data to
- be encoded might already be available in pre-segmented chunks, such
- that it is faster or less work to make encode() take a list rather
- than splitting a single string?
-
- ALSO ALSO: I think 'inshares' is a misleading term, since encode()
- is supposed to *produce* shares, so what it *accepts* should be
- something other than shares. Other places in this interface use the
- word 'data' for that-which-is-not-shares.. maybe we should use that
- term?
+ Note: the requirement to break up your data into
+ 'required_shares' chunks of exactly the right length before
+ calling encode() is surprising from point of view of a user
+ who doesn't know how FEC works. It feels like an
+ implementation detail that has leaked outside the abstraction
+ barrier. Is there a use case in which the data to be encoded
+ might already be available in pre-segmented chunks, such that
+ it is faster or less work to make encode() take a list rather
+ than splitting a single string?
+
+ Yes, there is: suppose you are uploading a file with K=64,
+ N=128, segsize=262,144. Then each in-share will be of size
+ 4096. If you use this .encode() API then your code could first
+ read each successive 4096-byte chunk from the file and store
+ each one in a Python string and store each such Python string
+ in a Python list. Then you could call .encode(), passing that
+ list as "inshares". The encoder would generate the other 64
+ "secondary shares" and return to you a new list containing
+ references to the same 64 Python strings that you passed in
+ (as the primary shares) plus references to the new 64 Python
+ strings.
+
+ (You could even imagine that your code could use readv() so
+ that the operating system can arrange to get all of those
+ bytes copied from the file into the Python list of Python
+ strings as efficiently as possible instead of having a loop
+ written in C or in Python to copy the next part of the file
+ into the next string.)
+
+ On the other hand if you instead use the .encode_proposal()
+ API (above), then your code can first read in all of the
+ 262,144 bytes of the segment from the file into a Python
+ string, then call .encode_proposal() passing the segment data
+ as the "data" argument. The encoder would basically first
+ split the "data" argument into a list of 64 in-shares of 4096
+ byte each, and then do the same thing that .encode() does. So
+ this would result in a little bit more copying of data and a
+ little bit higher of a "maximum memory usage" during the
+ process, although it might or might not make a practical
+ difference for our current use cases.
+
+ Note that "inshares" is a strange name for the parameter if
+ you think of the parameter as being just for feeding in data
+ to the codec. It makes more sense if you think of the result
+ of this encoding as being the set of shares from inshares plus
+ an extra set of "secondary shares" (or "check shares"). It is
+ a surprising name! If the API is going to be surprising then
+ the name should be surprising. If we switch to
+ encode_proposal() above then we should also switch to an
+ unsurprising name.
'desired_share_ids', if provided, is required to be a sequence of
ints, each of which is required to be >= 0 and < max_shares. If not
called. Whatever it returns will be returned to the invoker of
Downloader.download.
"""
- # The following methods are just because that target might be a
- # repairer.DownUpConnector, and just because the current CHKUpload object
- # expects to find the storage index and encoding parameters in its
- # Uploadable.
- def set_storageindex(storageindex):
- """ Set the storage index. """
- def set_encodingparams(encodingparams):
- """ Set the encoding parameters. """
class IDownloader(Interface):
def download(uri, target):
If the data must be acquired through multiple internal read
operations, returning a list instead of a single string may help to
- reduce string copies.
+ reduce string copies. However, the length of the concatenated strings
+ must equal the amount of data requested, unless EOF is encountered.
+ Long reads, or short reads without EOF, are not allowed. read()
+ should return the same amount of data as a local disk file read, just
+ in a different shape and asynchronously.
'length' will typically be equal to (min(get_size(),1MB)/req_shares),
so a 10kB file means length=3kB, 100kB file means length=30kB,
"""The upload is finished, and whatever filehandle was in use may be
closed."""
+
+class IMutableUploadable(Interface):
+ """
+ I represent content that is due to be uploaded to a mutable filecap.
+ """
+ # This is somewhat simpler than the IUploadable interface above
+ # because mutable files do not need to be concerned with possibly
+ # generating a CHK, nor with per-file keys. It is a subset of the
+ # methods in IUploadable, though, so we could just as well implement
+ # the mutable uploadables as IUploadables that don't happen to use
+ # those methods (with the understanding that the unused methods will
+ # never be called on such objects)
+ def get_size():
+ """
+ Returns a Deferred that fires with the size of the content held
+ by the uploadable.
+ """
+
+ def read(length):
+ """
+ Returns a list of strings which, when concatenated, are the next
+ length bytes of the file, or fewer if there are fewer bytes
+ between the current location and the end of the file.
+ """
+
+ def close():
+ """
+ The process that used the Uploadable is finished using it, so
+ the uploadable may be closed.
+ """
+
class IUploadResults(Interface):
"""I am returned by upload() methods. I contain a number of public
attributes which can be read to determine the results of the upload. Some
class ICheckable(Interface):
def check(monitor, verify=False, add_lease=False):
- """Check upon my health, optionally repairing any problems.
+ """Check up on my health, optionally repairing any problems.
This returns a Deferred that fires with an instance that provides
ICheckResults, or None if the object is non-distributed (i.e. LIT
@return: a dictionary mapping peerid to a float (RTT time in seconds)
"""
- return DictOf(Nodeid, float)
+ return DictOf(str, float)
UploadResults = Any() #DictOf(str, str)
def get_stats():
"""
returns a dictionary containing 'counters' and 'stats', each a
- dictionary with string counter/stat name keys, and numeric values.
+ dictionary with string counter/stat name keys, and numeric or None values.
counters are monotonically increasing measures of work done, and
stats are instantaneous measures (potentially time averaged
internally)
"""
- return DictOf(str, DictOf(str, ChoiceOf(float, int, long)))
+ return DictOf(str, DictOf(str, ChoiceOf(float, int, long, None)))
class RIStatsGatherer(RemoteInterface):
__remote_name__ = "RIStatsGatherer.tahoe.allmydata.com"