2 from zope.interface import Interface
3 from foolscap.schema import StringConstraint, ListOf, TupleOf, SetOf, DictOf, \
5 from foolscap import RemoteInterface, Referenceable
9 Hash = StringConstraint(maxLength=HASH_SIZE,
10 minLength=HASH_SIZE)# binary format 32-byte SHA256 hash
11 Nodeid = StringConstraint(maxLength=20,
12 minLength=20) # binary format 20-byte SHA1 hash
13 FURL = StringConstraint(1000)
14 StorageIndex = StringConstraint(16)
15 URI = StringConstraint(300) # kind of arbitrary
17 MAX_BUCKETS = 200 # per peer
19 # MAX_SEGMENT_SIZE in encode.py is 1 MiB (this constraint allows k = 1)
20 ShareData = StringConstraint(2**20)
21 URIExtensionData = StringConstraint(1000)
22 LeaseRenewSecret = Hash # used to protect bucket lease renewal requests
23 LeaseCancelSecret = Hash # used to protect bucket lease cancellation requests
25 # Announcements are (FURL, service_name, remoteinterface_name,
26 # nickname, my_version, oldest_supported)
27 # the (FURL, service_name, remoteinterface_name) refer to the service being
28 # announced. The (nickname, my_version, oldest_supported) refer to the
29 # client as a whole. The my_version/oldest_supported strings can be parsed
30 # by an allmydata.util.version.Version instance, and then compared. The
31 # first goal is to make sure that nodes are not confused by speaking to an
32 # incompatible peer. The second goal is to enable the development of
33 # backwards-compatibility code.
35 Announcement = TupleOf(FURL, str, str,
38 class RIIntroducerSubscriberClient(RemoteInterface):
39 __remote_name__ = "RIIntroducerSubscriberClient.tahoe.allmydata.com"
41 def announce(announcements=SetOf(Announcement)):
42 """I accept announcements from the publisher."""
45 def set_encoding_parameters(parameters=(int, int, int)):
46 """Advise the client of the recommended k-of-n encoding parameters
47 for this grid. 'parameters' is a tuple of (k, desired, n), where 'n'
48 is the total number of shares that will be created for any given
49 file, while 'k' is the number of shares that must be retrieved to
50 recover that file, and 'desired' is the minimum number of shares that
51 must be placed before the uploader will consider its job a success.
52 n/k is the expansion ratio, while k determines the robustness.
54 Introducers should specify 'n' according to the expected size of the
55 grid (there is no point to producing more shares than there are
56 peers), and k according to the desired reliability-vs-overhead goals.
58 Note that setting k=1 is equivalent to simple replication.
62 # When Foolscap can handle multiple interfaces (Foolscap#17), the
63 # full-powered introducer will implement both RIIntroducerPublisher and
64 # RIIntroducerSubscriberService. Until then, we define
65 # RIIntroducerPublisherAndSubscriberService as a combination of the two, and
66 # make everybody use that.
68 class RIIntroducerPublisher(RemoteInterface):
69 """To publish a service to the world, connect to me and give me your
70 announcement message. I will deliver a copy to all connected subscribers."""
71 __remote_name__ = "RIIntroducerPublisher.tahoe.allmydata.com"
73 def publish(announcement=Announcement):
77 class RIIntroducerSubscriberService(RemoteInterface):
78 __remote_name__ = "RIIntroducerSubscriberService.tahoe.allmydata.com"
80 def subscribe(subscriber=RIIntroducerSubscriberClient, service_name=str):
81 """Give me a subscriber reference, and I will call its new_peers()
82 method will any announcements that match the desired service name. I
83 will ignore duplicate subscriptions.
87 class RIIntroducerPublisherAndSubscriberService(RemoteInterface):
88 __remote_name__ = "RIIntroducerPublisherAndSubscriberService.tahoe.allmydata.com"
89 def publish(announcement=Announcement):
91 def subscribe(subscriber=RIIntroducerSubscriberClient, service_name=str):
94 class IIntroducerClient(Interface):
95 """I provide service introduction facilities for a node. I help nodes
96 publish their services to the rest of the world, and I help them learn
97 about services available on other nodes."""
99 def publish(furl, service_name, remoteinterface_name):
100 """Once you call this, I will tell the world that the Referenceable
101 available at FURL is available to provide a service named
102 SERVICE_NAME. The precise definition of the service being provided is
103 identified by the Foolscap 'remote interface name' in the last
104 parameter: this is supposed to be a globally-unique string that
105 identifies the RemoteInterface that is implemented."""
107 def subscribe_to(service_name):
108 """Call this if you will eventually want to use services with the
109 given SERVICE_NAME. This will prompt me to subscribe to announcements
110 of those services. You can pick up the announcements later by calling
111 get_all_connections_for() or get_permuted_peers().
114 def get_all_connections():
115 """Return a frozenset of (nodeid, service_name, rref) tuples, one for
116 each active connection we've established to a remote service. This is
117 mostly useful for unit tests that need to wait until a certain number
118 of connections have been made."""
120 def get_all_connectors():
121 """Return a dict that maps from (nodeid, service_name) to a
122 RemoteServiceConnector instance for all services that we are actively
123 trying to connect to. Each RemoteServiceConnector has the following
126 service_name: the type of service provided, like 'storage'
127 announcement_time: when we first heard about this service
128 last_connect_time: when we last established a connection
129 last_loss_time: when we last lost a connection
131 version: the peer's version, from the most recent connection
132 oldest_supported: the peer's oldest supported version, same
134 rref: the RemoteReference, if connected, otherwise None
135 remote_host: the IAddress, if connected, otherwise None
137 This method is intended for monitoring interfaces, such as a web page
138 which describes connecting and connected peers.
141 def get_all_peerids():
142 """Return a frozenset of all peerids to whom we have a connection (to
143 one or more services) established. Mostly useful for unit tests."""
145 def get_all_connections_for(service_name):
146 """Return a frozenset of (nodeid, service_name, rref) tuples, one
147 for each active connection that provides the given SERVICE_NAME."""
149 def get_permuted_peers(service_name, key):
150 """Returns an ordered list of (peerid, rref) tuples, selecting from
151 the connections that provide SERVICE_NAME, using a hash-based
152 permutation keyed by KEY. This randomizes the service list in a
153 repeatable way, to distribute load over many peers.
156 def connected_to_introducer():
157 """Returns a boolean, True if we are currently connected to the
158 introducer, False if not."""
161 class RIBucketWriter(RemoteInterface):
162 def write(offset=int, data=ShareData):
167 If the data that has been written is incomplete or inconsistent then
168 the server will throw the data away, else it will store it for future
174 """Abandon all the data that has been written.
178 class RIBucketReader(RemoteInterface):
179 def read(offset=int, length=int):
182 TestVector = ListOf(TupleOf(int, int, str, str))
183 # elements are (offset, length, operator, specimen)
184 # operator is one of "lt, le, eq, ne, ge, gt"
185 # nop always passes and is used to fetch data while writing.
186 # you should use length==len(specimen) for everything except nop
187 DataVector = ListOf(TupleOf(int, ShareData))
188 # (offset, data). This limits us to 30 writes of 1MiB each per call
189 TestAndWriteVectorsForShares = DictOf(int,
192 ChoiceOf(None, int))) # new_length
193 ReadVector = ListOf(TupleOf(int, int))
194 ReadData = ListOf(ShareData)
195 # returns data[offset:offset+length] for each element of TestVector
197 class RIStorageServer(RemoteInterface):
198 __remote_name__ = "RIStorageServer.tahoe.allmydata.com"
201 """Return a tuple of (my_version, oldest_supported) strings.
202 Each string can be parsed by an allmydata.util.version.Version
203 instance, and then compared. The first goal is to make sure that
204 nodes are not confused by speaking to an incompatible peer. The
205 second goal is to enable the development of backwards-compatibility
208 This method is likely to change in incompatible ways until we get the
209 whole compatibility scheme nailed down.
211 return TupleOf(str, str)
213 def allocate_buckets(storage_index=StorageIndex,
214 renew_secret=LeaseRenewSecret,
215 cancel_secret=LeaseCancelSecret,
216 sharenums=SetOf(int, maxLength=MAX_BUCKETS),
217 allocated_size=int, canary=Referenceable):
219 @param storage_index: the index of the bucket to be created or
221 @param sharenums: these are the share numbers (probably between 0 and
222 99) that the sender is proposing to store on this
224 @param renew_secret: This is the secret used to protect bucket refresh
225 This secret is generated by the client and
226 stored for later comparison by the server. Each
227 server is given a different secret.
228 @param cancel_secret: Like renew_secret, but protects bucket decref.
229 @param canary: If the canary is lost before close(), the bucket is
231 @return: tuple of (alreadygot, allocated), where alreadygot is what we
232 already have and is what we hereby agree to accept. New
233 leases are added for shares in both lists.
235 return TupleOf(SetOf(int, maxLength=MAX_BUCKETS),
236 DictOf(int, RIBucketWriter, maxKeys=MAX_BUCKETS))
238 def renew_lease(storage_index=StorageIndex, renew_secret=LeaseRenewSecret):
240 Renew the lease on a given bucket. Some networks will use this, some
244 def cancel_lease(storage_index=StorageIndex,
245 cancel_secret=LeaseCancelSecret):
247 Cancel the lease on a given bucket. If this was the last lease on the
248 bucket, the bucket will be deleted.
251 def get_buckets(storage_index=StorageIndex):
252 return DictOf(int, RIBucketReader, maxKeys=MAX_BUCKETS)
256 def slot_readv(storage_index=StorageIndex,
257 shares=ListOf(int), readv=ReadVector):
258 """Read a vector from the numbered shares associated with the given
259 storage index. An empty shares list means to return data from all
260 known shares. Returns a dictionary with one key per share."""
261 return DictOf(int, ReadData) # shnum -> results
263 def slot_testv_and_readv_and_writev(storage_index=StorageIndex,
264 secrets=TupleOf(Hash, Hash, Hash),
265 tw_vectors=TestAndWriteVectorsForShares,
268 """General-purpose test-and-set operation for mutable slots. Perform
269 a bunch of comparisons against the existing shares. If they all pass,
270 then apply a bunch of write vectors to those shares. Then use the
271 read vectors to extract data from all the shares and return the data.
273 This method is, um, large. The goal is to allow clients to update all
274 the shares associated with a mutable file in a single round trip.
276 @param storage_index: the index of the bucket to be created or
278 @param write_enabler: a secret that is stored along with the slot.
279 Writes are accepted from any caller who can
280 present the matching secret. A different secret
281 should be used for each slot*server pair.
282 @param renew_secret: This is the secret used to protect bucket refresh
283 This secret is generated by the client and
284 stored for later comparison by the server. Each
285 server is given a different secret.
286 @param cancel_secret: Like renew_secret, but protects bucket decref.
288 The 'secrets' argument is a tuple of (write_enabler, renew_secret,
289 cancel_secret). The first is required to perform any write. The
290 latter two are used when allocating new shares. To simply acquire a
291 new lease on existing shares, use an empty testv and an empty writev.
293 Each share can have a separate test vector (i.e. a list of
294 comparisons to perform). If all vectors for all shares pass, then all
295 writes for all shares are recorded. Each comparison is a 4-tuple of
296 (offset, length, operator, specimen), which effectively does a bool(
297 (read(offset, length)) OPERATOR specimen ) and only performs the
298 write if all these evaluate to True. Basic test-and-set uses 'eq'.
299 Write-if-newer uses a seqnum and (offset, length, 'lt', specimen).
300 Write-if-same-or-newer uses 'le'.
302 Reads from the end of the container are truncated, and missing shares
303 behave like empty ones, so to assert that a share doesn't exist (for
304 use when creating a new share), use (0, 1, 'eq', '').
306 The write vector will be applied to the given share, expanding it if
307 necessary. A write vector applied to a share number that did not
308 exist previously will cause that share to be created.
310 Each write vector is accompanied by a 'new_length' argument. If
311 new_length is not None, use it to set the size of the container. This
312 can be used to pre-allocate space for a series of upcoming writes, or
313 truncate existing data. If the container is growing, new_length will
314 be applied before datav. If the container is shrinking, it will be
317 The read vector is used to extract data from all known shares,
318 *before* any writes have been applied. The same vector is used for
319 all shares. This captures the state that was tested by the test
322 This method returns two values: a boolean and a dict. The boolean is
323 True if the write vectors were applied, False if not. The dict is
324 keyed by share number, and each value contains a list of strings, one
325 for each element of the read vector.
327 If the write_enabler is wrong, this will raise BadWriteEnablerError.
328 To enable share migration, the exception will have the nodeid used
329 for the old write enabler embedded in it, in the following string::
331 The write enabler was recorded by nodeid '%s'.
333 Note that the nodeid here is encoded using the same base32 encoding
334 used by Foolscap and allmydata.util.idlib.nodeid_b2a().
337 return TupleOf(bool, DictOf(int, ReadData))
339 class IStorageBucketWriter(Interface):
340 def put_block(segmentnum=int, data=ShareData):
341 """@param data: For most segments, this data will be 'blocksize'
342 bytes in length. The last segment might be shorter.
343 @return: a Deferred that fires (with None) when the operation completes
346 def put_plaintext_hashes(hashes=ListOf(Hash, maxLength=2**20)):
348 @return: a Deferred that fires (with None) when the operation completes
351 def put_crypttext_hashes(hashes=ListOf(Hash, maxLength=2**20)):
353 @return: a Deferred that fires (with None) when the operation completes
356 def put_block_hashes(blockhashes=ListOf(Hash, maxLength=2**20)):
358 @return: a Deferred that fires (with None) when the operation completes
361 def put_share_hashes(sharehashes=ListOf(TupleOf(int, Hash),
364 @return: a Deferred that fires (with None) when the operation completes
367 def put_uri_extension(data=URIExtensionData):
368 """This block of data contains integrity-checking information (hashes
369 of plaintext, crypttext, and shares), as well as encoding parameters
370 that are necessary to recover the data. This is a serialized dict
371 mapping strings to other strings. The hash of this data is kept in
372 the URI and verified before any of the data is used. All buckets for
373 a given file contain identical copies of this data.
375 The serialization format is specified with the following pseudocode:
376 for k in sorted(dict.keys()):
377 assert re.match(r'^[a-zA-Z_\-]+$', k)
378 write(k + ':' + netstring(dict[k]))
380 @return: a Deferred that fires (with None) when the operation completes
384 """Finish writing and close the bucket. The share is not finalized
385 until this method is called: if the uploading client disconnects
386 before calling close(), the partially-written share will be
389 @return: a Deferred that fires (with None) when the operation completes
392 class IStorageBucketReader(Interface):
394 def get_block(blocknum=int):
395 """Most blocks will be the same size. The last block might be shorter
401 def get_plaintext_hashes():
403 @return: ListOf(Hash, maxLength=2**20)
406 def get_crypttext_hashes():
408 @return: ListOf(Hash, maxLength=2**20)
411 def get_block_hashes():
413 @return: ListOf(Hash, maxLength=2**20)
416 def get_share_hashes():
418 @return: ListOf(TupleOf(int, Hash), maxLength=2**20)
421 def get_uri_extension():
423 @return: URIExtensionData
428 # hm, we need a solution for forward references in schemas
429 from foolscap.schema import Any
431 FileNode_ = Any() # TODO: foolscap needs constraints on copyables
432 DirectoryNode_ = Any() # TODO: same
433 AnyNode_ = ChoiceOf(FileNode_, DirectoryNode_)
436 class IURI(Interface):
437 def init_from_string(uri):
438 """Accept a string (as created by my to_string() method) and populate
439 this instance with its data. I am not normally called directly,
440 please use the module-level uri.from_string() function to convert
441 arbitrary URI strings into IURI-providing instances."""
444 """Return False if this URI be used to modify the data. Return True
445 if this URI cannot be used to modify the data."""
448 """Return True if the data can be modified by *somebody* (perhaps
449 someone who has a more powerful URI than this one)."""
452 """Return another IURI instance, which represents a read-only form of
453 this one. If is_readonly() is True, this returns self."""
456 """Return an instance that provides IVerifierURI, which can be used
457 to check on the availability of the file or directory, without
458 providing enough capabilities to actually read or modify the
459 contents. This may return None if the file does not need checking or
460 verification (e.g. LIT URIs).
464 """Return a string of printable ASCII characters, suitable for
465 passing into init_from_string."""
467 class IVerifierURI(Interface):
468 def init_from_string(uri):
469 """Accept a string (as created by my to_string() method) and populate
470 this instance with its data. I am not normally called directly,
471 please use the module-level uri.from_string() function to convert
472 arbitrary URI strings into IURI-providing instances."""
475 """Return a string of printable ASCII characters, suitable for
476 passing into init_from_string."""
478 class IDirnodeURI(Interface):
479 """I am a URI which represents a dirnode."""
482 class IFileURI(Interface):
483 """I am a URI which represents a filenode."""
485 """Return the length (in bytes) of the file that I represent."""
487 class IMutableFileURI(Interface):
488 """I am a URI which represents a mutable filenode."""
489 class INewDirectoryURI(Interface):
491 class IReadonlyNewDirectoryURI(Interface):
495 class IFilesystemNode(Interface):
498 Return the URI that can be used by others to get access to this
499 node. If this node is read-only, the URI will only offer read-only
500 access. If this node is read-write, the URI will offer read-write
503 If you have read-write access to a node and wish to share merely
504 read-only access with others, use get_readonly_uri().
507 def get_readonly_uri():
508 """Return the directory URI that can be used by others to get
509 read-only access to this directory node. The result is a read-only
510 URI, regardless of whether this dirnode is read-only or read-write.
512 If you have merely read-only access to this dirnode,
513 get_readonly_uri() will return the same thing as get_uri().
517 """Return an IVerifierURI instance that represents the
518 'verifiy/refresh capability' for this node. The holder of this
519 capability will be able to renew the lease for this node, protecting
520 it from garbage-collection. They will also be able to ask a server if
521 it holds a share for the file or directory.
525 """Perform a file check. See IChecker.check for details."""
528 """Return True if this reference provides mutable access to the given
529 file or directory (i.e. if you can modify it), or False if not. Note
530 that even if this reference is read-only, someone else may hold a
531 read-write reference to it."""
534 """Return True if this file or directory is mutable (by *somebody*,
535 not necessarily you), False if it is is immutable. Note that a file
536 might be mutable overall, but your reference to it might be
537 read-only. On the other hand, all references to an immutable file
538 will be read-only; there are no read-write references to an immutable
542 class IMutableFilesystemNode(IFilesystemNode):
545 class IFileNode(IFilesystemNode):
546 def download(target):
547 """Download the file's contents to a given IDownloadTarget"""
549 def download_to_data():
550 """Download the file's contents. Return a Deferred that fires
551 with those contents."""
554 """Return the length (in bytes) of the data this node represents."""
556 class IMutableFileNode(IFileNode, IMutableFilesystemNode):
557 def download_to_data():
558 """Download the file's contents. Return a Deferred that fires with
559 those contents. If there are multiple retrievable versions in the
560 grid (because you failed to avoid simultaneous writes, see
561 docs/mutable.txt), this will return the first version that it can
562 reconstruct, and will silently ignore the others. In the future, a
563 more advanced API will signal and provide access to the multiple
566 def replace(newdata):
567 """Replace the old contents with the new data. Returns a Deferred
568 that fires (with None) when the operation is complete.
570 If the node detects that there are multiple outstanding versions of
571 the file, this will raise ConsistencyError, and may leave the
572 distributed file in an unusual state (the node will try to ensure
573 that at least one version of the file remains retrievable, but it may
574 or may not be the one you just tried to upload). You should respond
575 to this by downloading the current contents of the file and retrying
576 the replace() operation.
580 """Return this filenode's writekey, or None if the node does not have
581 write-capability. This may be used to assist with data structures
582 that need to make certain data available only to writers, such as the
583 read-write child caps in dirnodes. The recommended process is to have
584 reader-visible data be submitted to the filenode in the clear (where
585 it will be encrypted by the filenode using the readkey), but encrypt
586 writer-visible data using this writekey.
589 class IDirectoryNode(IMutableFilesystemNode):
590 """I represent a name-to-child mapping, holding the tahoe equivalent of a
591 directory. All child names are unicode strings, and all children are some
592 sort of IFilesystemNode (either files or subdirectories).
597 The dirnode ('1') URI returned by this method can be used in
598 set_uri() on a different directory ('2') to 'mount' a reference to
599 this directory ('1') under the other ('2'). This URI is just a
600 string, so it can be passed around through email or other out-of-band
604 def get_readonly_uri():
606 The dirnode ('1') URI returned by this method can be used in
607 set_uri() on a different directory ('2') to 'mount' a reference to
608 this directory ('1') under the other ('2'). This URI is just a
609 string, so it can be passed around through email or other out-of-band
614 """I return a Deferred that fires with a dictionary mapping child
615 name (a unicode string) to (node, metadata_dict) tuples, in which
616 'node' is either an IFileNode or IDirectoryNode, and 'metadata_dict'
617 is a dictionary of metadata."""
620 """I return a Deferred that fires with a boolean, True if there
621 exists a child of the given name, False if not. The child name must
622 be a unicode string."""
625 """I return a Deferred that fires with a specific named child node,
626 either an IFileNode or an IDirectoryNode. The child name must be a
629 def get_metadata_for(name):
630 """I return a Deferred that fires with the metadata dictionary for a
631 specific named child node. This metadata is stored in the *edge*, not
632 in the child, so it is attached to the parent dirnode rather than the
633 child dir-or-file-node. The child name must be a unicode string."""
635 def set_metadata_for(name, metadata):
636 """I replace any existing metadata for the named child with the new
637 metadata. The child name must be a unicode string. This metadata is
638 stored in the *edge*, not in the child, so it is attached to the
639 parent dirnode rather than the child dir-or-file-node. I return a
640 Deferred (that fires with this dirnode) when the operation is
643 def get_child_at_path(path):
644 """Transform a child path into an IDirectoryNode or IFileNode.
646 I perform a recursive series of 'get' operations to find the named
647 descendant node. I return a Deferred that fires with the node, or
648 errbacks with IndexError if the node could not be found.
650 The path can be either a single string (slash-separated) or a list of
651 path-name elements. All elements must be unicode strings.
654 def set_uri(name, child_uri, metadata=None):
655 """I add a child (by URI) at the specific name. I return a Deferred
656 that fires when the operation finishes. I will replace any existing
657 child of the same name. The child name must be a unicode string.
659 The child_uri could be for a file, or for a directory (either
660 read-write or read-only, using a URI that came from get_uri() ).
662 If metadata= is provided, I will use it as the metadata for the named
663 edge. This will replace any existing metadata. If metadata= is left
664 as the default value of None, I will set ['mtime'] to the current
665 time, and I will set ['ctime'] to the current time if there was not
666 already a child by this name present. This roughly matches the
667 ctime/mtime semantics of traditional filesystems.
669 If this directory node is read-only, the Deferred will errback with a
672 def set_children(entries):
673 """Add multiple (name, child_uri) pairs (or (name, child_uri,
674 metadata) triples) to a directory node. Returns a Deferred that fires
675 (with None) when the operation finishes. This is equivalent to
676 calling set_uri() multiple times, but is much more efficient. All
677 child names must be unicode strings.
680 def set_node(name, child, metadata=None):
681 """I add a child at the specific name. I return a Deferred that fires
682 when the operation finishes. This Deferred will fire with the child
683 node that was just added. I will replace any existing child of the
684 same name. The child name must be a unicode string.
686 If metadata= is provided, I will use it as the metadata for the named
687 edge. This will replace any existing metadata. If metadata= is left
688 as the default value of None, I will set ['mtime'] to the current
689 time, and I will set ['ctime'] to the current time if there was not
690 already a child by this name present. This roughly matches the
691 ctime/mtime semantics of traditional filesystems.
693 If this directory node is read-only, the Deferred will errback with a
696 def set_nodes(entries):
697 """Add multiple (name, child_node) pairs (or (name, child_node,
698 metadata) triples) to a directory node. Returns a Deferred that fires
699 (with None) when the operation finishes. This is equivalent to
700 calling set_node() multiple times, but is much more efficient. All
701 child names must be unicode strings."""
704 def add_file(name, uploadable, metadata=None):
705 """I upload a file (using the given IUploadable), then attach the
706 resulting FileNode to the directory at the given name. I set metadata
707 the same way as set_uri and set_node. The child name must be a
710 I return a Deferred that fires (with the IFileNode of the uploaded
711 file) when the operation completes."""
714 """I remove the child at the specific name. I return a Deferred that
715 fires when the operation finishes. The child name must be a unicode
718 def create_empty_directory(name):
719 """I create and attach an empty directory at the given name. The
720 child name must be a unicode string. I return a Deferred that fires
721 when the operation finishes."""
723 def move_child_to(current_child_name, new_parent, new_child_name=None):
724 """I take one of my children and move them to a new parent. The child
725 is referenced by name. On the new parent, the child will live under
726 'new_child_name', which defaults to 'current_child_name'. TODO: what
727 should we do about metadata? I return a Deferred that fires when the
728 operation finishes. The child name must be a unicode string."""
730 def build_manifest():
731 """Return a frozenset of verifier-capability strings for all nodes
732 (directories and files) reachable from this one."""
734 class ICodecEncoder(Interface):
735 def set_params(data_size, required_shares, max_shares):
736 """Set up the parameters of this encoder.
738 This prepares the encoder to perform an operation that converts a
739 single block of data into a number of shares, such that a future
740 ICodecDecoder can use a subset of these shares to recover the
741 original data. This operation is invoked by calling encode(). Once
742 the encoding parameters are set up, the encode operation can be
743 invoked multiple times.
745 set_params() prepares the encoder to accept blocks of input data that
746 are exactly 'data_size' bytes in length. The encoder will be prepared
747 to produce 'max_shares' shares for each encode() operation (although
748 see the 'desired_share_ids' to use less CPU). The encoding math will
749 be chosen such that the decoder can get by with as few as
750 'required_shares' of these shares and still reproduce the original
751 data. For example, set_params(1000, 5, 5) offers no redundancy at
752 all, whereas set_params(1000, 1, 10) provides 10x redundancy.
754 Numerical Restrictions: 'data_size' is required to be an integral
755 multiple of 'required_shares'. In general, the caller should choose
756 required_shares and max_shares based upon their reliability
757 requirements and the number of peers available (the total storage
758 space used is roughly equal to max_shares*data_size/required_shares),
759 then choose data_size to achieve the memory footprint desired (larger
760 data_size means more efficient operation, smaller data_size means
761 smaller memory footprint).
763 In addition, 'max_shares' must be equal to or greater than
764 'required_shares'. Of course, setting them to be equal causes
765 encode() to degenerate into a particularly slow form of the 'split'
768 See encode() for more details about how these parameters are used.
770 set_params() must be called before any other ICodecEncoder methods
774 def get_encoder_type():
775 """Return a short string that describes the type of this encoder.
777 There is required to be a global table of encoder classes. This method
778 returns an index into this table; the value at this index is an
779 encoder class, and this encoder is an instance of that class.
782 def get_serialized_params(): # TODO: maybe, maybe not
783 """Return a string that describes the parameters of this encoder.
785 This string can be passed to the decoder to prepare it for handling
786 the encoded shares we create. It might contain more information than
787 was presented to set_params(), if there is some flexibility of
790 This string is intended to be embedded in the URI, so there are
791 several restrictions on its contents. At the moment I'm thinking that
792 this means it may contain hex digits and hyphens, and nothing else.
793 The idea is that the URI contains something like '%s:%s:%s' %
794 (encoder.get_encoder_name(), encoder.get_serialized_params(),
795 b2a(crypttext_hash)), and this is enough information to construct a
799 def get_block_size():
800 """Return the length of the shares that encode() will produce.
803 def encode_proposal(data, desired_share_ids=None):
806 'data' must be a string (or other buffer object), and len(data) must
807 be equal to the 'data_size' value passed earlier to set_params().
809 This will return a Deferred that will fire with two lists. The first
810 is a list of shares, each of which is a string (or other buffer
811 object) such that len(share) is the same as what get_share_size()
812 returned earlier. The second is a list of shareids, in which each is
813 an integer. The lengths of the two lists will always be equal to each
814 other. The user should take care to keep each share closely
815 associated with its shareid, as one is useless without the other.
817 The length of this output list will normally be the same as the value
818 provided to the 'max_shares' parameter of set_params(). This may be
819 different if 'desired_share_ids' is provided.
821 'desired_share_ids', if provided, is required to be a sequence of
822 ints, each of which is required to be >= 0 and < max_shares. If not
823 provided, encode() will produce 'max_shares' shares, as if
824 'desired_share_ids' were set to range(max_shares). You might use this
825 if you initially thought you were going to use 10 peers, started
826 encoding, and then two of the peers dropped out: you could use
827 desired_share_ids= to skip the work (both memory and CPU) of
828 producing shares for the peers which are no longer available.
832 def encode(inshares, desired_share_ids=None):
833 """Encode some data. This may be called multiple times. Each call is
836 inshares is a sequence of length required_shares, containing buffers
837 (i.e. strings), where each buffer contains the next contiguous
838 non-overlapping segment of the input data. Each buffer is required to
839 be the same length, and the sum of the lengths of the buffers is
840 required to be exactly the data_size promised by set_params(). (This
841 implies that the data has to be padded before being passed to
842 encode(), unless of course it already happens to be an even multiple
843 of required_shares in length.)
845 ALSO: the requirement to break up your data into 'required_shares'
846 chunks before calling encode() feels a bit surprising, at least from
847 the point of view of a user who doesn't know how FEC works. It feels
848 like an implementation detail that has leaked outside the
849 abstraction barrier. Can you imagine a use case in which the data to
850 be encoded might already be available in pre-segmented chunks, such
851 that it is faster or less work to make encode() take a list rather
852 than splitting a single string?
854 ALSO ALSO: I think 'inshares' is a misleading term, since encode()
855 is supposed to *produce* shares, so what it *accepts* should be
856 something other than shares. Other places in this interface use the
857 word 'data' for that-which-is-not-shares.. maybe we should use that
860 'desired_share_ids', if provided, is required to be a sequence of
861 ints, each of which is required to be >= 0 and < max_shares. If not
862 provided, encode() will produce 'max_shares' shares, as if
863 'desired_share_ids' were set to range(max_shares). You might use this
864 if you initially thought you were going to use 10 peers, started
865 encoding, and then two of the peers dropped out: you could use
866 desired_share_ids= to skip the work (both memory and CPU) of
867 producing shares for the peers which are no longer available.
869 For each call, encode() will return a Deferred that fires with two
870 lists, one containing shares and the other containing the shareids.
871 The get_share_size() method can be used to determine the length of
872 the share strings returned by encode(). Each shareid is a small
873 integer, exactly as passed into 'desired_share_ids' (or
874 range(max_shares), if desired_share_ids was not provided).
876 The shares and their corresponding shareids are required to be kept
877 together during storage and retrieval. Specifically, the share data is
878 useless by itself: the decoder needs to be told which share is which
879 by providing it with both the shareid and the actual share data.
881 This function will allocate an amount of memory roughly equal to::
883 (max_shares - required_shares) * get_share_size()
885 When combined with the memory that the caller must allocate to
886 provide the input data, this leads to a memory footprint roughly
887 equal to the size of the resulting encoded shares (i.e. the expansion
888 factor times the size of the input segment).
893 # returning a list of (shareidN,shareN) tuples instead of a pair of
894 # lists (shareids..,shares..). Brian thought the tuples would
895 # encourage users to keep the share and shareid together throughout
896 # later processing, Zooko pointed out that the code to iterate
897 # through two lists is not really more complicated than using a list
898 # of tuples and there's also a performance improvement
900 # having 'data_size' not required to be an integral multiple of
901 # 'required_shares'. Doing this would require encode() to perform
902 # padding internally, and we'd prefer to have any padding be done
903 # explicitly by the caller. Yes, it is an abstraction leak, but
904 # hopefully not an onerous one.
907 class ICodecDecoder(Interface):
908 def set_serialized_params(params):
909 """Set up the parameters of this encoder, from a string returned by
910 encoder.get_serialized_params()."""
912 def get_needed_shares():
913 """Return the number of shares needed to reconstruct the data.
914 set_serialized_params() is required to be called before this."""
916 def decode(some_shares, their_shareids):
917 """Decode a partial list of shares into data.
919 'some_shares' is required to be a sequence of buffers of sharedata, a
920 subset of the shares returned by ICodecEncode.encode(). Each share is
921 required to be of the same length. The i'th element of their_shareids
922 is required to be the shareid of the i'th buffer in some_shares.
924 This returns a Deferred which fires with a sequence of buffers. This
925 sequence will contain all of the segments of the original data, in
926 order. The sum of the lengths of all of the buffers will be the
927 'data_size' value passed into the original ICodecEncode.set_params()
928 call. To get back the single original input block of data, use
929 ''.join(output_buffers), or you may wish to simply write them in
930 order to an output file.
932 Note that some of the elements in the result sequence may be
933 references to the elements of the some_shares input sequence. In
934 particular, this means that if those share objects are mutable (e.g.
935 arrays) and if they are changed, then both the input (the
936 'some_shares' parameter) and the output (the value given when the
937 deferred is triggered) will change.
939 The length of 'some_shares' is required to be exactly the value of
940 'required_shares' passed into the original ICodecEncode.set_params()
944 class IEncoder(Interface):
945 """I take an object that provides IEncryptedUploadable, which provides
946 encrypted data, and a list of shareholders. I then encode, hash, and
947 deliver shares to those shareholders. I will compute all the necessary
948 Merkle hash trees that are necessary to validate the crypttext that
949 eventually comes back from the shareholders. I provide the URI Extension
950 Block Hash, and the encoding parameters, both of which must be included
953 I do not choose shareholders, that is left to the IUploader. I must be
954 given a dict of RemoteReferences to storage buckets that are ready and
955 willing to receive data.
959 """Specify the number of bytes that will be encoded. This must be
960 peformed before get_serialized_params() can be called.
962 def set_params(params):
963 """Override the default encoding parameters. 'params' is a tuple of
964 (k,d,n), where 'k' is the number of required shares, 'd' is the
965 shares_of_happiness, and 'n' is the total number of shares that will
968 Encoding parameters can be set in three ways. 1: The Encoder class
969 provides defaults (3/7/10). 2: the Encoder can be constructed with
970 an 'options' dictionary, in which the
971 needed_and_happy_and_total_shares' key can be a (k,d,n) tuple. 3:
972 set_params((k,d,n)) can be called.
974 If you intend to use set_params(), you must call it before
975 get_share_size or get_param are called.
978 def set_encrypted_uploadable(u):
979 """Provide a source of encrypted upload data. 'u' must implement
980 IEncryptedUploadable.
982 When this is called, the IEncryptedUploadable will be queried for its
983 length and the storage_index that should be used.
985 This returns a Deferred that fires with this Encoder instance.
987 This must be performed before start() can be called.
991 """Return an encoding parameter, by name.
993 'storage_index': return a string with the (16-byte truncated SHA-256
994 hash) storage index to which these shares should be
997 'share_counts': return a tuple describing how many shares are used:
998 (needed_shares, shares_of_happiness, total_shares)
1000 'num_segments': return an int with the number of segments that
1003 'segment_size': return an int with the size of each segment.
1005 'block_size': return the size of the individual blocks that will
1006 be delivered to a shareholder's put_block() method. By
1007 knowing this, the shareholder will be able to keep all
1008 blocks in a single file and still provide random access
1009 when reading them. # TODO: can we avoid exposing this?
1011 'share_size': an int with the size of the data that will be stored
1012 on each shareholder. This is aggregate amount of data
1013 that will be sent to the shareholder, summed over all
1014 the put_block() calls I will ever make. It is useful to
1015 determine this size before asking potential
1016 shareholders whether they will grant a lease or not,
1017 since their answers will depend upon how much space we
1018 need. TODO: this might also include some amount of
1019 overhead, like the size of all the hashes. We need to
1020 decide whether this is useful or not.
1022 'serialized_params': a string with a concise description of the
1023 codec name and its parameters. This may be passed
1024 into the IUploadable to let it make sure that
1025 the same file encoded with different parameters
1026 will result in different storage indexes.
1028 Once this is called, set_size() and set_params() may not be called.
1031 def set_shareholders(shareholders):
1032 """Tell the encoder where to put the encoded shares. 'shareholders'
1033 must be a dictionary that maps share number (an integer ranging from
1034 0 to n-1) to an instance that provides IStorageBucketWriter. This
1035 must be performed before start() can be called."""
1038 """Begin the encode/upload process. This involves reading encrypted
1039 data from the IEncryptedUploadable, encoding it, uploading the shares
1040 to the shareholders, then sending the hash trees.
1042 set_encrypted_uploadable() and set_shareholders() must be called
1043 before this can be invoked.
1045 This returns a Deferred that fires with a tuple of
1046 (uri_extension_hash, needed_shares, total_shares, size) when the
1047 upload process is complete. This information, plus the encryption
1048 key, is sufficient to construct the URI.
1051 class IDecoder(Interface):
1052 """I take a list of shareholders and some setup information, then
1053 download, validate, decode, and decrypt data from them, writing the
1054 results to an output file.
1056 I do not locate the shareholders, that is left to the IDownloader. I must
1057 be given a dict of RemoteReferences to storage buckets that are ready to
1062 """I take a file-like object (providing write and close) to which all
1063 the plaintext data will be written.
1065 TODO: producer/consumer . Maybe write() should return a Deferred that
1066 indicates when it will accept more data? But probably having the
1067 IDecoder be a producer is easier to glue to IConsumer pieces.
1070 def set_shareholders(shareholders):
1071 """I take a dictionary that maps share identifiers (small integers)
1072 to RemoteReferences that provide RIBucketReader. This must be called
1076 """I start the download. This process involves retrieving data and
1077 hash chains from the shareholders, using the hashes to validate the
1078 data, decoding the shares into segments, decrypting the segments,
1079 then writing the resulting plaintext to the output file.
1081 I return a Deferred that will fire (with self) when the download is
1085 class IDownloadTarget(Interface):
1087 """Called before any calls to write() or close(). If an error
1088 occurs before any data is available, fail() may be called without
1089 a previous call to open().
1091 'size' is the length of the file being downloaded, in bytes."""
1094 """Output some data to the target."""
1096 """Inform the target that there is no more data to be written."""
1098 """fail() is called to indicate that the download has failed. 'why'
1099 is a Failure object indicating what went wrong. No further methods
1100 will be invoked on the IDownloadTarget after fail()."""
1101 def register_canceller(cb):
1102 """The FileDownloader uses this to register a no-argument function
1103 that the target can call to cancel the download. Once this canceller
1104 is invoked, no further calls to write() or close() will be made."""
1106 """When the FileDownloader is done, this finish() function will be
1107 called. Whatever it returns will be returned to the invoker of
1108 Downloader.download.
1111 class IDownloader(Interface):
1112 def download(uri, target):
1113 """Perform a CHK download, sending the data to the given target.
1114 'target' must provide IDownloadTarget.
1116 Returns a Deferred that fires (with the results of target.finish)
1117 when the download is finished, or errbacks if something went wrong."""
1119 class IEncryptedUploadable(Interface):
1120 def set_upload_status(upload_status):
1121 """Provide an IUploadStatus object that should be filled with status
1122 information. The IEncryptedUploadable is responsible for setting
1123 key-determination progress ('chk'), size, storage_index, and
1124 ciphertext-fetch progress. It may delegate some of this
1125 responsibility to others, in particular to the IUploadable."""
1128 """This behaves just like IUploadable.get_size()."""
1130 def get_all_encoding_parameters():
1131 """Return a Deferred that fires with a tuple of
1132 (k,happy,n,segment_size). The segment_size will be used as-is, and
1133 must match the following constraints: it must be a multiple of k, and
1134 it shouldn't be unreasonably larger than the file size (if
1135 segment_size is larger than filesize, the difference must be stored
1138 This usually passes through to the IUploadable method of the same
1141 The encoder strictly obeys the values returned by this method. To
1142 make an upload use non-default encoding parameters, you must arrange
1143 to control the values that this method returns.
1146 def get_storage_index():
1147 """Return a Deferred that fires with a 16-byte storage index.
1150 def read_encrypted(length, hash_only):
1151 """This behaves just like IUploadable.read(), but returns crypttext
1152 instead of plaintext. If hash_only is True, then this discards the
1153 data (and returns an empty list); this improves efficiency when
1154 resuming an interrupted upload (where we need to compute the
1155 plaintext hashes, but don't need the redundant encrypted data)."""
1157 def get_plaintext_hashtree_leaves(first, last, num_segments):
1158 """Get the leaf nodes of a merkle hash tree over the plaintext
1159 segments, i.e. get the tagged hashes of the given segments. The
1160 segment size is expected to be generated by the IEncryptedUploadable
1161 before any plaintext is read or ciphertext produced, so that the
1162 segment hashes can be generated with only a single pass.
1164 This returns a Deferred which fires with a sequence of hashes, using:
1166 tuple(segment_hashes[first:last])
1168 'num_segments' is used to assert that the number of segments that the
1169 IEncryptedUploadable handled matches the number of segments that the
1170 encoder was expecting.
1172 This method must not be called until the final byte has been read
1173 from read_encrypted(). Once this method is called, read_encrypted()
1174 can never be called again.
1177 def get_plaintext_hash():
1178 """Get the hash of the whole plaintext.
1180 This returns a Deferred which fires with a tagged SHA-256 hash of the
1181 whole plaintext, obtained from hashutil.plaintext_hash(data).
1185 """Just like IUploadable.close()."""
1187 class IUploadable(Interface):
1188 def set_upload_status(upload_status):
1189 """Provide an IUploadStatus object that should be filled with status
1190 information. The IUploadable is responsible for setting
1191 key-determination progress ('chk')."""
1193 def set_default_encoding_parameters(params):
1194 """Set the default encoding parameters, which must be a dict mapping
1195 strings to ints. The meaningful keys are 'k', 'happy', 'n', and
1196 'max_segment_size'. These might have an influence on the final
1197 encoding parameters returned by get_all_encoding_parameters(), if the
1198 Uploadable doesn't have more specific preferences.
1200 This call is optional: if it is not used, the Uploadable will use
1201 some built-in defaults. If used, this method must be called before
1202 any other IUploadable methods to have any effect.
1206 """Return a Deferred that will fire with the length of the data to be
1207 uploaded, in bytes. This will be called before the data is actually
1208 used, to compute encoding parameters.
1211 def get_all_encoding_parameters():
1212 """Return a Deferred that fires with a tuple of
1213 (k,happy,n,segment_size). The segment_size will be used as-is, and
1214 must match the following constraints: it must be a multiple of k, and
1215 it shouldn't be unreasonably larger than the file size (if
1216 segment_size is larger than filesize, the difference must be stored
1219 The relative values of k and n allow some IUploadables to request
1220 better redundancy than others (in exchange for consuming more space
1223 Larger values of segment_size reduce hash overhead, while smaller
1224 values reduce memory footprint and cause data to be delivered in
1225 smaller pieces (which may provide a smoother and more predictable
1226 download experience).
1228 The encoder strictly obeys the values returned by this method. To
1229 make an upload use non-default encoding parameters, you must arrange
1230 to control the values that this method returns. One way to influence
1231 them may be to call set_encoding_parameters() before calling
1232 get_all_encoding_parameters().
1235 def get_encryption_key():
1236 """Return a Deferred that fires with a 16-byte AES key. This key will
1237 be used to encrypt the data. The key will also be hashed to derive
1240 Uploadables which want to achieve convergence should hash their file
1241 contents and the serialized_encoding_parameters to form the key
1242 (which of course requires a full pass over the data). Uploadables can
1243 use the upload.ConvergentUploadMixin class to achieve this
1246 Uploadables which do not care about convergence (or do not wish to
1247 make multiple passes over the data) can simply return a
1248 strongly-random 16 byte string.
1250 get_encryption_key() may be called multiple times: the IUploadable is
1251 required to return the same value each time.
1255 """Return a Deferred that fires with a list of strings (perhaps with
1256 only a single element) which, when concatenated together, contain the
1257 next 'length' bytes of data. If EOF is near, this may provide fewer
1258 than 'length' bytes. The total number of bytes provided by read()
1259 before it signals EOF must equal the size provided by get_size().
1261 If the data must be acquired through multiple internal read
1262 operations, returning a list instead of a single string may help to
1263 reduce string copies.
1265 'length' will typically be equal to (min(get_size(),1MB)/req_shares),
1266 so a 10kB file means length=3kB, 100kB file means length=30kB,
1267 and >=1MB file means length=300kB.
1269 This method provides for a single full pass through the data. Later
1270 use cases may desire multiple passes or access to only parts of the
1271 data (such as a mutable file making small edits-in-place). This API
1272 will be expanded once those use cases are better understood.
1276 """The upload is finished, and whatever filehandle was in use may be
1279 class IUploadResults(Interface):
1280 """I am returned by upload() methods. I contain a number of public
1281 attributes which can be read to determine the results of the upload::
1283 .uri : the CHK read-cap for the file
1287 class IUploader(Interface):
1288 def upload(uploadable):
1289 """Upload the file. 'uploadable' must impement IUploadable. This
1290 returns a Deferred which fires with an UploadResults instance, from
1291 which the URI of the file can be obtained as results.uri ."""
1293 def upload_ssk(write_capability, new_version, uploadable):
1294 """TODO: how should this work?"""
1296 class IChecker(Interface):
1297 def check(uri_to_check):
1298 """Accepts an IVerifierURI, and checks upon the health of its target.
1300 For now, uri_to_check must be an IVerifierURI. In the future we
1301 expect to relax that to be anything that can be adapted to
1302 IVerifierURI (like read-only or read-write dirnode/filenode URIs).
1304 This returns a Deferred. For dirnodes, this fires with either True or
1305 False (dirnodes are not distributed, so their health is a boolean).
1307 For filenodes, this fires with a tuple of (needed_shares,
1308 total_shares, found_shares, sharemap). The first three are ints. The
1309 basic health of the file is found_shares / needed_shares: if less
1310 than 1.0, the file is unrecoverable.
1312 The sharemap has a key for each sharenum. The value is a list of
1313 (binary) nodeids who hold that share. If two shares are kept on the
1314 same nodeid, they will fail as a pair, and overall reliability is
1317 The IChecker instance remembers the results of the check. By default,
1318 these results are stashed in RAM (and are forgotten at shutdown). If
1319 a file named 'checker_results.db' exists in the node's basedir, it is
1320 used as a sqlite database of results, making them persistent across
1321 runs. To start using this feature, just 'touch checker_results.db',
1322 and the node will initialize it properly the next time it is started.
1325 def verify(uri_to_check):
1326 """Accepts an IVerifierURI, and verifies the crypttext of the target.
1328 This is a more-intensive form of checking. For verification, the
1329 file's crypttext contents are retrieved, and the associated hash
1330 checks are performed. If a storage server is holding a corrupted
1331 share, verification will detect the problem, but checking will not.
1332 This returns a Deferred that fires with True if the crypttext hashes
1333 look good, and will probably raise an exception if anything goes
1336 For dirnodes, 'verify' is the same as 'check', so the Deferred will
1337 fire with True or False.
1339 Verification currently only uses a minimal subset of peers, so a lot
1340 of share corruption will not be caught by it. We expect to improve
1344 def checker_results_for(uri_to_check):
1345 """Accepts an IVerifierURI, and returns a list of previously recorded
1346 checker results. This method performs no checking itself: it merely
1347 reports the results of checks that have taken place in the past.
1349 Each element of the list is a two-entry tuple: (when, results).
1350 The 'when' values are timestamps (float seconds since epoch), and the
1351 results are as defined in the check() method.
1353 Note: at the moment, this is specified to return synchronously. We
1354 might need to back away from this in the future.
1357 class IClient(Interface):
1358 def upload(uploadable):
1359 """Upload some data into a CHK, get back the UploadResults for it.
1360 @param uploadable: something that implements IUploadable
1361 @return: a Deferred that fires with the UploadResults instance.
1362 To get the URI for this file, use results.uri .
1365 def create_mutable_file(contents=""):
1366 """Create a new mutable file with contents, get back the URI string.
1367 @param contents: the initial contents to place in the file.
1368 @return: a Deferred that fires with tne (string) SSK URI for the new
1372 def create_empty_dirnode():
1373 """Create a new dirnode, empty and unattached.
1374 @return: a Deferred that fires with the new IDirectoryNode instance.
1377 def create_node_from_uri(uri):
1378 """Create a new IFilesystemNode instance from the uri, synchronously.
1379 @param uri: a string or IURI-providing instance. This could be for a
1380 LiteralFileNode, a CHK file node, a mutable file node, or
1382 @return: an instance that provides IFilesystemNode (or more usefully one
1383 of its subclasses). File-specifying URIs will result in
1384 IFileNode or IMutableFileNode -providing instances, like
1385 FileNode, LiteralFileNode, or MutableFileNode.
1386 Directory-specifying URIs will result in
1387 IDirectoryNode-providing instances, like NewDirectoryNode.
1390 class IClientStatus(Interface):
1392 """Return a list of IUploadStatus objects, one for each
1393 upload which is currently running."""
1394 def list_downloads():
1395 """Return a list of IDownloadStatus objects, one for each
1396 download which is currently running."""
1398 class IUploadStatus(Interface):
1399 def get_storage_index():
1400 """Return a string with the (binary) storage index in use on this
1401 upload. Returns None if the storage index has not yet been
1404 """Return an integer with the number of bytes that will eventually
1405 be uploaded for this file. Returns None if the size is not yet known.
1408 """Return True if this upload is using a Helper, False if not."""
1410 """Return a string describing the current state of the upload
1413 """Returns a tuple of floats, (chk, ciphertext, encode_and_push),
1414 each from 0.0 to 1.0 . 'chk' describes how much progress has been
1415 made towards hashing the file to determine a CHK encryption key: if
1416 non-convergent encryption is in use, this will be trivial, otherwise
1417 the whole file must be hashed. 'ciphertext' describes how much of the
1418 ciphertext has been pushed to the helper, and is '1.0' for non-helper
1419 uploads. 'encode_and_push' describes how much of the encode-and-push
1420 process has finished: for helper uploads this is dependent upon the
1421 helper providing progress reports. It might be reasonable to add all
1422 three numbers and report the sum to the user."""
1424 """Return True if the upload is currently active, False if not."""
1426 class IDownloadStatus(Interface):
1427 def get_storage_index():
1428 """Return a string with the (binary) storage index in use on this
1429 download. This may be None if there is no storage index (i.e. LIT
1432 """Return an integer with the number of bytes that will eventually be
1433 retrieved for this file. Returns None if the size is not yet known.
1436 """Return True if this download is using a Helper, False if not."""
1438 """Return a string describing the current state of the download
1441 """Returns a float (from 0.0 to 1.0) describing the amount of the
1442 download that has completed. This value will remain at 0.0 until the
1443 first byte of plaintext is pushed to the download target."""
1445 """Return True if the download is currently active, False if not."""
1448 class NotCapableError(Exception):
1449 """You have tried to write to a read-only node."""
1451 class BadWriteEnablerError(Exception):
1454 class RIControlClient(RemoteInterface):
1456 def wait_for_client_connections(num_clients=int):
1457 """Do not return until we have connections to at least NUM_CLIENTS
1461 def upload_from_file_to_uri(filename=str):
1462 """Upload a file to the grid. This accepts a filename (which must be
1463 absolute) that points to a file on the node's local disk. The node
1464 will read the contents of this file, upload it to the grid, then
1465 return the URI at which it was uploaded.
1469 def download_from_uri_to_file(uri=URI, filename=str):
1470 """Download a file from the grid, placing it on the node's local disk
1471 at the given filename (which must be absolute[?]). Returns the
1472 absolute filename where the file was written."""
1477 def get_memory_usage():
1478 """Return a dict describes the amount of memory currently in use. The
1479 keys are 'VmPeak', 'VmSize', and 'VmData'. The values are integers,
1480 measuring memory consupmtion in bytes."""
1481 return DictOf(str, int)
1483 def speed_test(count=int, size=int, mutable=Any()):
1484 """Write 'count' tempfiles to disk, all of the given size. Measure
1485 how long (in seconds) it takes to upload them all to the servers.
1486 Then measure how long it takes to download all of them. If 'mutable'
1487 is 'create', time creation of mutable files. If 'mutable' is
1488 'upload', then time access to the same mutable file instead of
1491 Returns a tuple of (upload_time, download_time).
1493 return (float, float)
1495 def measure_peer_response_time():
1496 """Send a short message to each connected peer, and measure the time
1497 it takes for them to respond to it. This is a rough measure of the
1498 application-level round trip time.
1500 @return: a dictionary mapping peerid to a float (RTT time in seconds)
1503 return DictOf(Nodeid, float)
1505 UploadResults = Any() #DictOf(str, str)
1507 class RIEncryptedUploadable(RemoteInterface):
1508 __remote_name__ = "RIEncryptedUploadable.tahoe.allmydata.com"
1513 def get_all_encoding_parameters():
1514 return (int, int, int, long)
1516 def read_encrypted(offset=long, length=long):
1519 def get_plaintext_hashtree_leaves(first=int, last=int, num_segments=int):
1522 def get_plaintext_hash():
1529 class RICHKUploadHelper(RemoteInterface):
1530 __remote_name__ = "RIUploadHelper.tahoe.allmydata.com"
1532 def upload(reader=RIEncryptedUploadable):
1533 return UploadResults
1536 class RIHelper(RemoteInterface):
1537 __remote_name__ = "RIHelper.tahoe.allmydata.com"
1539 def upload_chk(si=StorageIndex):
1540 """See if a file with a given storage index needs uploading. The
1541 helper will ask the appropriate storage servers to see if the file
1542 has already been uploaded. If so, the helper will return a set of
1543 'upload results' that includes whatever hashes are needed to build
1544 the read-cap, and perhaps a truncated sharemap.
1546 If the file has not yet been uploaded (or if it was only partially
1547 uploaded), the helper will return an empty upload-results dictionary
1548 and also an RICHKUploadHelper object that will take care of the
1549 upload process. The client should call upload() on this object and
1550 pass it a reference to an RIEncryptedUploadable object that will
1551 provide ciphertext. When the upload is finished, the upload() method
1552 will finish and return the upload results.
1554 return (UploadResults, ChoiceOf(RICHKUploadHelper, None))
1557 class RIStatsProvider(RemoteInterface):
1558 __remote_name__ = "RIStatsProvider.tahoe.allmydata.com"
1560 Provides access to statistics and monitoring information.
1565 returns a dictionary containing 'counters' and 'stats', each a dictionary
1566 with string counter/stat name keys, and numeric values. counters are
1567 monotonically increasing measures of work done, and stats are instantaneous
1568 measures (potentially time averaged internally)
1570 return DictOf(str, DictOf(str, ChoiceOf(float, int, long)))
1572 class RIStatsGatherer(RemoteInterface):
1573 __remote_name__ = "RIStatsGatherer.tahoe.allmydata.com"
1575 Provides a monitoring service for centralised collection of stats
1578 def provide(provider=RIStatsProvider, nickname=str):
1580 @param provider: a stats collector instance which should be polled
1581 periodically by the gatherer to collect stats.
1582 @param nickname: a name useful to identify the provided client
1587 class IStatsProducer(Interface):
1590 returns a dictionary, with str keys representing the names of stats
1591 to be monitored, and numeric values.