2 from zope.interface import Interface
3 from foolscap.schema import StringConstraint, ListOf, TupleOf, SetOf, DictOf, \
4 ChoiceOf, IntegerConstraint
5 from foolscap import RemoteInterface, Referenceable
9 Hash = StringConstraint(maxLength=HASH_SIZE,
10 minLength=HASH_SIZE)# binary format 32-byte SHA256 hash
11 Nodeid = StringConstraint(maxLength=20,
12 minLength=20) # binary format 20-byte SHA1 hash
13 FURL = StringConstraint(1000)
14 StorageIndex = StringConstraint(16)
15 URI = StringConstraint(300) # kind of arbitrary
17 MAX_BUCKETS = 200 # per peer
19 # MAX_SEGMENT_SIZE in encode.py is 1 MiB (this constraint allows k = 1)
20 ShareData = StringConstraint(2**20)
21 URIExtensionData = StringConstraint(1000)
22 Number = IntegerConstraint(8) # 2**(8*8) == 16EiB ~= 18e18 ~= 18 exabytes
24 ReadSize = int # the 'int' constraint is 2**31 == 2Gib
25 LeaseRenewSecret = Hash # used to protect bucket lease renewal requests
26 LeaseCancelSecret = Hash # used to protect bucket lease cancellation requests
28 # Announcements are (FURL, service_name, remoteinterface_name,
29 # nickname, my_version, oldest_supported)
30 # the (FURL, service_name, remoteinterface_name) refer to the service being
31 # announced. The (nickname, my_version, oldest_supported) refer to the
32 # client as a whole. The my_version/oldest_supported strings can be parsed
33 # by an allmydata.util.version.Version instance, and then compared. The
34 # first goal is to make sure that nodes are not confused by speaking to an
35 # incompatible peer. The second goal is to enable the development of
36 # backwards-compatibility code.
38 Announcement = TupleOf(FURL, str, str,
41 class RIIntroducerSubscriberClient(RemoteInterface):
42 __remote_name__ = "RIIntroducerSubscriberClient.tahoe.allmydata.com"
44 def announce(announcements=SetOf(Announcement)):
45 """I accept announcements from the publisher."""
48 def set_encoding_parameters(parameters=(int, int, int)):
49 """Advise the client of the recommended k-of-n encoding parameters
50 for this grid. 'parameters' is a tuple of (k, desired, n), where 'n'
51 is the total number of shares that will be created for any given
52 file, while 'k' is the number of shares that must be retrieved to
53 recover that file, and 'desired' is the minimum number of shares that
54 must be placed before the uploader will consider its job a success.
55 n/k is the expansion ratio, while k determines the robustness.
57 Introducers should specify 'n' according to the expected size of the
58 grid (there is no point to producing more shares than there are
59 peers), and k according to the desired reliability-vs-overhead goals.
61 Note that setting k=1 is equivalent to simple replication.
65 # When Foolscap can handle multiple interfaces (Foolscap#17), the
66 # full-powered introducer will implement both RIIntroducerPublisher and
67 # RIIntroducerSubscriberService. Until then, we define
68 # RIIntroducerPublisherAndSubscriberService as a combination of the two, and
69 # make everybody use that.
71 class RIIntroducerPublisher(RemoteInterface):
72 """To publish a service to the world, connect to me and give me your
73 announcement message. I will deliver a copy to all connected subscribers."""
74 __remote_name__ = "RIIntroducerPublisher.tahoe.allmydata.com"
76 def publish(announcement=Announcement):
80 class RIIntroducerSubscriberService(RemoteInterface):
81 __remote_name__ = "RIIntroducerSubscriberService.tahoe.allmydata.com"
83 def subscribe(subscriber=RIIntroducerSubscriberClient, service_name=str):
84 """Give me a subscriber reference, and I will call its new_peers()
85 method will any announcements that match the desired service name. I
86 will ignore duplicate subscriptions.
90 class RIIntroducerPublisherAndSubscriberService(RemoteInterface):
91 __remote_name__ = "RIIntroducerPublisherAndSubscriberService.tahoe.allmydata.com"
92 def publish(announcement=Announcement):
94 def subscribe(subscriber=RIIntroducerSubscriberClient, service_name=str):
97 class IIntroducerClient(Interface):
98 """I provide service introduction facilities for a node. I help nodes
99 publish their services to the rest of the world, and I help them learn
100 about services available on other nodes."""
102 def publish(furl, service_name, remoteinterface_name):
103 """Once you call this, I will tell the world that the Referenceable
104 available at FURL is available to provide a service named
105 SERVICE_NAME. The precise definition of the service being provided is
106 identified by the Foolscap 'remote interface name' in the last
107 parameter: this is supposed to be a globally-unique string that
108 identifies the RemoteInterface that is implemented."""
110 def subscribe_to(service_name):
111 """Call this if you will eventually want to use services with the
112 given SERVICE_NAME. This will prompt me to subscribe to announcements
113 of those services. You can pick up the announcements later by calling
114 get_all_connections_for() or get_permuted_peers().
117 def get_all_connections():
118 """Return a frozenset of (nodeid, service_name, rref) tuples, one for
119 each active connection we've established to a remote service. This is
120 mostly useful for unit tests that need to wait until a certain number
121 of connections have been made."""
123 def get_all_connectors():
124 """Return a dict that maps from (nodeid, service_name) to a
125 RemoteServiceConnector instance for all services that we are actively
126 trying to connect to. Each RemoteServiceConnector has the following
129 service_name: the type of service provided, like 'storage'
130 announcement_time: when we first heard about this service
131 last_connect_time: when we last established a connection
132 last_loss_time: when we last lost a connection
134 version: the peer's version, from the most recent connection
135 oldest_supported: the peer's oldest supported version, same
137 rref: the RemoteReference, if connected, otherwise None
138 remote_host: the IAddress, if connected, otherwise None
140 This method is intended for monitoring interfaces, such as a web page
141 which describes connecting and connected peers.
144 def get_all_peerids():
145 """Return a frozenset of all peerids to whom we have a connection (to
146 one or more services) established. Mostly useful for unit tests."""
148 def get_all_connections_for(service_name):
149 """Return a frozenset of (nodeid, service_name, rref) tuples, one
150 for each active connection that provides the given SERVICE_NAME."""
152 def get_permuted_peers(service_name, key):
153 """Returns an ordered list of (peerid, rref) tuples, selecting from
154 the connections that provide SERVICE_NAME, using a hash-based
155 permutation keyed by KEY. This randomizes the service list in a
156 repeatable way, to distribute load over many peers.
159 def connected_to_introducer():
160 """Returns a boolean, True if we are currently connected to the
161 introducer, False if not."""
163 class RIStubClient(RemoteInterface):
164 """Each client publishes a service announcement for a dummy object called
165 the StubClient. This object doesn't actually offer any services, but the
166 announcement helps the Introducer keep track of which clients are
167 subscribed (so the grid admin can keep track of things like the size of
168 the grid and the client versions in use. This is the (empty)
169 RemoteInterface for the StubClient."""
171 class RIBucketWriter(RemoteInterface):
172 def write(offset=Offset, data=ShareData):
177 If the data that has been written is incomplete or inconsistent then
178 the server will throw the data away, else it will store it for future
184 """Abandon all the data that has been written.
188 class RIBucketReader(RemoteInterface):
189 def read(offset=Offset, length=ReadSize):
190 # ShareData is limited to 1MiB, so we don't need length= to be any
191 # larger than that. Large files must be read in pieces.
194 TestVector = ListOf(TupleOf(Offset, ReadSize, str, str))
195 # elements are (offset, length, operator, specimen)
196 # operator is one of "lt, le, eq, ne, ge, gt"
197 # nop always passes and is used to fetch data while writing.
198 # you should use length==len(specimen) for everything except nop
199 DataVector = ListOf(TupleOf(Offset, ShareData))
200 # (offset, data). This limits us to 30 writes of 1MiB each per call
201 TestAndWriteVectorsForShares = DictOf(int,
204 ChoiceOf(None, Offset), # new_length
206 ReadVector = ListOf(TupleOf(Offset, ReadSize))
207 ReadData = ListOf(ShareData)
208 # returns data[offset:offset+length] for each element of TestVector
210 class RIStorageServer(RemoteInterface):
211 __remote_name__ = "RIStorageServer.tahoe.allmydata.com"
214 """Return a tuple of (my_version, oldest_supported) strings.
215 Each string can be parsed by an allmydata.util.version.Version
216 instance, and then compared. The first goal is to make sure that
217 nodes are not confused by speaking to an incompatible peer. The
218 second goal is to enable the development of backwards-compatibility
221 This method is likely to change in incompatible ways until we get the
222 whole compatibility scheme nailed down.
224 return TupleOf(str, str)
226 def allocate_buckets(storage_index=StorageIndex,
227 renew_secret=LeaseRenewSecret,
228 cancel_secret=LeaseCancelSecret,
229 sharenums=SetOf(int, maxLength=MAX_BUCKETS),
230 allocated_size=Offset, canary=Referenceable):
232 @param storage_index: the index of the bucket to be created or
234 @param sharenums: these are the share numbers (probably between 0 and
235 99) that the sender is proposing to store on this
237 @param renew_secret: This is the secret used to protect bucket refresh
238 This secret is generated by the client and
239 stored for later comparison by the server. Each
240 server is given a different secret.
241 @param cancel_secret: Like renew_secret, but protects bucket decref.
242 @param canary: If the canary is lost before close(), the bucket is
244 @return: tuple of (alreadygot, allocated), where alreadygot is what we
245 already have and is what we hereby agree to accept. New
246 leases are added for shares in both lists.
248 return TupleOf(SetOf(int, maxLength=MAX_BUCKETS),
249 DictOf(int, RIBucketWriter, maxKeys=MAX_BUCKETS))
251 def renew_lease(storage_index=StorageIndex, renew_secret=LeaseRenewSecret):
253 Renew the lease on a given bucket. Some networks will use this, some
257 def cancel_lease(storage_index=StorageIndex,
258 cancel_secret=LeaseCancelSecret):
260 Cancel the lease on a given bucket. If this was the last lease on the
261 bucket, the bucket will be deleted.
264 def get_buckets(storage_index=StorageIndex):
265 return DictOf(int, RIBucketReader, maxKeys=MAX_BUCKETS)
269 def slot_readv(storage_index=StorageIndex,
270 shares=ListOf(int), readv=ReadVector):
271 """Read a vector from the numbered shares associated with the given
272 storage index. An empty shares list means to return data from all
273 known shares. Returns a dictionary with one key per share."""
274 return DictOf(int, ReadData) # shnum -> results
276 def slot_testv_and_readv_and_writev(storage_index=StorageIndex,
277 secrets=TupleOf(Hash, Hash, Hash),
278 tw_vectors=TestAndWriteVectorsForShares,
281 """General-purpose test-and-set operation for mutable slots. Perform
282 a bunch of comparisons against the existing shares. If they all pass,
283 then apply a bunch of write vectors to those shares. Then use the
284 read vectors to extract data from all the shares and return the data.
286 This method is, um, large. The goal is to allow clients to update all
287 the shares associated with a mutable file in a single round trip.
289 @param storage_index: the index of the bucket to be created or
291 @param write_enabler: a secret that is stored along with the slot.
292 Writes are accepted from any caller who can
293 present the matching secret. A different secret
294 should be used for each slot*server pair.
295 @param renew_secret: This is the secret used to protect bucket refresh
296 This secret is generated by the client and
297 stored for later comparison by the server. Each
298 server is given a different secret.
299 @param cancel_secret: Like renew_secret, but protects bucket decref.
301 The 'secrets' argument is a tuple of (write_enabler, renew_secret,
302 cancel_secret). The first is required to perform any write. The
303 latter two are used when allocating new shares. To simply acquire a
304 new lease on existing shares, use an empty testv and an empty writev.
306 Each share can have a separate test vector (i.e. a list of
307 comparisons to perform). If all vectors for all shares pass, then all
308 writes for all shares are recorded. Each comparison is a 4-tuple of
309 (offset, length, operator, specimen), which effectively does a bool(
310 (read(offset, length)) OPERATOR specimen ) and only performs the
311 write if all these evaluate to True. Basic test-and-set uses 'eq'.
312 Write-if-newer uses a seqnum and (offset, length, 'lt', specimen).
313 Write-if-same-or-newer uses 'le'.
315 Reads from the end of the container are truncated, and missing shares
316 behave like empty ones, so to assert that a share doesn't exist (for
317 use when creating a new share), use (0, 1, 'eq', '').
319 The write vector will be applied to the given share, expanding it if
320 necessary. A write vector applied to a share number that did not
321 exist previously will cause that share to be created.
323 Each write vector is accompanied by a 'new_length' argument. If
324 new_length is not None, use it to set the size of the container. This
325 can be used to pre-allocate space for a series of upcoming writes, or
326 truncate existing data. If the container is growing, new_length will
327 be applied before datav. If the container is shrinking, it will be
330 The read vector is used to extract data from all known shares,
331 *before* any writes have been applied. The same vector is used for
332 all shares. This captures the state that was tested by the test
335 This method returns two values: a boolean and a dict. The boolean is
336 True if the write vectors were applied, False if not. The dict is
337 keyed by share number, and each value contains a list of strings, one
338 for each element of the read vector.
340 If the write_enabler is wrong, this will raise BadWriteEnablerError.
341 To enable share migration, the exception will have the nodeid used
342 for the old write enabler embedded in it, in the following string::
344 The write enabler was recorded by nodeid '%s'.
346 Note that the nodeid here is encoded using the same base32 encoding
347 used by Foolscap and allmydata.util.idlib.nodeid_b2a().
350 return TupleOf(bool, DictOf(int, ReadData))
352 class IStorageBucketWriter(Interface):
353 def put_block(segmentnum=int, data=ShareData):
354 """@param data: For most segments, this data will be 'blocksize'
355 bytes in length. The last segment might be shorter.
356 @return: a Deferred that fires (with None) when the operation completes
359 def put_plaintext_hashes(hashes=ListOf(Hash, maxLength=2**20)):
361 @return: a Deferred that fires (with None) when the operation completes
364 def put_crypttext_hashes(hashes=ListOf(Hash, maxLength=2**20)):
366 @return: a Deferred that fires (with None) when the operation completes
369 def put_block_hashes(blockhashes=ListOf(Hash, maxLength=2**20)):
371 @return: a Deferred that fires (with None) when the operation completes
374 def put_share_hashes(sharehashes=ListOf(TupleOf(int, Hash),
377 @return: a Deferred that fires (with None) when the operation completes
380 def put_uri_extension(data=URIExtensionData):
381 """This block of data contains integrity-checking information (hashes
382 of plaintext, crypttext, and shares), as well as encoding parameters
383 that are necessary to recover the data. This is a serialized dict
384 mapping strings to other strings. The hash of this data is kept in
385 the URI and verified before any of the data is used. All buckets for
386 a given file contain identical copies of this data.
388 The serialization format is specified with the following pseudocode:
389 for k in sorted(dict.keys()):
390 assert re.match(r'^[a-zA-Z_\-]+$', k)
391 write(k + ':' + netstring(dict[k]))
393 @return: a Deferred that fires (with None) when the operation completes
397 """Finish writing and close the bucket. The share is not finalized
398 until this method is called: if the uploading client disconnects
399 before calling close(), the partially-written share will be
402 @return: a Deferred that fires (with None) when the operation completes
405 class IStorageBucketReader(Interface):
407 def get_block(blocknum=int):
408 """Most blocks will be the same size. The last block might be shorter
414 def get_plaintext_hashes():
416 @return: ListOf(Hash, maxLength=2**20)
419 def get_crypttext_hashes():
421 @return: ListOf(Hash, maxLength=2**20)
424 def get_block_hashes():
426 @return: ListOf(Hash, maxLength=2**20)
429 def get_share_hashes():
431 @return: ListOf(TupleOf(int, Hash), maxLength=2**20)
434 def get_uri_extension():
436 @return: URIExtensionData
441 # hm, we need a solution for forward references in schemas
442 from foolscap.schema import Any
444 FileNode_ = Any() # TODO: foolscap needs constraints on copyables
445 DirectoryNode_ = Any() # TODO: same
446 AnyNode_ = ChoiceOf(FileNode_, DirectoryNode_)
449 class IURI(Interface):
450 def init_from_string(uri):
451 """Accept a string (as created by my to_string() method) and populate
452 this instance with its data. I am not normally called directly,
453 please use the module-level uri.from_string() function to convert
454 arbitrary URI strings into IURI-providing instances."""
457 """Return False if this URI be used to modify the data. Return True
458 if this URI cannot be used to modify the data."""
461 """Return True if the data can be modified by *somebody* (perhaps
462 someone who has a more powerful URI than this one)."""
465 """Return another IURI instance, which represents a read-only form of
466 this one. If is_readonly() is True, this returns self."""
469 """Return an instance that provides IVerifierURI, which can be used
470 to check on the availability of the file or directory, without
471 providing enough capabilities to actually read or modify the
472 contents. This may return None if the file does not need checking or
473 verification (e.g. LIT URIs).
477 """Return a string of printable ASCII characters, suitable for
478 passing into init_from_string."""
480 class IVerifierURI(Interface):
481 def init_from_string(uri):
482 """Accept a string (as created by my to_string() method) and populate
483 this instance with its data. I am not normally called directly,
484 please use the module-level uri.from_string() function to convert
485 arbitrary URI strings into IURI-providing instances."""
488 """Return a string of printable ASCII characters, suitable for
489 passing into init_from_string."""
491 class IDirnodeURI(Interface):
492 """I am a URI which represents a dirnode."""
495 class IFileURI(Interface):
496 """I am a URI which represents a filenode."""
498 """Return the length (in bytes) of the file that I represent."""
500 class IMutableFileURI(Interface):
501 """I am a URI which represents a mutable filenode."""
502 class INewDirectoryURI(Interface):
504 class IReadonlyNewDirectoryURI(Interface):
508 class IFilesystemNode(Interface):
511 Return the URI that can be used by others to get access to this
512 node. If this node is read-only, the URI will only offer read-only
513 access. If this node is read-write, the URI will offer read-write
516 If you have read-write access to a node and wish to share merely
517 read-only access with others, use get_readonly_uri().
520 def get_readonly_uri():
521 """Return the directory URI that can be used by others to get
522 read-only access to this directory node. The result is a read-only
523 URI, regardless of whether this dirnode is read-only or read-write.
525 If you have merely read-only access to this dirnode,
526 get_readonly_uri() will return the same thing as get_uri().
530 """Return an IVerifierURI instance that represents the
531 'verifiy/refresh capability' for this node. The holder of this
532 capability will be able to renew the lease for this node, protecting
533 it from garbage-collection. They will also be able to ask a server if
534 it holds a share for the file or directory.
538 """Perform a file check. See IChecker.check for details."""
541 """Return True if this reference provides mutable access to the given
542 file or directory (i.e. if you can modify it), or False if not. Note
543 that even if this reference is read-only, someone else may hold a
544 read-write reference to it."""
547 """Return True if this file or directory is mutable (by *somebody*,
548 not necessarily you), False if it is is immutable. Note that a file
549 might be mutable overall, but your reference to it might be
550 read-only. On the other hand, all references to an immutable file
551 will be read-only; there are no read-write references to an immutable
555 class IMutableFilesystemNode(IFilesystemNode):
558 class IFileNode(IFilesystemNode):
559 def download(target):
560 """Download the file's contents to a given IDownloadTarget"""
562 def download_to_data():
563 """Download the file's contents. Return a Deferred that fires
564 with those contents."""
567 """Return the length (in bytes) of the data this node represents."""
569 class IMutableFileNode(IFileNode, IMutableFilesystemNode):
570 def download_to_data():
571 """Download the file's contents. Return a Deferred that fires with
572 those contents. If there are multiple retrievable versions in the
573 grid (because you failed to avoid simultaneous writes, see
574 docs/mutable.txt), this will return the first version that it can
575 reconstruct, and will silently ignore the others. In the future, a
576 more advanced API will signal and provide access to the multiple
580 """Attempt to replace the old contents with the new data.
582 download_to_data() must have been called before calling update().
584 Returns a Deferred. If the Deferred fires successfully, the update
585 appeared to succeed. However, another writer (who read before your
586 changes were published) might still clobber your changes: they will
587 discover a problem but you will not. (see ticket #347 for details).
589 If the mutable file has been changed (by some other writer) since the
590 last call to download_to_data(), this will raise
591 UncoordinatedWriteError and the file will be left in an inconsistent
592 state (possibly the version you provided, possibly the old version,
593 possibly somebody else's version, and possibly a mix of shares from
594 all of these). The recommended response to UncoordinatedWriteError is
595 to either return it to the caller (since they failed to coordinate
596 their writes), or to do a new download_to_data() / modify-data /
599 update() is appropriate to use in a read-modify-write sequence, such
600 as a directory modification.
603 def overwrite(newdata):
604 """Attempt to replace the old contents with the new data.
606 Unlike update(), overwrite() does not require a previous call to
607 download_to_data(). It will unconditionally replace the old contents
610 overwrite() is implemented by doing download_to_data() and update()
611 in rapid succession, so there remains a (smaller) possibility of
612 UncoordinatedWriteError. A future version will remove the full
613 download_to_data step, making this faster than update().
615 overwrite() is only appropriate to use when the new contents of the
616 mutable file are completely unrelated to the old ones, and you do not
617 care about other clients changes to the file.
621 """Return this filenode's writekey, or None if the node does not have
622 write-capability. This may be used to assist with data structures
623 that need to make certain data available only to writers, such as the
624 read-write child caps in dirnodes. The recommended process is to have
625 reader-visible data be submitted to the filenode in the clear (where
626 it will be encrypted by the filenode using the readkey), but encrypt
627 writer-visible data using this writekey.
630 class IDirectoryNode(IMutableFilesystemNode):
631 """I represent a name-to-child mapping, holding the tahoe equivalent of a
632 directory. All child names are unicode strings, and all children are some
633 sort of IFilesystemNode (either files or subdirectories).
638 The dirnode ('1') URI returned by this method can be used in
639 set_uri() on a different directory ('2') to 'mount' a reference to
640 this directory ('1') under the other ('2'). This URI is just a
641 string, so it can be passed around through email or other out-of-band
645 def get_readonly_uri():
647 The dirnode ('1') URI returned by this method can be used in
648 set_uri() on a different directory ('2') to 'mount' a reference to
649 this directory ('1') under the other ('2'). This URI is just a
650 string, so it can be passed around through email or other out-of-band
655 """I return a Deferred that fires with a dictionary mapping child
656 name (a unicode string) to (node, metadata_dict) tuples, in which
657 'node' is either an IFileNode or IDirectoryNode, and 'metadata_dict'
658 is a dictionary of metadata."""
661 """I return a Deferred that fires with a boolean, True if there
662 exists a child of the given name, False if not. The child name must
663 be a unicode string."""
666 """I return a Deferred that fires with a specific named child node,
667 either an IFileNode or an IDirectoryNode. The child name must be a
670 def get_metadata_for(name):
671 """I return a Deferred that fires with the metadata dictionary for a
672 specific named child node. This metadata is stored in the *edge*, not
673 in the child, so it is attached to the parent dirnode rather than the
674 child dir-or-file-node. The child name must be a unicode string."""
676 def set_metadata_for(name, metadata):
677 """I replace any existing metadata for the named child with the new
678 metadata. The child name must be a unicode string. This metadata is
679 stored in the *edge*, not in the child, so it is attached to the
680 parent dirnode rather than the child dir-or-file-node. I return a
681 Deferred (that fires with this dirnode) when the operation is
684 def get_child_at_path(path):
685 """Transform a child path into an IDirectoryNode or IFileNode.
687 I perform a recursive series of 'get' operations to find the named
688 descendant node. I return a Deferred that fires with the node, or
689 errbacks with IndexError if the node could not be found.
691 The path can be either a single string (slash-separated) or a list of
692 path-name elements. All elements must be unicode strings.
695 def set_uri(name, child_uri, metadata=None):
696 """I add a child (by URI) at the specific name. I return a Deferred
697 that fires when the operation finishes. I will replace any existing
698 child of the same name. The child name must be a unicode string.
700 The child_uri could be for a file, or for a directory (either
701 read-write or read-only, using a URI that came from get_uri() ).
703 If metadata= is provided, I will use it as the metadata for the named
704 edge. This will replace any existing metadata. If metadata= is left
705 as the default value of None, I will set ['mtime'] to the current
706 time, and I will set ['ctime'] to the current time if there was not
707 already a child by this name present. This roughly matches the
708 ctime/mtime semantics of traditional filesystems.
710 If this directory node is read-only, the Deferred will errback with a
713 def set_children(entries):
714 """Add multiple (name, child_uri) pairs (or (name, child_uri,
715 metadata) triples) to a directory node. Returns a Deferred that fires
716 (with None) when the operation finishes. This is equivalent to
717 calling set_uri() multiple times, but is much more efficient. All
718 child names must be unicode strings.
721 def set_node(name, child, metadata=None):
722 """I add a child at the specific name. I return a Deferred that fires
723 when the operation finishes. This Deferred will fire with the child
724 node that was just added. I will replace any existing child of the
725 same name. The child name must be a unicode string.
727 If metadata= is provided, I will use it as the metadata for the named
728 edge. This will replace any existing metadata. If metadata= is left
729 as the default value of None, I will set ['mtime'] to the current
730 time, and I will set ['ctime'] to the current time if there was not
731 already a child by this name present. This roughly matches the
732 ctime/mtime semantics of traditional filesystems.
734 If this directory node is read-only, the Deferred will errback with a
737 def set_nodes(entries):
738 """Add multiple (name, child_node) pairs (or (name, child_node,
739 metadata) triples) to a directory node. Returns a Deferred that fires
740 (with None) when the operation finishes. This is equivalent to
741 calling set_node() multiple times, but is much more efficient. All
742 child names must be unicode strings."""
745 def add_file(name, uploadable, metadata=None):
746 """I upload a file (using the given IUploadable), then attach the
747 resulting FileNode to the directory at the given name. I set metadata
748 the same way as set_uri and set_node. The child name must be a
751 I return a Deferred that fires (with the IFileNode of the uploaded
752 file) when the operation completes."""
755 """I remove the child at the specific name. I return a Deferred that
756 fires when the operation finishes. The child name must be a unicode
759 def create_empty_directory(name):
760 """I create and attach an empty directory at the given name. The
761 child name must be a unicode string. I return a Deferred that fires
762 when the operation finishes."""
764 def move_child_to(current_child_name, new_parent, new_child_name=None):
765 """I take one of my children and move them to a new parent. The child
766 is referenced by name. On the new parent, the child will live under
767 'new_child_name', which defaults to 'current_child_name'. TODO: what
768 should we do about metadata? I return a Deferred that fires when the
769 operation finishes. The child name must be a unicode string."""
771 def build_manifest():
772 """Return a frozenset of verifier-capability strings for all nodes
773 (directories and files) reachable from this one."""
775 class ICodecEncoder(Interface):
776 def set_params(data_size, required_shares, max_shares):
777 """Set up the parameters of this encoder.
779 This prepares the encoder to perform an operation that converts a
780 single block of data into a number of shares, such that a future
781 ICodecDecoder can use a subset of these shares to recover the
782 original data. This operation is invoked by calling encode(). Once
783 the encoding parameters are set up, the encode operation can be
784 invoked multiple times.
786 set_params() prepares the encoder to accept blocks of input data that
787 are exactly 'data_size' bytes in length. The encoder will be prepared
788 to produce 'max_shares' shares for each encode() operation (although
789 see the 'desired_share_ids' to use less CPU). The encoding math will
790 be chosen such that the decoder can get by with as few as
791 'required_shares' of these shares and still reproduce the original
792 data. For example, set_params(1000, 5, 5) offers no redundancy at
793 all, whereas set_params(1000, 1, 10) provides 10x redundancy.
795 Numerical Restrictions: 'data_size' is required to be an integral
796 multiple of 'required_shares'. In general, the caller should choose
797 required_shares and max_shares based upon their reliability
798 requirements and the number of peers available (the total storage
799 space used is roughly equal to max_shares*data_size/required_shares),
800 then choose data_size to achieve the memory footprint desired (larger
801 data_size means more efficient operation, smaller data_size means
802 smaller memory footprint).
804 In addition, 'max_shares' must be equal to or greater than
805 'required_shares'. Of course, setting them to be equal causes
806 encode() to degenerate into a particularly slow form of the 'split'
809 See encode() for more details about how these parameters are used.
811 set_params() must be called before any other ICodecEncoder methods
815 def get_encoder_type():
816 """Return a short string that describes the type of this encoder.
818 There is required to be a global table of encoder classes. This method
819 returns an index into this table; the value at this index is an
820 encoder class, and this encoder is an instance of that class.
823 def get_serialized_params(): # TODO: maybe, maybe not
824 """Return a string that describes the parameters of this encoder.
826 This string can be passed to the decoder to prepare it for handling
827 the encoded shares we create. It might contain more information than
828 was presented to set_params(), if there is some flexibility of
831 This string is intended to be embedded in the URI, so there are
832 several restrictions on its contents. At the moment I'm thinking that
833 this means it may contain hex digits and hyphens, and nothing else.
834 The idea is that the URI contains something like '%s:%s:%s' %
835 (encoder.get_encoder_name(), encoder.get_serialized_params(),
836 b2a(crypttext_hash)), and this is enough information to construct a
840 def get_block_size():
841 """Return the length of the shares that encode() will produce.
844 def encode_proposal(data, desired_share_ids=None):
847 'data' must be a string (or other buffer object), and len(data) must
848 be equal to the 'data_size' value passed earlier to set_params().
850 This will return a Deferred that will fire with two lists. The first
851 is a list of shares, each of which is a string (or other buffer
852 object) such that len(share) is the same as what get_share_size()
853 returned earlier. The second is a list of shareids, in which each is
854 an integer. The lengths of the two lists will always be equal to each
855 other. The user should take care to keep each share closely
856 associated with its shareid, as one is useless without the other.
858 The length of this output list will normally be the same as the value
859 provided to the 'max_shares' parameter of set_params(). This may be
860 different if 'desired_share_ids' is provided.
862 'desired_share_ids', if provided, is required to be a sequence of
863 ints, each of which is required to be >= 0 and < max_shares. If not
864 provided, encode() will produce 'max_shares' shares, as if
865 'desired_share_ids' were set to range(max_shares). You might use this
866 if you initially thought you were going to use 10 peers, started
867 encoding, and then two of the peers dropped out: you could use
868 desired_share_ids= to skip the work (both memory and CPU) of
869 producing shares for the peers which are no longer available.
873 def encode(inshares, desired_share_ids=None):
874 """Encode some data. This may be called multiple times. Each call is
877 inshares is a sequence of length required_shares, containing buffers
878 (i.e. strings), where each buffer contains the next contiguous
879 non-overlapping segment of the input data. Each buffer is required to
880 be the same length, and the sum of the lengths of the buffers is
881 required to be exactly the data_size promised by set_params(). (This
882 implies that the data has to be padded before being passed to
883 encode(), unless of course it already happens to be an even multiple
884 of required_shares in length.)
886 ALSO: the requirement to break up your data into 'required_shares'
887 chunks before calling encode() feels a bit surprising, at least from
888 the point of view of a user who doesn't know how FEC works. It feels
889 like an implementation detail that has leaked outside the
890 abstraction barrier. Can you imagine a use case in which the data to
891 be encoded might already be available in pre-segmented chunks, such
892 that it is faster or less work to make encode() take a list rather
893 than splitting a single string?
895 ALSO ALSO: I think 'inshares' is a misleading term, since encode()
896 is supposed to *produce* shares, so what it *accepts* should be
897 something other than shares. Other places in this interface use the
898 word 'data' for that-which-is-not-shares.. maybe we should use that
901 'desired_share_ids', if provided, is required to be a sequence of
902 ints, each of which is required to be >= 0 and < max_shares. If not
903 provided, encode() will produce 'max_shares' shares, as if
904 'desired_share_ids' were set to range(max_shares). You might use this
905 if you initially thought you were going to use 10 peers, started
906 encoding, and then two of the peers dropped out: you could use
907 desired_share_ids= to skip the work (both memory and CPU) of
908 producing shares for the peers which are no longer available.
910 For each call, encode() will return a Deferred that fires with two
911 lists, one containing shares and the other containing the shareids.
912 The get_share_size() method can be used to determine the length of
913 the share strings returned by encode(). Each shareid is a small
914 integer, exactly as passed into 'desired_share_ids' (or
915 range(max_shares), if desired_share_ids was not provided).
917 The shares and their corresponding shareids are required to be kept
918 together during storage and retrieval. Specifically, the share data is
919 useless by itself: the decoder needs to be told which share is which
920 by providing it with both the shareid and the actual share data.
922 This function will allocate an amount of memory roughly equal to::
924 (max_shares - required_shares) * get_share_size()
926 When combined with the memory that the caller must allocate to
927 provide the input data, this leads to a memory footprint roughly
928 equal to the size of the resulting encoded shares (i.e. the expansion
929 factor times the size of the input segment).
934 # returning a list of (shareidN,shareN) tuples instead of a pair of
935 # lists (shareids..,shares..). Brian thought the tuples would
936 # encourage users to keep the share and shareid together throughout
937 # later processing, Zooko pointed out that the code to iterate
938 # through two lists is not really more complicated than using a list
939 # of tuples and there's also a performance improvement
941 # having 'data_size' not required to be an integral multiple of
942 # 'required_shares'. Doing this would require encode() to perform
943 # padding internally, and we'd prefer to have any padding be done
944 # explicitly by the caller. Yes, it is an abstraction leak, but
945 # hopefully not an onerous one.
948 class ICodecDecoder(Interface):
949 def set_serialized_params(params):
950 """Set up the parameters of this encoder, from a string returned by
951 encoder.get_serialized_params()."""
953 def get_needed_shares():
954 """Return the number of shares needed to reconstruct the data.
955 set_serialized_params() is required to be called before this."""
957 def decode(some_shares, their_shareids):
958 """Decode a partial list of shares into data.
960 'some_shares' is required to be a sequence of buffers of sharedata, a
961 subset of the shares returned by ICodecEncode.encode(). Each share is
962 required to be of the same length. The i'th element of their_shareids
963 is required to be the shareid of the i'th buffer in some_shares.
965 This returns a Deferred which fires with a sequence of buffers. This
966 sequence will contain all of the segments of the original data, in
967 order. The sum of the lengths of all of the buffers will be the
968 'data_size' value passed into the original ICodecEncode.set_params()
969 call. To get back the single original input block of data, use
970 ''.join(output_buffers), or you may wish to simply write them in
971 order to an output file.
973 Note that some of the elements in the result sequence may be
974 references to the elements of the some_shares input sequence. In
975 particular, this means that if those share objects are mutable (e.g.
976 arrays) and if they are changed, then both the input (the
977 'some_shares' parameter) and the output (the value given when the
978 deferred is triggered) will change.
980 The length of 'some_shares' is required to be exactly the value of
981 'required_shares' passed into the original ICodecEncode.set_params()
985 class IEncoder(Interface):
986 """I take an object that provides IEncryptedUploadable, which provides
987 encrypted data, and a list of shareholders. I then encode, hash, and
988 deliver shares to those shareholders. I will compute all the necessary
989 Merkle hash trees that are necessary to validate the crypttext that
990 eventually comes back from the shareholders. I provide the URI Extension
991 Block Hash, and the encoding parameters, both of which must be included
994 I do not choose shareholders, that is left to the IUploader. I must be
995 given a dict of RemoteReferences to storage buckets that are ready and
996 willing to receive data.
1000 """Specify the number of bytes that will be encoded. This must be
1001 peformed before get_serialized_params() can be called.
1003 def set_params(params):
1004 """Override the default encoding parameters. 'params' is a tuple of
1005 (k,d,n), where 'k' is the number of required shares, 'd' is the
1006 shares_of_happiness, and 'n' is the total number of shares that will
1009 Encoding parameters can be set in three ways. 1: The Encoder class
1010 provides defaults (3/7/10). 2: the Encoder can be constructed with
1011 an 'options' dictionary, in which the
1012 needed_and_happy_and_total_shares' key can be a (k,d,n) tuple. 3:
1013 set_params((k,d,n)) can be called.
1015 If you intend to use set_params(), you must call it before
1016 get_share_size or get_param are called.
1019 def set_encrypted_uploadable(u):
1020 """Provide a source of encrypted upload data. 'u' must implement
1021 IEncryptedUploadable.
1023 When this is called, the IEncryptedUploadable will be queried for its
1024 length and the storage_index that should be used.
1026 This returns a Deferred that fires with this Encoder instance.
1028 This must be performed before start() can be called.
1031 def get_param(name):
1032 """Return an encoding parameter, by name.
1034 'storage_index': return a string with the (16-byte truncated SHA-256
1035 hash) storage index to which these shares should be
1038 'share_counts': return a tuple describing how many shares are used:
1039 (needed_shares, shares_of_happiness, total_shares)
1041 'num_segments': return an int with the number of segments that
1044 'segment_size': return an int with the size of each segment.
1046 'block_size': return the size of the individual blocks that will
1047 be delivered to a shareholder's put_block() method. By
1048 knowing this, the shareholder will be able to keep all
1049 blocks in a single file and still provide random access
1050 when reading them. # TODO: can we avoid exposing this?
1052 'share_size': an int with the size of the data that will be stored
1053 on each shareholder. This is aggregate amount of data
1054 that will be sent to the shareholder, summed over all
1055 the put_block() calls I will ever make. It is useful to
1056 determine this size before asking potential
1057 shareholders whether they will grant a lease or not,
1058 since their answers will depend upon how much space we
1059 need. TODO: this might also include some amount of
1060 overhead, like the size of all the hashes. We need to
1061 decide whether this is useful or not.
1063 'serialized_params': a string with a concise description of the
1064 codec name and its parameters. This may be passed
1065 into the IUploadable to let it make sure that
1066 the same file encoded with different parameters
1067 will result in different storage indexes.
1069 Once this is called, set_size() and set_params() may not be called.
1072 def set_shareholders(shareholders):
1073 """Tell the encoder where to put the encoded shares. 'shareholders'
1074 must be a dictionary that maps share number (an integer ranging from
1075 0 to n-1) to an instance that provides IStorageBucketWriter. This
1076 must be performed before start() can be called."""
1079 """Begin the encode/upload process. This involves reading encrypted
1080 data from the IEncryptedUploadable, encoding it, uploading the shares
1081 to the shareholders, then sending the hash trees.
1083 set_encrypted_uploadable() and set_shareholders() must be called
1084 before this can be invoked.
1086 This returns a Deferred that fires with a tuple of
1087 (uri_extension_hash, needed_shares, total_shares, size) when the
1088 upload process is complete. This information, plus the encryption
1089 key, is sufficient to construct the URI.
1092 class IDecoder(Interface):
1093 """I take a list of shareholders and some setup information, then
1094 download, validate, decode, and decrypt data from them, writing the
1095 results to an output file.
1097 I do not locate the shareholders, that is left to the IDownloader. I must
1098 be given a dict of RemoteReferences to storage buckets that are ready to
1103 """I take a file-like object (providing write and close) to which all
1104 the plaintext data will be written.
1106 TODO: producer/consumer . Maybe write() should return a Deferred that
1107 indicates when it will accept more data? But probably having the
1108 IDecoder be a producer is easier to glue to IConsumer pieces.
1111 def set_shareholders(shareholders):
1112 """I take a dictionary that maps share identifiers (small integers)
1113 to RemoteReferences that provide RIBucketReader. This must be called
1117 """I start the download. This process involves retrieving data and
1118 hash chains from the shareholders, using the hashes to validate the
1119 data, decoding the shares into segments, decrypting the segments,
1120 then writing the resulting plaintext to the output file.
1122 I return a Deferred that will fire (with self) when the download is
1126 class IDownloadTarget(Interface):
1128 """Called before any calls to write() or close(). If an error
1129 occurs before any data is available, fail() may be called without
1130 a previous call to open().
1132 'size' is the length of the file being downloaded, in bytes."""
1135 """Output some data to the target."""
1137 """Inform the target that there is no more data to be written."""
1139 """fail() is called to indicate that the download has failed. 'why'
1140 is a Failure object indicating what went wrong. No further methods
1141 will be invoked on the IDownloadTarget after fail()."""
1142 def register_canceller(cb):
1143 """The FileDownloader uses this to register a no-argument function
1144 that the target can call to cancel the download. Once this canceller
1145 is invoked, no further calls to write() or close() will be made."""
1147 """When the FileDownloader is done, this finish() function will be
1148 called. Whatever it returns will be returned to the invoker of
1149 Downloader.download.
1152 class IDownloader(Interface):
1153 def download(uri, target):
1154 """Perform a CHK download, sending the data to the given target.
1155 'target' must provide IDownloadTarget.
1157 Returns a Deferred that fires (with the results of target.finish)
1158 when the download is finished, or errbacks if something went wrong."""
1160 class IEncryptedUploadable(Interface):
1161 def set_upload_status(upload_status):
1162 """Provide an IUploadStatus object that should be filled with status
1163 information. The IEncryptedUploadable is responsible for setting
1164 key-determination progress ('chk'), size, storage_index, and
1165 ciphertext-fetch progress. It may delegate some of this
1166 responsibility to others, in particular to the IUploadable."""
1169 """This behaves just like IUploadable.get_size()."""
1171 def get_all_encoding_parameters():
1172 """Return a Deferred that fires with a tuple of
1173 (k,happy,n,segment_size). The segment_size will be used as-is, and
1174 must match the following constraints: it must be a multiple of k, and
1175 it shouldn't be unreasonably larger than the file size (if
1176 segment_size is larger than filesize, the difference must be stored
1179 This usually passes through to the IUploadable method of the same
1182 The encoder strictly obeys the values returned by this method. To
1183 make an upload use non-default encoding parameters, you must arrange
1184 to control the values that this method returns.
1187 def get_storage_index():
1188 """Return a Deferred that fires with a 16-byte storage index.
1191 def read_encrypted(length, hash_only):
1192 """This behaves just like IUploadable.read(), but returns crypttext
1193 instead of plaintext. If hash_only is True, then this discards the
1194 data (and returns an empty list); this improves efficiency when
1195 resuming an interrupted upload (where we need to compute the
1196 plaintext hashes, but don't need the redundant encrypted data)."""
1198 def get_plaintext_hashtree_leaves(first, last, num_segments):
1199 """Get the leaf nodes of a merkle hash tree over the plaintext
1200 segments, i.e. get the tagged hashes of the given segments. The
1201 segment size is expected to be generated by the IEncryptedUploadable
1202 before any plaintext is read or ciphertext produced, so that the
1203 segment hashes can be generated with only a single pass.
1205 This returns a Deferred which fires with a sequence of hashes, using:
1207 tuple(segment_hashes[first:last])
1209 'num_segments' is used to assert that the number of segments that the
1210 IEncryptedUploadable handled matches the number of segments that the
1211 encoder was expecting.
1213 This method must not be called until the final byte has been read
1214 from read_encrypted(). Once this method is called, read_encrypted()
1215 can never be called again.
1218 def get_plaintext_hash():
1219 """Get the hash of the whole plaintext.
1221 This returns a Deferred which fires with a tagged SHA-256 hash of the
1222 whole plaintext, obtained from hashutil.plaintext_hash(data).
1226 """Just like IUploadable.close()."""
1228 class IUploadable(Interface):
1229 def set_upload_status(upload_status):
1230 """Provide an IUploadStatus object that should be filled with status
1231 information. The IUploadable is responsible for setting
1232 key-determination progress ('chk')."""
1234 def set_default_encoding_parameters(params):
1235 """Set the default encoding parameters, which must be a dict mapping
1236 strings to ints. The meaningful keys are 'k', 'happy', 'n', and
1237 'max_segment_size'. These might have an influence on the final
1238 encoding parameters returned by get_all_encoding_parameters(), if the
1239 Uploadable doesn't have more specific preferences.
1241 This call is optional: if it is not used, the Uploadable will use
1242 some built-in defaults. If used, this method must be called before
1243 any other IUploadable methods to have any effect.
1247 """Return a Deferred that will fire with the length of the data to be
1248 uploaded, in bytes. This will be called before the data is actually
1249 used, to compute encoding parameters.
1252 def get_all_encoding_parameters():
1253 """Return a Deferred that fires with a tuple of
1254 (k,happy,n,segment_size). The segment_size will be used as-is, and
1255 must match the following constraints: it must be a multiple of k, and
1256 it shouldn't be unreasonably larger than the file size (if
1257 segment_size is larger than filesize, the difference must be stored
1260 The relative values of k and n allow some IUploadables to request
1261 better redundancy than others (in exchange for consuming more space
1264 Larger values of segment_size reduce hash overhead, while smaller
1265 values reduce memory footprint and cause data to be delivered in
1266 smaller pieces (which may provide a smoother and more predictable
1267 download experience).
1269 The encoder strictly obeys the values returned by this method. To
1270 make an upload use non-default encoding parameters, you must arrange
1271 to control the values that this method returns. One way to influence
1272 them may be to call set_encoding_parameters() before calling
1273 get_all_encoding_parameters().
1276 def get_encryption_key():
1277 """Return a Deferred that fires with a 16-byte AES key. This key will
1278 be used to encrypt the data. The key will also be hashed to derive
1281 Uploadables which want to achieve convergence should hash their file
1282 contents and the serialized_encoding_parameters to form the key
1283 (which of course requires a full pass over the data). Uploadables can
1284 use the upload.ConvergentUploadMixin class to achieve this
1287 Uploadables which do not care about convergence (or do not wish to
1288 make multiple passes over the data) can simply return a
1289 strongly-random 16 byte string.
1291 get_encryption_key() may be called multiple times: the IUploadable is
1292 required to return the same value each time.
1296 """Return a Deferred that fires with a list of strings (perhaps with
1297 only a single element) which, when concatenated together, contain the
1298 next 'length' bytes of data. If EOF is near, this may provide fewer
1299 than 'length' bytes. The total number of bytes provided by read()
1300 before it signals EOF must equal the size provided by get_size().
1302 If the data must be acquired through multiple internal read
1303 operations, returning a list instead of a single string may help to
1304 reduce string copies.
1306 'length' will typically be equal to (min(get_size(),1MB)/req_shares),
1307 so a 10kB file means length=3kB, 100kB file means length=30kB,
1308 and >=1MB file means length=300kB.
1310 This method provides for a single full pass through the data. Later
1311 use cases may desire multiple passes or access to only parts of the
1312 data (such as a mutable file making small edits-in-place). This API
1313 will be expanded once those use cases are better understood.
1317 """The upload is finished, and whatever filehandle was in use may be
1320 class IUploadResults(Interface):
1321 """I am returned by upload() methods. I contain a number of public
1322 attributes which can be read to determine the results of the upload. Some
1323 of these are functional, some are timing information. All of these may be
1326 .file_size : the size of the file, in bytes
1327 .uri : the CHK read-cap for the file
1328 .ciphertext_fetched : how many bytes were fetched by the helper
1329 .sharemap : dict mapping share number to placement string
1330 .servermap : dict mapping server peerid to a set of share numbers
1331 .timings : dict of timing information, mapping name to seconds (float)
1332 total : total upload time, start to finish
1333 storage_index : time to compute the storage index
1334 peer_selection : time to decide which peers will be used
1335 contacting_helper : initial helper query to upload/no-upload decision
1336 existence_check : helper pre-upload existence check
1337 helper_total : initial helper query to helper finished pushing
1338 cumulative_fetch : helper waiting for ciphertext requests
1339 total_fetch : helper start to last ciphertext response
1340 cumulative_encoding : just time spent in zfec
1341 cumulative_sending : just time spent waiting for storage servers
1342 hashes_and_close : last segment push to shareholder close
1343 total_encode_and_push : first encode to shareholder close
1347 class IDownloadResults(Interface):
1348 """I am created internally by download() methods. I contain a number of
1349 public attributes which contain details about the download process.::
1351 .file_size : the size of the file, in bytes
1352 .servers_used : set of server peerids that were used during download
1353 .server_problems : dict mapping server peerid to a problem string. Only
1354 servers that had problems (bad hashes, disconnects) are
1356 .servermap : dict mapping server peerid to a set of share numbers. Only
1357 servers that had any shares are listed here.
1358 .timings : dict of timing information, mapping name to seconds (float)
1359 peer_selection : time to ask servers about shares
1360 servers_peer_selection : dict of peerid to DYHB-query time
1361 uri_extension : time to fetch a copy of the URI extension block
1362 hashtrees : time to fetch the hash trees
1363 segments : time to fetch, decode, and deliver segments
1364 cumulative_fetch : time spent waiting for storage servers
1365 cumulative_decode : just time spent in zfec
1366 cumulative_decrypt : just time spent in decryption
1367 total : total download time, start to finish
1368 fetch_per_server : dict of peerid to list of per-segment fetch times
1372 class IUploader(Interface):
1373 def upload(uploadable):
1374 """Upload the file. 'uploadable' must impement IUploadable. This
1375 returns a Deferred which fires with an UploadResults instance, from
1376 which the URI of the file can be obtained as results.uri ."""
1378 def upload_ssk(write_capability, new_version, uploadable):
1379 """TODO: how should this work?"""
1381 class IChecker(Interface):
1382 def check(uri_to_check):
1383 """Accepts an IVerifierURI, and checks upon the health of its target.
1385 For now, uri_to_check must be an IVerifierURI. In the future we
1386 expect to relax that to be anything that can be adapted to
1387 IVerifierURI (like read-only or read-write dirnode/filenode URIs).
1389 This returns a Deferred. For dirnodes, this fires with either True or
1390 False (dirnodes are not distributed, so their health is a boolean).
1392 For filenodes, this fires with a tuple of (needed_shares,
1393 total_shares, found_shares, sharemap). The first three are ints. The
1394 basic health of the file is found_shares / needed_shares: if less
1395 than 1.0, the file is unrecoverable.
1397 The sharemap has a key for each sharenum. The value is a list of
1398 (binary) nodeids who hold that share. If two shares are kept on the
1399 same nodeid, they will fail as a pair, and overall reliability is
1402 The IChecker instance remembers the results of the check. By default,
1403 these results are stashed in RAM (and are forgotten at shutdown). If
1404 a file named 'checker_results.db' exists in the node's basedir, it is
1405 used as a sqlite database of results, making them persistent across
1406 runs. To start using this feature, just 'touch checker_results.db',
1407 and the node will initialize it properly the next time it is started.
1410 def verify(uri_to_check):
1411 """Accepts an IVerifierURI, and verifies the crypttext of the target.
1413 This is a more-intensive form of checking. For verification, the
1414 file's crypttext contents are retrieved, and the associated hash
1415 checks are performed. If a storage server is holding a corrupted
1416 share, verification will detect the problem, but checking will not.
1417 This returns a Deferred that fires with True if the crypttext hashes
1418 look good, and will probably raise an exception if anything goes
1421 For dirnodes, 'verify' is the same as 'check', so the Deferred will
1422 fire with True or False.
1424 Verification currently only uses a minimal subset of peers, so a lot
1425 of share corruption will not be caught by it. We expect to improve
1429 def checker_results_for(uri_to_check):
1430 """Accepts an IVerifierURI, and returns a list of previously recorded
1431 checker results. This method performs no checking itself: it merely
1432 reports the results of checks that have taken place in the past.
1434 Each element of the list is a two-entry tuple: (when, results).
1435 The 'when' values are timestamps (float seconds since epoch), and the
1436 results are as defined in the check() method.
1438 Note: at the moment, this is specified to return synchronously. We
1439 might need to back away from this in the future.
1442 class IClient(Interface):
1443 def upload(uploadable):
1444 """Upload some data into a CHK, get back the UploadResults for it.
1445 @param uploadable: something that implements IUploadable
1446 @return: a Deferred that fires with the UploadResults instance.
1447 To get the URI for this file, use results.uri .
1450 def create_mutable_file(contents=""):
1451 """Create a new mutable file with contents, get back the URI string.
1452 @param contents: the initial contents to place in the file.
1453 @return: a Deferred that fires with tne (string) SSK URI for the new
1457 def create_empty_dirnode():
1458 """Create a new dirnode, empty and unattached.
1459 @return: a Deferred that fires with the new IDirectoryNode instance.
1462 def create_node_from_uri(uri):
1463 """Create a new IFilesystemNode instance from the uri, synchronously.
1464 @param uri: a string or IURI-providing instance. This could be for a
1465 LiteralFileNode, a CHK file node, a mutable file node, or
1467 @return: an instance that provides IFilesystemNode (or more usefully one
1468 of its subclasses). File-specifying URIs will result in
1469 IFileNode or IMutableFileNode -providing instances, like
1470 FileNode, LiteralFileNode, or MutableFileNode.
1471 Directory-specifying URIs will result in
1472 IDirectoryNode-providing instances, like NewDirectoryNode.
1475 class IClientStatus(Interface):
1476 def list_all_uploads():
1477 """Return a list of uploader objects, one for each upload which
1478 currently has an object available (tracked with weakrefs). This is
1479 intended for debugging purposes."""
1480 def list_active_uploads():
1481 """Return a list of active IUploadStatus objects."""
1482 def list_recent_uploads():
1483 """Return a list of IUploadStatus objects for the most recently
1486 def list_all_downloads():
1487 """Return a list of downloader objects, one for each download which
1488 currently has an object available (tracked with weakrefs). This is
1489 intended for debugging purposes."""
1490 def list_active_downloads():
1491 """Return a list of active IDownloadStatus objects."""
1492 def list_recent_downloads():
1493 """Return a list of IDownloadStatus objects for the most recently
1494 started downloads."""
1496 class IUploadStatus(Interface):
1498 """Return a timestamp (float with seconds since epoch) indicating
1499 when the operation was started."""
1500 def get_storage_index():
1501 """Return a string with the (binary) storage index in use on this
1502 upload. Returns None if the storage index has not yet been
1505 """Return an integer with the number of bytes that will eventually
1506 be uploaded for this file. Returns None if the size is not yet known.
1509 """Return True if this upload is using a Helper, False if not."""
1511 """Return a string describing the current state of the upload
1514 """Returns a tuple of floats, (chk, ciphertext, encode_and_push),
1515 each from 0.0 to 1.0 . 'chk' describes how much progress has been
1516 made towards hashing the file to determine a CHK encryption key: if
1517 non-convergent encryption is in use, this will be trivial, otherwise
1518 the whole file must be hashed. 'ciphertext' describes how much of the
1519 ciphertext has been pushed to the helper, and is '1.0' for non-helper
1520 uploads. 'encode_and_push' describes how much of the encode-and-push
1521 process has finished: for helper uploads this is dependent upon the
1522 helper providing progress reports. It might be reasonable to add all
1523 three numbers and report the sum to the user."""
1525 """Return True if the upload is currently active, False if not."""
1527 """Return an instance of UploadResults (which contains timing and
1528 sharemap information). Might return None if the upload is not yet
1531 """Each upload status gets a unique number: this method returns that
1532 number. This provides a handle to this particular upload, so a web
1533 page can generate a suitable hyperlink."""
1535 class IDownloadStatus(Interface):
1537 """Return a timestamp (float with seconds since epoch) indicating
1538 when the operation was started."""
1539 def get_storage_index():
1540 """Return a string with the (binary) storage index in use on this
1541 download. This may be None if there is no storage index (i.e. LIT
1544 """Return an integer with the number of bytes that will eventually be
1545 retrieved for this file. Returns None if the size is not yet known.
1548 """Return True if this download is using a Helper, False if not."""
1550 """Return a string describing the current state of the download
1553 """Returns a float (from 0.0 to 1.0) describing the amount of the
1554 download that has completed. This value will remain at 0.0 until the
1555 first byte of plaintext is pushed to the download target."""
1557 """Return True if the download is currently active, False if not."""
1559 """Each download status gets a unique number: this method returns
1560 that number. This provides a handle to this particular download, so a
1561 web page can generate a suitable hyperlink."""
1563 class IPublishStatus(Interface):
1565 class IRetrieveStatus(Interface):
1568 class NotCapableError(Exception):
1569 """You have tried to write to a read-only node."""
1571 class BadWriteEnablerError(Exception):
1574 class RIControlClient(RemoteInterface):
1576 def wait_for_client_connections(num_clients=int):
1577 """Do not return until we have connections to at least NUM_CLIENTS
1581 def upload_from_file_to_uri(filename=str, convergence=ChoiceOf(None, StringConstraint(2**20))):
1582 """Upload a file to the grid. This accepts a filename (which must be
1583 absolute) that points to a file on the node's local disk. The node will
1584 read the contents of this file, upload it to the grid, then return the
1585 URI at which it was uploaded. If convergence is None then a random
1586 encryption key will be used, else the plaintext will be hashed, then
1587 that hash will be mixed together with the "convergence" string to form
1592 def download_from_uri_to_file(uri=URI, filename=str):
1593 """Download a file from the grid, placing it on the node's local disk
1594 at the given filename (which must be absolute[?]). Returns the
1595 absolute filename where the file was written."""
1600 def get_memory_usage():
1601 """Return a dict describes the amount of memory currently in use. The
1602 keys are 'VmPeak', 'VmSize', and 'VmData'. The values are integers,
1603 measuring memory consupmtion in bytes."""
1604 return DictOf(str, int)
1606 def speed_test(count=int, size=int, mutable=Any()):
1607 """Write 'count' tempfiles to disk, all of the given size. Measure
1608 how long (in seconds) it takes to upload them all to the servers.
1609 Then measure how long it takes to download all of them. If 'mutable'
1610 is 'create', time creation of mutable files. If 'mutable' is
1611 'upload', then time access to the same mutable file instead of
1614 Returns a tuple of (upload_time, download_time).
1616 return (float, float)
1618 def measure_peer_response_time():
1619 """Send a short message to each connected peer, and measure the time
1620 it takes for them to respond to it. This is a rough measure of the
1621 application-level round trip time.
1623 @return: a dictionary mapping peerid to a float (RTT time in seconds)
1626 return DictOf(Nodeid, float)
1628 UploadResults = Any() #DictOf(str, str)
1630 class RIEncryptedUploadable(RemoteInterface):
1631 __remote_name__ = "RIEncryptedUploadable.tahoe.allmydata.com"
1636 def get_all_encoding_parameters():
1637 return (int, int, int, long)
1639 def read_encrypted(offset=Offset, length=ReadSize):
1642 def get_plaintext_hashtree_leaves(first=int, last=int, num_segments=int):
1645 def get_plaintext_hash():
1652 class RICHKUploadHelper(RemoteInterface):
1653 __remote_name__ = "RIUploadHelper.tahoe.allmydata.com"
1655 def upload(reader=RIEncryptedUploadable):
1656 return UploadResults
1659 class RIHelper(RemoteInterface):
1660 __remote_name__ = "RIHelper.tahoe.allmydata.com"
1662 def upload_chk(si=StorageIndex):
1663 """See if a file with a given storage index needs uploading. The
1664 helper will ask the appropriate storage servers to see if the file
1665 has already been uploaded. If so, the helper will return a set of
1666 'upload results' that includes whatever hashes are needed to build
1667 the read-cap, and perhaps a truncated sharemap.
1669 If the file has not yet been uploaded (or if it was only partially
1670 uploaded), the helper will return an empty upload-results dictionary
1671 and also an RICHKUploadHelper object that will take care of the
1672 upload process. The client should call upload() on this object and
1673 pass it a reference to an RIEncryptedUploadable object that will
1674 provide ciphertext. When the upload is finished, the upload() method
1675 will finish and return the upload results.
1677 return (UploadResults, ChoiceOf(RICHKUploadHelper, None))
1680 class RIStatsProvider(RemoteInterface):
1681 __remote_name__ = "RIStatsProvider.tahoe.allmydata.com"
1683 Provides access to statistics and monitoring information.
1688 returns a dictionary containing 'counters' and 'stats', each a dictionary
1689 with string counter/stat name keys, and numeric values. counters are
1690 monotonically increasing measures of work done, and stats are instantaneous
1691 measures (potentially time averaged internally)
1693 return DictOf(str, DictOf(str, ChoiceOf(float, int, long)))
1695 class RIStatsGatherer(RemoteInterface):
1696 __remote_name__ = "RIStatsGatherer.tahoe.allmydata.com"
1698 Provides a monitoring service for centralised collection of stats
1701 def provide(provider=RIStatsProvider, nickname=str):
1703 @param provider: a stats collector instance which should be polled
1704 periodically by the gatherer to collect stats.
1705 @param nickname: a name useful to identify the provided client
1710 class IStatsProducer(Interface):
1713 returns a dictionary, with str keys representing the names of stats
1714 to be monitored, and numeric values.