src/allmydata/interfaces.py

   1
   2 from zope.interface import Interface
   3 from foolscap.schema import StringConstraint, ListOf, TupleOf, SetOf, DictOf, \
   4      ChoiceOf
   5 from foolscap import RemoteInterface, Referenceable
   6
   7 HASH_SIZE=32
   8
   9 Hash = StringConstraint(maxLength=HASH_SIZE,
  10                         minLength=HASH_SIZE)# binary format 32-byte SHA256 hash
  11 Nodeid = StringConstraint(maxLength=20,
  12                           minLength=20) # binary format 20-byte SHA1 hash
  13 FURL = StringConstraint(1000)
  14 StorageIndex = StringConstraint(16)
  15 URI = StringConstraint(300) # kind of arbitrary
  16
  17 MAX_BUCKETS = 200  # per peer
  18
  19 # MAX_SEGMENT_SIZE in encode.py is 1 MiB (this constraint allows k = 1)
  20 ShareData = StringConstraint(2**20)
  21 URIExtensionData = StringConstraint(1000)
  22 LeaseRenewSecret = Hash # used to protect bucket lease renewal requests
  23 LeaseCancelSecret = Hash # used to protect bucket lease cancellation requests
  24
  25 # Announcements are (FURL, service_name, remoteinterface_name,
  26 #                    nickname, my_version, oldest_supported)
  27 #  the (FURL, service_name, remoteinterface_name) refer to the service being
  28 #  announced. The (nickname, my_version, oldest_supported) refer to the
  29 #  client as a whole. The my_version/oldest_supported strings can be parsed
  30 #  by an allmydata.util.version.Version instance, and then compared. The
  31 #  first goal is to make sure that nodes are not confused by speaking to an
  32 #  incompatible peer. The second goal is to enable the development of
  33 #  backwards-compatibility code.
  34
  35 Announcement = TupleOf(FURL, str, str,
  36                        str, str, str)
  37
  38 class RIIntroducerSubscriberClient(RemoteInterface):
  39     __remote_name__ = "RIIntroducerSubscriberClient.tahoe.allmydata.com"
  40
  41     def announce(announcements=SetOf(Announcement)):
  42         """I accept announcements from the publisher."""
  43         return None
  44
  45     def set_encoding_parameters(parameters=(int, int, int)):
  46         """Advise the client of the recommended k-of-n encoding parameters
  47         for this grid. 'parameters' is a tuple of (k, desired, n), where 'n'
  48         is the total number of shares that will be created for any given
  49         file, while 'k' is the number of shares that must be retrieved to
  50         recover that file, and 'desired' is the minimum number of shares that
  51         must be placed before the uploader will consider its job a success.
  52         n/k is the expansion ratio, while k determines the robustness.
  53
  54         Introducers should specify 'n' according to the expected size of the
  55         grid (there is no point to producing more shares than there are
  56         peers), and k according to the desired reliability-vs-overhead goals.
  57
  58         Note that setting k=1 is equivalent to simple replication.
  59         """
  60         return None
  61
  62 # When Foolscap can handle multiple interfaces (Foolscap#17), the
  63 # full-powered introducer will implement both RIIntroducerPublisher and
  64 # RIIntroducerSubscriberService. Until then, we define
  65 # RIIntroducerPublisherAndSubscriberService as a combination of the two, and
  66 # make everybody use that.
  67
  68 class RIIntroducerPublisher(RemoteInterface):
  69     """To publish a service to the world, connect to me and give me your
  70     announcement message. I will deliver a copy to all connected subscribers."""
  71     __remote_name__ = "RIIntroducerPublisher.tahoe.allmydata.com"
  72
  73     def publish(announcement=Announcement):
  74         # canary?
  75         return None
  76
  77 class RIIntroducerSubscriberService(RemoteInterface):
  78     __remote_name__ = "RIIntroducerSubscriberService.tahoe.allmydata.com"
  79
  80     def subscribe(subscriber=RIIntroducerSubscriberClient, service_name=str):
  81         """Give me a subscriber reference, and I will call its new_peers()
  82         method will any announcements that match the desired service name. I
  83         will ignore duplicate subscriptions.
  84         """
  85         return None
  86
  87 class RIIntroducerPublisherAndSubscriberService(RemoteInterface):
  88     __remote_name__ = "RIIntroducerPublisherAndSubscriberService.tahoe.allmydata.com"
  89     def publish(announcement=Announcement):
  90         return None
  91     def subscribe(subscriber=RIIntroducerSubscriberClient, service_name=str):
  92         return None
  93
  94 class IIntroducerClient(Interface):
  95     """I provide service introduction facilities for a node. I help nodes
  96     publish their services to the rest of the world, and I help them learn
  97     about services available on other nodes."""
  98
  99     def publish(furl, service_name, remoteinterface_name):
 100         """Once you call this, I will tell the world that the Referenceable
 101         available at FURL is available to provide a service named
 102         SERVICE_NAME. The precise definition of the service being provided is
 103         identified by the Foolscap 'remote interface name' in the last
 104         parameter: this is supposed to be a globally-unique string that
 105         identifies the RemoteInterface that is implemented."""
 106
 107     def subscribe_to(service_name):
 108         """Call this if you will eventually want to use services with the
 109         given SERVICE_NAME. This will prompt me to subscribe to announcements
 110         of those services. You can pick up the announcements later by calling
 111         get_all_connections_for() or get_permuted_peers().
 112         """
 113
 114     def get_all_connections():
 115         """Return a frozenset of (nodeid, service_name, rref) tuples, one for
 116         each active connection we've established to a remote service. This is
 117         mostly useful for unit tests that need to wait until a certain number
 118         of connections have been made."""
 119
 120     def get_all_connectors():
 121         """Return a dict that maps from (nodeid, service_name) to a
 122         RemoteServiceConnector instance for all services that we are actively
 123         trying to connect to. Each RemoteServiceConnector has the following
 124         public attributes::
 125
 126           service_name: the type of service provided, like 'storage'
 127           announcement_time: when we first heard about this service
 128           last_connect_time: when we last established a connection
 129           last_loss_time: when we last lost a connection
 130
 131           version: the peer's version, from the most recent connection
 132           oldest_supported: the peer's oldest supported version, same
 133
 134           rref: the RemoteReference, if connected, otherwise None
 135           remote_host: the IAddress, if connected, otherwise None
 136
 137         This method is intended for monitoring interfaces, such as a web page
 138         which describes connecting and connected peers.
 139         """
 140
 141     def get_all_peerids():
 142         """Return a frozenset of all peerids to whom we have a connection (to
 143         one or more services) established. Mostly useful for unit tests."""
 144
 145     def get_all_connections_for(service_name):
 146         """Return a frozenset of (nodeid, service_name, rref) tuples, one
 147         for each active connection that provides the given SERVICE_NAME."""
 148
 149     def get_permuted_peers(service_name, key):
 150         """Returns an ordered list of (peerid, rref) tuples, selecting from
 151         the connections that provide SERVICE_NAME, using a hash-based
 152         permutation keyed by KEY. This randomizes the service list in a
 153         repeatable way, to distribute load over many peers.
 154         """
 155
 156     def connected_to_introducer():
 157         """Returns a boolean, True if we are currently connected to the
 158         introducer, False if not."""
 159
 160
 161 class RIBucketWriter(RemoteInterface):
 162     def write(offset=int, data=ShareData):
 163         return None
 164
 165     def close():
 166         """
 167         If the data that has been written is incomplete or inconsistent then
 168         the server will throw the data away, else it will store it for future
 169         retrieval.
 170         """
 171         return None
 172
 173     def abort():
 174         """Abandon all the data that has been written.
 175         """
 176         return None
 177
 178 class RIBucketReader(RemoteInterface):
 179     def read(offset=int, length=int):
 180         return ShareData
 181
 182 TestVector = ListOf(TupleOf(int, int, str, str))
 183 # elements are (offset, length, operator, specimen)
 184 # operator is one of "lt, le, eq, ne, ge, gt"
 185 # nop always passes and is used to fetch data while writing.
 186 # you should use length==len(specimen) for everything except nop
 187 DataVector = ListOf(TupleOf(int, ShareData))
 188 # (offset, data). This limits us to 30 writes of 1MiB each per call
 189 TestAndWriteVectorsForShares = DictOf(int,
 190                                       TupleOf(TestVector,
 191                                               DataVector,
 192                                               ChoiceOf(None, int))) # new_length
 193 ReadVector = ListOf(TupleOf(int, int))
 194 ReadData = ListOf(ShareData)
 195 # returns data[offset:offset+length] for each element of TestVector
 196
 197 class RIStorageServer(RemoteInterface):
 198     __remote_name__ = "RIStorageServer.tahoe.allmydata.com"
 199
 200     def get_versions():
 201         """Return a tuple of (my_version, oldest_supported) strings.
 202         Each string can be parsed by an allmydata.util.version.Version
 203         instance, and then compared. The first goal is to make sure that
 204         nodes are not confused by speaking to an incompatible peer. The
 205         second goal is to enable the development of backwards-compatibility
 206         code.
 207
 208         This method is likely to change in incompatible ways until we get the
 209         whole compatibility scheme nailed down.
 210         """
 211         return TupleOf(str, str)
 212
 213     def allocate_buckets(storage_index=StorageIndex,
 214                          renew_secret=LeaseRenewSecret,
 215                          cancel_secret=LeaseCancelSecret,
 216                          sharenums=SetOf(int, maxLength=MAX_BUCKETS),
 217                          allocated_size=int, canary=Referenceable):
 218         """
 219         @param storage_index: the index of the bucket to be created or
 220                               increfed.
 221         @param sharenums: these are the share numbers (probably between 0 and
 222                           99) that the sender is proposing to store on this
 223                           server.
 224         @param renew_secret: This is the secret used to protect bucket refresh
 225                              This secret is generated by the client and
 226                              stored for later comparison by the server. Each
 227                              server is given a different secret.
 228         @param cancel_secret: Like renew_secret, but protects bucket decref.
 229         @param canary: If the canary is lost before close(), the bucket is
 230                        deleted.
 231         @return: tuple of (alreadygot, allocated), where alreadygot is what we
 232                  already have and is what we hereby agree to accept. New
 233                  leases are added for shares in both lists.
 234         """
 235         return TupleOf(SetOf(int, maxLength=MAX_BUCKETS),
 236                        DictOf(int, RIBucketWriter, maxKeys=MAX_BUCKETS))
 237
 238     def renew_lease(storage_index=StorageIndex, renew_secret=LeaseRenewSecret):
 239         """
 240         Renew the lease on a given bucket. Some networks will use this, some
 241         will not.
 242         """
 243
 244     def cancel_lease(storage_index=StorageIndex,
 245                      cancel_secret=LeaseCancelSecret):
 246         """
 247         Cancel the lease on a given bucket. If this was the last lease on the
 248         bucket, the bucket will be deleted.
 249         """
 250
 251     def get_buckets(storage_index=StorageIndex):
 252         return DictOf(int, RIBucketReader, maxKeys=MAX_BUCKETS)
 253
 254
 255
 256     def slot_readv(storage_index=StorageIndex,
 257                    shares=ListOf(int), readv=ReadVector):
 258         """Read a vector from the numbered shares associated with the given
 259         storage index. An empty shares list means to return data from all
 260         known shares. Returns a dictionary with one key per share."""
 261         return DictOf(int, ReadData) # shnum -> results
 262
 263     def slot_testv_and_readv_and_writev(storage_index=StorageIndex,
 264                                         secrets=TupleOf(Hash, Hash, Hash),
 265                                         tw_vectors=TestAndWriteVectorsForShares,
 266                                         r_vector=ReadVector,
 267                                         ):
 268         """General-purpose test-and-set operation for mutable slots. Perform
 269         a bunch of comparisons against the existing shares. If they all pass,
 270         then apply a bunch of write vectors to those shares. Then use the
 271         read vectors to extract data from all the shares and return the data.
 272
 273         This method is, um, large. The goal is to allow clients to update all
 274         the shares associated with a mutable file in a single round trip.
 275
 276         @param storage_index: the index of the bucket to be created or
 277                               increfed.
 278         @param write_enabler: a secret that is stored along with the slot.
 279                               Writes are accepted from any caller who can
 280                               present the matching secret. A different secret
 281                               should be used for each slot*server pair.
 282         @param renew_secret: This is the secret used to protect bucket refresh
 283                              This secret is generated by the client and
 284                              stored for later comparison by the server. Each
 285                              server is given a different secret.
 286         @param cancel_secret: Like renew_secret, but protects bucket decref.
 287
 288         The 'secrets' argument is a tuple of (write_enabler, renew_secret,
 289         cancel_secret). The first is required to perform any write. The
 290         latter two are used when allocating new shares. To simply acquire a
 291         new lease on existing shares, use an empty testv and an empty writev.
 292
 293         Each share can have a separate test vector (i.e. a list of
 294         comparisons to perform). If all vectors for all shares pass, then all
 295         writes for all shares are recorded. Each comparison is a 4-tuple of
 296         (offset, length, operator, specimen), which effectively does a bool(
 297         (read(offset, length)) OPERATOR specimen ) and only performs the
 298         write if all these evaluate to True. Basic test-and-set uses 'eq'.
 299         Write-if-newer uses a seqnum and (offset, length, 'lt', specimen).
 300         Write-if-same-or-newer uses 'le'.
 301
 302         Reads from the end of the container are truncated, and missing shares
 303         behave like empty ones, so to assert that a share doesn't exist (for
 304         use when creating a new share), use (0, 1, 'eq', '').
 305
 306         The write vector will be applied to the given share, expanding it if
 307         necessary. A write vector applied to a share number that did not
 308         exist previously will cause that share to be created.
 309
 310         Each write vector is accompanied by a 'new_length' argument. If
 311         new_length is not None, use it to set the size of the container. This
 312         can be used to pre-allocate space for a series of upcoming writes, or
 313         truncate existing data. If the container is growing, new_length will
 314         be applied before datav. If the container is shrinking, it will be
 315         applied afterwards.
 316
 317         The read vector is used to extract data from all known shares,
 318         *before* any writes have been applied. The same vector is used for
 319         all shares. This captures the state that was tested by the test
 320         vector.
 321
 322         This method returns two values: a boolean and a dict. The boolean is
 323         True if the write vectors were applied, False if not. The dict is
 324         keyed by share number, and each value contains a list of strings, one
 325         for each element of the read vector.
 326
 327         If the write_enabler is wrong, this will raise BadWriteEnablerError.
 328         To enable share migration, the exception will have the nodeid used
 329         for the old write enabler embedded in it, in the following string::
 330
 331          The write enabler was recorded by nodeid '%s'.
 332
 333         Note that the nodeid here is encoded using the same base32 encoding
 334         used by Foolscap and allmydata.util.idlib.nodeid_b2a().
 335
 336         """
 337         return TupleOf(bool, DictOf(int, ReadData))
 338
 339 class IStorageBucketWriter(Interface):
 340     def put_block(segmentnum=int, data=ShareData):
 341         """@param data: For most segments, this data will be 'blocksize'
 342         bytes in length. The last segment might be shorter.
 343         @return: a Deferred that fires (with None) when the operation completes
 344         """
 345
 346     def put_plaintext_hashes(hashes=ListOf(Hash, maxLength=2**20)):
 347         """
 348         @return: a Deferred that fires (with None) when the operation completes
 349         """
 350
 351     def put_crypttext_hashes(hashes=ListOf(Hash, maxLength=2**20)):
 352         """
 353         @return: a Deferred that fires (with None) when the operation completes
 354         """
 355
 356     def put_block_hashes(blockhashes=ListOf(Hash, maxLength=2**20)):
 357         """
 358         @return: a Deferred that fires (with None) when the operation completes
 359         """
 360
 361     def put_share_hashes(sharehashes=ListOf(TupleOf(int, Hash),
 362                                             maxLength=2**20)):
 363         """
 364         @return: a Deferred that fires (with None) when the operation completes
 365         """
 366
 367     def put_uri_extension(data=URIExtensionData):
 368         """This block of data contains integrity-checking information (hashes
 369         of plaintext, crypttext, and shares), as well as encoding parameters
 370         that are necessary to recover the data. This is a serialized dict
 371         mapping strings to other strings. The hash of this data is kept in
 372         the URI and verified before any of the data is used. All buckets for
 373         a given file contain identical copies of this data.
 374
 375         The serialization format is specified with the following pseudocode:
 376         for k in sorted(dict.keys()):
 377             assert re.match(r'^[a-zA-Z_\-]+$', k)
 378             write(k + ':' + netstring(dict[k]))
 379
 380         @return: a Deferred that fires (with None) when the operation completes
 381         """
 382
 383     def close():
 384         """Finish writing and close the bucket. The share is not finalized
 385         until this method is called: if the uploading client disconnects
 386         before calling close(), the partially-written share will be
 387         discarded.
 388
 389         @return: a Deferred that fires (with None) when the operation completes
 390         """
 391
 392 class IStorageBucketReader(Interface):
 393
 394     def get_block(blocknum=int):
 395         """Most blocks will be the same size. The last block might be shorter
 396         than the others.
 397
 398         @return: ShareData
 399         """
 400
 401     def get_plaintext_hashes():
 402         """
 403         @return: ListOf(Hash, maxLength=2**20)
 404         """
 405
 406     def get_crypttext_hashes():
 407         """
 408         @return: ListOf(Hash, maxLength=2**20)
 409         """
 410
 411     def get_block_hashes():
 412         """
 413         @return: ListOf(Hash, maxLength=2**20)
 414         """
 415
 416     def get_share_hashes():
 417         """
 418         @return: ListOf(TupleOf(int, Hash), maxLength=2**20)
 419         """
 420
 421     def get_uri_extension():
 422         """
 423         @return: URIExtensionData
 424         """
 425
 426
 427
 428 # hm, we need a solution for forward references in schemas
 429 from foolscap.schema import Any
 430
 431 FileNode_ = Any() # TODO: foolscap needs constraints on copyables
 432 DirectoryNode_ = Any() # TODO: same
 433 AnyNode_ = ChoiceOf(FileNode_, DirectoryNode_)
 434 EncryptedThing = str
 435
 436 class IURI(Interface):
 437     def init_from_string(uri):
 438         """Accept a string (as created by my to_string() method) and populate
 439         this instance with its data. I am not normally called directly,
 440         please use the module-level uri.from_string() function to convert
 441         arbitrary URI strings into IURI-providing instances."""
 442
 443     def is_readonly():
 444         """Return False if this URI be used to modify the data. Return True
 445         if this URI cannot be used to modify the data."""
 446
 447     def is_mutable():
 448         """Return True if the data can be modified by *somebody* (perhaps
 449         someone who has a more powerful URI than this one)."""
 450
 451     def get_readonly():
 452         """Return another IURI instance, which represents a read-only form of
 453         this one. If is_readonly() is True, this returns self."""
 454
 455     def get_verifier():
 456         """Return an instance that provides IVerifierURI, which can be used
 457         to check on the availability of the file or directory, without
 458         providing enough capabilities to actually read or modify the
 459         contents. This may return None if the file does not need checking or
 460         verification (e.g. LIT URIs).
 461         """
 462
 463     def to_string():
 464         """Return a string of printable ASCII characters, suitable for
 465         passing into init_from_string."""
 466
 467 class IVerifierURI(Interface):
 468     def init_from_string(uri):
 469         """Accept a string (as created by my to_string() method) and populate
 470         this instance with its data. I am not normally called directly,
 471         please use the module-level uri.from_string() function to convert
 472         arbitrary URI strings into IURI-providing instances."""
 473
 474     def to_string():
 475         """Return a string of printable ASCII characters, suitable for
 476         passing into init_from_string."""
 477
 478 class IDirnodeURI(Interface):
 479     """I am a URI which represents a dirnode."""
 480
 481
 482 class IFileURI(Interface):
 483     """I am a URI which represents a filenode."""
 484     def get_size():
 485         """Return the length (in bytes) of the file that I represent."""
 486
 487 class IMutableFileURI(Interface):
 488     """I am a URI which represents a mutable filenode."""
 489 class INewDirectoryURI(Interface):
 490     pass
 491 class IReadonlyNewDirectoryURI(Interface):
 492     pass
 493
 494
 495 class IFilesystemNode(Interface):
 496     def get_uri():
 497         """
 498         Return the URI that can be used by others to get access to this
 499         node. If this node is read-only, the URI will only offer read-only
 500         access. If this node is read-write, the URI will offer read-write
 501         access.
 502
 503         If you have read-write access to a node and wish to share merely
 504         read-only access with others, use get_readonly_uri().
 505         """
 506
 507     def get_readonly_uri():
 508         """Return the directory URI that can be used by others to get
 509         read-only access to this directory node. The result is a read-only
 510         URI, regardless of whether this dirnode is read-only or read-write.
 511
 512         If you have merely read-only access to this dirnode,
 513         get_readonly_uri() will return the same thing as get_uri().
 514         """
 515
 516     def get_verifier():
 517         """Return an IVerifierURI instance that represents the
 518         'verifiy/refresh capability' for this node. The holder of this
 519         capability will be able to renew the lease for this node, protecting
 520         it from garbage-collection. They will also be able to ask a server if
 521         it holds a share for the file or directory.
 522         """
 523
 524     def check():
 525         """Perform a file check. See IChecker.check for details."""
 526
 527     def is_readonly():
 528         """Return True if this reference provides mutable access to the given
 529         file or directory (i.e. if you can modify it), or False if not. Note
 530         that even if this reference is read-only, someone else may hold a
 531         read-write reference to it."""
 532
 533     def is_mutable():
 534         """Return True if this file or directory is mutable (by *somebody*,
 535         not necessarily you), False if it is is immutable. Note that a file
 536         might be mutable overall, but your reference to it might be
 537         read-only. On the other hand, all references to an immutable file
 538         will be read-only; there are no read-write references to an immutable
 539         file.
 540         """
 541
 542 class IMutableFilesystemNode(IFilesystemNode):
 543     pass
 544
 545 class IFileNode(IFilesystemNode):
 546     def download(target):
 547         """Download the file's contents to a given IDownloadTarget"""
 548
 549     def download_to_data():
 550         """Download the file's contents. Return a Deferred that fires
 551         with those contents."""
 552
 553     def get_size():
 554         """Return the length (in bytes) of the data this node represents."""
 555
 556 class IMutableFileNode(IFileNode, IMutableFilesystemNode):
 557     def download_to_data():
 558         """Download the file's contents. Return a Deferred that fires with
 559         those contents. If there are multiple retrievable versions in the
 560         grid (because you failed to avoid simultaneous writes, see
 561         docs/mutable.txt), this will return the first version that it can
 562         reconstruct, and will silently ignore the others. In the future, a
 563         more advanced API will signal and provide access to the multiple
 564         heads."""
 565
 566     def replace(newdata):
 567         """Replace the old contents with the new data. Returns a Deferred
 568         that fires (with None) when the operation is complete.
 569
 570         If the node detects that there are multiple outstanding versions of
 571         the file, this will raise ConsistencyError, and may leave the
 572         distributed file in an unusual state (the node will try to ensure
 573         that at least one version of the file remains retrievable, but it may
 574         or may not be the one you just tried to upload). You should respond
 575         to this by downloading the current contents of the file and retrying
 576         the replace() operation.
 577         """
 578
 579     def get_writekey():
 580         """Return this filenode's writekey, or None if the node does not have
 581         write-capability. This may be used to assist with data structures
 582         that need to make certain data available only to writers, such as the
 583         read-write child caps in dirnodes. The recommended process is to have
 584         reader-visible data be submitted to the filenode in the clear (where
 585         it will be encrypted by the filenode using the readkey), but encrypt
 586         writer-visible data using this writekey.
 587         """
 588
 589 class IDirectoryNode(IMutableFilesystemNode):
 590     def get_uri():
 591         """
 592         The dirnode ('1') URI returned by this method can be used in
 593         set_uri() on a different directory ('2') to 'mount' a reference to
 594         this directory ('1') under the other ('2'). This URI is just a
 595         string, so it can be passed around through email or other out-of-band
 596         protocol.
 597         """
 598
 599     def get_readonly_uri():
 600         """
 601         The dirnode ('1') URI returned by this method can be used in
 602         set_uri() on a different directory ('2') to 'mount' a reference to
 603         this directory ('1') under the other ('2'). This URI is just a
 604         string, so it can be passed around through email or other out-of-band
 605         protocol.
 606         """
 607
 608     def list():
 609         """I return a Deferred that fires with a dictionary mapping child
 610         name to (node, metadata_dict) tuples, in which 'node' is either an
 611         IFileNode or IDirectoryNode, and 'metadata_dict' is a dictionary of
 612         metadata."""
 613
 614     def has_child(name):
 615         """I return a Deferred that fires with a boolean, True if there
 616         exists a child of the given name, False if not."""
 617
 618     def get(name):
 619         """I return a Deferred that fires with a specific named child node,
 620         either an IFileNode or an IDirectoryNode."""
 621
 622     def get_metadata_for(name):
 623         """I return a Deferred that fires with the metadata dictionary for a
 624         specific named child node. This metadata is stored in the *edge*, not
 625         in the child, so it is attached to the parent dirnode rather than the
 626         child dir-or-file-node."""
 627
 628     def set_metadata_for(name, metadata):
 629         """I replace any existing metadata for the named child with the new
 630         metadata. This metadata is stored in the *edge*, not in the child, so
 631         it is attached to the parent dirnode rather than the child
 632         dir-or-file-node. I return a Deferred (that fires with this dirnode)
 633         when the operation is complete."""
 634
 635     def get_child_at_path(path):
 636         """Transform a child path into an IDirectoryNode or IFileNode.
 637
 638         I perform a recursive series of 'get' operations to find the named
 639         descendant node. I return a Deferred that fires with the node, or
 640         errbacks with IndexError if the node could not be found.
 641
 642         The path can be either a single string (slash-separated) or a list of
 643         path-name elements.
 644         """
 645
 646     def set_uri(name, child_uri, metadata=None):
 647         """I add a child (by URI) at the specific name. I return a Deferred
 648         that fires when the operation finishes. I will replace any existing
 649         child of the same name.
 650
 651         The child_uri could be for a file, or for a directory (either
 652         read-write or read-only, using a URI that came from get_uri() ).
 653
 654         If metadata= is provided, I will use it as the metadata for the named
 655         edge. This will replace any existing metadata. If metadata= is left
 656         as the default value of None, I will set ['mtime'] to the current
 657         time, and I will set ['ctime'] to the current time if there was not
 658         already a child by this name present. This roughly matches the
 659         ctime/mtime semantics of traditional filesystems.
 660
 661         If this directory node is read-only, the Deferred will errback with a
 662         NotMutableError."""
 663
 664     def set_uris(entries):
 665         """Add multiple (name, child_uri) pairs (or (name, child_uri,
 666         metadata) triples) to a directory node. Returns a Deferred that fires
 667         (with None) when the operation finishes. This is equivalent to
 668         calling set_uri() multiple times, but is much more efficient.
 669         """
 670
 671     def set_node(name, child, metadata=None):
 672         """I add a child at the specific name. I return a Deferred that fires
 673         when the operation finishes. This Deferred will fire with the child
 674         node that was just added. I will replace any existing child of the
 675         same name.
 676
 677         If metadata= is provided, I will use it as the metadata for the named
 678         edge. This will replace any existing metadata. If metadata= is left
 679         as the default value of None, I will set ['mtime'] to the current
 680         time, and I will set ['ctime'] to the current time if there was not
 681         already a child by this name present. This roughly matches the
 682         ctime/mtime semantics of traditional filesystems.
 683
 684         If this directory node is read-only, the Deferred will errback with a
 685         NotMutableError."""
 686
 687     def set_nodes(entries):
 688         """Add multiple (name, child_node) pairs (or (name, child_node,
 689         metadata) triples) to a directory node. Returns a Deferred that fires
 690         (with None) when the operation finishes. This is equivalent to
 691         calling set_node() multiple times, but is much more efficient."""
 692
 693
 694     def add_file(name, uploadable, metadata=None):
 695         """I upload a file (using the given IUploadable), then attach the
 696         resulting FileNode to the directory at the given name. I set metadata
 697         the same way as set_uri and set_node.
 698
 699         I return a Deferred that fires (with the IFileNode of the uploaded
 700         file) when the operation completes."""
 701
 702     def delete(name):
 703         """I remove the child at the specific name. I return a Deferred that
 704         fires when the operation finishes."""
 705
 706     def create_empty_directory(name):
 707         """I create and attach an empty directory at the given name. I return
 708         a Deferred that fires when the operation finishes."""
 709
 710     def move_child_to(current_child_name, new_parent, new_child_name=None):
 711         """I take one of my children and move them to a new parent. The child
 712         is referenced by name. On the new parent, the child will live under
 713         'new_child_name', which defaults to 'current_child_name'. TODO: what
 714         should we do about metadata? I return a Deferred that fires when the
 715         operation finishes."""
 716
 717     def build_manifest():
 718         """Return a frozenset of verifier-capability strings for all nodes
 719         (directories and files) reachable from this one."""
 720
 721 class ICodecEncoder(Interface):
 722     def set_params(data_size, required_shares, max_shares):
 723         """Set up the parameters of this encoder.
 724
 725         This prepares the encoder to perform an operation that converts a
 726         single block of data into a number of shares, such that a future
 727         ICodecDecoder can use a subset of these shares to recover the
 728         original data. This operation is invoked by calling encode(). Once
 729         the encoding parameters are set up, the encode operation can be
 730         invoked multiple times.
 731
 732         set_params() prepares the encoder to accept blocks of input data that
 733         are exactly 'data_size' bytes in length. The encoder will be prepared
 734         to produce 'max_shares' shares for each encode() operation (although
 735         see the 'desired_share_ids' to use less CPU). The encoding math will
 736         be chosen such that the decoder can get by with as few as
 737         'required_shares' of these shares and still reproduce the original
 738         data. For example, set_params(1000, 5, 5) offers no redundancy at
 739         all, whereas set_params(1000, 1, 10) provides 10x redundancy.
 740
 741         Numerical Restrictions: 'data_size' is required to be an integral
 742         multiple of 'required_shares'. In general, the caller should choose
 743         required_shares and max_shares based upon their reliability
 744         requirements and the number of peers available (the total storage
 745         space used is roughly equal to max_shares*data_size/required_shares),
 746         then choose data_size to achieve the memory footprint desired (larger
 747         data_size means more efficient operation, smaller data_size means
 748         smaller memory footprint).
 749
 750         In addition, 'max_shares' must be equal to or greater than
 751         'required_shares'. Of course, setting them to be equal causes
 752         encode() to degenerate into a particularly slow form of the 'split'
 753         utility.
 754
 755         See encode() for more details about how these parameters are used.
 756
 757         set_params() must be called before any other ICodecEncoder methods
 758         may be invoked.
 759         """
 760
 761     def get_encoder_type():
 762         """Return a short string that describes the type of this encoder.
 763
 764         There is required to be a global table of encoder classes. This method
 765         returns an index into this table; the value at this index is an
 766         encoder class, and this encoder is an instance of that class.
 767         """
 768
 769     def get_serialized_params(): # TODO: maybe, maybe not
 770         """Return a string that describes the parameters of this encoder.
 771
 772         This string can be passed to the decoder to prepare it for handling
 773         the encoded shares we create. It might contain more information than
 774         was presented to set_params(), if there is some flexibility of
 775         parameter choice.
 776
 777         This string is intended to be embedded in the URI, so there are
 778         several restrictions on its contents. At the moment I'm thinking that
 779         this means it may contain hex digits and hyphens, and nothing else.
 780         The idea is that the URI contains something like '%s:%s:%s' %
 781         (encoder.get_encoder_name(), encoder.get_serialized_params(),
 782         b2a(crypttext_hash)), and this is enough information to construct a
 783         compatible decoder.
 784         """
 785
 786     def get_block_size():
 787         """Return the length of the shares that encode() will produce.
 788         """
 789
 790     def encode_proposal(data, desired_share_ids=None):
 791         """Encode some data.
 792
 793         'data' must be a string (or other buffer object), and len(data) must
 794         be equal to the 'data_size' value passed earlier to set_params().
 795
 796         This will return a Deferred that will fire with two lists. The first
 797         is a list of shares, each of which is a string (or other buffer
 798         object) such that len(share) is the same as what get_share_size()
 799         returned earlier. The second is a list of shareids, in which each is
 800         an integer. The lengths of the two lists will always be equal to each
 801         other. The user should take care to keep each share closely
 802         associated with its shareid, as one is useless without the other.
 803
 804         The length of this output list will normally be the same as the value
 805         provided to the 'max_shares' parameter of set_params(). This may be
 806         different if 'desired_share_ids' is provided.
 807
 808         'desired_share_ids', if provided, is required to be a sequence of
 809         ints, each of which is required to be >= 0 and < max_shares. If not
 810         provided, encode() will produce 'max_shares' shares, as if
 811         'desired_share_ids' were set to range(max_shares). You might use this
 812         if you initially thought you were going to use 10 peers, started
 813         encoding, and then two of the peers dropped out: you could use
 814         desired_share_ids= to skip the work (both memory and CPU) of
 815         producing shares for the peers which are no longer available.
 816
 817         """
 818
 819     def encode(inshares, desired_share_ids=None):
 820         """Encode some data. This may be called multiple times. Each call is
 821         independent.
 822
 823         inshares is a sequence of length required_shares, containing buffers
 824         (i.e. strings), where each buffer contains the next contiguous
 825         non-overlapping segment of the input data. Each buffer is required to
 826         be the same length, and the sum of the lengths of the buffers is
 827         required to be exactly the data_size promised by set_params(). (This
 828         implies that the data has to be padded before being passed to
 829         encode(), unless of course it already happens to be an even multiple
 830         of required_shares in length.)
 831
 832          ALSO: the requirement to break up your data into 'required_shares'
 833          chunks before calling encode() feels a bit surprising, at least from
 834          the point of view of a user who doesn't know how FEC works. It feels
 835          like an implementation detail that has leaked outside the
 836          abstraction barrier. Can you imagine a use case in which the data to
 837          be encoded might already be available in pre-segmented chunks, such
 838          that it is faster or less work to make encode() take a list rather
 839          than splitting a single string?
 840
 841          ALSO ALSO: I think 'inshares' is a misleading term, since encode()
 842          is supposed to *produce* shares, so what it *accepts* should be
 843          something other than shares. Other places in this interface use the
 844          word 'data' for that-which-is-not-shares.. maybe we should use that
 845          term?
 846
 847         'desired_share_ids', if provided, is required to be a sequence of
 848         ints, each of which is required to be >= 0 and < max_shares. If not
 849         provided, encode() will produce 'max_shares' shares, as if
 850         'desired_share_ids' were set to range(max_shares). You might use this
 851         if you initially thought you were going to use 10 peers, started
 852         encoding, and then two of the peers dropped out: you could use
 853         desired_share_ids= to skip the work (both memory and CPU) of
 854         producing shares for the peers which are no longer available.
 855
 856         For each call, encode() will return a Deferred that fires with two
 857         lists, one containing shares and the other containing the shareids.
 858         The get_share_size() method can be used to determine the length of
 859         the share strings returned by encode(). Each shareid is a small
 860         integer, exactly as passed into 'desired_share_ids' (or
 861         range(max_shares), if desired_share_ids was not provided).
 862
 863         The shares and their corresponding shareids are required to be kept
 864         together during storage and retrieval. Specifically, the share data is
 865         useless by itself: the decoder needs to be told which share is which
 866         by providing it with both the shareid and the actual share data.
 867
 868         This function will allocate an amount of memory roughly equal to::
 869
 870          (max_shares - required_shares) * get_share_size()
 871
 872         When combined with the memory that the caller must allocate to
 873         provide the input data, this leads to a memory footprint roughly
 874         equal to the size of the resulting encoded shares (i.e. the expansion
 875         factor times the size of the input segment).
 876         """
 877
 878         # rejected ideas:
 879         #
 880         #  returning a list of (shareidN,shareN) tuples instead of a pair of
 881         #  lists (shareids..,shares..). Brian thought the tuples would
 882         #  encourage users to keep the share and shareid together throughout
 883         #  later processing, Zooko pointed out that the code to iterate
 884         #  through two lists is not really more complicated than using a list
 885         #  of tuples and there's also a performance improvement
 886         #
 887         #  having 'data_size' not required to be an integral multiple of
 888         #  'required_shares'. Doing this would require encode() to perform
 889         #  padding internally, and we'd prefer to have any padding be done
 890         #  explicitly by the caller. Yes, it is an abstraction leak, but
 891         #  hopefully not an onerous one.
 892
 893
 894 class ICodecDecoder(Interface):
 895     def set_serialized_params(params):
 896         """Set up the parameters of this encoder, from a string returned by
 897         encoder.get_serialized_params()."""
 898
 899     def get_needed_shares():
 900         """Return the number of shares needed to reconstruct the data.
 901         set_serialized_params() is required to be called before this."""
 902
 903     def decode(some_shares, their_shareids):
 904         """Decode a partial list of shares into data.
 905
 906         'some_shares' is required to be a sequence of buffers of sharedata, a
 907         subset of the shares returned by ICodecEncode.encode(). Each share is
 908         required to be of the same length.  The i'th element of their_shareids
 909         is required to be the shareid of the i'th buffer in some_shares.
 910
 911         This returns a Deferred which fires with a sequence of buffers. This
 912         sequence will contain all of the segments of the original data, in
 913         order. The sum of the lengths of all of the buffers will be the
 914         'data_size' value passed into the original ICodecEncode.set_params()
 915         call. To get back the single original input block of data, use
 916         ''.join(output_buffers), or you may wish to simply write them in
 917         order to an output file.
 918
 919         Note that some of the elements in the result sequence may be
 920         references to the elements of the some_shares input sequence. In
 921         particular, this means that if those share objects are mutable (e.g.
 922         arrays) and if they are changed, then both the input (the
 923         'some_shares' parameter) and the output (the value given when the
 924         deferred is triggered) will change.
 925
 926         The length of 'some_shares' is required to be exactly the value of
 927         'required_shares' passed into the original ICodecEncode.set_params()
 928         call.
 929         """
 930
 931 class IEncoder(Interface):
 932     """I take an object that provides IEncryptedUploadable, which provides
 933     encrypted data, and a list of shareholders. I then encode, hash, and
 934     deliver shares to those shareholders. I will compute all the necessary
 935     Merkle hash trees that are necessary to validate the crypttext that
 936     eventually comes back from the shareholders. I provide the URI Extension
 937     Block Hash, and the encoding parameters, both of which must be included
 938     in the URI.
 939
 940     I do not choose shareholders, that is left to the IUploader. I must be
 941     given a dict of RemoteReferences to storage buckets that are ready and
 942     willing to receive data.
 943     """
 944
 945     def set_size(size):
 946         """Specify the number of bytes that will be encoded. This must be
 947         peformed before get_serialized_params() can be called.
 948         """
 949     def set_params(params):
 950         """Override the default encoding parameters. 'params' is a tuple of
 951         (k,d,n), where 'k' is the number of required shares, 'd' is the
 952         shares_of_happiness, and 'n' is the total number of shares that will
 953         be created.
 954
 955         Encoding parameters can be set in three ways. 1: The Encoder class
 956         provides defaults (3/7/10). 2: the Encoder can be constructed with
 957         an 'options' dictionary, in which the
 958         needed_and_happy_and_total_shares' key can be a (k,d,n) tuple. 3:
 959         set_params((k,d,n)) can be called.
 960
 961         If you intend to use set_params(), you must call it before
 962         get_share_size or get_param are called.
 963         """
 964
 965     def set_encrypted_uploadable(u):
 966         """Provide a source of encrypted upload data. 'u' must implement
 967         IEncryptedUploadable.
 968
 969         When this is called, the IEncryptedUploadable will be queried for its
 970         length and the storage_index that should be used.
 971
 972         This returns a Deferred that fires with this Encoder instance.
 973
 974         This must be performed before start() can be called.
 975         """
 976
 977     def get_param(name):
 978         """Return an encoding parameter, by name.
 979
 980         'storage_index': return a string with the (16-byte truncated SHA-256
 981                          hash) storage index to which these shares should be
 982                          pushed.
 983
 984         'share_counts': return a tuple describing how many shares are used:
 985                         (needed_shares, shares_of_happiness, total_shares)
 986
 987         'num_segments': return an int with the number of segments that
 988                         will be encoded.
 989
 990         'segment_size': return an int with the size of each segment.
 991
 992         'block_size': return the size of the individual blocks that will
 993                       be delivered to a shareholder's put_block() method. By
 994                       knowing this, the shareholder will be able to keep all
 995                       blocks in a single file and still provide random access
 996                       when reading them. # TODO: can we avoid exposing this?
 997
 998         'share_size': an int with the size of the data that will be stored
 999                       on each shareholder. This is aggregate amount of data
1000                       that will be sent to the shareholder, summed over all
1001                       the put_block() calls I will ever make. It is useful to
1002                       determine this size before asking potential
1003                       shareholders whether they will grant a lease or not,
1004                       since their answers will depend upon how much space we
1005                       need. TODO: this might also include some amount of
1006                       overhead, like the size of all the hashes. We need to
1007                       decide whether this is useful or not.
1008
1009         'serialized_params': a string with a concise description of the
1010                              codec name and its parameters. This may be passed
1011                              into the IUploadable to let it make sure that
1012                              the same file encoded with different parameters
1013                              will result in different storage indexes.
1014
1015         Once this is called, set_size() and set_params() may not be called.
1016         """
1017
1018     def set_shareholders(shareholders):
1019         """Tell the encoder where to put the encoded shares. 'shareholders'
1020         must be a dictionary that maps share number (an integer ranging from
1021         0 to n-1) to an instance that provides IStorageBucketWriter. This
1022         must be performed before start() can be called."""
1023
1024     def start():
1025         """Begin the encode/upload process. This involves reading encrypted
1026         data from the IEncryptedUploadable, encoding it, uploading the shares
1027         to the shareholders, then sending the hash trees.
1028
1029         set_encrypted_uploadable() and set_shareholders() must be called
1030         before this can be invoked.
1031
1032         This returns a Deferred that fires with a tuple of
1033         (uri_extension_hash, needed_shares, total_shares, size) when the
1034         upload process is complete. This information, plus the encryption
1035         key, is sufficient to construct the URI.
1036         """
1037
1038 class IDecoder(Interface):
1039     """I take a list of shareholders and some setup information, then
1040     download, validate, decode, and decrypt data from them, writing the
1041     results to an output file.
1042
1043     I do not locate the shareholders, that is left to the IDownloader. I must
1044     be given a dict of RemoteReferences to storage buckets that are ready to
1045     send data.
1046     """
1047
1048     def setup(outfile):
1049         """I take a file-like object (providing write and close) to which all
1050         the plaintext data will be written.
1051
1052         TODO: producer/consumer . Maybe write() should return a Deferred that
1053         indicates when it will accept more data? But probably having the
1054         IDecoder be a producer is easier to glue to IConsumer pieces.
1055         """
1056
1057     def set_shareholders(shareholders):
1058         """I take a dictionary that maps share identifiers (small integers)
1059         to RemoteReferences that provide RIBucketReader. This must be called
1060         before start()."""
1061
1062     def start():
1063         """I start the download. This process involves retrieving data and
1064         hash chains from the shareholders, using the hashes to validate the
1065         data, decoding the shares into segments, decrypting the segments,
1066         then writing the resulting plaintext to the output file.
1067
1068         I return a Deferred that will fire (with self) when the download is
1069         complete.
1070         """
1071
1072 class IDownloadTarget(Interface):
1073     def open(size):
1074         """Called before any calls to write() or close(). If an error
1075         occurs before any data is available, fail() may be called without
1076         a previous call to open().
1077
1078         'size' is the length of the file being downloaded, in bytes."""
1079
1080     def write(data):
1081         """Output some data to the target."""
1082     def close():
1083         """Inform the target that there is no more data to be written."""
1084     def fail(why):
1085         """fail() is called to indicate that the download has failed. 'why'
1086         is a Failure object indicating what went wrong. No further methods
1087         will be invoked on the IDownloadTarget after fail()."""
1088     def register_canceller(cb):
1089         """The FileDownloader uses this to register a no-argument function
1090         that the target can call to cancel the download. Once this canceller
1091         is invoked, no further calls to write() or close() will be made."""
1092     def finish():
1093         """When the FileDownloader is done, this finish() function will be
1094         called. Whatever it returns will be returned to the invoker of
1095         Downloader.download.
1096         """
1097
1098 class IDownloader(Interface):
1099     def download(uri, target):
1100         """Perform a CHK download, sending the data to the given target.
1101         'target' must provide IDownloadTarget.
1102
1103         Returns a Deferred that fires (with the results of target.finish)
1104         when the download is finished, or errbacks if something went wrong."""
1105
1106 class IEncryptedUploadable(Interface):
1107     def set_upload_status(upload_status):
1108         """Provide an IUploadStatus object that should be filled with status
1109         information. The IEncryptedUploadable is responsible for setting
1110         key-determination progress ('chk'), size, storage_index, and
1111         ciphertext-fetch progress. It may delegate some of this
1112         responsibility to others, in particular to the IUploadable."""
1113
1114     def get_size():
1115         """This behaves just like IUploadable.get_size()."""
1116
1117     def get_all_encoding_parameters():
1118         """Return a Deferred that fires with a tuple of
1119         (k,happy,n,segment_size). The segment_size will be used as-is, and
1120         must match the following constraints: it must be a multiple of k, and
1121         it shouldn't be unreasonably larger than the file size (if
1122         segment_size is larger than filesize, the difference must be stored
1123         as padding).
1124
1125         This usually passes through to the IUploadable method of the same
1126         name.
1127
1128         The encoder strictly obeys the values returned by this method. To
1129         make an upload use non-default encoding parameters, you must arrange
1130         to control the values that this method returns.
1131         """
1132
1133     def get_storage_index():
1134         """Return a Deferred that fires with a 16-byte storage index.
1135         """
1136
1137     def read_encrypted(length, hash_only):
1138         """This behaves just like IUploadable.read(), but returns crypttext
1139         instead of plaintext. If hash_only is True, then this discards the
1140         data (and returns an empty list); this improves efficiency when
1141         resuming an interrupted upload (where we need to compute the
1142         plaintext hashes, but don't need the redundant encrypted data)."""
1143
1144     def get_plaintext_hashtree_leaves(first, last, num_segments):
1145         """Get the leaf nodes of a merkle hash tree over the plaintext
1146         segments, i.e. get the tagged hashes of the given segments. The
1147         segment size is expected to be generated by the IEncryptedUploadable
1148         before any plaintext is read or ciphertext produced, so that the
1149         segment hashes can be generated with only a single pass.
1150
1151         This returns a Deferred which fires with a sequence of hashes, using:
1152
1153          tuple(segment_hashes[first:last])
1154
1155         'num_segments' is used to assert that the number of segments that the
1156         IEncryptedUploadable handled matches the number of segments that the
1157         encoder was expecting.
1158
1159         This method must not be called until the final byte has been read
1160         from read_encrypted(). Once this method is called, read_encrypted()
1161         can never be called again.
1162         """
1163
1164     def get_plaintext_hash():
1165         """Get the hash of the whole plaintext.
1166
1167         This returns a Deferred which fires with a tagged SHA-256 hash of the
1168         whole plaintext, obtained from hashutil.plaintext_hash(data).
1169         """
1170
1171     def close():
1172         """Just like IUploadable.close()."""
1173
1174 class IUploadable(Interface):
1175     def set_upload_status(upload_status):
1176         """Provide an IUploadStatus object that should be filled with status
1177         information. The IUploadable is responsible for setting
1178         key-determination progress ('chk')."""
1179
1180     def set_default_encoding_parameters(params):
1181         """Set the default encoding parameters, which must be a dict mapping
1182         strings to ints. The meaningful keys are 'k', 'happy', 'n', and
1183         'max_segment_size'. These might have an influence on the final
1184         encoding parameters returned by get_all_encoding_parameters(), if the
1185         Uploadable doesn't have more specific preferences.
1186
1187         This call is optional: if it is not used, the Uploadable will use
1188         some built-in defaults. If used, this method must be called before
1189         any other IUploadable methods to have any effect.
1190         """
1191
1192     def get_size():
1193         """Return a Deferred that will fire with the length of the data to be
1194         uploaded, in bytes. This will be called before the data is actually
1195         used, to compute encoding parameters.
1196         """
1197
1198     def get_all_encoding_parameters():
1199         """Return a Deferred that fires with a tuple of
1200         (k,happy,n,segment_size). The segment_size will be used as-is, and
1201         must match the following constraints: it must be a multiple of k, and
1202         it shouldn't be unreasonably larger than the file size (if
1203         segment_size is larger than filesize, the difference must be stored
1204         as padding).
1205
1206         The relative values of k and n allow some IUploadables to request
1207         better redundancy than others (in exchange for consuming more space
1208         in the grid).
1209
1210         Larger values of segment_size reduce hash overhead, while smaller
1211         values reduce memory footprint and cause data to be delivered in
1212         smaller pieces (which may provide a smoother and more predictable
1213         download experience).
1214
1215         The encoder strictly obeys the values returned by this method. To
1216         make an upload use non-default encoding parameters, you must arrange
1217         to control the values that this method returns. One way to influence
1218         them may be to call set_encoding_parameters() before calling
1219         get_all_encoding_parameters().
1220         """
1221
1222     def get_encryption_key():
1223         """Return a Deferred that fires with a 16-byte AES key. This key will
1224         be used to encrypt the data. The key will also be hashed to derive
1225         the StorageIndex.
1226
1227         Uploadables which want to achieve convergence should hash their file
1228         contents and the serialized_encoding_parameters to form the key
1229         (which of course requires a full pass over the data). Uploadables can
1230         use the upload.ConvergentUploadMixin class to achieve this
1231         automatically.
1232
1233         Uploadables which do not care about convergence (or do not wish to
1234         make multiple passes over the data) can simply return a
1235         strongly-random 16 byte string.
1236
1237         get_encryption_key() may be called multiple times: the IUploadable is
1238         required to return the same value each time.
1239         """
1240
1241     def read(length):
1242         """Return a Deferred that fires with a list of strings (perhaps with
1243         only a single element) which, when concatenated together, contain the
1244         next 'length' bytes of data. If EOF is near, this may provide fewer
1245         than 'length' bytes. The total number of bytes provided by read()
1246         before it signals EOF must equal the size provided by get_size().
1247
1248         If the data must be acquired through multiple internal read
1249         operations, returning a list instead of a single string may help to
1250         reduce string copies.
1251
1252         'length' will typically be equal to (min(get_size(),1MB)/req_shares),
1253         so a 10kB file means length=3kB, 100kB file means length=30kB,
1254         and >=1MB file means length=300kB.
1255
1256         This method provides for a single full pass through the data. Later
1257         use cases may desire multiple passes or access to only parts of the
1258         data (such as a mutable file making small edits-in-place). This API
1259         will be expanded once those use cases are better understood.
1260         """
1261
1262     def close():
1263         """The upload is finished, and whatever filehandle was in use may be
1264         closed."""
1265
1266 class IUploadResults(Interface):
1267     """I am returned by upload() methods. I contain a number of public
1268     attributes which can be read to determine the results of the upload::
1269
1270      .uri : the CHK read-cap for the file
1271
1272     """
1273
1274 class IUploader(Interface):
1275     def upload(uploadable):
1276         """Upload the file. 'uploadable' must impement IUploadable. This
1277         returns a Deferred which fires with an UploadResults instance, from
1278         which the URI of the file can be obtained as results.uri ."""
1279
1280     def upload_ssk(write_capability, new_version, uploadable):
1281         """TODO: how should this work?"""
1282
1283 class IChecker(Interface):
1284     def check(uri_to_check):
1285         """Accepts an IVerifierURI, and checks upon the health of its target.
1286
1287         For now, uri_to_check must be an IVerifierURI. In the future we
1288         expect to relax that to be anything that can be adapted to
1289         IVerifierURI (like read-only or read-write dirnode/filenode URIs).
1290
1291         This returns a Deferred. For dirnodes, this fires with either True or
1292         False (dirnodes are not distributed, so their health is a boolean).
1293
1294         For filenodes, this fires with a tuple of (needed_shares,
1295         total_shares, found_shares, sharemap). The first three are ints. The
1296         basic health of the file is found_shares / needed_shares: if less
1297         than 1.0, the file is unrecoverable.
1298
1299         The sharemap has a key for each sharenum. The value is a list of
1300         (binary) nodeids who hold that share. If two shares are kept on the
1301         same nodeid, they will fail as a pair, and overall reliability is
1302         decreased.
1303
1304         The IChecker instance remembers the results of the check. By default,
1305         these results are stashed in RAM (and are forgotten at shutdown). If
1306         a file named 'checker_results.db' exists in the node's basedir, it is
1307         used as a sqlite database of results, making them persistent across
1308         runs. To start using this feature, just 'touch checker_results.db',
1309         and the node will initialize it properly the next time it is started.
1310         """
1311
1312     def verify(uri_to_check):
1313         """Accepts an IVerifierURI, and verifies the crypttext of the target.
1314
1315         This is a more-intensive form of checking. For verification, the
1316         file's crypttext contents are retrieved, and the associated hash
1317         checks are performed. If a storage server is holding a corrupted
1318         share, verification will detect the problem, but checking will not.
1319         This returns a Deferred that fires with True if the crypttext hashes
1320         look good, and will probably raise an exception if anything goes
1321         wrong.
1322
1323         For dirnodes, 'verify' is the same as 'check', so the Deferred will
1324         fire with True or False.
1325
1326         Verification currently only uses a minimal subset of peers, so a lot
1327         of share corruption will not be caught by it. We expect to improve
1328         this in the future.
1329         """
1330
1331     def checker_results_for(uri_to_check):
1332         """Accepts an IVerifierURI, and returns a list of previously recorded
1333         checker results. This method performs no checking itself: it merely
1334         reports the results of checks that have taken place in the past.
1335
1336         Each element of the list is a two-entry tuple: (when, results).
1337         The 'when' values are timestamps (float seconds since epoch), and the
1338         results are as defined in the check() method.
1339
1340         Note: at the moment, this is specified to return synchronously. We
1341         might need to back away from this in the future.
1342         """
1343
1344 class IClient(Interface):
1345     def upload(uploadable):
1346         """Upload some data into a CHK, get back the UploadResults for it.
1347         @param uploadable: something that implements IUploadable
1348         @return: a Deferred that fires with the UploadResults instance.
1349                  To get the URI for this file, use results.uri .
1350         """
1351
1352     def create_mutable_file(contents=""):
1353         """Create a new mutable file with contents, get back the URI string.
1354         @param contents: the initial contents to place in the file.
1355         @return: a Deferred that fires with tne (string) SSK URI for the new
1356                  file.
1357         """
1358
1359     def create_empty_dirnode():
1360         """Create a new dirnode, empty and unattached.
1361         @return: a Deferred that fires with the new IDirectoryNode instance.
1362         """
1363
1364     def create_node_from_uri(uri):
1365         """Create a new IFilesystemNode instance from the uri, synchronously.
1366         @param uri: a string or IURI-providing instance. This could be for a
1367                     LiteralFileNode, a CHK file node, a mutable file node, or
1368                     a directory node
1369         @return: an instance that provides IFilesystemNode (or more usefully one
1370                  of its subclasses). File-specifying URIs will result in
1371                  IFileNode or IMutableFileNode -providing instances, like
1372                  FileNode, LiteralFileNode, or MutableFileNode.
1373                  Directory-specifying URIs will result in
1374                  IDirectoryNode-providing instances, like NewDirectoryNode.
1375         """
1376
1377 class IClientStatus(Interface):
1378     def list_uploads():
1379         """Return a list of IUploadStatus objects, one for each
1380         upload which is currently running."""
1381     def list_downloads():
1382         """Return a list of IDownloadStatus objects, one for each
1383         download which is currently running."""
1384
1385 class IUploadStatus(Interface):
1386     def get_storage_index():
1387         """Return a string with the (binary) storage index in use on this
1388         upload. Returns None if the storage index has not yet been
1389         calculated."""
1390     def get_size():
1391         """Return an integer with the number of bytes that will eventually
1392         be uploaded for this file. Returns None if the size is not yet known.
1393         """
1394     def using_helper():
1395         """Return True if this upload is using a Helper, False if not."""
1396     def get_status():
1397         """Return a string describing the current state of the upload
1398         process."""
1399     def get_progress():
1400         """Returns a tuple of floats, (chk, ciphertext, encode_and_push),
1401         each from 0.0 to 1.0 . 'chk' describes how much progress has been
1402         made towards hashing the file to determine a CHK encryption key: if
1403         non-convergent encryption is in use, this will be trivial, otherwise
1404         the whole file must be hashed. 'ciphertext' describes how much of the
1405         ciphertext has been pushed to the helper, and is '1.0' for non-helper
1406         uploads. 'encode_and_push' describes how much of the encode-and-push
1407         process has finished: for helper uploads this is dependent upon the
1408         helper providing progress reports. It might be reasonable to add all
1409         three numbers and report the sum to the user."""
1410
1411 class IDownloadStatus(Interface):
1412     def get_storage_index():
1413         """Return a string with the (binary) storage index in use on this
1414         download. This may be None if there is no storage index (i.e. LIT
1415         files)."""
1416     def get_size():
1417         """Return an integer with the number of bytes that will eventually be
1418         retrieved for this file. Returns None if the size is not yet known.
1419         """
1420     def using_helper():
1421         """Return True if this download is using a Helper, False if not."""
1422     def get_status():
1423         """Return a string describing the current state of the download
1424         process."""
1425     def get_progress():
1426         """Returns a float (from 0.0 to 1.0) describing the amount of the
1427         download that has completed. This value will remain at 0.0 until the
1428         first byte of plaintext is pushed to the download target."""
1429
1430
1431 class NotCapableError(Exception):
1432     """You have tried to write to a read-only node."""
1433
1434 class BadWriteEnablerError(Exception):
1435     pass
1436
1437 class RIControlClient(RemoteInterface):
1438
1439     def wait_for_client_connections(num_clients=int):
1440         """Do not return until we have connections to at least NUM_CLIENTS
1441         storage servers.
1442         """
1443
1444     def upload_from_file_to_uri(filename=str):
1445         """Upload a file to the grid. This accepts a filename (which must be
1446         absolute) that points to a file on the node's local disk. The node
1447         will read the contents of this file, upload it to the grid, then
1448         return the URI at which it was uploaded.
1449         """
1450         return URI
1451
1452     def download_from_uri_to_file(uri=URI, filename=str):
1453         """Download a file from the grid, placing it on the node's local disk
1454         at the given filename (which must be absolute[?]). Returns the
1455         absolute filename where the file was written."""
1456         return str
1457
1458     # debug stuff
1459
1460     def get_memory_usage():
1461         """Return a dict describes the amount of memory currently in use. The
1462         keys are 'VmPeak', 'VmSize', and 'VmData'. The values are integers,
1463         measuring memory consupmtion in bytes."""
1464         return DictOf(str, int)
1465
1466     def speed_test(count=int, size=int, mutable=Any()):
1467         """Write 'count' tempfiles to disk, all of the given size. Measure
1468         how long (in seconds) it takes to upload them all to the servers.
1469         Then measure how long it takes to download all of them. If 'mutable'
1470         is 'create', time creation of mutable files. If 'mutable' is
1471         'upload', then time access to the same mutable file instead of
1472         creating one.
1473
1474         Returns a tuple of (upload_time, download_time).
1475         """
1476         return (float, float)
1477
1478     def measure_peer_response_time():
1479         """Send a short message to each connected peer, and measure the time
1480         it takes for them to respond to it. This is a rough measure of the
1481         application-level round trip time.
1482
1483         @return: a dictionary mapping peerid to a float (RTT time in seconds)
1484         """
1485
1486         return DictOf(Nodeid, float)
1487
1488 UploadResults = Any() #DictOf(str, str)
1489
1490 class RIEncryptedUploadable(RemoteInterface):
1491     __remote_name__ = "RIEncryptedUploadable.tahoe.allmydata.com"
1492
1493     def get_size():
1494         return int
1495
1496     def get_all_encoding_parameters():
1497         return (int, int, int, long)
1498
1499     def read_encrypted(offset=long, length=long):
1500         return ListOf(str)
1501
1502     def get_plaintext_hashtree_leaves(first=int, last=int, num_segments=int):
1503         return ListOf(Hash)
1504
1505     def get_plaintext_hash():
1506         return Hash
1507
1508     def close():
1509         return None
1510
1511
1512 class RICHKUploadHelper(RemoteInterface):
1513     __remote_name__ = "RIUploadHelper.tahoe.allmydata.com"
1514
1515     def upload(reader=RIEncryptedUploadable):
1516         return UploadResults
1517
1518
1519 class RIHelper(RemoteInterface):
1520     __remote_name__ = "RIHelper.tahoe.allmydata.com"
1521
1522     def upload_chk(si=StorageIndex):
1523         """See if a file with a given storage index needs uploading. The
1524         helper will ask the appropriate storage servers to see if the file
1525         has already been uploaded. If so, the helper will return a set of
1526         'upload results' that includes whatever hashes are needed to build
1527         the read-cap, and perhaps a truncated sharemap.
1528
1529         If the file has not yet been uploaded (or if it was only partially
1530         uploaded), the helper will return an empty upload-results dictionary
1531         and also an RICHKUploadHelper object that will take care of the
1532         upload process. The client should call upload() on this object and
1533         pass it a reference to an RIEncryptedUploadable object that will
1534         provide ciphertext. When the upload is finished, the upload() method
1535         will finish and return the upload results.
1536         """
1537         return (UploadResults, ChoiceOf(RICHKUploadHelper, None))
1538
1539
1540 class RIStatsProvider(RemoteInterface):
1541     __remote_name__ = "RIStatsProvider.tahoe.allmydata.com"
1542     """
1543     Provides access to statistics and monitoring information.
1544     """
1545
1546     def get_stats():
1547         """
1548         returns a dictionary containing 'counters' and 'stats', each a dictionary
1549         with string counter/stat name keys, and numeric values.  counters are
1550         monotonically increasing measures of work done, and stats are instantaneous
1551         measures (potentially time averaged internally)
1552         """
1553         return DictOf(str, DictOf(str, ChoiceOf(float, int, long)))
1554
1555 class RIStatsGatherer(RemoteInterface):
1556     __remote_name__ = "RIStatsGatherer.tahoe.allmydata.com"
1557     """
1558     Provides a monitoring service for centralised collection of stats
1559     """
1560
1561     def provide(provider=RIStatsProvider, nickname=str):
1562         """
1563         @param provider: a stats collector instance which should be polled
1564                          periodically by the gatherer to collect stats.
1565         @param nickname: a name useful to identify the provided client
1566         """
1567         return None
1568
1569
1570 class IStatsProducer(Interface):
1571     def get_stats():
1572         """
1573         returns a dictionary, with str keys representing the names of stats
1574         to be monitored, and numeric values.
1575         """
1576