src/allmydata/interfaces.py

   1
   2 from zope.interface import Interface
   3 from foolscap.api import StringConstraint, ListOf, TupleOf, SetOf, DictOf, \
   4      ChoiceOf, IntegerConstraint, Any, RemoteInterface, Referenceable
   5
   6 HASH_SIZE=32
   7
   8 Hash = StringConstraint(maxLength=HASH_SIZE,
   9                         minLength=HASH_SIZE)# binary format 32-byte SHA256 hash
  10 Nodeid = StringConstraint(maxLength=20,
  11                           minLength=20) # binary format 20-byte SHA1 hash
  12 FURL = StringConstraint(1000)
  13 StorageIndex = StringConstraint(16)
  14 URI = StringConstraint(300) # kind of arbitrary
  15
  16 MAX_BUCKETS = 256  # per peer -- zfec offers at most 256 shares per file
  17
  18 ShareData = StringConstraint(None)
  19 URIExtensionData = StringConstraint(1000)
  20 Number = IntegerConstraint(8) # 2**(8*8) == 16EiB ~= 18e18 ~= 18 exabytes
  21 Offset = Number
  22 ReadSize = int # the 'int' constraint is 2**31 == 2Gib -- large files are processed in not-so-large increments
  23 WriteEnablerSecret = Hash # used to protect mutable bucket modifications
  24 LeaseRenewSecret = Hash # used to protect bucket lease renewal requests
  25 LeaseCancelSecret = Hash # used to protect bucket lease cancellation requests
  26
  27 class RIStubClient(RemoteInterface):
  28     """Each client publishes a service announcement for a dummy object called
  29     the StubClient. This object doesn't actually offer any services, but the
  30     announcement helps the Introducer keep track of which clients are
  31     subscribed (so the grid admin can keep track of things like the size of
  32     the grid and the client versions in use. This is the (empty)
  33     RemoteInterface for the StubClient."""
  34
  35 class RIBucketWriter(RemoteInterface):
  36     """ Objects of this kind live on the server side. """
  37     def write(offset=Offset, data=ShareData):
  38         return None
  39
  40     def close():
  41         """
  42         If the data that has been written is incomplete or inconsistent then
  43         the server will throw the data away, else it will store it for future
  44         retrieval.
  45         """
  46         return None
  47
  48     def abort():
  49         """Abandon all the data that has been written.
  50         """
  51         return None
  52
  53 class RIBucketReader(RemoteInterface):
  54     def read(offset=Offset, length=ReadSize):
  55         return ShareData
  56
  57     def advise_corrupt_share(reason=str):
  58         """Clients who discover hash failures in shares that they have
  59         downloaded from me will use this method to inform me about the
  60         failures. I will record their concern so that my operator can
  61         manually inspect the shares in question. I return None.
  62
  63         This is a wrapper around RIStorageServer.advise_corrupt_share(),
  64         which is tied to a specific share, and therefore does not need the
  65         extra share-identifying arguments. Please see that method for full
  66         documentation.
  67         """
  68
  69 TestVector = ListOf(TupleOf(Offset, ReadSize, str, str))
  70 # elements are (offset, length, operator, specimen)
  71 # operator is one of "lt, le, eq, ne, ge, gt"
  72 # nop always passes and is used to fetch data while writing.
  73 # you should use length==len(specimen) for everything except nop
  74 DataVector = ListOf(TupleOf(Offset, ShareData))
  75 # (offset, data). This limits us to 30 writes of 1MiB each per call
  76 TestAndWriteVectorsForShares = DictOf(int,
  77                                       TupleOf(TestVector,
  78                                               DataVector,
  79                                               ChoiceOf(None, Offset), # new_length
  80                                               ))
  81 ReadVector = ListOf(TupleOf(Offset, ReadSize))
  82 ReadData = ListOf(ShareData)
  83 # returns data[offset:offset+length] for each element of TestVector
  84
  85 class RIStorageServer(RemoteInterface):
  86     __remote_name__ = "RIStorageServer.tahoe.allmydata.com"
  87
  88     def get_version():
  89         """
  90         Return a dictionary of version information.
  91         """
  92         return DictOf(str, Any())
  93
  94     def allocate_buckets(storage_index=StorageIndex,
  95                          renew_secret=LeaseRenewSecret,
  96                          cancel_secret=LeaseCancelSecret,
  97                          sharenums=SetOf(int, maxLength=MAX_BUCKETS),
  98                          allocated_size=Offset, canary=Referenceable):
  99         """
 100         @param storage_index: the index of the bucket to be created or
 101                               increfed.
 102         @param sharenums: these are the share numbers (probably between 0 and
 103                           99) that the sender is proposing to store on this
 104                           server.
 105         @param renew_secret: This is the secret used to protect bucket refresh
 106                              This secret is generated by the client and
 107                              stored for later comparison by the server. Each
 108                              server is given a different secret.
 109         @param cancel_secret: Like renew_secret, but protects bucket decref.
 110         @param canary: If the canary is lost before close(), the bucket is
 111                        deleted.
 112         @return: tuple of (alreadygot, allocated), where alreadygot is what we
 113                  already have and allocated is what we hereby agree to accept.
 114                  New leases are added for shares in both lists.
 115         """
 116         return TupleOf(SetOf(int, maxLength=MAX_BUCKETS),
 117                        DictOf(int, RIBucketWriter, maxKeys=MAX_BUCKETS))
 118
 119     def add_lease(storage_index=StorageIndex,
 120                   renew_secret=LeaseRenewSecret,
 121                   cancel_secret=LeaseCancelSecret):
 122         """
 123         Add a new lease on the given bucket. If the renew_secret matches an
 124         existing lease, that lease will be renewed instead. If there is no
 125         bucket for the given storage_index, return silently. (note that in
 126         tahoe-1.3.0 and earlier, IndexError was raised if there was no
 127         bucket)
 128         """
 129         return Any() # returns None now, but future versions might change
 130
 131     def renew_lease(storage_index=StorageIndex, renew_secret=LeaseRenewSecret):
 132         """
 133         Renew the lease on a given bucket, resetting the timer to 31 days.
 134         Some networks will use this, some will not. If there is no bucket for
 135         the given storage_index, IndexError will be raised.
 136
 137         For mutable shares, if the given renew_secret does not match an
 138         existing lease, IndexError will be raised with a note listing the
 139         server-nodeids on the existing leases, so leases on migrated shares
 140         can be renewed or cancelled. For immutable shares, IndexError
 141         (without the note) will be raised.
 142         """
 143         return Any()
 144
 145     def cancel_lease(storage_index=StorageIndex,
 146                      cancel_secret=LeaseCancelSecret):
 147         """
 148         Cancel the lease on a given bucket. If this was the last lease on the
 149         bucket, the bucket will be deleted. If there is no bucket for the
 150         given storage_index, IndexError will be raised.
 151
 152         For mutable shares, if the given cancel_secret does not match an
 153         existing lease, IndexError will be raised with a note listing the
 154         server-nodeids on the existing leases, so leases on migrated shares
 155         can be renewed or cancelled. For immutable shares, IndexError
 156         (without the note) will be raised.
 157         """
 158         return Any()
 159
 160     def get_buckets(storage_index=StorageIndex):
 161         return DictOf(int, RIBucketReader, maxKeys=MAX_BUCKETS)
 162
 163
 164
 165     def slot_readv(storage_index=StorageIndex,
 166                    shares=ListOf(int), readv=ReadVector):
 167         """Read a vector from the numbered shares associated with the given
 168         storage index. An empty shares list means to return data from all
 169         known shares. Returns a dictionary with one key per share."""
 170         return DictOf(int, ReadData) # shnum -> results
 171
 172     def slot_testv_and_readv_and_writev(storage_index=StorageIndex,
 173                                         secrets=TupleOf(WriteEnablerSecret,
 174                                                         LeaseRenewSecret,
 175                                                         LeaseCancelSecret),
 176                                         tw_vectors=TestAndWriteVectorsForShares,
 177                                         r_vector=ReadVector,
 178                                         ):
 179         """General-purpose test-and-set operation for mutable slots. Perform
 180         a bunch of comparisons against the existing shares. If they all pass,
 181         then apply a bunch of write vectors to those shares. Then use the
 182         read vectors to extract data from all the shares and return the data.
 183
 184         This method is, um, large. The goal is to allow clients to update all
 185         the shares associated with a mutable file in a single round trip.
 186
 187         @param storage_index: the index of the bucket to be created or
 188                               increfed.
 189         @param write_enabler: a secret that is stored along with the slot.
 190                               Writes are accepted from any caller who can
 191                               present the matching secret. A different secret
 192                               should be used for each slot*server pair.
 193         @param renew_secret: This is the secret used to protect bucket refresh
 194                              This secret is generated by the client and
 195                              stored for later comparison by the server. Each
 196                              server is given a different secret.
 197         @param cancel_secret: Like renew_secret, but protects bucket decref.
 198
 199         The 'secrets' argument is a tuple of (write_enabler, renew_secret,
 200         cancel_secret). The first is required to perform any write. The
 201         latter two are used when allocating new shares. To simply acquire a
 202         new lease on existing shares, use an empty testv and an empty writev.
 203
 204         Each share can have a separate test vector (i.e. a list of
 205         comparisons to perform). If all vectors for all shares pass, then all
 206         writes for all shares are recorded. Each comparison is a 4-tuple of
 207         (offset, length, operator, specimen), which effectively does a bool(
 208         (read(offset, length)) OPERATOR specimen ) and only performs the
 209         write if all these evaluate to True. Basic test-and-set uses 'eq'.
 210         Write-if-newer uses a seqnum and (offset, length, 'lt', specimen).
 211         Write-if-same-or-newer uses 'le'.
 212
 213         Reads from the end of the container are truncated, and missing shares
 214         behave like empty ones, so to assert that a share doesn't exist (for
 215         use when creating a new share), use (0, 1, 'eq', '').
 216
 217         The write vector will be applied to the given share, expanding it if
 218         necessary. A write vector applied to a share number that did not
 219         exist previously will cause that share to be created.
 220
 221         Each write vector is accompanied by a 'new_length' argument. If
 222         new_length is not None, use it to set the size of the container. This
 223         can be used to pre-allocate space for a series of upcoming writes, or
 224         truncate existing data. If the container is growing, new_length will
 225         be applied before datav. If the container is shrinking, it will be
 226         applied afterwards. If new_length==0, the share will be deleted.
 227
 228         The read vector is used to extract data from all known shares,
 229         *before* any writes have been applied. The same vector is used for
 230         all shares. This captures the state that was tested by the test
 231         vector.
 232
 233         This method returns two values: a boolean and a dict. The boolean is
 234         True if the write vectors were applied, False if not. The dict is
 235         keyed by share number, and each value contains a list of strings, one
 236         for each element of the read vector.
 237
 238         If the write_enabler is wrong, this will raise BadWriteEnablerError.
 239         To enable share migration (using update_write_enabler), the exception
 240         will have the nodeid used for the old write enabler embedded in it,
 241         in the following string::
 242
 243          The write enabler was recorded by nodeid '%s'.
 244
 245         Note that the nodeid here is encoded using the same base32 encoding
 246         used by Foolscap and allmydata.util.idlib.nodeid_b2a().
 247
 248         """
 249         return TupleOf(bool, DictOf(int, ReadData))
 250
 251     def advise_corrupt_share(share_type=str, storage_index=StorageIndex,
 252                              shnum=int, reason=str):
 253         """Clients who discover hash failures in shares that they have
 254         downloaded from me will use this method to inform me about the
 255         failures. I will record their concern so that my operator can
 256         manually inspect the shares in question. I return None.
 257
 258         'share_type' is either 'mutable' or 'immutable'. 'storage_index' is a
 259         (binary) storage index string, and 'shnum' is the integer share
 260         number. 'reason' is a human-readable explanation of the problem,
 261         probably including some expected hash values and the computed ones
 262         which did not match. Corruption advisories for mutable shares should
 263         include a hash of the public key (the same value that appears in the
 264         mutable-file verify-cap), since the current share format does not
 265         store that on disk.
 266         """
 267
 268 class IStorageBucketWriter(Interface):
 269     """
 270     Objects of this kind live on the client side.
 271     """
 272     def put_block(segmentnum=int, data=ShareData):
 273         """@param data: For most segments, this data will be 'blocksize'
 274         bytes in length. The last segment might be shorter.
 275         @return: a Deferred that fires (with None) when the operation completes
 276         """
 277
 278     def put_plaintext_hashes(hashes=ListOf(Hash)):
 279         """
 280         @return: a Deferred that fires (with None) when the operation completes
 281         """
 282
 283     def put_crypttext_hashes(hashes=ListOf(Hash)):
 284         """
 285         @return: a Deferred that fires (with None) when the operation completes
 286         """
 287
 288     def put_block_hashes(blockhashes=ListOf(Hash)):
 289         """
 290         @return: a Deferred that fires (with None) when the operation completes
 291         """
 292
 293     def put_share_hashes(sharehashes=ListOf(TupleOf(int, Hash))):
 294         """
 295         @return: a Deferred that fires (with None) when the operation completes
 296         """
 297
 298     def put_uri_extension(data=URIExtensionData):
 299         """This block of data contains integrity-checking information (hashes
 300         of plaintext, crypttext, and shares), as well as encoding parameters
 301         that are necessary to recover the data. This is a serialized dict
 302         mapping strings to other strings. The hash of this data is kept in
 303         the URI and verified before any of the data is used. All buckets for
 304         a given file contain identical copies of this data.
 305
 306         The serialization format is specified with the following pseudocode:
 307         for k in sorted(dict.keys()):
 308             assert re.match(r'^[a-zA-Z_\-]+$', k)
 309             write(k + ':' + netstring(dict[k]))
 310
 311         @return: a Deferred that fires (with None) when the operation completes
 312         """
 313
 314     def close():
 315         """Finish writing and close the bucket. The share is not finalized
 316         until this method is called: if the uploading client disconnects
 317         before calling close(), the partially-written share will be
 318         discarded.
 319
 320         @return: a Deferred that fires (with None) when the operation completes
 321         """
 322
 323 class IStorageBucketReader(Interface):
 324
 325     def get_block_data(blocknum=int, blocksize=int, size=int):
 326         """Most blocks will be the same size. The last block might be shorter
 327         than the others.
 328
 329         @return: ShareData
 330         """
 331
 332     def get_crypttext_hashes():
 333         """
 334         @return: ListOf(Hash)
 335         """
 336
 337     def get_block_hashes(at_least_these=SetOf(int)):
 338         """
 339         @return: ListOf(Hash)
 340         """
 341
 342     def get_share_hashes(at_least_these=SetOf(int)):
 343         """
 344         @return: ListOf(TupleOf(int, Hash))
 345         """
 346
 347     def get_uri_extension():
 348         """
 349         @return: URIExtensionData
 350         """
 351
 352 class IStorageBroker(Interface):
 353     def get_servers_for_index(peer_selection_index):
 354         """
 355         @return: list of (peerid, versioned-rref) tuples
 356         """
 357     def get_all_servers():
 358         """
 359         @return: frozenset of (peerid, versioned-rref) tuples
 360         """
 361     def get_all_serverids():
 362         """
 363         @return: frozenset of serverid strings
 364         """
 365     def get_nickname_for_serverid(serverid):
 366         """
 367         @return: unicode nickname, or None
 368         """
 369
 370     # methods moved from IntroducerClient, need review
 371     def get_all_connections():
 372         """Return a frozenset of (nodeid, service_name, rref) tuples, one for
 373         each active connection we've established to a remote service. This is
 374         mostly useful for unit tests that need to wait until a certain number
 375         of connections have been made."""
 376
 377     def get_all_connectors():
 378         """Return a dict that maps from (nodeid, service_name) to a
 379         RemoteServiceConnector instance for all services that we are actively
 380         trying to connect to. Each RemoteServiceConnector has the following
 381         public attributes::
 382
 383           service_name: the type of service provided, like 'storage'
 384           announcement_time: when we first heard about this service
 385           last_connect_time: when we last established a connection
 386           last_loss_time: when we last lost a connection
 387
 388           version: the peer's version, from the most recent connection
 389           oldest_supported: the peer's oldest supported version, same
 390
 391           rref: the RemoteReference, if connected, otherwise None
 392           remote_host: the IAddress, if connected, otherwise None
 393
 394         This method is intended for monitoring interfaces, such as a web page
 395         which describes connecting and connected peers.
 396         """
 397
 398     def get_all_peerids():
 399         """Return a frozenset of all peerids to whom we have a connection (to
 400         one or more services) established. Mostly useful for unit tests."""
 401
 402     def get_all_connections_for(service_name):
 403         """Return a frozenset of (nodeid, service_name, rref) tuples, one
 404         for each active connection that provides the given SERVICE_NAME."""
 405
 406     def get_permuted_peers(service_name, key):
 407         """Returns an ordered list of (peerid, rref) tuples, selecting from
 408         the connections that provide SERVICE_NAME, using a hash-based
 409         permutation keyed by KEY. This randomizes the service list in a
 410         repeatable way, to distribute load over many peers.
 411         """
 412
 413
 414 class IURI(Interface):
 415     def init_from_string(uri):
 416         """Accept a string (as created by my to_string() method) and populate
 417         this instance with its data. I am not normally called directly,
 418         please use the module-level uri.from_string() function to convert
 419         arbitrary URI strings into IURI-providing instances."""
 420
 421     def is_readonly():
 422         """Return False if this URI be used to modify the data. Return True
 423         if this URI cannot be used to modify the data."""
 424
 425     def is_mutable():
 426         """Return True if the data can be modified by *somebody* (perhaps
 427         someone who has a more powerful URI than this one)."""
 428
 429     def get_readonly():
 430         """Return another IURI instance, which represents a read-only form of
 431         this one. If is_readonly() is True, this returns self."""
 432
 433     def get_verify_cap():
 434         """Return an instance that provides IVerifierURI, which can be used
 435         to check on the availability of the file or directory, without
 436         providing enough capabilities to actually read or modify the
 437         contents. This may return None if the file does not need checking or
 438         verification (e.g. LIT URIs).
 439         """
 440
 441     def to_string():
 442         """Return a string of printable ASCII characters, suitable for
 443         passing into init_from_string."""
 444
 445 class IVerifierURI(Interface, IURI):
 446     def init_from_string(uri):
 447         """Accept a string (as created by my to_string() method) and populate
 448         this instance with its data. I am not normally called directly,
 449         please use the module-level uri.from_string() function to convert
 450         arbitrary URI strings into IURI-providing instances."""
 451
 452     def to_string():
 453         """Return a string of printable ASCII characters, suitable for
 454         passing into init_from_string."""
 455
 456 class IDirnodeURI(Interface):
 457     """I am a URI which represents a dirnode."""
 458
 459
 460 class IFileURI(Interface):
 461     """I am a URI which represents a filenode."""
 462     def get_size():
 463         """Return the length (in bytes) of the file that I represent."""
 464
 465 class IImmutableFileURI(IFileURI):
 466     pass
 467
 468 class IMutableFileURI(Interface):
 469     """I am a URI which represents a mutable filenode."""
 470 class IDirectoryURI(Interface):
 471     pass
 472 class IReadonlyDirectoryURI(Interface):
 473     pass
 474
 475 class CannotPackUnknownNodeError(Exception):
 476     """UnknownNodes (using filecaps from the future that we don't understand)
 477     cannot yet be copied safely, so I refuse to copy them."""
 478
 479 class UnhandledCapTypeError(Exception):
 480     """I recognize the cap/URI, but I cannot create an IFilesystemNode for
 481     it."""
 482
 483 class IFilesystemNode(Interface):
 484     def get_uri():
 485         """
 486         Return the URI string that can be used by others to get access to
 487         this node. If this node is read-only, the URI will only offer
 488         read-only access. If this node is read-write, the URI will offer
 489         read-write access.
 490
 491         If you have read-write access to a node and wish to share merely
 492         read-only access with others, use get_readonly_uri().
 493         """
 494
 495     def get_readonly_uri():
 496         """Return the URI string that can be used by others to get read-only
 497         access to this node. The result is a read-only URI, regardless of
 498         whether this node is read-only or read-write.
 499
 500         If you have merely read-only access to this node, get_readonly_uri()
 501         will return the same thing as get_uri().
 502         """
 503
 504     def get_repair_cap():
 505         """Return an IURI instance that can be used to repair the file, or
 506         None if this node cannot be repaired (either because it is not
 507         distributed, like a LIT file, or because the node does not represent
 508         sufficient authority to create a repair-cap, like a read-only RSA
 509         mutable file node [which cannot create the correct write-enablers]).
 510         """
 511
 512     def get_verify_cap():
 513         """Return an IVerifierURI instance that represents the
 514         'verifiy/refresh capability' for this node. The holder of this
 515         capability will be able to renew the lease for this node, protecting
 516         it from garbage-collection. They will also be able to ask a server if
 517         it holds a share for the file or directory.
 518         """
 519
 520     def get_storage_index():
 521         """Return a string with the (binary) storage index in use on this
 522         download. This may be None if there is no storage index (i.e. LIT
 523         files)."""
 524
 525     def is_readonly():
 526         """Return True if this reference provides mutable access to the given
 527         file or directory (i.e. if you can modify it), or False if not. Note
 528         that even if this reference is read-only, someone else may hold a
 529         read-write reference to it."""
 530
 531     def is_mutable():
 532         """Return True if this file or directory is mutable (by *somebody*,
 533         not necessarily you), False if it is is immutable. Note that a file
 534         might be mutable overall, but your reference to it might be
 535         read-only. On the other hand, all references to an immutable file
 536         will be read-only; there are no read-write references to an immutable
 537         file.
 538         """
 539
 540 class IMutableFilesystemNode(IFilesystemNode):
 541     pass
 542
 543 class IFileNode(IFilesystemNode):
 544     def download(target):
 545         """Download the file's contents to a given IDownloadTarget"""
 546
 547     def download_to_data():
 548         """Download the file's contents. Return a Deferred that fires
 549         with those contents."""
 550
 551     def get_size():
 552         """Return the length (in bytes) of the data this node represents."""
 553
 554     def read(consumer, offset=0, size=None):
 555         """Download a portion (possibly all) of the file's contents, making
 556         them available to the given IConsumer. Return a Deferred that fires
 557         (with the consumer) when the consumer is unregistered (either because
 558         the last byte has been given to it, or because the consumer threw an
 559         exception during write(), possibly because it no longer wants to
 560         receive data). The portion downloaded will start at 'offset' and
 561         contain 'size' bytes (or the remainder of the file if size==None).
 562
 563         The consumer will be used in non-streaming mode: an IPullProducer
 564         will be attached to it.
 565
 566         The consumer will not receive data right away: several network trips
 567         must occur first. The order of events will be::
 568
 569          consumer.registerProducer(p, streaming)
 570           (if streaming == False)::
 571            consumer does p.resumeProducing()
 572             consumer.write(data)
 573            consumer does p.resumeProducing()
 574             consumer.write(data).. (repeat until all data is written)
 575          consumer.unregisterProducer()
 576          deferred.callback(consumer)
 577
 578         If a download error occurs, or an exception is raised by
 579         consumer.registerProducer() or consumer.write(), I will call
 580         consumer.unregisterProducer() and then deliver the exception via
 581         deferred.errback(). To cancel the download, the consumer should call
 582         p.stopProducing(), which will result in an exception being delivered
 583         via deferred.errback().
 584
 585         A simple download-to-memory consumer example would look like this::
 586
 587          class MemoryConsumer:
 588            implements(IConsumer)
 589            def __init__(self):
 590              self.chunks = []
 591              self.done = False
 592            def registerProducer(self, p, streaming):
 593              assert streaming == False
 594              while not self.done:
 595                p.resumeProducing()
 596            def write(self, data):
 597              self.chunks.append(data)
 598            def unregisterProducer(self):
 599              self.done = True
 600          d = filenode.read(MemoryConsumer())
 601          d.addCallback(lambda mc: "".join(mc.chunks))
 602          return d
 603
 604         """
 605
 606 class IMutableFileNode(IFileNode, IMutableFilesystemNode):
 607     """I provide access to a 'mutable file', which retains its identity
 608     regardless of what contents are put in it.
 609
 610     The consistency-vs-availability problem means that there might be
 611     multiple versions of a file present in the grid, some of which might be
 612     unrecoverable (i.e. have fewer than 'k' shares). These versions are
 613     loosely ordered: each has a sequence number and a hash, and any version
 614     with seqnum=N was uploaded by a node which has seen at least one version
 615     with seqnum=N-1.
 616
 617     The 'servermap' (an instance of IMutableFileServerMap) is used to
 618     describe the versions that are known to be present in the grid, and which
 619     servers are hosting their shares. It is used to represent the 'state of
 620     the world', and is used for this purpose by my test-and-set operations.
 621     Downloading the contents of the mutable file will also return a
 622     servermap. Uploading a new version into the mutable file requires a
 623     servermap as input, and the semantics of the replace operation is
 624     'replace the file with my new version if it looks like nobody else has
 625     changed the file since my previous download'. Because the file is
 626     distributed, this is not a perfect test-and-set operation, but it will do
 627     its best. If the replace process sees evidence of a simultaneous write,
 628     it will signal an UncoordinatedWriteError, so that the caller can take
 629     corrective action.
 630
 631
 632     Most readers will want to use the 'best' current version of the file, and
 633     should use my 'download_best_version()' method.
 634
 635     To unconditionally replace the file, callers should use overwrite(). This
 636     is the mode that user-visible mutable files will probably use.
 637
 638     To apply some delta to the file, call modify() with a callable modifier
 639     function that can apply the modification that you want to make. This is
 640     the mode that dirnodes will use, since most directory modification
 641     operations can be expressed in terms of deltas to the directory state.
 642
 643
 644     Three methods are available for users who need to perform more complex
 645     operations. The first is get_servermap(), which returns an up-to-date
 646     servermap using a specified mode. The second is download_version(), which
 647     downloads a specific version (not necessarily the 'best' one). The third
 648     is 'upload', which accepts new contents and a servermap (which must have
 649     been updated with MODE_WRITE). The upload method will attempt to apply
 650     the new contents as long as no other node has modified the file since the
 651     servermap was updated. This might be useful to a caller who wants to
 652     merge multiple versions into a single new one.
 653
 654     Note that each time the servermap is updated, a specific 'mode' is used,
 655     which determines how many peers are queried. To use a servermap for my
 656     replace() method, that servermap must have been updated in MODE_WRITE.
 657     These modes are defined in allmydata.mutable.common, and consist of
 658     MODE_READ, MODE_WRITE, MODE_ANYTHING, and MODE_CHECK. Please look in
 659     allmydata/mutable/servermap.py for details about the differences.
 660
 661     Mutable files are currently limited in size (about 3.5MB max) and can
 662     only be retrieved and updated all-at-once, as a single big string. Future
 663     versions of our mutable files will remove this restriction.
 664     """
 665
 666     def download_best_version():
 667         """Download the 'best' available version of the file, meaning one of
 668         the recoverable versions with the highest sequence number. If no
 669         uncoordinated writes have occurred, and if enough shares are
 670         available, then this will be the most recent version that has been
 671         uploaded.
 672
 673         I update an internal servermap with MODE_READ, determine which
 674         version of the file is indicated by
 675         servermap.best_recoverable_version(), and return a Deferred that
 676         fires with its contents. If no version is recoverable, the Deferred
 677         will errback with UnrecoverableFileError.
 678         """
 679
 680     def get_size_of_best_version():
 681         """Find the size of the version that would be downloaded with
 682         download_best_version(), without actually downloading the whole file.
 683
 684         I return a Deferred that fires with an integer.
 685         """
 686
 687     def overwrite(new_contents):
 688         """Unconditionally replace the contents of the mutable file with new
 689         ones. This simply chains get_servermap(MODE_WRITE) and upload(). This
 690         is only appropriate to use when the new contents of the file are
 691         completely unrelated to the old ones, and you do not care about other
 692         clients' changes.
 693
 694         I return a Deferred that fires (with a PublishStatus object) when the
 695         update has completed.
 696         """
 697
 698     def modify(modifier_cb):
 699         """Modify the contents of the file, by downloading the current
 700         version, applying the modifier function (or bound method), then
 701         uploading the new version. I return a Deferred that fires (with a
 702         PublishStatus object) when the update is complete.
 703
 704         The modifier callable will be given three arguments: a string (with
 705         the old contents), a 'first_time' boolean, and a servermap. As with
 706         download_best_version(), the old contents will be from the best
 707         recoverable version, but the modifier can use the servermap to make
 708         other decisions (such as refusing to apply the delta if there are
 709         multiple parallel versions, or if there is evidence of a newer
 710         unrecoverable version). 'first_time' will be True the first time the
 711         modifier is called, and False on any subsequent calls.
 712
 713         The callable should return a string with the new contents. The
 714         callable must be prepared to be called multiple times, and must
 715         examine the input string to see if the change that it wants to make
 716         is already present in the old version. If it does not need to make
 717         any changes, it can either return None, or return its input string.
 718
 719         If the modifier raises an exception, it will be returned in the
 720         errback.
 721         """
 722
 723
 724     def get_servermap(mode):
 725         """Return a Deferred that fires with an IMutableFileServerMap
 726         instance, updated using the given mode.
 727         """
 728
 729     def download_version(servermap, version):
 730         """Download a specific version of the file, using the servermap
 731         as a guide to where the shares are located.
 732
 733         I return a Deferred that fires with the requested contents, or
 734         errbacks with UnrecoverableFileError. Note that a servermap which was
 735         updated with MODE_ANYTHING or MODE_READ may not know about shares for
 736         all versions (those modes stop querying servers as soon as they can
 737         fulfil their goals), so you may want to use MODE_CHECK (which checks
 738         everything) to get increased visibility.
 739         """
 740
 741     def upload(new_contents, servermap):
 742         """Replace the contents of the file with new ones. This requires a
 743         servermap that was previously updated with MODE_WRITE.
 744
 745         I attempt to provide test-and-set semantics, in that I will avoid
 746         modifying any share that is different than the version I saw in the
 747         servermap. However, if another node is writing to the file at the
 748         same time as me, I may manage to update some shares while they update
 749         others. If I see any evidence of this, I will signal
 750         UncoordinatedWriteError, and the file will be left in an inconsistent
 751         state (possibly the version you provided, possibly the old version,
 752         possibly somebody else's version, and possibly a mix of shares from
 753         all of these).
 754
 755         The recommended response to UncoordinatedWriteError is to either
 756         return it to the caller (since they failed to coordinate their
 757         writes), or to attempt some sort of recovery. It may be sufficient to
 758         wait a random interval (with exponential backoff) and repeat your
 759         operation. If I do not signal UncoordinatedWriteError, then I was
 760         able to write the new version without incident.
 761
 762         I return a Deferred that fires (with a PublishStatus object) when the
 763         publish has completed. I will update the servermap in-place with the
 764         location of all new shares.
 765         """
 766
 767     def get_writekey():
 768         """Return this filenode's writekey, or None if the node does not have
 769         write-capability. This may be used to assist with data structures
 770         that need to make certain data available only to writers, such as the
 771         read-write child caps in dirnodes. The recommended process is to have
 772         reader-visible data be submitted to the filenode in the clear (where
 773         it will be encrypted by the filenode using the readkey), but encrypt
 774         writer-visible data using this writekey.
 775         """
 776
 777 class NotEnoughSharesError(Exception):
 778     """Download was unable to get enough shares, or upload was unable to
 779     place 'shares_of_happiness' shares."""
 780
 781 class NoSharesError(Exception):
 782     """Upload or Download was unable to get any shares at all."""
 783
 784 class UnableToFetchCriticalDownloadDataError(Exception):
 785     """I was unable to fetch some piece of critical data which is supposed to
 786     be identically present in all shares."""
 787
 788 class NoServersError(Exception):
 789     """Upload wasn't given any servers to work with, usually indicating a
 790     network or Introducer problem."""
 791
 792 class ExistingChildError(Exception):
 793     """A directory node was asked to add or replace a child that already
 794     exists, and overwrite= was set to False."""
 795
 796 class NoSuchChildError(Exception):
 797     """A directory node was asked to fetch a child which does not exist."""
 798
 799 class IDirectoryNode(IMutableFilesystemNode):
 800     """I represent a name-to-child mapping, holding the tahoe equivalent of a
 801     directory. All child names are unicode strings, and all children are some
 802     sort of IFilesystemNode (either files or subdirectories).
 803     """
 804
 805     def get_uri():
 806         """
 807         The dirnode ('1') URI returned by this method can be used in
 808         set_uri() on a different directory ('2') to 'mount' a reference to
 809         this directory ('1') under the other ('2'). This URI is just a
 810         string, so it can be passed around through email or other out-of-band
 811         protocol.
 812         """
 813
 814     def get_readonly_uri():
 815         """
 816         The dirnode ('1') URI returned by this method can be used in
 817         set_uri() on a different directory ('2') to 'mount' a reference to
 818         this directory ('1') under the other ('2'). This URI is just a
 819         string, so it can be passed around through email or other out-of-band
 820         protocol.
 821         """
 822
 823     def list():
 824         """I return a Deferred that fires with a dictionary mapping child
 825         name (a unicode string) to (node, metadata_dict) tuples, in which
 826         'node' is either an IFileNode or IDirectoryNode, and 'metadata_dict'
 827         is a dictionary of metadata."""
 828
 829     def has_child(name):
 830         """I return a Deferred that fires with a boolean, True if there
 831         exists a child of the given name, False if not. The child name must
 832         be a unicode string."""
 833
 834     def get(name):
 835         """I return a Deferred that fires with a specific named child node,
 836         either an IFileNode or an IDirectoryNode. The child name must be a
 837         unicode string. I raise NoSuchChildError if I do not have a child by
 838         that name."""
 839
 840     def get_metadata_for(name):
 841         """I return a Deferred that fires with the metadata dictionary for a
 842         specific named child node. This metadata is stored in the *edge*, not
 843         in the child, so it is attached to the parent dirnode rather than the
 844         child dir-or-file-node. The child name must be a unicode string. I
 845         raise NoSuchChildError if I do not have a child by that name."""
 846
 847     def set_metadata_for(name, metadata):
 848         """I replace any existing metadata for the named child with the new
 849         metadata. The child name must be a unicode string. This metadata is
 850         stored in the *edge*, not in the child, so it is attached to the
 851         parent dirnode rather than the child dir-or-file-node. I return a
 852         Deferred (that fires with this dirnode) when the operation is
 853         complete. I raise NoSuchChildError if I do not have a child by that
 854         name."""
 855
 856     def get_child_at_path(path):
 857         """Transform a child path into an IDirectoryNode or IFileNode.
 858
 859         I perform a recursive series of 'get' operations to find the named
 860         descendant node. I return a Deferred that fires with the node, or
 861         errbacks with NoSuchChildError if the node could not be found.
 862
 863         The path can be either a single string (slash-separated) or a list of
 864         path-name elements. All elements must be unicode strings.
 865         """
 866
 867     def get_child_and_metadata_at_path(path):
 868         """Transform a child path into an IDirectoryNode/IFileNode and
 869         metadata.
 870
 871         I am like get_child_at_path(), but my Deferred fires with a tuple of
 872         (node, metadata). The metadata comes from the last edge. If the path
 873         is empty, the metadata will be an empty dictionary.
 874         """
 875
 876     def set_uri(name, writecap, readcap=None, metadata=None, overwrite=True):
 877         """I add a child (by writecap+readcap) at the specific name. I return
 878         a Deferred that fires when the operation finishes. If overwrite= is
 879         True, I will replace any existing child of the same name, otherwise
 880         an existing child will cause me to return ExistingChildError. The
 881         child name must be a unicode string.
 882
 883         The child caps could be for a file, or for a directory. If the new
 884         child is read/write, you will provide both writecap and readcap. If
 885         the child is read-only, you will provide the readcap write (i.e. the
 886         writecap= and readcap= arguments will both be the child's readcap).
 887         The filecaps are typically obtained from an IFilesystemNode with
 888         get_uri() and get_readonly_uri().
 889
 890         If metadata= is provided, I will use it as the metadata for the named
 891         edge. This will replace any existing metadata. If metadata= is left
 892         as the default value of None, I will set ['mtime'] to the current
 893         time, and I will set ['ctime'] to the current time if there was not
 894         already a child by this name present. This roughly matches the
 895         ctime/mtime semantics of traditional filesystems.
 896
 897         If this directory node is read-only, the Deferred will errback with a
 898         NotMutableError."""
 899
 900     def set_children(entries, overwrite=True):
 901         """Add multiple children (by writecap+readcap) to a directory node.
 902         Takes a dictionary, with childname as keys and (writecap, readcap)
 903         tuples (or (writecap, readcap, metadata) triples) as values. Returns
 904         a Deferred that fires (with this dirnode) when the operation
 905         finishes. This is equivalent to calling set_uri() multiple times, but
 906         is much more efficient. All child names must be unicode strings.
 907         """
 908
 909     def set_node(name, child, metadata=None, overwrite=True):
 910         """I add a child at the specific name. I return a Deferred that fires
 911         when the operation finishes. This Deferred will fire with the child
 912         node that was just added. I will replace any existing child of the
 913         same name. The child name must be a unicode string. The 'child'
 914         instance must be an instance providing IDirectoryNode or IFileNode.
 915
 916         If metadata= is provided, I will use it as the metadata for the named
 917         edge. This will replace any existing metadata. If metadata= is left
 918         as the default value of None, I will set ['mtime'] to the current
 919         time, and I will set ['ctime'] to the current time if there was not
 920         already a child by this name present. This roughly matches the
 921         ctime/mtime semantics of traditional filesystems.
 922
 923         If this directory node is read-only, the Deferred will errback with a
 924         NotMutableError."""
 925
 926     def set_nodes(entries, overwrite=True):
 927         """Add multiple children to a directory node. Takes a dict mapping
 928         unicode childname to (child_node, metdata) tuples. If metdata=None,
 929         the original metadata is left unmodified. Returns a Deferred that
 930         fires (with this dirnode) when the operation finishes. This is
 931         equivalent to calling set_node() multiple times, but is much more
 932         efficient."""
 933
 934     def add_file(name, uploadable, metadata=None, overwrite=True):
 935         """I upload a file (using the given IUploadable), then attach the
 936         resulting FileNode to the directory at the given name. I set metadata
 937         the same way as set_uri and set_node. The child name must be a
 938         unicode string.
 939
 940         I return a Deferred that fires (with the IFileNode of the uploaded
 941         file) when the operation completes."""
 942
 943     def delete(name):
 944         """I remove the child at the specific name. I return a Deferred that
 945         fires when the operation finishes. The child name must be a unicode
 946         string. I raise NoSuchChildError if I do not have a child by that
 947         name."""
 948
 949     def create_subdirectory(name, initial_children={}, overwrite=True):
 950         """I create and attach a directory at the given name. The new
 951         directory can be empty, or it can be populated with children
 952         according to 'initial_children', which takes a dictionary in the same
 953         format as set_nodes (i.e. mapping unicode child name to (childnode,
 954         metadata) tuples). The child name must be a unicode string. I return
 955         a Deferred that fires (with the new directory node) when the
 956         operation finishes."""
 957
 958     def move_child_to(current_child_name, new_parent, new_child_name=None,
 959                       overwrite=True):
 960         """I take one of my children and move them to a new parent. The child
 961         is referenced by name. On the new parent, the child will live under
 962         'new_child_name', which defaults to 'current_child_name'. TODO: what
 963         should we do about metadata? I return a Deferred that fires when the
 964         operation finishes. The child name must be a unicode string. I raise
 965         NoSuchChildError if I do not have a child by that name."""
 966
 967     def build_manifest():
 968         """I generate a table of everything reachable from this directory.
 969         I also compute deep-stats as described below.
 970
 971         I return a Monitor. The Monitor's results will be a dictionary with
 972         four elements:
 973
 974          res['manifest']: a list of (path, cap) tuples for all nodes
 975                           (directories and files) reachable from this one.
 976                           'path' will be a tuple of unicode strings. The
 977                           origin dirnode will be represented by an empty path
 978                           tuple.
 979          res['verifycaps']: a list of (printable) verifycap strings, one for
 980                             each reachable non-LIT node. This is a set:
 981                             it will contain no duplicates.
 982          res['storage-index']: a list of (base32) storage index strings,
 983                                one for each reachable non-LIT node. This is
 984                                a set: it will contain no duplicates.
 985          res['stats']: a dictionary, the same that is generated by
 986                        start_deep_stats() below.
 987
 988         The Monitor will also have an .origin_si attribute with the (binary)
 989         storage index of the starting point.
 990         """
 991
 992     def start_deep_stats():
 993         """Return a Monitor, examining all nodes (directories and files)
 994         reachable from this one. The Monitor's results will be a dictionary
 995         with the following keys::
 996
 997            count-immutable-files: count of how many CHK files are in the set
 998            count-mutable-files: same, for mutable files (does not include
 999                                 directories)
1000            count-literal-files: same, for LIT files
1001            count-files: sum of the above three
1002
1003            count-directories: count of directories
1004
1005            size-immutable-files: total bytes for all CHK files in the set
1006            size-mutable-files (TODO): same, for current version of all mutable
1007                                       files, does not include directories
1008            size-literal-files: same, for LIT files
1009            size-directories: size of mutable files used by directories
1010
1011            largest-directory: number of bytes in the largest directory
1012            largest-directory-children: number of children in the largest
1013                                        directory
1014            largest-immutable-file: number of bytes in the largest CHK file
1015
1016         size-mutable-files is not yet implemented, because it would involve
1017         even more queries than deep_stats does.
1018
1019         The Monitor will also have an .origin_si attribute with the (binary)
1020         storage index of the starting point.
1021
1022         This operation will visit every directory node underneath this one,
1023         and can take a long time to run. On a typical workstation with good
1024         bandwidth, this can examine roughly 15 directories per second (and
1025         takes several minutes of 100% CPU for ~1700 directories).
1026         """
1027
1028 class ICodecEncoder(Interface):
1029     def set_params(data_size, required_shares, max_shares):
1030         """Set up the parameters of this encoder.
1031
1032         This prepares the encoder to perform an operation that converts a
1033         single block of data into a number of shares, such that a future
1034         ICodecDecoder can use a subset of these shares to recover the
1035         original data. This operation is invoked by calling encode(). Once
1036         the encoding parameters are set up, the encode operation can be
1037         invoked multiple times.
1038
1039         set_params() prepares the encoder to accept blocks of input data that
1040         are exactly 'data_size' bytes in length. The encoder will be prepared
1041         to produce 'max_shares' shares for each encode() operation (although
1042         see the 'desired_share_ids' to use less CPU). The encoding math will
1043         be chosen such that the decoder can get by with as few as
1044         'required_shares' of these shares and still reproduce the original
1045         data. For example, set_params(1000, 5, 5) offers no redundancy at
1046         all, whereas set_params(1000, 1, 10) provides 10x redundancy.
1047
1048         Numerical Restrictions: 'data_size' is required to be an integral
1049         multiple of 'required_shares'. In general, the caller should choose
1050         required_shares and max_shares based upon their reliability
1051         requirements and the number of peers available (the total storage
1052         space used is roughly equal to max_shares*data_size/required_shares),
1053         then choose data_size to achieve the memory footprint desired (larger
1054         data_size means more efficient operation, smaller data_size means
1055         smaller memory footprint).
1056
1057         In addition, 'max_shares' must be equal to or greater than
1058         'required_shares'. Of course, setting them to be equal causes
1059         encode() to degenerate into a particularly slow form of the 'split'
1060         utility.
1061
1062         See encode() for more details about how these parameters are used.
1063
1064         set_params() must be called before any other ICodecEncoder methods
1065         may be invoked.
1066         """
1067
1068     def get_params():
1069         """Return the 3-tuple of data_size, required_shares, max_shares"""
1070
1071     def get_encoder_type():
1072         """Return a short string that describes the type of this encoder.
1073
1074         There is required to be a global table of encoder classes. This method
1075         returns an index into this table; the value at this index is an
1076         encoder class, and this encoder is an instance of that class.
1077         """
1078
1079     def get_block_size():
1080         """Return the length of the shares that encode() will produce.
1081         """
1082
1083     def encode_proposal(data, desired_share_ids=None):
1084         """Encode some data.
1085
1086         'data' must be a string (or other buffer object), and len(data) must
1087         be equal to the 'data_size' value passed earlier to set_params().
1088
1089         This will return a Deferred that will fire with two lists. The first
1090         is a list of shares, each of which is a string (or other buffer
1091         object) such that len(share) is the same as what get_share_size()
1092         returned earlier. The second is a list of shareids, in which each is
1093         an integer. The lengths of the two lists will always be equal to each
1094         other. The user should take care to keep each share closely
1095         associated with its shareid, as one is useless without the other.
1096
1097         The length of this output list will normally be the same as the value
1098         provided to the 'max_shares' parameter of set_params(). This may be
1099         different if 'desired_share_ids' is provided.
1100
1101         'desired_share_ids', if provided, is required to be a sequence of
1102         ints, each of which is required to be >= 0 and < max_shares. If not
1103         provided, encode() will produce 'max_shares' shares, as if
1104         'desired_share_ids' were set to range(max_shares). You might use this
1105         if you initially thought you were going to use 10 peers, started
1106         encoding, and then two of the peers dropped out: you could use
1107         desired_share_ids= to skip the work (both memory and CPU) of
1108         producing shares for the peers which are no longer available.
1109
1110         """
1111
1112     def encode(inshares, desired_share_ids=None):
1113         """Encode some data. This may be called multiple times. Each call is
1114         independent.
1115
1116         inshares is a sequence of length required_shares, containing buffers
1117         (i.e. strings), where each buffer contains the next contiguous
1118         non-overlapping segment of the input data. Each buffer is required to
1119         be the same length, and the sum of the lengths of the buffers is
1120         required to be exactly the data_size promised by set_params(). (This
1121         implies that the data has to be padded before being passed to
1122         encode(), unless of course it already happens to be an even multiple
1123         of required_shares in length.)
1124
1125          ALSO: the requirement to break up your data into 'required_shares'
1126          chunks before calling encode() feels a bit surprising, at least from
1127          the point of view of a user who doesn't know how FEC works. It feels
1128          like an implementation detail that has leaked outside the
1129          abstraction barrier. Can you imagine a use case in which the data to
1130          be encoded might already be available in pre-segmented chunks, such
1131          that it is faster or less work to make encode() take a list rather
1132          than splitting a single string?
1133
1134          ALSO ALSO: I think 'inshares' is a misleading term, since encode()
1135          is supposed to *produce* shares, so what it *accepts* should be
1136          something other than shares. Other places in this interface use the
1137          word 'data' for that-which-is-not-shares.. maybe we should use that
1138          term?
1139
1140         'desired_share_ids', if provided, is required to be a sequence of
1141         ints, each of which is required to be >= 0 and < max_shares. If not
1142         provided, encode() will produce 'max_shares' shares, as if
1143         'desired_share_ids' were set to range(max_shares). You might use this
1144         if you initially thought you were going to use 10 peers, started
1145         encoding, and then two of the peers dropped out: you could use
1146         desired_share_ids= to skip the work (both memory and CPU) of
1147         producing shares for the peers which are no longer available.
1148
1149         For each call, encode() will return a Deferred that fires with two
1150         lists, one containing shares and the other containing the shareids.
1151         The get_share_size() method can be used to determine the length of
1152         the share strings returned by encode(). Each shareid is a small
1153         integer, exactly as passed into 'desired_share_ids' (or
1154         range(max_shares), if desired_share_ids was not provided).
1155
1156         The shares and their corresponding shareids are required to be kept
1157         together during storage and retrieval. Specifically, the share data is
1158         useless by itself: the decoder needs to be told which share is which
1159         by providing it with both the shareid and the actual share data.
1160
1161         This function will allocate an amount of memory roughly equal to::
1162
1163          (max_shares - required_shares) * get_share_size()
1164
1165         When combined with the memory that the caller must allocate to
1166         provide the input data, this leads to a memory footprint roughly
1167         equal to the size of the resulting encoded shares (i.e. the expansion
1168         factor times the size of the input segment).
1169         """
1170
1171         # rejected ideas:
1172         #
1173         #  returning a list of (shareidN,shareN) tuples instead of a pair of
1174         #  lists (shareids..,shares..). Brian thought the tuples would
1175         #  encourage users to keep the share and shareid together throughout
1176         #  later processing, Zooko pointed out that the code to iterate
1177         #  through two lists is not really more complicated than using a list
1178         #  of tuples and there's also a performance improvement
1179         #
1180         #  having 'data_size' not required to be an integral multiple of
1181         #  'required_shares'. Doing this would require encode() to perform
1182         #  padding internally, and we'd prefer to have any padding be done
1183         #  explicitly by the caller. Yes, it is an abstraction leak, but
1184         #  hopefully not an onerous one.
1185
1186
1187 class ICodecDecoder(Interface):
1188     def set_params(data_size, required_shares, max_shares):
1189         """Set the params. They have to be exactly the same ones that were
1190         used for encoding."""
1191
1192     def get_needed_shares():
1193         """Return the number of shares needed to reconstruct the data.
1194         set_params() is required to be called before this."""
1195
1196     def decode(some_shares, their_shareids):
1197         """Decode a partial list of shares into data.
1198
1199         'some_shares' is required to be a sequence of buffers of sharedata, a
1200         subset of the shares returned by ICodecEncode.encode(). Each share is
1201         required to be of the same length.  The i'th element of their_shareids
1202         is required to be the shareid of the i'th buffer in some_shares.
1203
1204         This returns a Deferred which fires with a sequence of buffers. This
1205         sequence will contain all of the segments of the original data, in
1206         order. The sum of the lengths of all of the buffers will be the
1207         'data_size' value passed into the original ICodecEncode.set_params()
1208         call. To get back the single original input block of data, use
1209         ''.join(output_buffers), or you may wish to simply write them in
1210         order to an output file.
1211
1212         Note that some of the elements in the result sequence may be
1213         references to the elements of the some_shares input sequence. In
1214         particular, this means that if those share objects are mutable (e.g.
1215         arrays) and if they are changed, then both the input (the
1216         'some_shares' parameter) and the output (the value given when the
1217         deferred is triggered) will change.
1218
1219         The length of 'some_shares' is required to be exactly the value of
1220         'required_shares' passed into the original ICodecEncode.set_params()
1221         call.
1222         """
1223
1224 class IEncoder(Interface):
1225     """I take an object that provides IEncryptedUploadable, which provides
1226     encrypted data, and a list of shareholders. I then encode, hash, and
1227     deliver shares to those shareholders. I will compute all the necessary
1228     Merkle hash trees that are necessary to validate the crypttext that
1229     eventually comes back from the shareholders. I provide the URI Extension
1230     Block Hash, and the encoding parameters, both of which must be included
1231     in the URI.
1232
1233     I do not choose shareholders, that is left to the IUploader. I must be
1234     given a dict of RemoteReferences to storage buckets that are ready and
1235     willing to receive data.
1236     """
1237
1238     def set_size(size):
1239         """Specify the number of bytes that will be encoded. This must be
1240         peformed before get_serialized_params() can be called.
1241         """
1242     def set_params(params):
1243         """Override the default encoding parameters. 'params' is a tuple of
1244         (k,d,n), where 'k' is the number of required shares, 'd' is the
1245         shares_of_happiness, and 'n' is the total number of shares that will
1246         be created.
1247
1248         Encoding parameters can be set in three ways. 1: The Encoder class
1249         provides defaults (3/7/10). 2: the Encoder can be constructed with
1250         an 'options' dictionary, in which the
1251         needed_and_happy_and_total_shares' key can be a (k,d,n) tuple. 3:
1252         set_params((k,d,n)) can be called.
1253
1254         If you intend to use set_params(), you must call it before
1255         get_share_size or get_param are called.
1256         """
1257
1258     def set_encrypted_uploadable(u):
1259         """Provide a source of encrypted upload data. 'u' must implement
1260         IEncryptedUploadable.
1261
1262         When this is called, the IEncryptedUploadable will be queried for its
1263         length and the storage_index that should be used.
1264
1265         This returns a Deferred that fires with this Encoder instance.
1266
1267         This must be performed before start() can be called.
1268         """
1269
1270     def get_param(name):
1271         """Return an encoding parameter, by name.
1272
1273         'storage_index': return a string with the (16-byte truncated SHA-256
1274                          hash) storage index to which these shares should be
1275                          pushed.
1276
1277         'share_counts': return a tuple describing how many shares are used:
1278                         (needed_shares, shares_of_happiness, total_shares)
1279
1280         'num_segments': return an int with the number of segments that
1281                         will be encoded.
1282
1283         'segment_size': return an int with the size of each segment.
1284
1285         'block_size': return the size of the individual blocks that will
1286                       be delivered to a shareholder's put_block() method. By
1287                       knowing this, the shareholder will be able to keep all
1288                       blocks in a single file and still provide random access
1289                       when reading them. # TODO: can we avoid exposing this?
1290
1291         'share_size': an int with the size of the data that will be stored
1292                       on each shareholder. This is aggregate amount of data
1293                       that will be sent to the shareholder, summed over all
1294                       the put_block() calls I will ever make. It is useful to
1295                       determine this size before asking potential
1296                       shareholders whether they will grant a lease or not,
1297                       since their answers will depend upon how much space we
1298                       need. TODO: this might also include some amount of
1299                       overhead, like the size of all the hashes. We need to
1300                       decide whether this is useful or not.
1301
1302         'serialized_params': a string with a concise description of the
1303                              codec name and its parameters. This may be passed
1304                              into the IUploadable to let it make sure that
1305                              the same file encoded with different parameters
1306                              will result in different storage indexes.
1307
1308         Once this is called, set_size() and set_params() may not be called.
1309         """
1310
1311     def set_shareholders(shareholders):
1312         """Tell the encoder where to put the encoded shares. 'shareholders'
1313         must be a dictionary that maps share number (an integer ranging from
1314         0 to n-1) to an instance that provides IStorageBucketWriter. This
1315         must be performed before start() can be called."""
1316
1317     def start():
1318         """Begin the encode/upload process. This involves reading encrypted
1319         data from the IEncryptedUploadable, encoding it, uploading the shares
1320         to the shareholders, then sending the hash trees.
1321
1322         set_encrypted_uploadable() and set_shareholders() must be called
1323         before this can be invoked.
1324
1325         This returns a Deferred that fires with a verify cap when the upload
1326         process is complete. The verifycap, plus the encryption key, is
1327         sufficient to construct the read cap.
1328         """
1329
1330 class IDecoder(Interface):
1331     """I take a list of shareholders and some setup information, then
1332     download, validate, decode, and decrypt data from them, writing the
1333     results to an output file.
1334
1335     I do not locate the shareholders, that is left to the IDownloader. I must
1336     be given a dict of RemoteReferences to storage buckets that are ready to
1337     send data.
1338     """
1339
1340     def setup(outfile):
1341         """I take a file-like object (providing write and close) to which all
1342         the plaintext data will be written.
1343
1344         TODO: producer/consumer . Maybe write() should return a Deferred that
1345         indicates when it will accept more data? But probably having the
1346         IDecoder be a producer is easier to glue to IConsumer pieces.
1347         """
1348
1349     def set_shareholders(shareholders):
1350         """I take a dictionary that maps share identifiers (small integers)
1351         to RemoteReferences that provide RIBucketReader. This must be called
1352         before start()."""
1353
1354     def start():
1355         """I start the download. This process involves retrieving data and
1356         hash chains from the shareholders, using the hashes to validate the
1357         data, decoding the shares into segments, decrypting the segments,
1358         then writing the resulting plaintext to the output file.
1359
1360         I return a Deferred that will fire (with self) when the download is
1361         complete.
1362         """
1363
1364 class IDownloadTarget(Interface):
1365     # Note that if the IDownloadTarget is also an IConsumer, the downloader
1366     # will register itself as a producer. This allows the target to invoke
1367     # downloader.pauseProducing, resumeProducing, and stopProducing.
1368     def open(size):
1369         """Called before any calls to write() or close(). If an error
1370         occurs before any data is available, fail() may be called without
1371         a previous call to open().
1372
1373         'size' is the length of the file being downloaded, in bytes."""
1374
1375     def write(data):
1376         """Output some data to the target."""
1377     def close():
1378         """Inform the target that there is no more data to be written."""
1379     def fail(why):
1380         """fail() is called to indicate that the download has failed. 'why'
1381         is a Failure object indicating what went wrong. No further methods
1382         will be invoked on the IDownloadTarget after fail()."""
1383     def register_canceller(cb):
1384         """The CiphertextDownloader uses this to register a no-argument function
1385         that the target can call to cancel the download. Once this canceller
1386         is invoked, no further calls to write() or close() will be made."""
1387     def finish():
1388         """When the CiphertextDownloader is done, this finish() function will be
1389         called. Whatever it returns will be returned to the invoker of
1390         Downloader.download.
1391         """
1392     # The following methods are just because that target might be a
1393     # repairer.DownUpConnector, and just because the current CHKUpload object
1394     # expects to find the storage index and encoding parameters in its
1395     # Uploadable.
1396     def set_storageindex(storageindex):
1397         """ Set the storage index. """
1398     def set_encodingparams(encodingparams):
1399         """ Set the encoding parameters. """
1400
1401 class IDownloader(Interface):
1402     def download(uri, target):
1403         """Perform a CHK download, sending the data to the given target.
1404         'target' must provide IDownloadTarget.
1405
1406         Returns a Deferred that fires (with the results of target.finish)
1407         when the download is finished, or errbacks if something went wrong."""
1408
1409 class IEncryptedUploadable(Interface):
1410     def set_upload_status(upload_status):
1411         """Provide an IUploadStatus object that should be filled with status
1412         information. The IEncryptedUploadable is responsible for setting
1413         key-determination progress ('chk'), size, storage_index, and
1414         ciphertext-fetch progress. It may delegate some of this
1415         responsibility to others, in particular to the IUploadable."""
1416
1417     def get_size():
1418         """This behaves just like IUploadable.get_size()."""
1419
1420     def get_all_encoding_parameters():
1421         """Return a Deferred that fires with a tuple of
1422         (k,happy,n,segment_size). The segment_size will be used as-is, and
1423         must match the following constraints: it must be a multiple of k, and
1424         it shouldn't be unreasonably larger than the file size (if
1425         segment_size is larger than filesize, the difference must be stored
1426         as padding).
1427
1428         This usually passes through to the IUploadable method of the same
1429         name.
1430
1431         The encoder strictly obeys the values returned by this method. To
1432         make an upload use non-default encoding parameters, you must arrange
1433         to control the values that this method returns.
1434         """
1435
1436     def get_storage_index():
1437         """Return a Deferred that fires with a 16-byte storage index.
1438         """
1439
1440     def read_encrypted(length, hash_only):
1441         """This behaves just like IUploadable.read(), but returns crypttext
1442         instead of plaintext. If hash_only is True, then this discards the
1443         data (and returns an empty list); this improves efficiency when
1444         resuming an interrupted upload (where we need to compute the
1445         plaintext hashes, but don't need the redundant encrypted data)."""
1446
1447     def get_plaintext_hashtree_leaves(first, last, num_segments):
1448         """OBSOLETE; Get the leaf nodes of a merkle hash tree over the
1449         plaintext segments, i.e. get the tagged hashes of the given segments.
1450         The segment size is expected to be generated by the
1451         IEncryptedUploadable before any plaintext is read or ciphertext
1452         produced, so that the segment hashes can be generated with only a
1453         single pass.
1454
1455         This returns a Deferred which fires with a sequence of hashes, using:
1456
1457          tuple(segment_hashes[first:last])
1458
1459         'num_segments' is used to assert that the number of segments that the
1460         IEncryptedUploadable handled matches the number of segments that the
1461         encoder was expecting.
1462
1463         This method must not be called until the final byte has been read
1464         from read_encrypted(). Once this method is called, read_encrypted()
1465         can never be called again.
1466         """
1467
1468     def get_plaintext_hash():
1469         """OBSOLETE; Get the hash of the whole plaintext.
1470
1471         This returns a Deferred which fires with a tagged SHA-256 hash of the
1472         whole plaintext, obtained from hashutil.plaintext_hash(data).
1473         """
1474
1475     def close():
1476         """Just like IUploadable.close()."""
1477
1478 class IUploadable(Interface):
1479     def set_upload_status(upload_status):
1480         """Provide an IUploadStatus object that should be filled with status
1481         information. The IUploadable is responsible for setting
1482         key-determination progress ('chk')."""
1483
1484     def set_default_encoding_parameters(params):
1485         """Set the default encoding parameters, which must be a dict mapping
1486         strings to ints. The meaningful keys are 'k', 'happy', 'n', and
1487         'max_segment_size'. These might have an influence on the final
1488         encoding parameters returned by get_all_encoding_parameters(), if the
1489         Uploadable doesn't have more specific preferences.
1490
1491         This call is optional: if it is not used, the Uploadable will use
1492         some built-in defaults. If used, this method must be called before
1493         any other IUploadable methods to have any effect.
1494         """
1495
1496     def get_size():
1497         """Return a Deferred that will fire with the length of the data to be
1498         uploaded, in bytes. This will be called before the data is actually
1499         used, to compute encoding parameters.
1500         """
1501
1502     def get_all_encoding_parameters():
1503         """Return a Deferred that fires with a tuple of
1504         (k,happy,n,segment_size). The segment_size will be used as-is, and
1505         must match the following constraints: it must be a multiple of k, and
1506         it shouldn't be unreasonably larger than the file size (if
1507         segment_size is larger than filesize, the difference must be stored
1508         as padding).
1509
1510         The relative values of k and n allow some IUploadables to request
1511         better redundancy than others (in exchange for consuming more space
1512         in the grid).
1513
1514         Larger values of segment_size reduce hash overhead, while smaller
1515         values reduce memory footprint and cause data to be delivered in
1516         smaller pieces (which may provide a smoother and more predictable
1517         download experience).
1518
1519         The encoder strictly obeys the values returned by this method. To
1520         make an upload use non-default encoding parameters, you must arrange
1521         to control the values that this method returns. One way to influence
1522         them may be to call set_encoding_parameters() before calling
1523         get_all_encoding_parameters().
1524         """
1525
1526     def get_encryption_key():
1527         """Return a Deferred that fires with a 16-byte AES key. This key will
1528         be used to encrypt the data. The key will also be hashed to derive
1529         the StorageIndex.
1530
1531         Uploadables which want to achieve convergence should hash their file
1532         contents and the serialized_encoding_parameters to form the key
1533         (which of course requires a full pass over the data). Uploadables can
1534         use the upload.ConvergentUploadMixin class to achieve this
1535         automatically.
1536
1537         Uploadables which do not care about convergence (or do not wish to
1538         make multiple passes over the data) can simply return a
1539         strongly-random 16 byte string.
1540
1541         get_encryption_key() may be called multiple times: the IUploadable is
1542         required to return the same value each time.
1543         """
1544
1545     def read(length):
1546         """Return a Deferred that fires with a list of strings (perhaps with
1547         only a single element) which, when concatenated together, contain the
1548         next 'length' bytes of data. If EOF is near, this may provide fewer
1549         than 'length' bytes. The total number of bytes provided by read()
1550         before it signals EOF must equal the size provided by get_size().
1551
1552         If the data must be acquired through multiple internal read
1553         operations, returning a list instead of a single string may help to
1554         reduce string copies.
1555
1556         'length' will typically be equal to (min(get_size(),1MB)/req_shares),
1557         so a 10kB file means length=3kB, 100kB file means length=30kB,
1558         and >=1MB file means length=300kB.
1559
1560         This method provides for a single full pass through the data. Later
1561         use cases may desire multiple passes or access to only parts of the
1562         data (such as a mutable file making small edits-in-place). This API
1563         will be expanded once those use cases are better understood.
1564         """
1565
1566     def close():
1567         """The upload is finished, and whatever filehandle was in use may be
1568         closed."""
1569
1570 class IUploadResults(Interface):
1571     """I am returned by upload() methods. I contain a number of public
1572     attributes which can be read to determine the results of the upload. Some
1573     of these are functional, some are timing information. All of these may be
1574     None.
1575
1576      .file_size : the size of the file, in bytes
1577      .uri : the CHK read-cap for the file
1578      .ciphertext_fetched : how many bytes were fetched by the helper
1579      .sharemap: dict mapping share identifier to set of serverids
1580                    (binary strings). This indicates which servers were given
1581                    which shares. For immutable files, the shareid is an
1582                    integer (the share number, from 0 to N-1). For mutable
1583                    files, it is a string of the form 'seq%d-%s-sh%d',
1584                    containing the sequence number, the roothash, and the
1585                    share number.
1586      .servermap : dict mapping server peerid to a set of share numbers
1587      .timings : dict of timing information, mapping name to seconds (float)
1588        total : total upload time, start to finish
1589        storage_index : time to compute the storage index
1590        peer_selection : time to decide which peers will be used
1591        contacting_helper : initial helper query to upload/no-upload decision
1592        existence_check : helper pre-upload existence check
1593        helper_total : initial helper query to helper finished pushing
1594        cumulative_fetch : helper waiting for ciphertext requests
1595        total_fetch : helper start to last ciphertext response
1596        cumulative_encoding : just time spent in zfec
1597        cumulative_sending : just time spent waiting for storage servers
1598        hashes_and_close : last segment push to shareholder close
1599        total_encode_and_push : first encode to shareholder close
1600
1601     """
1602
1603 class IDownloadResults(Interface):
1604     """I am created internally by download() methods. I contain a number of
1605     public attributes which contain details about the download process.::
1606
1607      .file_size : the size of the file, in bytes
1608      .servers_used : set of server peerids that were used during download
1609      .server_problems : dict mapping server peerid to a problem string. Only
1610                         servers that had problems (bad hashes, disconnects)
1611                         are listed here.
1612      .servermap : dict mapping server peerid to a set of share numbers. Only
1613                   servers that had any shares are listed here.
1614      .timings : dict of timing information, mapping name to seconds (float)
1615        peer_selection : time to ask servers about shares
1616        servers_peer_selection : dict of peerid to DYHB-query time
1617        uri_extension : time to fetch a copy of the URI extension block
1618        hashtrees : time to fetch the hash trees
1619        segments : time to fetch, decode, and deliver segments
1620        cumulative_fetch : time spent waiting for storage servers
1621        cumulative_decode : just time spent in zfec
1622        cumulative_decrypt : just time spent in decryption
1623        total : total download time, start to finish
1624        fetch_per_server : dict of peerid to list of per-segment fetch times
1625
1626     """
1627
1628 class IUploader(Interface):
1629     def upload(uploadable):
1630         """Upload the file. 'uploadable' must impement IUploadable. This
1631         returns a Deferred which fires with an UploadResults instance, from
1632         which the URI of the file can be obtained as results.uri ."""
1633
1634     def upload_ssk(write_capability, new_version, uploadable):
1635         """TODO: how should this work?"""
1636
1637 class ICheckable(Interface):
1638     def check(monitor, verify=False, add_lease=False):
1639         """Check upon my health, optionally repairing any problems.
1640
1641         This returns a Deferred that fires with an instance that provides
1642         ICheckResults, or None if the object is non-distributed (i.e. LIT
1643         files).
1644
1645         The monitor will be checked periodically to see if the operation has
1646         been cancelled. If so, no new queries will be sent, and the Deferred
1647         will fire (with a OperationCancelledError) immediately.
1648
1649         Filenodes and dirnodes (which provide IFilesystemNode) are also
1650         checkable. Instances that represent verifier-caps will be checkable
1651         but not downloadable. Some objects (like LIT files) do not actually
1652         live in the grid, and their checkers return None (non-distributed
1653         files are always healthy).
1654
1655         If verify=False, a relatively lightweight check will be performed: I
1656         will ask all servers if they have a share for me, and I will believe
1657         whatever they say. If there are at least N distinct shares on the
1658         grid, my results will indicate r.is_healthy()==True. This requires a
1659         roundtrip to each server, but does not transfer very much data, so
1660         the network bandwidth is fairly low.
1661
1662         If verify=True, a more resource-intensive check will be performed:
1663         every share will be downloaded, and the hashes will be validated on
1664         every bit. I will ignore any shares that failed their hash checks. If
1665         there are at least N distinct valid shares on the grid, my results
1666         will indicate r.is_healthy()==True. This requires N/k times as much
1667         download bandwidth (and server disk IO) as a regular download. If a
1668         storage server is holding a corrupt share, or is experiencing memory
1669         failures during retrieval, or is malicious or buggy, then
1670         verification will detect the problem, but checking will not.
1671
1672         If add_lease=True, I will ensure that an up-to-date lease is present
1673         on each share. The lease secrets will be derived from by node secret
1674         (in BASEDIR/private/secret), so either I will add a new lease to the
1675         share, or I will merely renew the lease that I already had. In a
1676         future version of the storage-server protocol (once Accounting has
1677         been implemented), there may be additional options here to define the
1678         kind of lease that is obtained (which account number to claim, etc).
1679
1680         TODO: any problems seen during checking will be reported to the
1681         health-manager.furl, a centralized object which is responsible for
1682         figuring out why files are unhealthy so corrective action can be
1683         taken.
1684         """
1685
1686     def check_and_repair(monitor, verify=False, add_lease=False):
1687         """Like check(), but if the file/directory is not healthy, attempt to
1688         repair the damage.
1689
1690         Any non-healthy result will cause an immediate repair operation, to
1691         generate and upload new shares. After repair, the file will be as
1692         healthy as we can make it. Details about what sort of repair is done
1693         will be put in the check-and-repair results. The Deferred will not
1694         fire until the repair is complete.
1695
1696         This returns a Deferred which fires with an instance of
1697         ICheckAndRepairResults."""
1698
1699 class IDeepCheckable(Interface):
1700     def start_deep_check(verify=False, add_lease=False):
1701         """Check upon the health of me and everything I can reach.
1702
1703         This is a recursive form of check(), useable only on dirnodes.
1704
1705         I return a Monitor, with results that are an IDeepCheckResults
1706         object.
1707
1708         TODO: If any of the directories I traverse are unrecoverable, the
1709         Monitor will report failure. If any of the files I check upon are
1710         unrecoverable, those problems will be reported in the
1711         IDeepCheckResults as usual, and the Monitor will not report a
1712         failure.
1713         """
1714
1715     def start_deep_check_and_repair(verify=False, add_lease=False):
1716         """Check upon the health of me and everything I can reach. Repair
1717         anything that isn't healthy.
1718
1719         This is a recursive form of check_and_repair(), useable only on
1720         dirnodes.
1721
1722         I return a Monitor, with results that are an
1723         IDeepCheckAndRepairResults object.
1724
1725         TODO: If any of the directories I traverse are unrecoverable, the
1726         Monitor will report failure. If any of the files I check upon are
1727         unrecoverable, those problems will be reported in the
1728         IDeepCheckResults as usual, and the Monitor will not report a
1729         failure.
1730         """
1731
1732 class ICheckResults(Interface):
1733     """I contain the detailed results of a check/verify operation.
1734     """
1735
1736     def get_storage_index():
1737         """Return a string with the (binary) storage index."""
1738     def get_storage_index_string():
1739         """Return a string with the (printable) abbreviated storage index."""
1740     def get_uri():
1741         """Return the (string) URI of the object that was checked."""
1742
1743     def is_healthy():
1744         """Return a boolean, True if the file/dir is fully healthy, False if
1745         it is damaged in any way. Non-distributed LIT files always return
1746         True."""
1747
1748     def is_recoverable():
1749         """Return a boolean, True if the file/dir can be recovered, False if
1750         not. Unrecoverable files are obviously unhealthy. Non-distributed LIT
1751         files always return True."""
1752
1753     def needs_rebalancing():
1754         """Return a boolean, True if the file/dir's reliability could be
1755         improved by moving shares to new servers. Non-distributed LIT files
1756         always return False."""
1757
1758
1759     def get_data():
1760         """Return a dictionary that describes the state of the file/dir. LIT
1761         files always return an empty dictionary. Normal files and directories
1762         return a dictionary with the following keys (note that these use
1763         binary strings rather than base32-encoded ones) (also note that for
1764         mutable files, these counts are for the 'best' version):
1765
1766          count-shares-good: the number of distinct good shares that were found
1767          count-shares-needed: 'k', the number of shares required for recovery
1768          count-shares-expected: 'N', the number of total shares generated
1769          count-good-share-hosts: the number of distinct storage servers with
1770                                  good shares. If this number is less than
1771                                  count-shares-good, then some shares are
1772                                  doubled up, increasing the correlation of
1773                                  failures. This indicates that one or more
1774                                  shares should be moved to an otherwise unused
1775                                  server, if one is available.
1776          count-corrupt-shares: the number of shares with integrity failures
1777          list-corrupt-shares: a list of 'share locators', one for each share
1778                               that was found to be corrupt. Each share
1779                               locator is a list of (serverid, storage_index,
1780                               sharenum).
1781          count-incompatible-shares: the number of shares which are of a share
1782                                     format unknown to this checker
1783          list-incompatible-shares: a list of 'share locators', one for each
1784                                    share that was found to be of an unknown
1785                                    format. Each share locator is a list of
1786                                    (serverid, storage_index, sharenum).
1787          servers-responding: list of (binary) storage server identifiers,
1788                              one for each server which responded to the share
1789                              query (even if they said they didn't have
1790                              shares, and even if they said they did have
1791                              shares but then didn't send them when asked, or
1792                              dropped the connection, or returned a Failure,
1793                              and even if they said they did have shares and
1794                              sent incorrect ones when asked)
1795          sharemap: dict mapping share identifier to list of serverids
1796                    (binary strings). This indicates which servers are holding
1797                    which shares. For immutable files, the shareid is an
1798                    integer (the share number, from 0 to N-1). For mutable
1799                    files, it is a string of the form 'seq%d-%s-sh%d',
1800                    containing the sequence number, the roothash, and the
1801                    share number.
1802
1803         The following keys are most relevant for mutable files, but immutable
1804         files will provide sensible values too::
1805
1806          count-wrong-shares: the number of shares for versions other than the
1807                              'best' one (which is defined as being the
1808                              recoverable version with the highest sequence
1809                              number, then the highest roothash). These are
1810                              either leftover shares from an older version
1811                              (perhaps on a server that was offline when an
1812                              update occurred), shares from an unrecoverable
1813                              newer version, or shares from an alternate
1814                              current version that results from an
1815                              uncoordinated write collision. For a healthy
1816                              file, this will equal 0.
1817
1818          count-recoverable-versions: the number of recoverable versions of
1819                                      the file. For a healthy file, this will
1820                                      equal 1.
1821
1822          count-unrecoverable-versions: the number of unrecoverable versions
1823                                        of the file. For a healthy file, this
1824                                        will be 0.
1825
1826         """
1827
1828     def get_summary():
1829         """Return a string with a brief (one-line) summary of the results."""
1830
1831     def get_report():
1832         """Return a list of strings with more detailed results."""
1833
1834 class ICheckAndRepairResults(Interface):
1835     """I contain the detailed results of a check/verify/repair operation.
1836
1837     The IFilesystemNode.check()/verify()/repair() methods all return
1838     instances that provide ICheckAndRepairResults.
1839     """
1840
1841     def get_storage_index():
1842         """Return a string with the (binary) storage index."""
1843     def get_storage_index_string():
1844         """Return a string with the (printable) abbreviated storage index."""
1845     def get_repair_attempted():
1846         """Return a boolean, True if a repair was attempted. We might not
1847         attempt to repair the file because it was healthy, or healthy enough
1848         (i.e. some shares were missing but not enough to exceed some
1849         threshold), or because we don't know how to repair this object."""
1850     def get_repair_successful():
1851         """Return a boolean, True if repair was attempted and the file/dir
1852         was fully healthy afterwards. False if no repair was attempted or if
1853         a repair attempt failed."""
1854     def get_pre_repair_results():
1855         """Return an ICheckResults instance that describes the state of the
1856         file/dir before any repair was attempted."""
1857     def get_post_repair_results():
1858         """Return an ICheckResults instance that describes the state of the
1859         file/dir after any repair was attempted. If no repair was attempted,
1860         the pre-repair and post-repair results will be identical."""
1861
1862
1863 class IDeepCheckResults(Interface):
1864     """I contain the results of a deep-check operation.
1865
1866     This is returned by a call to ICheckable.deep_check().
1867     """
1868
1869     def get_root_storage_index_string():
1870         """Return the storage index (abbreviated human-readable string) of
1871         the first object checked."""
1872     def get_counters():
1873         """Return a dictionary with the following keys::
1874
1875              count-objects-checked: count of how many objects were checked
1876              count-objects-healthy: how many of those objects were completely
1877                                     healthy
1878              count-objects-unhealthy: how many were damaged in some way
1879              count-objects-unrecoverable: how many were unrecoverable
1880              count-corrupt-shares: how many shares were found to have
1881                                    corruption, summed over all objects
1882                                    examined
1883         """
1884
1885     def get_corrupt_shares():
1886         """Return a set of (serverid, storage_index, sharenum) for all shares
1887         that were found to be corrupt. Both serverid and storage_index are
1888         binary.
1889         """
1890     def get_all_results():
1891         """Return a dictionary mapping pathname (a tuple of strings, ready to
1892         be slash-joined) to an ICheckResults instance, one for each object
1893         that was checked."""
1894
1895     def get_results_for_storage_index(storage_index):
1896         """Retrive the ICheckResults instance for the given (binary)
1897         storage index. Raises KeyError if there are no results for that
1898         storage index."""
1899
1900     def get_stats():
1901         """Return a dictionary with the same keys as
1902         IDirectoryNode.deep_stats()."""
1903
1904 class IDeepCheckAndRepairResults(Interface):
1905     """I contain the results of a deep-check-and-repair operation.
1906
1907     This is returned by a call to ICheckable.deep_check_and_repair().
1908     """
1909
1910     def get_root_storage_index_string():
1911         """Return the storage index (abbreviated human-readable string) of
1912         the first object checked."""
1913     def get_counters():
1914         """Return a dictionary with the following keys::
1915
1916              count-objects-checked: count of how many objects were checked
1917              count-objects-healthy-pre-repair: how many of those objects were
1918                                                completely healthy (before any
1919                                                repair)
1920              count-objects-unhealthy-pre-repair: how many were damaged in
1921                                                  some way
1922              count-objects-unrecoverable-pre-repair: how many were unrecoverable
1923              count-objects-healthy-post-repair: how many of those objects were
1924                                                 completely healthy (after any
1925                                                 repair)
1926              count-objects-unhealthy-post-repair: how many were damaged in
1927                                                   some way
1928              count-objects-unrecoverable-post-repair: how many were
1929                                                       unrecoverable
1930              count-repairs-attempted: repairs were attempted on this many
1931                                       objects. The count-repairs- keys will
1932                                       always be provided, however unless
1933                                       repair=true is present, they will all
1934                                       be zero.
1935              count-repairs-successful: how many repairs resulted in healthy
1936                                        objects
1937              count-repairs-unsuccessful: how many repairs resulted did not
1938                                          results in completely healthy objects
1939              count-corrupt-shares-pre-repair: how many shares were found to
1940                                               have corruption, summed over all
1941                                               objects examined (before any
1942                                               repair)
1943              count-corrupt-shares-post-repair: how many shares were found to
1944                                                have corruption, summed over all
1945                                                objects examined (after any
1946                                                repair)
1947         """
1948
1949     def get_stats():
1950         """Return a dictionary with the same keys as
1951         IDirectoryNode.deep_stats()."""
1952
1953     def get_corrupt_shares():
1954         """Return a set of (serverid, storage_index, sharenum) for all shares
1955         that were found to be corrupt before any repair was attempted. Both
1956         serverid and storage_index are binary.
1957         """
1958     def get_remaining_corrupt_shares():
1959         """Return a set of (serverid, storage_index, sharenum) for all shares
1960         that were found to be corrupt after any repair was completed. Both
1961         serverid and storage_index are binary. These are shares that need
1962         manual inspection and probably deletion.
1963         """
1964     def get_all_results():
1965         """Return a dictionary mapping pathname (a tuple of strings, ready to
1966         be slash-joined) to an ICheckAndRepairResults instance, one for each
1967         object that was checked."""
1968
1969     def get_results_for_storage_index(storage_index):
1970         """Retrive the ICheckAndRepairResults instance for the given (binary)
1971         storage index. Raises KeyError if there are no results for that
1972         storage index."""
1973
1974
1975 class IRepairable(Interface):
1976     def repair(check_results):
1977         """Attempt to repair the given object. Returns a Deferred that fires
1978         with a IRepairResults object.
1979
1980         I must be called with an object that implements ICheckResults, as
1981         proof that you have actually discovered a problem with this file. I
1982         will use the data in the checker results to guide the repair process,
1983         such as which servers provided bad data and should therefore be
1984         avoided. The ICheckResults object is inside the
1985         ICheckAndRepairResults object, which is returned by the
1986         ICheckable.check() method::
1987
1988          d = filenode.check(repair=False)
1989          def _got_results(check_and_repair_results):
1990              check_results = check_and_repair_results.get_pre_repair_results()
1991              return filenode.repair(check_results)
1992          d.addCallback(_got_results)
1993          return d
1994         """
1995
1996 class IRepairResults(Interface):
1997     """I contain the results of a repair operation."""
1998
1999
2000 class IClient(Interface):
2001     def upload(uploadable):
2002         """Upload some data into a CHK, get back the UploadResults for it.
2003         @param uploadable: something that implements IUploadable
2004         @return: a Deferred that fires with the UploadResults instance.
2005                  To get the URI for this file, use results.uri .
2006         """
2007
2008     def create_mutable_file(contents=""):
2009         """Create a new mutable file (with initial) contents, get back the
2010         URI string.
2011
2012         @param contents: (bytestring, callable, or None): this provides the
2013         initial contents of the mutable file. If 'contents' is a bytestring,
2014         it will be used as-is. If 'contents' is a callable, it will be
2015         invoked with the new MutableFileNode instance and is expected to
2016         return a bytestring with the initial contents of the file (the
2017         callable can use node.get_writekey() to decide how to encrypt the
2018         initial contents, e.g. for a brand new dirnode with initial
2019         children). contents=None is equivalent to an empty string. Using
2020         content_maker= is more efficient than creating a mutable file and
2021         setting its contents in two separate operations.
2022
2023         @return: a Deferred that fires with tne (string) SSK URI for the new
2024                  file.
2025         """
2026
2027     def create_dirnode(initial_children={}):
2028         """Create a new unattached dirnode, possibly with initial children.
2029
2030         @param initial_children: dict with keys that are unicode child names,
2031         and values that are (childnode, metadata) tuples.
2032
2033         @return: a Deferred that fires with the new IDirectoryNode instance.
2034         """
2035
2036     def create_node_from_uri(uri, rouri):
2037         """Create a new IFilesystemNode instance from the uri, synchronously.
2038         @param uri: a string or IURI-providing instance, or None. This could
2039                     be for a LiteralFileNode, a CHK file node, a mutable file
2040                     node, or a directory node
2041         @param rouri: a string or IURI-providing instance, or None. If the
2042                       main uri is None, I will use the rouri instead. If I
2043                       recognize the format of the main uri, I will ignore the
2044                       rouri (because it can be derived from the writecap).
2045
2046         @return: an instance that provides IFilesystemNode (or more usefully
2047                  one of its subclasses). File-specifying URIs will result in
2048                  IFileNode or IMutableFileNode -providing instances, like
2049                  FileNode, LiteralFileNode, or MutableFileNode.
2050                  Directory-specifying URIs will result in
2051                  IDirectoryNode-providing instances, like DirectoryNode.
2052         """
2053
2054 class INodeMaker(Interface):
2055     """The NodeMaker is used to create IFilesystemNode instances. It can
2056     accept a filecap/dircap string and return the node right away. It can
2057     also create new nodes (i.e. upload a file, or create a mutable file)
2058     asynchronously. Once you have one of these nodes, you can use other
2059     methods to determine whether it is a file or directory, and to download
2060     or modify its contents.
2061
2062     The NodeMaker encapsulates all the authorities that these
2063     IfilesystemNodes require (like references to the StorageFarmBroker). Each
2064     Tahoe process will typically have a single NodeMaker, but unit tests may
2065     create simplified/mocked forms for testing purposes.
2066     """
2067     def create_from_cap(writecap, readcap=None):
2068         """I create an IFilesystemNode from the given writecap/readcap. I can
2069         only provide nodes for existing file/directory objects: use my other
2070         methods to create new objects. I return synchronously."""
2071
2072     def create_mutable_file(contents=None, keysize=None):
2073         """I create a new mutable file, and return a Deferred which will fire
2074         with the IMutableFileNode instance when it is ready. If contents= is
2075         provided (a bytestring), it will be used as the initial contents of
2076         the new file, otherwise the file will contain zero bytes. keysize= is
2077         for use by unit tests, to create mutable files that are smaller than
2078         usual."""
2079
2080     def create_new_mutable_directory(initial_children={}):
2081         """I create a new mutable directory, and return a Deferred which will
2082         fire with the IDirectoryNode instance when it is ready. If
2083         initial_children= is provided (a dict mapping unicode child name to
2084         (childnode, metadata_dict) tuples), the directory will be populated
2085         with those children, otherwise it will be empty."""
2086
2087 class IClientStatus(Interface):
2088     def list_all_uploads():
2089         """Return a list of uploader objects, one for each upload which
2090         currently has an object available (tracked with weakrefs). This is
2091         intended for debugging purposes."""
2092     def list_active_uploads():
2093         """Return a list of active IUploadStatus objects."""
2094     def list_recent_uploads():
2095         """Return a list of IUploadStatus objects for the most recently
2096         started uploads."""
2097
2098     def list_all_downloads():
2099         """Return a list of downloader objects, one for each download which
2100         currently has an object available (tracked with weakrefs). This is
2101         intended for debugging purposes."""
2102     def list_active_downloads():
2103         """Return a list of active IDownloadStatus objects."""
2104     def list_recent_downloads():
2105         """Return a list of IDownloadStatus objects for the most recently
2106         started downloads."""
2107
2108 class IUploadStatus(Interface):
2109     def get_started():
2110         """Return a timestamp (float with seconds since epoch) indicating
2111         when the operation was started."""
2112     def get_storage_index():
2113         """Return a string with the (binary) storage index in use on this
2114         upload. Returns None if the storage index has not yet been
2115         calculated."""
2116     def get_size():
2117         """Return an integer with the number of bytes that will eventually
2118         be uploaded for this file. Returns None if the size is not yet known.
2119         """
2120     def using_helper():
2121         """Return True if this upload is using a Helper, False if not."""
2122     def get_status():
2123         """Return a string describing the current state of the upload
2124         process."""
2125     def get_progress():
2126         """Returns a tuple of floats, (chk, ciphertext, encode_and_push),
2127         each from 0.0 to 1.0 . 'chk' describes how much progress has been
2128         made towards hashing the file to determine a CHK encryption key: if
2129         non-convergent encryption is in use, this will be trivial, otherwise
2130         the whole file must be hashed. 'ciphertext' describes how much of the
2131         ciphertext has been pushed to the helper, and is '1.0' for non-helper
2132         uploads. 'encode_and_push' describes how much of the encode-and-push
2133         process has finished: for helper uploads this is dependent upon the
2134         helper providing progress reports. It might be reasonable to add all
2135         three numbers and report the sum to the user."""
2136     def get_active():
2137         """Return True if the upload is currently active, False if not."""
2138     def get_results():
2139         """Return an instance of UploadResults (which contains timing and
2140         sharemap information). Might return None if the upload is not yet
2141         finished."""
2142     def get_counter():
2143         """Each upload status gets a unique number: this method returns that
2144         number. This provides a handle to this particular upload, so a web
2145         page can generate a suitable hyperlink."""
2146
2147 class IDownloadStatus(Interface):
2148     def get_started():
2149         """Return a timestamp (float with seconds since epoch) indicating
2150         when the operation was started."""
2151     def get_storage_index():
2152         """Return a string with the (binary) storage index in use on this
2153         download. This may be None if there is no storage index (i.e. LIT
2154         files)."""
2155     def get_size():
2156         """Return an integer with the number of bytes that will eventually be
2157         retrieved for this file. Returns None if the size is not yet known.
2158         """
2159     def using_helper():
2160         """Return True if this download is using a Helper, False if not."""
2161     def get_status():
2162         """Return a string describing the current state of the download
2163         process."""
2164     def get_progress():
2165         """Returns a float (from 0.0 to 1.0) describing the amount of the
2166         download that has completed. This value will remain at 0.0 until the
2167         first byte of plaintext is pushed to the download target."""
2168     def get_active():
2169         """Return True if the download is currently active, False if not."""
2170     def get_counter():
2171         """Each download status gets a unique number: this method returns
2172         that number. This provides a handle to this particular download, so a
2173         web page can generate a suitable hyperlink."""
2174
2175 class IServermapUpdaterStatus(Interface):
2176     pass
2177 class IPublishStatus(Interface):
2178     pass
2179 class IRetrieveStatus(Interface):
2180     pass
2181
2182 class NotCapableError(Exception):
2183     """You have tried to write to a read-only node."""
2184
2185 class BadWriteEnablerError(Exception):
2186     pass
2187
2188 class RIControlClient(RemoteInterface):
2189
2190     def wait_for_client_connections(num_clients=int):
2191         """Do not return until we have connections to at least NUM_CLIENTS
2192         storage servers.
2193         """
2194
2195     def upload_from_file_to_uri(filename=str,
2196                                 convergence=ChoiceOf(None,
2197                                                      StringConstraint(2**20))):
2198         """Upload a file to the grid. This accepts a filename (which must be
2199         absolute) that points to a file on the node's local disk. The node will
2200         read the contents of this file, upload it to the grid, then return the
2201         URI at which it was uploaded.  If convergence is None then a random
2202         encryption key will be used, else the plaintext will be hashed, then
2203         that hash will be mixed together with the "convergence" string to form
2204         the encryption key.
2205         """
2206         return URI
2207
2208     def download_from_uri_to_file(uri=URI, filename=str):
2209         """Download a file from the grid, placing it on the node's local disk
2210         at the given filename (which must be absolute[?]). Returns the
2211         absolute filename where the file was written."""
2212         return str
2213
2214     # debug stuff
2215
2216     def get_memory_usage():
2217         """Return a dict describes the amount of memory currently in use. The
2218         keys are 'VmPeak', 'VmSize', and 'VmData'. The values are integers,
2219         measuring memory consupmtion in bytes."""
2220         return DictOf(str, int)
2221
2222     def speed_test(count=int, size=int, mutable=Any()):
2223         """Write 'count' tempfiles to disk, all of the given size. Measure
2224         how long (in seconds) it takes to upload them all to the servers.
2225         Then measure how long it takes to download all of them. If 'mutable'
2226         is 'create', time creation of mutable files. If 'mutable' is
2227         'upload', then time access to the same mutable file instead of
2228         creating one.
2229
2230         Returns a tuple of (upload_time, download_time).
2231         """
2232         return (float, float)
2233
2234     def measure_peer_response_time():
2235         """Send a short message to each connected peer, and measure the time
2236         it takes for them to respond to it. This is a rough measure of the
2237         application-level round trip time.
2238
2239         @return: a dictionary mapping peerid to a float (RTT time in seconds)
2240         """
2241
2242         return DictOf(Nodeid, float)
2243
2244 UploadResults = Any() #DictOf(str, str)
2245
2246 class RIEncryptedUploadable(RemoteInterface):
2247     __remote_name__ = "RIEncryptedUploadable.tahoe.allmydata.com"
2248
2249     def get_size():
2250         return Offset
2251
2252     def get_all_encoding_parameters():
2253         return (int, int, int, long)
2254
2255     def read_encrypted(offset=Offset, length=ReadSize):
2256         return ListOf(str)
2257
2258     def close():
2259         return None
2260
2261
2262 class RICHKUploadHelper(RemoteInterface):
2263     __remote_name__ = "RIUploadHelper.tahoe.allmydata.com"
2264
2265     def get_version():
2266         """
2267         Return a dictionary of version information.
2268         """
2269         return DictOf(str, Any())
2270
2271     def upload(reader=RIEncryptedUploadable):
2272         return UploadResults
2273
2274
2275 class RIHelper(RemoteInterface):
2276     __remote_name__ = "RIHelper.tahoe.allmydata.com"
2277
2278     def get_version():
2279         """
2280         Return a dictionary of version information.
2281         """
2282         return DictOf(str, Any())
2283
2284     def upload_chk(si=StorageIndex):
2285         """See if a file with a given storage index needs uploading. The
2286         helper will ask the appropriate storage servers to see if the file
2287         has already been uploaded. If so, the helper will return a set of
2288         'upload results' that includes whatever hashes are needed to build
2289         the read-cap, and perhaps a truncated sharemap.
2290
2291         If the file has not yet been uploaded (or if it was only partially
2292         uploaded), the helper will return an empty upload-results dictionary
2293         and also an RICHKUploadHelper object that will take care of the
2294         upload process. The client should call upload() on this object and
2295         pass it a reference to an RIEncryptedUploadable object that will
2296         provide ciphertext. When the upload is finished, the upload() method
2297         will finish and return the upload results.
2298         """
2299         return (UploadResults, ChoiceOf(RICHKUploadHelper, None))
2300
2301
2302 class RIStatsProvider(RemoteInterface):
2303     __remote_name__ = "RIStatsProvider.tahoe.allmydata.com"
2304     """
2305     Provides access to statistics and monitoring information.
2306     """
2307
2308     def get_stats():
2309         """
2310         returns a dictionary containing 'counters' and 'stats', each a
2311         dictionary with string counter/stat name keys, and numeric values.
2312         counters are monotonically increasing measures of work done, and
2313         stats are instantaneous measures (potentially time averaged
2314         internally)
2315         """
2316         return DictOf(str, DictOf(str, ChoiceOf(float, int, long)))
2317
2318 class RIStatsGatherer(RemoteInterface):
2319     __remote_name__ = "RIStatsGatherer.tahoe.allmydata.com"
2320     """
2321     Provides a monitoring service for centralised collection of stats
2322     """
2323
2324     def provide(provider=RIStatsProvider, nickname=str):
2325         """
2326         @param provider: a stats collector instance which should be polled
2327                          periodically by the gatherer to collect stats.
2328         @param nickname: a name useful to identify the provided client
2329         """
2330         return None
2331
2332
2333 class IStatsProducer(Interface):
2334     def get_stats():
2335         """
2336         returns a dictionary, with str keys representing the names of stats
2337         to be monitored, and numeric values.
2338         """
2339
2340 class RIKeyGenerator(RemoteInterface):
2341     __remote_name__ = "RIKeyGenerator.tahoe.allmydata.com"
2342     """
2343     Provides a service offering to make RSA key pairs.
2344     """
2345
2346     def get_rsa_key_pair(key_size=int):
2347         """
2348         @param key_size: the size of the signature key.
2349         @return: tuple(verifying_key, signing_key)
2350         """
2351         return TupleOf(str, str)
2352
2353
2354 class FileTooLargeError(Exception):
2355     pass
2356
2357 class IValidatedThingProxy(Interface):
2358     def start():
2359         """ Acquire a thing and validate it. Return a deferred which is
2360         eventually fired with self if the thing is valid or errbacked if it
2361         can't be acquired or validated."""
2362
2363 class InsufficientVersionError(Exception):
2364     def __init__(self, needed, got):
2365         self.needed = needed
2366         self.got = got
2367     def __repr__(self):
2368         return "InsufficientVersionError(need '%s', got %s)" % (self.needed,
2369                                                                 self.got)