src/allmydata/interfaces.py

   1
   2 from zope.interface import Interface
   3 from foolscap.schema import StringConstraint, ListOf, TupleOf, SetOf, DictOf, \
   4      ChoiceOf, IntegerConstraint
   5 from foolscap import RemoteInterface, Referenceable
   6
   7 HASH_SIZE=32
   8
   9 Hash = StringConstraint(maxLength=HASH_SIZE,
  10                         minLength=HASH_SIZE)# binary format 32-byte SHA256 hash
  11 Nodeid = StringConstraint(maxLength=20,
  12                           minLength=20) # binary format 20-byte SHA1 hash
  13 FURL = StringConstraint(1000)
  14 StorageIndex = StringConstraint(16)
  15 URI = StringConstraint(300) # kind of arbitrary
  16
  17 MAX_BUCKETS = 256  # per peer -- zfec offers at most 256 shares per file
  18
  19 ShareData = StringConstraint(None)
  20 URIExtensionData = StringConstraint(1000)
  21 Number = IntegerConstraint(8) # 2**(8*8) == 16EiB ~= 18e18 ~= 18 exabytes
  22 Offset = Number
  23 ReadSize = int # the 'int' constraint is 2**31 == 2Gib -- large files are processed in not-so-large increments
  24 WriteEnablerSecret = Hash # used to protect mutable bucket modifications
  25 LeaseRenewSecret = Hash # used to protect bucket lease renewal requests
  26 LeaseCancelSecret = Hash # used to protect bucket lease cancellation requests
  27
  28 class RIStubClient(RemoteInterface):
  29     """Each client publishes a service announcement for a dummy object called
  30     the StubClient. This object doesn't actually offer any services, but the
  31     announcement helps the Introducer keep track of which clients are
  32     subscribed (so the grid admin can keep track of things like the size of
  33     the grid and the client versions in use. This is the (empty)
  34     RemoteInterface for the StubClient."""
  35
  36 class RIBucketWriter(RemoteInterface):
  37     """ Objects of this kind live on the server side. """
  38     def write(offset=Offset, data=ShareData):
  39         return None
  40
  41     def close():
  42         """
  43         If the data that has been written is incomplete or inconsistent then
  44         the server will throw the data away, else it will store it for future
  45         retrieval.
  46         """
  47         return None
  48
  49     def abort():
  50         """Abandon all the data that has been written.
  51         """
  52         return None
  53
  54 class RIBucketReader(RemoteInterface):
  55     def read(offset=Offset, length=ReadSize):
  56         return ShareData
  57
  58 TestVector = ListOf(TupleOf(Offset, ReadSize, str, str))
  59 # elements are (offset, length, operator, specimen)
  60 # operator is one of "lt, le, eq, ne, ge, gt"
  61 # nop always passes and is used to fetch data while writing.
  62 # you should use length==len(specimen) for everything except nop
  63 DataVector = ListOf(TupleOf(Offset, ShareData))
  64 # (offset, data). This limits us to 30 writes of 1MiB each per call
  65 TestAndWriteVectorsForShares = DictOf(int,
  66                                       TupleOf(TestVector,
  67                                               DataVector,
  68                                               ChoiceOf(None, Offset), # new_length
  69                                               ))
  70 ReadVector = ListOf(TupleOf(Offset, ReadSize))
  71 ReadData = ListOf(ShareData)
  72 # returns data[offset:offset+length] for each element of TestVector
  73
  74 class RIStorageServer(RemoteInterface):
  75     __remote_name__ = "RIStorageServer.tahoe.allmydata.com"
  76
  77     def get_versions():
  78         """
  79         Return a tuple of (my_version, oldest_supported) strings.  Each string can be parsed by
  80         a pyutil.version_class.Version instance or a distutils.version.LooseVersion instance,
  81         and then compared. The first goal is to make sure that nodes are not confused by
  82         speaking to an incompatible peer. The second goal is to enable the development of
  83         backwards-compatibility code.
  84
  85         The meaning of the oldest_supported element is that if you treat this storage server as
  86         though it were of that version, then you will not be disappointed.
  87
  88         The precise meaning of this method might change in incompatible ways until we get the
  89         whole compatibility scheme nailed down.
  90         """
  91         return TupleOf(str, str)
  92
  93     def allocate_buckets(storage_index=StorageIndex,
  94                          renew_secret=LeaseRenewSecret,
  95                          cancel_secret=LeaseCancelSecret,
  96                          sharenums=SetOf(int, maxLength=MAX_BUCKETS),
  97                          allocated_size=Offset, canary=Referenceable):
  98         """
  99         @param storage_index: the index of the bucket to be created or
 100                               increfed.
 101         @param sharenums: these are the share numbers (probably between 0 and
 102                           99) that the sender is proposing to store on this
 103                           server.
 104         @param renew_secret: This is the secret used to protect bucket refresh
 105                              This secret is generated by the client and
 106                              stored for later comparison by the server. Each
 107                              server is given a different secret.
 108         @param cancel_secret: Like renew_secret, but protects bucket decref.
 109         @param canary: If the canary is lost before close(), the bucket is
 110                        deleted.
 111         @return: tuple of (alreadygot, allocated), where alreadygot is what we
 112                  already have and is what we hereby agree to accept. New
 113                  leases are added for shares in both lists.
 114         """
 115         return TupleOf(SetOf(int, maxLength=MAX_BUCKETS),
 116                        DictOf(int, RIBucketWriter, maxKeys=MAX_BUCKETS))
 117
 118     def add_lease(storage_index=StorageIndex,
 119                   renew_secret=LeaseRenewSecret,
 120                   cancel_secret=LeaseCancelSecret):
 121         """
 122         Add a new lease on the given bucket. If the renew_secret matches an
 123         existing lease, that lease will be renewed instead.
 124         """
 125         return None
 126
 127     def renew_lease(storage_index=StorageIndex, renew_secret=LeaseRenewSecret):
 128         """
 129         Renew the lease on a given bucket. Some networks will use this, some
 130         will not.
 131         """
 132         return None
 133
 134     def cancel_lease(storage_index=StorageIndex,
 135                      cancel_secret=LeaseCancelSecret):
 136         """
 137         Cancel the lease on a given bucket. If this was the last lease on the
 138         bucket, the bucket will be deleted.
 139         """
 140         return None
 141
 142     def get_buckets(storage_index=StorageIndex):
 143         return DictOf(int, RIBucketReader, maxKeys=MAX_BUCKETS)
 144
 145
 146
 147     def slot_readv(storage_index=StorageIndex,
 148                    shares=ListOf(int), readv=ReadVector):
 149         """Read a vector from the numbered shares associated with the given
 150         storage index. An empty shares list means to return data from all
 151         known shares. Returns a dictionary with one key per share."""
 152         return DictOf(int, ReadData) # shnum -> results
 153
 154     def slot_testv_and_readv_and_writev(storage_index=StorageIndex,
 155                                         secrets=TupleOf(WriteEnablerSecret,
 156                                                         LeaseRenewSecret,
 157                                                         LeaseCancelSecret),
 158                                         tw_vectors=TestAndWriteVectorsForShares,
 159                                         r_vector=ReadVector,
 160                                         ):
 161         """General-purpose test-and-set operation for mutable slots. Perform
 162         a bunch of comparisons against the existing shares. If they all pass,
 163         then apply a bunch of write vectors to those shares. Then use the
 164         read vectors to extract data from all the shares and return the data.
 165
 166         This method is, um, large. The goal is to allow clients to update all
 167         the shares associated with a mutable file in a single round trip.
 168
 169         @param storage_index: the index of the bucket to be created or
 170                               increfed.
 171         @param write_enabler: a secret that is stored along with the slot.
 172                               Writes are accepted from any caller who can
 173                               present the matching secret. A different secret
 174                               should be used for each slot*server pair.
 175         @param renew_secret: This is the secret used to protect bucket refresh
 176                              This secret is generated by the client and
 177                              stored for later comparison by the server. Each
 178                              server is given a different secret.
 179         @param cancel_secret: Like renew_secret, but protects bucket decref.
 180
 181         The 'secrets' argument is a tuple of (write_enabler, renew_secret,
 182         cancel_secret). The first is required to perform any write. The
 183         latter two are used when allocating new shares. To simply acquire a
 184         new lease on existing shares, use an empty testv and an empty writev.
 185
 186         Each share can have a separate test vector (i.e. a list of
 187         comparisons to perform). If all vectors for all shares pass, then all
 188         writes for all shares are recorded. Each comparison is a 4-tuple of
 189         (offset, length, operator, specimen), which effectively does a bool(
 190         (read(offset, length)) OPERATOR specimen ) and only performs the
 191         write if all these evaluate to True. Basic test-and-set uses 'eq'.
 192         Write-if-newer uses a seqnum and (offset, length, 'lt', specimen).
 193         Write-if-same-or-newer uses 'le'.
 194
 195         Reads from the end of the container are truncated, and missing shares
 196         behave like empty ones, so to assert that a share doesn't exist (for
 197         use when creating a new share), use (0, 1, 'eq', '').
 198
 199         The write vector will be applied to the given share, expanding it if
 200         necessary. A write vector applied to a share number that did not
 201         exist previously will cause that share to be created.
 202
 203         Each write vector is accompanied by a 'new_length' argument. If
 204         new_length is not None, use it to set the size of the container. This
 205         can be used to pre-allocate space for a series of upcoming writes, or
 206         truncate existing data. If the container is growing, new_length will
 207         be applied before datav. If the container is shrinking, it will be
 208         applied afterwards.
 209
 210         The read vector is used to extract data from all known shares,
 211         *before* any writes have been applied. The same vector is used for
 212         all shares. This captures the state that was tested by the test
 213         vector.
 214
 215         This method returns two values: a boolean and a dict. The boolean is
 216         True if the write vectors were applied, False if not. The dict is
 217         keyed by share number, and each value contains a list of strings, one
 218         for each element of the read vector.
 219
 220         If the write_enabler is wrong, this will raise BadWriteEnablerError.
 221         To enable share migration (using update_write_enabler), the exception
 222         will have the nodeid used for the old write enabler embedded in it,
 223         in the following string::
 224
 225          The write enabler was recorded by nodeid '%s'.
 226
 227         Note that the nodeid here is encoded using the same base32 encoding
 228         used by Foolscap and allmydata.util.idlib.nodeid_b2a().
 229
 230         """
 231         return TupleOf(bool, DictOf(int, ReadData))
 232
 233 class IStorageBucketWriter(Interface):
 234     """
 235     Objects of this kind live on the client side.
 236     """
 237     def put_block(segmentnum=int, data=ShareData):
 238         """@param data: For most segments, this data will be 'blocksize'
 239         bytes in length. The last segment might be shorter.
 240         @return: a Deferred that fires (with None) when the operation completes
 241         """
 242
 243     def put_plaintext_hashes(hashes=ListOf(Hash)):
 244         """
 245         @return: a Deferred that fires (with None) when the operation completes
 246         """
 247
 248     def put_crypttext_hashes(hashes=ListOf(Hash)):
 249         """
 250         @return: a Deferred that fires (with None) when the operation completes
 251         """
 252
 253     def put_block_hashes(blockhashes=ListOf(Hash)):
 254         """
 255         @return: a Deferred that fires (with None) when the operation completes
 256         """
 257
 258     def put_share_hashes(sharehashes=ListOf(TupleOf(int, Hash))):
 259         """
 260         @return: a Deferred that fires (with None) when the operation completes
 261         """
 262
 263     def put_uri_extension(data=URIExtensionData):
 264         """This block of data contains integrity-checking information (hashes
 265         of plaintext, crypttext, and shares), as well as encoding parameters
 266         that are necessary to recover the data. This is a serialized dict
 267         mapping strings to other strings. The hash of this data is kept in
 268         the URI and verified before any of the data is used. All buckets for
 269         a given file contain identical copies of this data.
 270
 271         The serialization format is specified with the following pseudocode:
 272         for k in sorted(dict.keys()):
 273             assert re.match(r'^[a-zA-Z_\-]+$', k)
 274             write(k + ':' + netstring(dict[k]))
 275
 276         @return: a Deferred that fires (with None) when the operation completes
 277         """
 278
 279     def close():
 280         """Finish writing and close the bucket. The share is not finalized
 281         until this method is called: if the uploading client disconnects
 282         before calling close(), the partially-written share will be
 283         discarded.
 284
 285         @return: a Deferred that fires (with None) when the operation completes
 286         """
 287
 288 class IStorageBucketReader(Interface):
 289
 290     def get_block(blocknum=int):
 291         """Most blocks will be the same size. The last block might be shorter
 292         than the others.
 293
 294         @return: ShareData
 295         """
 296
 297     def get_plaintext_hashes():
 298         """
 299         @return: ListOf(Hash)
 300         """
 301
 302     def get_crypttext_hashes():
 303         """
 304         @return: ListOf(Hash)
 305         """
 306
 307     def get_block_hashes():
 308         """
 309         @return: ListOf(Hash)
 310         """
 311
 312     def get_share_hashes():
 313         """
 314         @return: ListOf(TupleOf(int, Hash))
 315         """
 316
 317     def get_uri_extension():
 318         """
 319         @return: URIExtensionData
 320         """
 321
 322
 323
 324 # hm, we need a solution for forward references in schemas
 325 from foolscap.schema import Any
 326
 327 FileNode_ = Any() # TODO: foolscap needs constraints on copyables
 328 DirectoryNode_ = Any() # TODO: same
 329 AnyNode_ = ChoiceOf(FileNode_, DirectoryNode_)
 330 EncryptedThing = str
 331
 332 class IURI(Interface):
 333     def init_from_string(uri):
 334         """Accept a string (as created by my to_string() method) and populate
 335         this instance with its data. I am not normally called directly,
 336         please use the module-level uri.from_string() function to convert
 337         arbitrary URI strings into IURI-providing instances."""
 338
 339     def is_readonly():
 340         """Return False if this URI be used to modify the data. Return True
 341         if this URI cannot be used to modify the data."""
 342
 343     def is_mutable():
 344         """Return True if the data can be modified by *somebody* (perhaps
 345         someone who has a more powerful URI than this one)."""
 346
 347     def get_readonly():
 348         """Return another IURI instance, which represents a read-only form of
 349         this one. If is_readonly() is True, this returns self."""
 350
 351     def get_verifier():
 352         """Return an instance that provides IVerifierURI, which can be used
 353         to check on the availability of the file or directory, without
 354         providing enough capabilities to actually read or modify the
 355         contents. This may return None if the file does not need checking or
 356         verification (e.g. LIT URIs).
 357         """
 358
 359     def to_string():
 360         """Return a string of printable ASCII characters, suitable for
 361         passing into init_from_string."""
 362
 363 class IVerifierURI(Interface):
 364     def init_from_string(uri):
 365         """Accept a string (as created by my to_string() method) and populate
 366         this instance with its data. I am not normally called directly,
 367         please use the module-level uri.from_string() function to convert
 368         arbitrary URI strings into IURI-providing instances."""
 369
 370     def to_string():
 371         """Return a string of printable ASCII characters, suitable for
 372         passing into init_from_string."""
 373
 374 class IDirnodeURI(Interface):
 375     """I am a URI which represents a dirnode."""
 376
 377
 378 class IFileURI(Interface):
 379     """I am a URI which represents a filenode."""
 380     def get_size():
 381         """Return the length (in bytes) of the file that I represent."""
 382
 383 class IMutableFileURI(Interface):
 384     """I am a URI which represents a mutable filenode."""
 385 class INewDirectoryURI(Interface):
 386     pass
 387 class IReadonlyNewDirectoryURI(Interface):
 388     pass
 389
 390
 391 class IFilesystemNode(Interface):
 392     def get_uri():
 393         """
 394         Return the URI that can be used by others to get access to this
 395         node. If this node is read-only, the URI will only offer read-only
 396         access. If this node is read-write, the URI will offer read-write
 397         access.
 398
 399         If you have read-write access to a node and wish to share merely
 400         read-only access with others, use get_readonly_uri().
 401         """
 402
 403     def get_readonly_uri():
 404         """Return the directory URI that can be used by others to get
 405         read-only access to this directory node. The result is a read-only
 406         URI, regardless of whether this dirnode is read-only or read-write.
 407
 408         If you have merely read-only access to this dirnode,
 409         get_readonly_uri() will return the same thing as get_uri().
 410         """
 411
 412     def get_verifier():
 413         """Return an IVerifierURI instance that represents the
 414         'verifiy/refresh capability' for this node. The holder of this
 415         capability will be able to renew the lease for this node, protecting
 416         it from garbage-collection. They will also be able to ask a server if
 417         it holds a share for the file or directory.
 418         """
 419
 420     def get_storage_index():
 421         """Return a string with the (binary) storage index in use on this
 422         download. This may be None if there is no storage index (i.e. LIT
 423         files)."""
 424
 425     def is_readonly():
 426         """Return True if this reference provides mutable access to the given
 427         file or directory (i.e. if you can modify it), or False if not. Note
 428         that even if this reference is read-only, someone else may hold a
 429         read-write reference to it."""
 430
 431     def is_mutable():
 432         """Return True if this file or directory is mutable (by *somebody*,
 433         not necessarily you), False if it is is immutable. Note that a file
 434         might be mutable overall, but your reference to it might be
 435         read-only. On the other hand, all references to an immutable file
 436         will be read-only; there are no read-write references to an immutable
 437         file.
 438         """
 439
 440 class IMutableFilesystemNode(IFilesystemNode):
 441     pass
 442
 443 class IFileNode(IFilesystemNode):
 444     def download(target):
 445         """Download the file's contents to a given IDownloadTarget"""
 446
 447     def download_to_data():
 448         """Download the file's contents. Return a Deferred that fires
 449         with those contents."""
 450
 451     def get_size():
 452         """Return the length (in bytes) of the data this node represents."""
 453
 454 class IMutableFileNode(IFileNode, IMutableFilesystemNode):
 455     """I provide access to a 'mutable file', which retains its identity
 456     regardless of what contents are put in it.
 457
 458     The consistency-vs-availability problem means that there might be
 459     multiple versions of a file present in the grid, some of which might be
 460     unrecoverable (i.e. have fewer than 'k' shares). These versions are
 461     loosely ordered: each has a sequence number and a hash, and any version
 462     with seqnum=N was uploaded by a node which has seen at least one version
 463     with seqnum=N-1.
 464
 465     The 'servermap' (an instance of IMutableFileServerMap) is used to
 466     describe the versions that are known to be present in the grid, and which
 467     servers are hosting their shares. It is used to represent the 'state of
 468     the world', and is used for this purpose by my test-and-set operations.
 469     Downloading the contents of the mutable file will also return a
 470     servermap. Uploading a new version into the mutable file requires a
 471     servermap as input, and the semantics of the replace operation is
 472     'replace the file with my new version if it looks like nobody else has
 473     changed the file since my previous download'. Because the file is
 474     distributed, this is not a perfect test-and-set operation, but it will do
 475     its best. If the replace process sees evidence of a simultaneous write,
 476     it will signal an UncoordinatedWriteError, so that the caller can take
 477     corrective action.
 478
 479
 480     Most readers will want to use the 'best' current version of the file, and
 481     should use my 'download_best_version()' method.
 482
 483     To unconditionally replace the file, callers should use overwrite(). This
 484     is the mode that user-visible mutable files will probably use.
 485
 486     To apply some delta to the file, call modify() with a callable modifier
 487     function that can apply the modification that you want to make. This is
 488     the mode that dirnodes will use, since most directory modification
 489     operations can be expressed in terms of deltas to the directory state.
 490
 491
 492     Three methods are available for users who need to perform more complex
 493     operations. The first is get_servermap(), which returns an up-to-date
 494     servermap using a specified mode. The second is download_version(), which
 495     downloads a specific version (not necessarily the 'best' one). The third
 496     is 'upload', which accepts new contents and a servermap (which must have
 497     been updated with MODE_WRITE). The upload method will attempt to apply
 498     the new contents as long as no other node has modified the file since the
 499     servermap was updated. This might be useful to a caller who wants to
 500     merge multiple versions into a single new one.
 501
 502     Note that each time the servermap is updated, a specific 'mode' is used,
 503     which determines how many peers are queried. To use a servermap for my
 504     replace() method, that servermap must have been updated in MODE_WRITE.
 505     These modes are defined in allmydata.mutable.common, and consist of
 506     MODE_READ, MODE_WRITE, MODE_ANYTHING, and MODE_CHECK. Please look in
 507     allmydata/mutable/servermap.py for details about the differences.
 508
 509     Mutable files are currently limited in size (about 3.5MB max) and can
 510     only be retrieved and updated all-at-once, as a single big string. Future
 511     versions of our mutable files will remove this restriction.
 512     """
 513
 514     def download_best_version():
 515         """Download the 'best' available version of the file, meaning one of
 516         the recoverable versions with the highest sequence number. If no
 517         uncoordinated writes have occurred, and if enough shares are
 518         available, then this will be the most recent version that has been
 519         uploaded.
 520
 521         I return a Deferred that fires with a (contents, servermap) pair. The
 522         servermap is updated with MODE_READ. The contents will be the version
 523         of the file indicated by servermap.best_recoverable_version(). If no
 524         version is recoverable, the Deferred will errback with
 525         UnrecoverableFileError.
 526         """
 527
 528     def get_size_of_best_version():
 529         """Find the size of the version that would be downloaded with
 530         download_best_version(), without actually downloading the whole file.
 531
 532         I return a Deferred that fires with an integer.
 533         """
 534
 535     def overwrite(new_contents):
 536         """Unconditionally replace the contents of the mutable file with new
 537         ones. This simply chains get_servermap(MODE_WRITE) and upload(). This
 538         is only appropriate to use when the new contents of the file are
 539         completely unrelated to the old ones, and you do not care about other
 540         clients' changes.
 541
 542         I return a Deferred that fires (with a PublishStatus object) when the
 543         update has completed.
 544         """
 545
 546     def modify(modifier_cb):
 547         """Modify the contents of the file, by downloading the current
 548         version, applying the modifier function (or bound method), then
 549         uploading the new version. I return a Deferred that fires (with a
 550         PublishStatus object) when the update is complete.
 551
 552         The modifier callable will be given two arguments: a string (with the
 553         old contents) and a servermap. As with download_best_version(), the
 554         old contents will be from the best recoverable version, but the
 555         modifier can use the servermap to make other decisions (such as
 556         refusing to apply the delta if there are multiple parallel versions,
 557         or if there is evidence of a newer unrecoverable version).
 558
 559         The callable should return a string with the new contents. The
 560         callable must be prepared to be called multiple times, and must
 561         examine the input string to see if the change that it wants to make
 562         is already present in the old version. If it does not need to make
 563         any changes, it can either return None, or return its input string.
 564
 565         If the modifier raises an exception, it will be returned in the
 566         errback.
 567         """
 568
 569
 570     def get_servermap(mode):
 571         """Return a Deferred that fires with an IMutableFileServerMap
 572         instance, updated using the given mode.
 573         """
 574
 575     def download_version(servermap, version):
 576         """Download a specific version of the file, using the servermap
 577         as a guide to where the shares are located.
 578
 579         I return a Deferred that fires with the requested contents, or
 580         errbacks with UnrecoverableFileError. Note that a servermap which was
 581         updated with MODE_ANYTHING or MODE_READ may not know about shares for
 582         all versions (those modes stop querying servers as soon as they can
 583         fulfil their goals), so you may want to use MODE_CHECK (which checks
 584         everything) to get increased visibility.
 585         """
 586
 587     def upload(new_contents, servermap):
 588         """Replace the contents of the file with new ones. This requires a
 589         servermap that was previously updated with MODE_WRITE.
 590
 591         I attempt to provide test-and-set semantics, in that I will avoid
 592         modifying any share that is different than the version I saw in the
 593         servermap. However, if another node is writing to the file at the
 594         same time as me, I may manage to update some shares while they update
 595         others. If I see any evidence of this, I will signal
 596         UncoordinatedWriteError, and the file will be left in an inconsistent
 597         state (possibly the version you provided, possibly the old version,
 598         possibly somebody else's version, and possibly a mix of shares from
 599         all of these).
 600
 601         The recommended response to UncoordinatedWriteError is to either
 602         return it to the caller (since they failed to coordinate their
 603         writes), or to attempt some sort of recovery. It may be sufficient to
 604         wait a random interval (with exponential backoff) and repeat your
 605         operation. If I do not signal UncoordinatedWriteError, then I was
 606         able to write the new version without incident.
 607
 608         I return a Deferred that fires (with a PublishStatus object) when the
 609         publish has completed. I will update the servermap in-place with the
 610         location of all new shares.
 611         """
 612
 613     def get_writekey():
 614         """Return this filenode's writekey, or None if the node does not have
 615         write-capability. This may be used to assist with data structures
 616         that need to make certain data available only to writers, such as the
 617         read-write child caps in dirnodes. The recommended process is to have
 618         reader-visible data be submitted to the filenode in the clear (where
 619         it will be encrypted by the filenode using the readkey), but encrypt
 620         writer-visible data using this writekey.
 621         """
 622
 623 class ExistingChildError(Exception):
 624     """A directory node was asked to add or replace a child that already
 625     exists, and overwrite= was set to False."""
 626
 627 class IDirectoryNode(IMutableFilesystemNode):
 628     """I represent a name-to-child mapping, holding the tahoe equivalent of a
 629     directory. All child names are unicode strings, and all children are some
 630     sort of IFilesystemNode (either files or subdirectories).
 631     """
 632
 633     def get_uri():
 634         """
 635         The dirnode ('1') URI returned by this method can be used in
 636         set_uri() on a different directory ('2') to 'mount' a reference to
 637         this directory ('1') under the other ('2'). This URI is just a
 638         string, so it can be passed around through email or other out-of-band
 639         protocol.
 640         """
 641
 642     def get_readonly_uri():
 643         """
 644         The dirnode ('1') URI returned by this method can be used in
 645         set_uri() on a different directory ('2') to 'mount' a reference to
 646         this directory ('1') under the other ('2'). This URI is just a
 647         string, so it can be passed around through email or other out-of-band
 648         protocol.
 649         """
 650
 651     def list():
 652         """I return a Deferred that fires with a dictionary mapping child
 653         name (a unicode string) to (node, metadata_dict) tuples, in which
 654         'node' is either an IFileNode or IDirectoryNode, and 'metadata_dict'
 655         is a dictionary of metadata."""
 656
 657     def has_child(name):
 658         """I return a Deferred that fires with a boolean, True if there
 659         exists a child of the given name, False if not. The child name must
 660         be a unicode string."""
 661
 662     def get(name):
 663         """I return a Deferred that fires with a specific named child node,
 664         either an IFileNode or an IDirectoryNode. The child name must be a
 665         unicode string."""
 666
 667     def get_metadata_for(name):
 668         """I return a Deferred that fires with the metadata dictionary for a
 669         specific named child node. This metadata is stored in the *edge*, not
 670         in the child, so it is attached to the parent dirnode rather than the
 671         child dir-or-file-node. The child name must be a unicode string."""
 672
 673     def set_metadata_for(name, metadata):
 674         """I replace any existing metadata for the named child with the new
 675         metadata. The child name must be a unicode string. This metadata is
 676         stored in the *edge*, not in the child, so it is attached to the
 677         parent dirnode rather than the child dir-or-file-node. I return a
 678         Deferred (that fires with this dirnode) when the operation is
 679         complete."""
 680
 681     def get_child_at_path(path):
 682         """Transform a child path into an IDirectoryNode or IFileNode.
 683
 684         I perform a recursive series of 'get' operations to find the named
 685         descendant node. I return a Deferred that fires with the node, or
 686         errbacks with IndexError if the node could not be found.
 687
 688         The path can be either a single string (slash-separated) or a list of
 689         path-name elements. All elements must be unicode strings.
 690         """
 691
 692     def get_child_and_metadata_at_path(path):
 693         """Transform a child path into an IDirectoryNode/IFileNode and
 694         metadata.
 695
 696         I am like get_child_at_path(), but my Deferred fires with a tuple of
 697         (node, metadata). The metadata comes from the last edge. If the path
 698         is empty, the metadata will be an empty dictionary.
 699         """
 700
 701     def set_uri(name, child_uri, metadata=None, overwrite=True):
 702         """I add a child (by URI) at the specific name. I return a Deferred
 703         that fires when the operation finishes. If overwrite= is True, I will
 704         replace any existing child of the same name, otherwise an existing
 705         child will cause me to return ExistingChildError. The child name must
 706         be a unicode string.
 707
 708         The child_uri could be for a file, or for a directory (either
 709         read-write or read-only, using a URI that came from get_uri() ).
 710
 711         If metadata= is provided, I will use it as the metadata for the named
 712         edge. This will replace any existing metadata. If metadata= is left
 713         as the default value of None, I will set ['mtime'] to the current
 714         time, and I will set ['ctime'] to the current time if there was not
 715         already a child by this name present. This roughly matches the
 716         ctime/mtime semantics of traditional filesystems.
 717
 718         If this directory node is read-only, the Deferred will errback with a
 719         NotMutableError."""
 720
 721     def set_children(entries, overwrite=True):
 722         """Add multiple (name, child_uri) pairs (or (name, child_uri,
 723         metadata) triples) to a directory node. Returns a Deferred that fires
 724         (with None) when the operation finishes. This is equivalent to
 725         calling set_uri() multiple times, but is much more efficient. All
 726         child names must be unicode strings.
 727         """
 728
 729     def set_node(name, child, metadata=None, overwrite=True):
 730         """I add a child at the specific name. I return a Deferred that fires
 731         when the operation finishes. This Deferred will fire with the child
 732         node that was just added. I will replace any existing child of the
 733         same name. The child name must be a unicode string. The 'child'
 734         instance must be an instance providing IDirectoryNode or IFileNode.
 735
 736         If metadata= is provided, I will use it as the metadata for the named
 737         edge. This will replace any existing metadata. If metadata= is left
 738         as the default value of None, I will set ['mtime'] to the current
 739         time, and I will set ['ctime'] to the current time if there was not
 740         already a child by this name present. This roughly matches the
 741         ctime/mtime semantics of traditional filesystems.
 742
 743         If this directory node is read-only, the Deferred will errback with a
 744         NotMutableError."""
 745
 746     def set_nodes(entries, overwrite=True):
 747         """Add multiple (name, child_node) pairs (or (name, child_node,
 748         metadata) triples) to a directory node. Returns a Deferred that fires
 749         (with None) when the operation finishes. This is equivalent to
 750         calling set_node() multiple times, but is much more efficient. All
 751         child names must be unicode strings."""
 752
 753
 754     def add_file(name, uploadable, metadata=None, overwrite=True):
 755         """I upload a file (using the given IUploadable), then attach the
 756         resulting FileNode to the directory at the given name. I set metadata
 757         the same way as set_uri and set_node. The child name must be a
 758         unicode string.
 759
 760         I return a Deferred that fires (with the IFileNode of the uploaded
 761         file) when the operation completes."""
 762
 763     def delete(name):
 764         """I remove the child at the specific name. I return a Deferred that
 765         fires when the operation finishes. The child name must be a unicode
 766         string."""
 767
 768     def create_empty_directory(name, overwrite=True):
 769         """I create and attach an empty directory at the given name. The
 770         child name must be a unicode string. I return a Deferred that fires
 771         when the operation finishes."""
 772
 773     def move_child_to(current_child_name, new_parent, new_child_name=None,
 774                       overwrite=True):
 775         """I take one of my children and move them to a new parent. The child
 776         is referenced by name. On the new parent, the child will live under
 777         'new_child_name', which defaults to 'current_child_name'. TODO: what
 778         should we do about metadata? I return a Deferred that fires when the
 779         operation finishes. The child name must be a unicode string."""
 780
 781     def build_manifest():
 782         """Return a Monitor. The Monitor's results will be a list of (path,
 783         cap) tuples for nodes (directories and files) reachable from this
 784         one. 'path' will be a tuple of unicode strings. The origin dirnode
 785         will be represented by an empty path tuple. The Monitor will also
 786         have an .origin_si attribute with the (binary) storage index of the
 787         starting point.
 788         """
 789
 790     def start_deep_stats():
 791         """Return a Monitor, examining all nodes (directories and files)
 792         reachable from this one. The Monitor's results will be a dictionary
 793         with the following keys::
 794
 795            count-immutable-files: count of how many CHK files are in the set
 796            count-mutable-files: same, for mutable files (does not include
 797                                 directories)
 798            count-literal-files: same, for LIT files
 799            count-files: sum of the above three
 800
 801            count-directories: count of directories
 802
 803            size-immutable-files: total bytes for all CHK files in the set
 804            size-mutable-files (TODO): same, for current version of all mutable
 805                                       files, does not include directories
 806            size-literal-files: same, for LIT files
 807            size-directories: size of mutable files used by directories
 808
 809            largest-directory: number of bytes in the largest directory
 810            largest-directory-children: number of children in the largest
 811                                        directory
 812            largest-immutable-file: number of bytes in the largest CHK file
 813
 814         size-mutable-files is not yet implemented, because it would involve
 815         even more queries than deep_stats does.
 816
 817         The Monitor will also have an .origin_si attribute with the (binary)
 818         storage index of the starting point.
 819
 820         This operation will visit every directory node underneath this one,
 821         and can take a long time to run. On a typical workstation with good
 822         bandwidth, this can examine roughly 15 directories per second (and
 823         takes several minutes of 100% CPU for ~1700 directories).
 824         """
 825
 826 class ICodecEncoder(Interface):
 827     def set_params(data_size, required_shares, max_shares):
 828         """Set up the parameters of this encoder.
 829
 830         This prepares the encoder to perform an operation that converts a
 831         single block of data into a number of shares, such that a future
 832         ICodecDecoder can use a subset of these shares to recover the
 833         original data. This operation is invoked by calling encode(). Once
 834         the encoding parameters are set up, the encode operation can be
 835         invoked multiple times.
 836
 837         set_params() prepares the encoder to accept blocks of input data that
 838         are exactly 'data_size' bytes in length. The encoder will be prepared
 839         to produce 'max_shares' shares for each encode() operation (although
 840         see the 'desired_share_ids' to use less CPU). The encoding math will
 841         be chosen such that the decoder can get by with as few as
 842         'required_shares' of these shares and still reproduce the original
 843         data. For example, set_params(1000, 5, 5) offers no redundancy at
 844         all, whereas set_params(1000, 1, 10) provides 10x redundancy.
 845
 846         Numerical Restrictions: 'data_size' is required to be an integral
 847         multiple of 'required_shares'. In general, the caller should choose
 848         required_shares and max_shares based upon their reliability
 849         requirements and the number of peers available (the total storage
 850         space used is roughly equal to max_shares*data_size/required_shares),
 851         then choose data_size to achieve the memory footprint desired (larger
 852         data_size means more efficient operation, smaller data_size means
 853         smaller memory footprint).
 854
 855         In addition, 'max_shares' must be equal to or greater than
 856         'required_shares'. Of course, setting them to be equal causes
 857         encode() to degenerate into a particularly slow form of the 'split'
 858         utility.
 859
 860         See encode() for more details about how these parameters are used.
 861
 862         set_params() must be called before any other ICodecEncoder methods
 863         may be invoked.
 864         """
 865
 866     def get_encoder_type():
 867         """Return a short string that describes the type of this encoder.
 868
 869         There is required to be a global table of encoder classes. This method
 870         returns an index into this table; the value at this index is an
 871         encoder class, and this encoder is an instance of that class.
 872         """
 873
 874     def get_serialized_params(): # TODO: maybe, maybe not
 875         """Return a string that describes the parameters of this encoder.
 876
 877         This string can be passed to the decoder to prepare it for handling
 878         the encoded shares we create. It might contain more information than
 879         was presented to set_params(), if there is some flexibility of
 880         parameter choice.
 881
 882         This string is intended to be embedded in the URI, so there are
 883         several restrictions on its contents. At the moment I'm thinking that
 884         this means it may contain hex digits and hyphens, and nothing else.
 885         The idea is that the URI contains something like '%s:%s:%s' %
 886         (encoder.get_encoder_name(), encoder.get_serialized_params(),
 887         b2a(crypttext_hash)), and this is enough information to construct a
 888         compatible decoder.
 889         """
 890
 891     def get_block_size():
 892         """Return the length of the shares that encode() will produce.
 893         """
 894
 895     def encode_proposal(data, desired_share_ids=None):
 896         """Encode some data.
 897
 898         'data' must be a string (or other buffer object), and len(data) must
 899         be equal to the 'data_size' value passed earlier to set_params().
 900
 901         This will return a Deferred that will fire with two lists. The first
 902         is a list of shares, each of which is a string (or other buffer
 903         object) such that len(share) is the same as what get_share_size()
 904         returned earlier. The second is a list of shareids, in which each is
 905         an integer. The lengths of the two lists will always be equal to each
 906         other. The user should take care to keep each share closely
 907         associated with its shareid, as one is useless without the other.
 908
 909         The length of this output list will normally be the same as the value
 910         provided to the 'max_shares' parameter of set_params(). This may be
 911         different if 'desired_share_ids' is provided.
 912
 913         'desired_share_ids', if provided, is required to be a sequence of
 914         ints, each of which is required to be >= 0 and < max_shares. If not
 915         provided, encode() will produce 'max_shares' shares, as if
 916         'desired_share_ids' were set to range(max_shares). You might use this
 917         if you initially thought you were going to use 10 peers, started
 918         encoding, and then two of the peers dropped out: you could use
 919         desired_share_ids= to skip the work (both memory and CPU) of
 920         producing shares for the peers which are no longer available.
 921
 922         """
 923
 924     def encode(inshares, desired_share_ids=None):
 925         """Encode some data. This may be called multiple times. Each call is
 926         independent.
 927
 928         inshares is a sequence of length required_shares, containing buffers
 929         (i.e. strings), where each buffer contains the next contiguous
 930         non-overlapping segment of the input data. Each buffer is required to
 931         be the same length, and the sum of the lengths of the buffers is
 932         required to be exactly the data_size promised by set_params(). (This
 933         implies that the data has to be padded before being passed to
 934         encode(), unless of course it already happens to be an even multiple
 935         of required_shares in length.)
 936
 937          ALSO: the requirement to break up your data into 'required_shares'
 938          chunks before calling encode() feels a bit surprising, at least from
 939          the point of view of a user who doesn't know how FEC works. It feels
 940          like an implementation detail that has leaked outside the
 941          abstraction barrier. Can you imagine a use case in which the data to
 942          be encoded might already be available in pre-segmented chunks, such
 943          that it is faster or less work to make encode() take a list rather
 944          than splitting a single string?
 945
 946          ALSO ALSO: I think 'inshares' is a misleading term, since encode()
 947          is supposed to *produce* shares, so what it *accepts* should be
 948          something other than shares. Other places in this interface use the
 949          word 'data' for that-which-is-not-shares.. maybe we should use that
 950          term?
 951
 952         'desired_share_ids', if provided, is required to be a sequence of
 953         ints, each of which is required to be >= 0 and < max_shares. If not
 954         provided, encode() will produce 'max_shares' shares, as if
 955         'desired_share_ids' were set to range(max_shares). You might use this
 956         if you initially thought you were going to use 10 peers, started
 957         encoding, and then two of the peers dropped out: you could use
 958         desired_share_ids= to skip the work (both memory and CPU) of
 959         producing shares for the peers which are no longer available.
 960
 961         For each call, encode() will return a Deferred that fires with two
 962         lists, one containing shares and the other containing the shareids.
 963         The get_share_size() method can be used to determine the length of
 964         the share strings returned by encode(). Each shareid is a small
 965         integer, exactly as passed into 'desired_share_ids' (or
 966         range(max_shares), if desired_share_ids was not provided).
 967
 968         The shares and their corresponding shareids are required to be kept
 969         together during storage and retrieval. Specifically, the share data is
 970         useless by itself: the decoder needs to be told which share is which
 971         by providing it with both the shareid and the actual share data.
 972
 973         This function will allocate an amount of memory roughly equal to::
 974
 975          (max_shares - required_shares) * get_share_size()
 976
 977         When combined with the memory that the caller must allocate to
 978         provide the input data, this leads to a memory footprint roughly
 979         equal to the size of the resulting encoded shares (i.e. the expansion
 980         factor times the size of the input segment).
 981         """
 982
 983         # rejected ideas:
 984         #
 985         #  returning a list of (shareidN,shareN) tuples instead of a pair of
 986         #  lists (shareids..,shares..). Brian thought the tuples would
 987         #  encourage users to keep the share and shareid together throughout
 988         #  later processing, Zooko pointed out that the code to iterate
 989         #  through two lists is not really more complicated than using a list
 990         #  of tuples and there's also a performance improvement
 991         #
 992         #  having 'data_size' not required to be an integral multiple of
 993         #  'required_shares'. Doing this would require encode() to perform
 994         #  padding internally, and we'd prefer to have any padding be done
 995         #  explicitly by the caller. Yes, it is an abstraction leak, but
 996         #  hopefully not an onerous one.
 997
 998
 999 class ICodecDecoder(Interface):
1000     def set_serialized_params(params):
1001         """Set up the parameters of this encoder, from a string returned by
1002         encoder.get_serialized_params()."""
1003
1004     def get_needed_shares():
1005         """Return the number of shares needed to reconstruct the data.
1006         set_serialized_params() is required to be called before this."""
1007
1008     def decode(some_shares, their_shareids):
1009         """Decode a partial list of shares into data.
1010
1011         'some_shares' is required to be a sequence of buffers of sharedata, a
1012         subset of the shares returned by ICodecEncode.encode(). Each share is
1013         required to be of the same length.  The i'th element of their_shareids
1014         is required to be the shareid of the i'th buffer in some_shares.
1015
1016         This returns a Deferred which fires with a sequence of buffers. This
1017         sequence will contain all of the segments of the original data, in
1018         order. The sum of the lengths of all of the buffers will be the
1019         'data_size' value passed into the original ICodecEncode.set_params()
1020         call. To get back the single original input block of data, use
1021         ''.join(output_buffers), or you may wish to simply write them in
1022         order to an output file.
1023
1024         Note that some of the elements in the result sequence may be
1025         references to the elements of the some_shares input sequence. In
1026         particular, this means that if those share objects are mutable (e.g.
1027         arrays) and if they are changed, then both the input (the
1028         'some_shares' parameter) and the output (the value given when the
1029         deferred is triggered) will change.
1030
1031         The length of 'some_shares' is required to be exactly the value of
1032         'required_shares' passed into the original ICodecEncode.set_params()
1033         call.
1034         """
1035
1036 class IEncoder(Interface):
1037     """I take an object that provides IEncryptedUploadable, which provides
1038     encrypted data, and a list of shareholders. I then encode, hash, and
1039     deliver shares to those shareholders. I will compute all the necessary
1040     Merkle hash trees that are necessary to validate the crypttext that
1041     eventually comes back from the shareholders. I provide the URI Extension
1042     Block Hash, and the encoding parameters, both of which must be included
1043     in the URI.
1044
1045     I do not choose shareholders, that is left to the IUploader. I must be
1046     given a dict of RemoteReferences to storage buckets that are ready and
1047     willing to receive data.
1048     """
1049
1050     def set_size(size):
1051         """Specify the number of bytes that will be encoded. This must be
1052         peformed before get_serialized_params() can be called.
1053         """
1054     def set_params(params):
1055         """Override the default encoding parameters. 'params' is a tuple of
1056         (k,d,n), where 'k' is the number of required shares, 'd' is the
1057         shares_of_happiness, and 'n' is the total number of shares that will
1058         be created.
1059
1060         Encoding parameters can be set in three ways. 1: The Encoder class
1061         provides defaults (3/7/10). 2: the Encoder can be constructed with
1062         an 'options' dictionary, in which the
1063         needed_and_happy_and_total_shares' key can be a (k,d,n) tuple. 3:
1064         set_params((k,d,n)) can be called.
1065
1066         If you intend to use set_params(), you must call it before
1067         get_share_size or get_param are called.
1068         """
1069
1070     def set_encrypted_uploadable(u):
1071         """Provide a source of encrypted upload data. 'u' must implement
1072         IEncryptedUploadable.
1073
1074         When this is called, the IEncryptedUploadable will be queried for its
1075         length and the storage_index that should be used.
1076
1077         This returns a Deferred that fires with this Encoder instance.
1078
1079         This must be performed before start() can be called.
1080         """
1081
1082     def get_param(name):
1083         """Return an encoding parameter, by name.
1084
1085         'storage_index': return a string with the (16-byte truncated SHA-256
1086                          hash) storage index to which these shares should be
1087                          pushed.
1088
1089         'share_counts': return a tuple describing how many shares are used:
1090                         (needed_shares, shares_of_happiness, total_shares)
1091
1092         'num_segments': return an int with the number of segments that
1093                         will be encoded.
1094
1095         'segment_size': return an int with the size of each segment.
1096
1097         'block_size': return the size of the individual blocks that will
1098                       be delivered to a shareholder's put_block() method. By
1099                       knowing this, the shareholder will be able to keep all
1100                       blocks in a single file and still provide random access
1101                       when reading them. # TODO: can we avoid exposing this?
1102
1103         'share_size': an int with the size of the data that will be stored
1104                       on each shareholder. This is aggregate amount of data
1105                       that will be sent to the shareholder, summed over all
1106                       the put_block() calls I will ever make. It is useful to
1107                       determine this size before asking potential
1108                       shareholders whether they will grant a lease or not,
1109                       since their answers will depend upon how much space we
1110                       need. TODO: this might also include some amount of
1111                       overhead, like the size of all the hashes. We need to
1112                       decide whether this is useful or not.
1113
1114         'serialized_params': a string with a concise description of the
1115                              codec name and its parameters. This may be passed
1116                              into the IUploadable to let it make sure that
1117                              the same file encoded with different parameters
1118                              will result in different storage indexes.
1119
1120         Once this is called, set_size() and set_params() may not be called.
1121         """
1122
1123     def set_shareholders(shareholders):
1124         """Tell the encoder where to put the encoded shares. 'shareholders'
1125         must be a dictionary that maps share number (an integer ranging from
1126         0 to n-1) to an instance that provides IStorageBucketWriter. This
1127         must be performed before start() can be called."""
1128
1129     def start():
1130         """Begin the encode/upload process. This involves reading encrypted
1131         data from the IEncryptedUploadable, encoding it, uploading the shares
1132         to the shareholders, then sending the hash trees.
1133
1134         set_encrypted_uploadable() and set_shareholders() must be called
1135         before this can be invoked.
1136
1137         This returns a Deferred that fires with a tuple of
1138         (uri_extension_hash, needed_shares, total_shares, size) when the
1139         upload process is complete. This information, plus the encryption
1140         key, is sufficient to construct the URI.
1141         """
1142
1143 class IDecoder(Interface):
1144     """I take a list of shareholders and some setup information, then
1145     download, validate, decode, and decrypt data from them, writing the
1146     results to an output file.
1147
1148     I do not locate the shareholders, that is left to the IDownloader. I must
1149     be given a dict of RemoteReferences to storage buckets that are ready to
1150     send data.
1151     """
1152
1153     def setup(outfile):
1154         """I take a file-like object (providing write and close) to which all
1155         the plaintext data will be written.
1156
1157         TODO: producer/consumer . Maybe write() should return a Deferred that
1158         indicates when it will accept more data? But probably having the
1159         IDecoder be a producer is easier to glue to IConsumer pieces.
1160         """
1161
1162     def set_shareholders(shareholders):
1163         """I take a dictionary that maps share identifiers (small integers)
1164         to RemoteReferences that provide RIBucketReader. This must be called
1165         before start()."""
1166
1167     def start():
1168         """I start the download. This process involves retrieving data and
1169         hash chains from the shareholders, using the hashes to validate the
1170         data, decoding the shares into segments, decrypting the segments,
1171         then writing the resulting plaintext to the output file.
1172
1173         I return a Deferred that will fire (with self) when the download is
1174         complete.
1175         """
1176
1177 class IDownloadTarget(Interface):
1178     # Note that if the IDownloadTarget is also an IConsumer, the downloader
1179     # will register itself as a producer. This allows the target to invoke
1180     # downloader.pauseProducing, resumeProducing, and stopProducing.
1181     def open(size):
1182         """Called before any calls to write() or close(). If an error
1183         occurs before any data is available, fail() may be called without
1184         a previous call to open().
1185
1186         'size' is the length of the file being downloaded, in bytes."""
1187
1188     def write(data):
1189         """Output some data to the target."""
1190     def close():
1191         """Inform the target that there is no more data to be written."""
1192     def fail(why):
1193         """fail() is called to indicate that the download has failed. 'why'
1194         is a Failure object indicating what went wrong. No further methods
1195         will be invoked on the IDownloadTarget after fail()."""
1196     def register_canceller(cb):
1197         """The FileDownloader uses this to register a no-argument function
1198         that the target can call to cancel the download. Once this canceller
1199         is invoked, no further calls to write() or close() will be made."""
1200     def finish():
1201         """When the FileDownloader is done, this finish() function will be
1202         called. Whatever it returns will be returned to the invoker of
1203         Downloader.download.
1204         """
1205
1206 class IDownloader(Interface):
1207     def download(uri, target):
1208         """Perform a CHK download, sending the data to the given target.
1209         'target' must provide IDownloadTarget.
1210
1211         Returns a Deferred that fires (with the results of target.finish)
1212         when the download is finished, or errbacks if something went wrong."""
1213
1214 class IEncryptedUploadable(Interface):
1215     def set_upload_status(upload_status):
1216         """Provide an IUploadStatus object that should be filled with status
1217         information. The IEncryptedUploadable is responsible for setting
1218         key-determination progress ('chk'), size, storage_index, and
1219         ciphertext-fetch progress. It may delegate some of this
1220         responsibility to others, in particular to the IUploadable."""
1221
1222     def get_size():
1223         """This behaves just like IUploadable.get_size()."""
1224
1225     def get_all_encoding_parameters():
1226         """Return a Deferred that fires with a tuple of
1227         (k,happy,n,segment_size). The segment_size will be used as-is, and
1228         must match the following constraints: it must be a multiple of k, and
1229         it shouldn't be unreasonably larger than the file size (if
1230         segment_size is larger than filesize, the difference must be stored
1231         as padding).
1232
1233         This usually passes through to the IUploadable method of the same
1234         name.
1235
1236         The encoder strictly obeys the values returned by this method. To
1237         make an upload use non-default encoding parameters, you must arrange
1238         to control the values that this method returns.
1239         """
1240
1241     def get_storage_index():
1242         """Return a Deferred that fires with a 16-byte storage index.
1243         """
1244
1245     def read_encrypted(length, hash_only):
1246         """This behaves just like IUploadable.read(), but returns crypttext
1247         instead of plaintext. If hash_only is True, then this discards the
1248         data (and returns an empty list); this improves efficiency when
1249         resuming an interrupted upload (where we need to compute the
1250         plaintext hashes, but don't need the redundant encrypted data)."""
1251
1252     def get_plaintext_hashtree_leaves(first, last, num_segments):
1253         """Get the leaf nodes of a merkle hash tree over the plaintext
1254         segments, i.e. get the tagged hashes of the given segments. The
1255         segment size is expected to be generated by the IEncryptedUploadable
1256         before any plaintext is read or ciphertext produced, so that the
1257         segment hashes can be generated with only a single pass.
1258
1259         This returns a Deferred which fires with a sequence of hashes, using:
1260
1261          tuple(segment_hashes[first:last])
1262
1263         'num_segments' is used to assert that the number of segments that the
1264         IEncryptedUploadable handled matches the number of segments that the
1265         encoder was expecting.
1266
1267         This method must not be called until the final byte has been read
1268         from read_encrypted(). Once this method is called, read_encrypted()
1269         can never be called again.
1270         """
1271
1272     def get_plaintext_hash():
1273         """Get the hash of the whole plaintext.
1274
1275         This returns a Deferred which fires with a tagged SHA-256 hash of the
1276         whole plaintext, obtained from hashutil.plaintext_hash(data).
1277         """
1278
1279     def close():
1280         """Just like IUploadable.close()."""
1281
1282 class IUploadable(Interface):
1283     def set_upload_status(upload_status):
1284         """Provide an IUploadStatus object that should be filled with status
1285         information. The IUploadable is responsible for setting
1286         key-determination progress ('chk')."""
1287
1288     def set_default_encoding_parameters(params):
1289         """Set the default encoding parameters, which must be a dict mapping
1290         strings to ints. The meaningful keys are 'k', 'happy', 'n', and
1291         'max_segment_size'. These might have an influence on the final
1292         encoding parameters returned by get_all_encoding_parameters(), if the
1293         Uploadable doesn't have more specific preferences.
1294
1295         This call is optional: if it is not used, the Uploadable will use
1296         some built-in defaults. If used, this method must be called before
1297         any other IUploadable methods to have any effect.
1298         """
1299
1300     def get_size():
1301         """Return a Deferred that will fire with the length of the data to be
1302         uploaded, in bytes. This will be called before the data is actually
1303         used, to compute encoding parameters.
1304         """
1305
1306     def get_all_encoding_parameters():
1307         """Return a Deferred that fires with a tuple of
1308         (k,happy,n,segment_size). The segment_size will be used as-is, and
1309         must match the following constraints: it must be a multiple of k, and
1310         it shouldn't be unreasonably larger than the file size (if
1311         segment_size is larger than filesize, the difference must be stored
1312         as padding).
1313
1314         The relative values of k and n allow some IUploadables to request
1315         better redundancy than others (in exchange for consuming more space
1316         in the grid).
1317
1318         Larger values of segment_size reduce hash overhead, while smaller
1319         values reduce memory footprint and cause data to be delivered in
1320         smaller pieces (which may provide a smoother and more predictable
1321         download experience).
1322
1323         The encoder strictly obeys the values returned by this method. To
1324         make an upload use non-default encoding parameters, you must arrange
1325         to control the values that this method returns. One way to influence
1326         them may be to call set_encoding_parameters() before calling
1327         get_all_encoding_parameters().
1328         """
1329
1330     def get_encryption_key():
1331         """Return a Deferred that fires with a 16-byte AES key. This key will
1332         be used to encrypt the data. The key will also be hashed to derive
1333         the StorageIndex.
1334
1335         Uploadables which want to achieve convergence should hash their file
1336         contents and the serialized_encoding_parameters to form the key
1337         (which of course requires a full pass over the data). Uploadables can
1338         use the upload.ConvergentUploadMixin class to achieve this
1339         automatically.
1340
1341         Uploadables which do not care about convergence (or do not wish to
1342         make multiple passes over the data) can simply return a
1343         strongly-random 16 byte string.
1344
1345         get_encryption_key() may be called multiple times: the IUploadable is
1346         required to return the same value each time.
1347         """
1348
1349     def read(length):
1350         """Return a Deferred that fires with a list of strings (perhaps with
1351         only a single element) which, when concatenated together, contain the
1352         next 'length' bytes of data. If EOF is near, this may provide fewer
1353         than 'length' bytes. The total number of bytes provided by read()
1354         before it signals EOF must equal the size provided by get_size().
1355
1356         If the data must be acquired through multiple internal read
1357         operations, returning a list instead of a single string may help to
1358         reduce string copies.
1359
1360         'length' will typically be equal to (min(get_size(),1MB)/req_shares),
1361         so a 10kB file means length=3kB, 100kB file means length=30kB,
1362         and >=1MB file means length=300kB.
1363
1364         This method provides for a single full pass through the data. Later
1365         use cases may desire multiple passes or access to only parts of the
1366         data (such as a mutable file making small edits-in-place). This API
1367         will be expanded once those use cases are better understood.
1368         """
1369
1370     def close():
1371         """The upload is finished, and whatever filehandle was in use may be
1372         closed."""
1373
1374 class IUploadResults(Interface):
1375     """I am returned by upload() methods. I contain a number of public
1376     attributes which can be read to determine the results of the upload. Some
1377     of these are functional, some are timing information. All of these may be
1378     None.::
1379
1380      .file_size : the size of the file, in bytes
1381      .uri : the CHK read-cap for the file
1382      .ciphertext_fetched : how many bytes were fetched by the helper
1383      .sharemap : dict mapping share number to placement string
1384      .servermap : dict mapping server peerid to a set of share numbers
1385      .timings : dict of timing information, mapping name to seconds (float)
1386        total : total upload time, start to finish
1387        storage_index : time to compute the storage index
1388        peer_selection : time to decide which peers will be used
1389        contacting_helper : initial helper query to upload/no-upload decision
1390        existence_check : helper pre-upload existence check
1391        helper_total : initial helper query to helper finished pushing
1392        cumulative_fetch : helper waiting for ciphertext requests
1393        total_fetch : helper start to last ciphertext response
1394        cumulative_encoding : just time spent in zfec
1395        cumulative_sending : just time spent waiting for storage servers
1396        hashes_and_close : last segment push to shareholder close
1397        total_encode_and_push : first encode to shareholder close
1398
1399     """
1400
1401 class IDownloadResults(Interface):
1402     """I am created internally by download() methods. I contain a number of
1403     public attributes which contain details about the download process.::
1404
1405      .file_size : the size of the file, in bytes
1406      .servers_used : set of server peerids that were used during download
1407      .server_problems : dict mapping server peerid to a problem string. Only
1408                         servers that had problems (bad hashes, disconnects) are
1409                         listed here.
1410      .servermap : dict mapping server peerid to a set of share numbers. Only
1411                   servers that had any shares are listed here.
1412      .timings : dict of timing information, mapping name to seconds (float)
1413        peer_selection : time to ask servers about shares
1414        servers_peer_selection : dict of peerid to DYHB-query time
1415        uri_extension : time to fetch a copy of the URI extension block
1416        hashtrees : time to fetch the hash trees
1417        segments : time to fetch, decode, and deliver segments
1418        cumulative_fetch : time spent waiting for storage servers
1419        cumulative_decode : just time spent in zfec
1420        cumulative_decrypt : just time spent in decryption
1421        total : total download time, start to finish
1422        fetch_per_server : dict of peerid to list of per-segment fetch times
1423
1424     """
1425
1426 class IUploader(Interface):
1427     def upload(uploadable):
1428         """Upload the file. 'uploadable' must impement IUploadable. This
1429         returns a Deferred which fires with an UploadResults instance, from
1430         which the URI of the file can be obtained as results.uri ."""
1431
1432     def upload_ssk(write_capability, new_version, uploadable):
1433         """TODO: how should this work?"""
1434
1435 class ICheckable(Interface):
1436     def check(monitor, verify=False):
1437         """Check upon my health, optionally repairing any problems.
1438
1439         This returns a Deferred that fires with an instance that provides
1440         ICheckerResults, or None if the object is non-distributed (i.e. LIT
1441         files).
1442
1443         The monitor will be checked periodically to see if the operation has
1444         been cancelled. If so, no new queries will be sent, and the Deferred
1445         will fire (with a OperationCancelledError) immediately.
1446
1447         Filenodes and dirnodes (which provide IFilesystemNode) are also
1448         checkable. Instances that represent verifier-caps will be checkable
1449         but not downloadable. Some objects (like LIT files) do not actually
1450         live in the grid, and their checkers return None (non-distributed
1451         files are always healthy).
1452
1453         If verify=False, a relatively lightweight check will be performed: I
1454         will ask all servers if they have a share for me, and I will believe
1455         whatever they say. If there are at least N distinct shares on the
1456         grid, my results will indicate r.is_healthy()==True. This requires a
1457         roundtrip to each server, but does not transfer very much data, so
1458         the network bandwidth is fairly low.
1459
1460         If verify=True, a more resource-intensive check will be performed:
1461         every share will be downloaded, and the hashes will be validated on
1462         every bit. I will ignore any shares that failed their hash checks. If
1463         there are at least N distinct valid shares on the grid, my results
1464         will indicate r.is_healthy()==True. This requires N/k times as much
1465         download bandwidth (and server disk IO) as a regular download. If a
1466         storage server is holding a corrupt share, or is experiencing memory
1467         failures during retrieval, or is malicious or buggy, then
1468         verification will detect the problem, but checking will not.
1469
1470         TODO: any problems seen during checking will be reported to the
1471         health-manager.furl, a centralized object which is responsible for
1472         figuring out why files are unhealthy so corrective action can be
1473         taken.
1474         """
1475
1476     def check_and_repair(monitor, verify=False):
1477         """Like check(), but if the file/directory is not healthy, attempt to
1478         repair the damage.
1479
1480         Any non-healthy result will cause an immediate repair operation, to
1481         generate and upload new shares. After repair, the file will be as
1482         healthy as we can make it. Details about what sort of repair is done
1483         will be put in the check-and-repair results. The Deferred will not
1484         fire until the repair is complete.
1485
1486         This returns a Deferred which fires with an instance of
1487         ICheckAndRepairResults."""
1488
1489 class IDeepCheckable(Interface):
1490     def start_deep_check(verify=False):
1491         """Check upon the health of me and everything I can reach.
1492
1493         This is a recursive form of check(), useable only on dirnodes.
1494
1495         I return a Monitor, with results that are an IDeepCheckResults
1496         object.
1497         """
1498
1499     def start_deep_check_and_repair(verify=False):
1500         """Check upon the health of me and everything I can reach. Repair
1501         anything that isn't healthy.
1502
1503         This is a recursive form of check_and_repair(), useable only on
1504         dirnodes.
1505
1506         I return a Monitor, with results that are an
1507         IDeepCheckAndRepairResults object.
1508         """
1509
1510 class ICheckerResults(Interface):
1511     """I contain the detailed results of a check/verify operation.
1512     """
1513
1514     def get_storage_index():
1515         """Return a string with the (binary) storage index."""
1516     def get_storage_index_string():
1517         """Return a string with the (printable) abbreviated storage index."""
1518
1519     def is_healthy():
1520         """Return a boolean, True if the file/dir is fully healthy, False if
1521         it is damaged in any way. Non-distributed LIT files always return
1522         True."""
1523
1524     def needs_rebalancing():
1525         """Return a boolean, True if the file/dir's reliability could be
1526         improved by moving shares to new servers. Non-distributed LIT files
1527         always returne False."""
1528
1529
1530     def get_data():
1531         """Return a dictionary that describes the state of the file/dir.
1532         Non-distributed LIT files always return an empty dictionary. Normal
1533         files and directories return a dictionary with the following keys
1534         (note that these use base32-encoded strings rather than binary ones)
1535         (also note that for mutable files, these counts are for the 'best'
1536         version)::
1537
1538          count-shares-good: the number of distinct good shares that were found
1539          count-shares-needed: 'k', the number of shares required for recovery
1540          count-shares-expected: 'N', the number of total shares generated
1541          count-good-share-hosts: the number of distinct storage servers with
1542                                  good shares. If this number is less than
1543                                  count-shares-good, then some shares are
1544                                  doubled up, increasing the correlation of
1545                                  failures. This indicates that one or more
1546                                  shares should be moved to an otherwise unused
1547                                  server, if one is available.
1548          count-corrupt-shares: the number of shares with integrity failures
1549          list-corrupt-shares: a list of 'share locators', one for each share
1550                               that was found to be corrupt. Each share
1551                               locator is a list of (serverid, storage_index,
1552                               sharenum).
1553          servers-responding: list of (binary) storage server identifiers,
1554                              one for each server which responded to the share
1555                              query.
1556          sharemap: dict mapping share identifier to list of serverids
1557                    (binary strings). This indicates which servers are holding
1558                    which shares. For immutable files, the shareid is an
1559                    integer (the share number, from 0 to N-1). For immutable
1560                    files, it is a string of the form 'seq%d-%s-sh%d',
1561                    containing the sequence number, the roothash, and the
1562                    share number.
1563
1564         The following keys are most relevant for mutable files, but immutable
1565         files will provide sensible values too::
1566
1567          count-wrong-shares: the number of shares for versions other than the
1568                              'best' one (which is defined as being the
1569                              recoverable version with the highest sequence
1570                              number, then the highest roothash). These are
1571                              either leftover shares from an older version
1572                              (perhaps on a server that was offline when an
1573                              update occurred), shares from an unrecoverable
1574                              newer version, or shares from an alternate
1575                              current version that results from an
1576                              uncoordinated write collision. For a healthy
1577                              file, this will equal 0.
1578
1579          count-recoverable-versions: the number of recoverable versions of
1580                                      the file. For a healthy file, this will
1581                                      equal 1.
1582
1583          count-unrecoverable-versions: the number of unrecoverable versions
1584                                        of the file. For a healthy file, this
1585                                        will be 0.
1586
1587         """
1588
1589     def get_summary():
1590         """Return a string with a brief (one-line) summary of the results."""
1591
1592     def get_report():
1593         """Return a list of strings with more detailed results."""
1594
1595 class ICheckAndRepairResults(Interface):
1596     """I contain the detailed results of a check/verify/repair operation.
1597
1598     The IFilesystemNode.check()/verify()/repair() methods all return
1599     instances that provide ICheckAndRepairResults.
1600     """
1601
1602     def get_storage_index():
1603         """Return a string with the (binary) storage index."""
1604     def get_storage_index_string():
1605         """Return a string with the (printable) abbreviated storage index."""
1606     def get_repair_attempted():
1607         """Return a boolean, True if a repair was attempted."""
1608     def get_repair_successful():
1609         """Return a boolean, True if repair was attempted and the file/dir
1610         was fully healthy afterwards. False if no repair was attempted or if
1611         a repair attempt failed."""
1612     def get_pre_repair_results():
1613         """Return an ICheckerResults instance that describes the state of the
1614         file/dir before any repair was attempted."""
1615     def get_post_repair_results():
1616         """Return an ICheckerResults instance that describes the state of the
1617         file/dir after any repair was attempted. If no repair was attempted,
1618         the pre-repair and post-repair results will be identical."""
1619
1620
1621 class IDeepCheckResults(Interface):
1622     """I contain the results of a deep-check operation.
1623
1624     This is returned by a call to ICheckable.deep_check().
1625     """
1626
1627     def get_root_storage_index_string():
1628         """Return the storage index (abbreviated human-readable string) of
1629         the first object checked."""
1630     def get_counters():
1631         """Return a dictionary with the following keys::
1632
1633              count-objects-checked: count of how many objects were checked
1634              count-objects-healthy: how many of those objects were completely
1635                                     healthy
1636              count-objects-unhealthy: how many were damaged in some way
1637              count-corrupt-shares: how many shares were found to have
1638                                    corruption, summed over all objects
1639                                    examined
1640         """
1641
1642     def get_corrupt_shares():
1643         """Return a set of (serverid, storage_index, sharenum) for all shares
1644         that were found to be corrupt. Both serverid and storage_index are
1645         binary.
1646         """
1647     def get_all_results():
1648         """Return a dictionary mapping pathname (a tuple of strings, ready to
1649         be slash-joined) to an ICheckerResults instance, one for each object
1650         that was checked."""
1651
1652     def get_stats():
1653         """Return a dictionary with the same keys as
1654         IDirectoryNode.deep_stats()."""
1655
1656 class IDeepCheckAndRepairResults(Interface):
1657     """I contain the results of a deep-check-and-repair operation.
1658
1659     This is returned by a call to ICheckable.deep_check_and_repair().
1660     """
1661
1662     def get_root_storage_index_string():
1663         """Return the storage index (abbreviated human-readable string) of
1664         the first object checked."""
1665     def get_counters():
1666         """Return a dictionary with the following keys::
1667
1668              count-objects-checked: count of how many objects were checked
1669              count-objects-healthy-pre-repair: how many of those objects were
1670                                                completely healthy (before any
1671                                                repair)
1672              count-objects-unhealthy-pre-repair: how many were damaged in
1673                                                  some way
1674              count-objects-healthy-post-repair: how many of those objects were
1675                                                 completely healthy (after any
1676                                                 repair)
1677              count-objects-unhealthy-post-repair: how many were damaged in
1678                                                   some way
1679              count-repairs-attempted: repairs were attempted on this many
1680                                       objects. The count-repairs- keys will
1681                                       always be provided, however unless
1682                                       repair=true is present, they will all
1683                                       be zero.
1684              count-repairs-successful: how many repairs resulted in healthy
1685                                        objects
1686              count-repairs-unsuccessful: how many repairs resulted did not
1687                                          results in completely healthy objects
1688              count-corrupt-shares-pre-repair: how many shares were found to
1689                                               have corruption, summed over all
1690                                               objects examined (before any
1691                                               repair)
1692              count-corrupt-shares-post-repair: how many shares were found to
1693                                                have corruption, summed over all
1694                                                objects examined (after any
1695                                                repair)
1696         """
1697
1698     def get_stats():
1699         """Return a dictionary with the same keys as
1700         IDirectoryNode.deep_stats()."""
1701
1702     def get_corrupt_shares():
1703         """Return a set of (serverid, storage_index, sharenum) for all shares
1704         that were found to be corrupt before any repair was attempted. Both
1705         serverid and storage_index are binary.
1706         """
1707     def get_remaining_corrupt_shares():
1708         """Return a set of (serverid, storage_index, sharenum) for all shares
1709         that were found to be corrupt after any repair was completed. Both
1710         serverid and storage_index are binary. These are shares that need
1711         manual inspection and probably deletion.
1712         """
1713     def get_all_results():
1714         """Return a dictionary mapping pathname (a tuple of strings, ready to
1715         be slash-joined) to an ICheckAndRepairResults instance, one for each
1716         object that was checked."""
1717
1718
1719 class IRepairable(Interface):
1720     def repair(checker_results):
1721         """Attempt to repair the given object. Returns a Deferred that fires
1722         with a IRepairResults object.
1723
1724         I must be called with an object that implements ICheckerResults, as
1725         proof that you have actually discovered a problem with this file. I
1726         will use the data in the checker results to guide the repair process,
1727         such as which servers provided bad data and should therefore be
1728         avoided. The ICheckerResults object is inside the
1729         ICheckAndRepairResults object, which is returned by the
1730         ICheckable.check() method::
1731
1732          d = filenode.check(repair=False)
1733          def _got_results(check_and_repair_results):
1734              check_results = check_and_repair_results.get_pre_repair_results()
1735              return filenode.repair(check_results)
1736          d.addCallback(_got_results)
1737          return d
1738         """
1739
1740 class IRepairResults(Interface):
1741     """I contain the results of a repair operation."""
1742
1743
1744 class IClient(Interface):
1745     def upload(uploadable):
1746         """Upload some data into a CHK, get back the UploadResults for it.
1747         @param uploadable: something that implements IUploadable
1748         @return: a Deferred that fires with the UploadResults instance.
1749                  To get the URI for this file, use results.uri .
1750         """
1751
1752     def create_mutable_file(contents=""):
1753         """Create a new mutable file with contents, get back the URI string.
1754         @param contents: the initial contents to place in the file.
1755         @return: a Deferred that fires with tne (string) SSK URI for the new
1756                  file.
1757         """
1758
1759     def create_empty_dirnode():
1760         """Create a new dirnode, empty and unattached.
1761         @return: a Deferred that fires with the new IDirectoryNode instance.
1762         """
1763
1764     def create_node_from_uri(uri):
1765         """Create a new IFilesystemNode instance from the uri, synchronously.
1766         @param uri: a string or IURI-providing instance. This could be for a
1767                     LiteralFileNode, a CHK file node, a mutable file node, or
1768                     a directory node
1769         @return: an instance that provides IFilesystemNode (or more usefully one
1770                  of its subclasses). File-specifying URIs will result in
1771                  IFileNode or IMutableFileNode -providing instances, like
1772                  FileNode, LiteralFileNode, or MutableFileNode.
1773                  Directory-specifying URIs will result in
1774                  IDirectoryNode-providing instances, like NewDirectoryNode.
1775         """
1776
1777 class IClientStatus(Interface):
1778     def list_all_uploads():
1779         """Return a list of uploader objects, one for each upload which
1780         currently has an object available (tracked with weakrefs). This is
1781         intended for debugging purposes."""
1782     def list_active_uploads():
1783         """Return a list of active IUploadStatus objects."""
1784     def list_recent_uploads():
1785         """Return a list of IUploadStatus objects for the most recently
1786         started uploads."""
1787
1788     def list_all_downloads():
1789         """Return a list of downloader objects, one for each download which
1790         currently has an object available (tracked with weakrefs). This is
1791         intended for debugging purposes."""
1792     def list_active_downloads():
1793         """Return a list of active IDownloadStatus objects."""
1794     def list_recent_downloads():
1795         """Return a list of IDownloadStatus objects for the most recently
1796         started downloads."""
1797
1798 class IUploadStatus(Interface):
1799     def get_started():
1800         """Return a timestamp (float with seconds since epoch) indicating
1801         when the operation was started."""
1802     def get_storage_index():
1803         """Return a string with the (binary) storage index in use on this
1804         upload. Returns None if the storage index has not yet been
1805         calculated."""
1806     def get_size():
1807         """Return an integer with the number of bytes that will eventually
1808         be uploaded for this file. Returns None if the size is not yet known.
1809         """
1810     def using_helper():
1811         """Return True if this upload is using a Helper, False if not."""
1812     def get_status():
1813         """Return a string describing the current state of the upload
1814         process."""
1815     def get_progress():
1816         """Returns a tuple of floats, (chk, ciphertext, encode_and_push),
1817         each from 0.0 to 1.0 . 'chk' describes how much progress has been
1818         made towards hashing the file to determine a CHK encryption key: if
1819         non-convergent encryption is in use, this will be trivial, otherwise
1820         the whole file must be hashed. 'ciphertext' describes how much of the
1821         ciphertext has been pushed to the helper, and is '1.0' for non-helper
1822         uploads. 'encode_and_push' describes how much of the encode-and-push
1823         process has finished: for helper uploads this is dependent upon the
1824         helper providing progress reports. It might be reasonable to add all
1825         three numbers and report the sum to the user."""
1826     def get_active():
1827         """Return True if the upload is currently active, False if not."""
1828     def get_results():
1829         """Return an instance of UploadResults (which contains timing and
1830         sharemap information). Might return None if the upload is not yet
1831         finished."""
1832     def get_counter():
1833         """Each upload status gets a unique number: this method returns that
1834         number. This provides a handle to this particular upload, so a web
1835         page can generate a suitable hyperlink."""
1836
1837 class IDownloadStatus(Interface):
1838     def get_started():
1839         """Return a timestamp (float with seconds since epoch) indicating
1840         when the operation was started."""
1841     def get_storage_index():
1842         """Return a string with the (binary) storage index in use on this
1843         download. This may be None if there is no storage index (i.e. LIT
1844         files)."""
1845     def get_size():
1846         """Return an integer with the number of bytes that will eventually be
1847         retrieved for this file. Returns None if the size is not yet known.
1848         """
1849     def using_helper():
1850         """Return True if this download is using a Helper, False if not."""
1851     def get_status():
1852         """Return a string describing the current state of the download
1853         process."""
1854     def get_progress():
1855         """Returns a float (from 0.0 to 1.0) describing the amount of the
1856         download that has completed. This value will remain at 0.0 until the
1857         first byte of plaintext is pushed to the download target."""
1858     def get_active():
1859         """Return True if the download is currently active, False if not."""
1860     def get_counter():
1861         """Each download status gets a unique number: this method returns
1862         that number. This provides a handle to this particular download, so a
1863         web page can generate a suitable hyperlink."""
1864
1865 class IServermapUpdaterStatus(Interface):
1866     pass
1867 class IPublishStatus(Interface):
1868     pass
1869 class IRetrieveStatus(Interface):
1870     pass
1871
1872 class NotCapableError(Exception):
1873     """You have tried to write to a read-only node."""
1874
1875 class BadWriteEnablerError(Exception):
1876     pass
1877
1878 class RIControlClient(RemoteInterface):
1879
1880     def wait_for_client_connections(num_clients=int):
1881         """Do not return until we have connections to at least NUM_CLIENTS
1882         storage servers.
1883         """
1884
1885     def upload_from_file_to_uri(filename=str, convergence=ChoiceOf(None, StringConstraint(2**20))):
1886         """Upload a file to the grid. This accepts a filename (which must be
1887         absolute) that points to a file on the node's local disk. The node will
1888         read the contents of this file, upload it to the grid, then return the
1889         URI at which it was uploaded.  If convergence is None then a random
1890         encryption key will be used, else the plaintext will be hashed, then
1891         that hash will be mixed together with the "convergence" string to form
1892         the encryption key.
1893         """
1894         return URI
1895
1896     def download_from_uri_to_file(uri=URI, filename=str):
1897         """Download a file from the grid, placing it on the node's local disk
1898         at the given filename (which must be absolute[?]). Returns the
1899         absolute filename where the file was written."""
1900         return str
1901
1902     # debug stuff
1903
1904     def get_memory_usage():
1905         """Return a dict describes the amount of memory currently in use. The
1906         keys are 'VmPeak', 'VmSize', and 'VmData'. The values are integers,
1907         measuring memory consupmtion in bytes."""
1908         return DictOf(str, int)
1909
1910     def speed_test(count=int, size=int, mutable=Any()):
1911         """Write 'count' tempfiles to disk, all of the given size. Measure
1912         how long (in seconds) it takes to upload them all to the servers.
1913         Then measure how long it takes to download all of them. If 'mutable'
1914         is 'create', time creation of mutable files. If 'mutable' is
1915         'upload', then time access to the same mutable file instead of
1916         creating one.
1917
1918         Returns a tuple of (upload_time, download_time).
1919         """
1920         return (float, float)
1921
1922     def measure_peer_response_time():
1923         """Send a short message to each connected peer, and measure the time
1924         it takes for them to respond to it. This is a rough measure of the
1925         application-level round trip time.
1926
1927         @return: a dictionary mapping peerid to a float (RTT time in seconds)
1928         """
1929
1930         return DictOf(Nodeid, float)
1931
1932 UploadResults = Any() #DictOf(str, str)
1933
1934 class RIEncryptedUploadable(RemoteInterface):
1935     __remote_name__ = "RIEncryptedUploadable.tahoe.allmydata.com"
1936
1937     def get_size():
1938         return Offset
1939
1940     def get_all_encoding_parameters():
1941         return (int, int, int, long)
1942
1943     def read_encrypted(offset=Offset, length=ReadSize):
1944         return ListOf(str)
1945
1946     def get_plaintext_hashtree_leaves(first=int, last=int, num_segments=int):
1947         return ListOf(Hash)
1948
1949     def get_plaintext_hash():
1950         return Hash
1951
1952     def close():
1953         return None
1954
1955
1956 class RICHKUploadHelper(RemoteInterface):
1957     __remote_name__ = "RIUploadHelper.tahoe.allmydata.com"
1958
1959     def upload(reader=RIEncryptedUploadable):
1960         return UploadResults
1961
1962
1963 class RIHelper(RemoteInterface):
1964     __remote_name__ = "RIHelper.tahoe.allmydata.com"
1965
1966     def upload_chk(si=StorageIndex):
1967         """See if a file with a given storage index needs uploading. The
1968         helper will ask the appropriate storage servers to see if the file
1969         has already been uploaded. If so, the helper will return a set of
1970         'upload results' that includes whatever hashes are needed to build
1971         the read-cap, and perhaps a truncated sharemap.
1972
1973         If the file has not yet been uploaded (or if it was only partially
1974         uploaded), the helper will return an empty upload-results dictionary
1975         and also an RICHKUploadHelper object that will take care of the
1976         upload process. The client should call upload() on this object and
1977         pass it a reference to an RIEncryptedUploadable object that will
1978         provide ciphertext. When the upload is finished, the upload() method
1979         will finish and return the upload results.
1980         """
1981         return (UploadResults, ChoiceOf(RICHKUploadHelper, None))
1982
1983
1984 class RIStatsProvider(RemoteInterface):
1985     __remote_name__ = "RIStatsProvider.tahoe.allmydata.com"
1986     """
1987     Provides access to statistics and monitoring information.
1988     """
1989
1990     def get_stats():
1991         """
1992         returns a dictionary containing 'counters' and 'stats', each a dictionary
1993         with string counter/stat name keys, and numeric values.  counters are
1994         monotonically increasing measures of work done, and stats are instantaneous
1995         measures (potentially time averaged internally)
1996         """
1997         return DictOf(str, DictOf(str, ChoiceOf(float, int, long)))
1998
1999 class RIStatsGatherer(RemoteInterface):
2000     __remote_name__ = "RIStatsGatherer.tahoe.allmydata.com"
2001     """
2002     Provides a monitoring service for centralised collection of stats
2003     """
2004
2005     def provide(provider=RIStatsProvider, nickname=str):
2006         """
2007         @param provider: a stats collector instance which should be polled
2008                          periodically by the gatherer to collect stats.
2009         @param nickname: a name useful to identify the provided client
2010         """
2011         return None
2012
2013
2014 class IStatsProducer(Interface):
2015     def get_stats():
2016         """
2017         returns a dictionary, with str keys representing the names of stats
2018         to be monitored, and numeric values.
2019         """
2020
2021 class RIKeyGenerator(RemoteInterface):
2022     __remote_name__ = "RIKeyGenerator.tahoe.allmydata.com"
2023     """
2024     Provides a service offering to make RSA key pairs.
2025     """
2026
2027     def get_rsa_key_pair(key_size=int):
2028         """
2029         @param key_size: the size of the signature key.
2030         @return: tuple(verifying_key, signing_key)
2031         """
2032         return TupleOf(str, str)
2033
2034
2035 class FileTooLargeError(Exception):
2036     pass
2037