src/allmydata/interfaces.py

   1
   2 from zope.interface import Interface
   3 from foolscap.schema import StringConstraint, ListOf, TupleOf, SetOf, DictOf, \
   4      ChoiceOf, IntegerConstraint
   5 from foolscap import RemoteInterface, Referenceable
   6
   7 HASH_SIZE=32
   8
   9 Hash = StringConstraint(maxLength=HASH_SIZE,
  10                         minLength=HASH_SIZE)# binary format 32-byte SHA256 hash
  11 Nodeid = StringConstraint(maxLength=20,
  12                           minLength=20) # binary format 20-byte SHA1 hash
  13 FURL = StringConstraint(1000)
  14 StorageIndex = StringConstraint(16)
  15 URI = StringConstraint(300) # kind of arbitrary
  16
  17 MAX_BUCKETS = 200  # per peer
  18
  19 # MAX_SEGMENT_SIZE in encode.py is 1 MiB (this constraint allows k = 1)
  20 ShareData = StringConstraint(2**20)
  21 URIExtensionData = StringConstraint(1000)
  22 Number = IntegerConstraint(8) # 2**(8*8) == 16EiB ~= 18e18 ~= 18 exabytes
  23 Offset = Number
  24 ReadSize = int # the 'int' constraint is 2**31 == 2Gib
  25 LeaseRenewSecret = Hash # used to protect bucket lease renewal requests
  26 LeaseCancelSecret = Hash # used to protect bucket lease cancellation requests
  27
  28 # Announcements are (FURL, service_name, remoteinterface_name,
  29 #                    nickname, my_version, oldest_supported)
  30 #  the (FURL, service_name, remoteinterface_name) refer to the service being
  31 #  announced. The (nickname, my_version, oldest_supported) refer to the
  32 #  client as a whole. The my_version/oldest_supported strings can be parsed
  33 #  by an allmydata.util.version.Version instance, and then compared. The
  34 #  first goal is to make sure that nodes are not confused by speaking to an
  35 #  incompatible peer. The second goal is to enable the development of
  36 #  backwards-compatibility code.
  37
  38 Announcement = TupleOf(FURL, str, str,
  39                        str, str, str)
  40
  41 class RIIntroducerSubscriberClient(RemoteInterface):
  42     __remote_name__ = "RIIntroducerSubscriberClient.tahoe.allmydata.com"
  43
  44     def announce(announcements=SetOf(Announcement)):
  45         """I accept announcements from the publisher."""
  46         return None
  47
  48     def set_encoding_parameters(parameters=(int, int, int)):
  49         """Advise the client of the recommended k-of-n encoding parameters
  50         for this grid. 'parameters' is a tuple of (k, desired, n), where 'n'
  51         is the total number of shares that will be created for any given
  52         file, while 'k' is the number of shares that must be retrieved to
  53         recover that file, and 'desired' is the minimum number of shares that
  54         must be placed before the uploader will consider its job a success.
  55         n/k is the expansion ratio, while k determines the robustness.
  56
  57         Introducers should specify 'n' according to the expected size of the
  58         grid (there is no point to producing more shares than there are
  59         peers), and k according to the desired reliability-vs-overhead goals.
  60
  61         Note that setting k=1 is equivalent to simple replication.
  62         """
  63         return None
  64
  65 # When Foolscap can handle multiple interfaces (Foolscap#17), the
  66 # full-powered introducer will implement both RIIntroducerPublisher and
  67 # RIIntroducerSubscriberService. Until then, we define
  68 # RIIntroducerPublisherAndSubscriberService as a combination of the two, and
  69 # make everybody use that.
  70
  71 class RIIntroducerPublisher(RemoteInterface):
  72     """To publish a service to the world, connect to me and give me your
  73     announcement message. I will deliver a copy to all connected subscribers."""
  74     __remote_name__ = "RIIntroducerPublisher.tahoe.allmydata.com"
  75
  76     def publish(announcement=Announcement):
  77         # canary?
  78         return None
  79
  80 class RIIntroducerSubscriberService(RemoteInterface):
  81     __remote_name__ = "RIIntroducerSubscriberService.tahoe.allmydata.com"
  82
  83     def subscribe(subscriber=RIIntroducerSubscriberClient, service_name=str):
  84         """Give me a subscriber reference, and I will call its new_peers()
  85         method will any announcements that match the desired service name. I
  86         will ignore duplicate subscriptions.
  87         """
  88         return None
  89
  90 class RIIntroducerPublisherAndSubscriberService(RemoteInterface):
  91     __remote_name__ = "RIIntroducerPublisherAndSubscriberService.tahoe.allmydata.com"
  92     def publish(announcement=Announcement):
  93         return None
  94     def subscribe(subscriber=RIIntroducerSubscriberClient, service_name=str):
  95         return None
  96
  97 class IIntroducerClient(Interface):
  98     """I provide service introduction facilities for a node. I help nodes
  99     publish their services to the rest of the world, and I help them learn
 100     about services available on other nodes."""
 101
 102     def publish(furl, service_name, remoteinterface_name):
 103         """Once you call this, I will tell the world that the Referenceable
 104         available at FURL is available to provide a service named
 105         SERVICE_NAME. The precise definition of the service being provided is
 106         identified by the Foolscap 'remote interface name' in the last
 107         parameter: this is supposed to be a globally-unique string that
 108         identifies the RemoteInterface that is implemented."""
 109
 110     def subscribe_to(service_name):
 111         """Call this if you will eventually want to use services with the
 112         given SERVICE_NAME. This will prompt me to subscribe to announcements
 113         of those services. You can pick up the announcements later by calling
 114         get_all_connections_for() or get_permuted_peers().
 115         """
 116
 117     def get_all_connections():
 118         """Return a frozenset of (nodeid, service_name, rref) tuples, one for
 119         each active connection we've established to a remote service. This is
 120         mostly useful for unit tests that need to wait until a certain number
 121         of connections have been made."""
 122
 123     def get_all_connectors():
 124         """Return a dict that maps from (nodeid, service_name) to a
 125         RemoteServiceConnector instance for all services that we are actively
 126         trying to connect to. Each RemoteServiceConnector has the following
 127         public attributes::
 128
 129           service_name: the type of service provided, like 'storage'
 130           announcement_time: when we first heard about this service
 131           last_connect_time: when we last established a connection
 132           last_loss_time: when we last lost a connection
 133
 134           version: the peer's version, from the most recent connection
 135           oldest_supported: the peer's oldest supported version, same
 136
 137           rref: the RemoteReference, if connected, otherwise None
 138           remote_host: the IAddress, if connected, otherwise None
 139
 140         This method is intended for monitoring interfaces, such as a web page
 141         which describes connecting and connected peers.
 142         """
 143
 144     def get_all_peerids():
 145         """Return a frozenset of all peerids to whom we have a connection (to
 146         one or more services) established. Mostly useful for unit tests."""
 147
 148     def get_all_connections_for(service_name):
 149         """Return a frozenset of (nodeid, service_name, rref) tuples, one
 150         for each active connection that provides the given SERVICE_NAME."""
 151
 152     def get_permuted_peers(service_name, key):
 153         """Returns an ordered list of (peerid, rref) tuples, selecting from
 154         the connections that provide SERVICE_NAME, using a hash-based
 155         permutation keyed by KEY. This randomizes the service list in a
 156         repeatable way, to distribute load over many peers.
 157         """
 158
 159     def connected_to_introducer():
 160         """Returns a boolean, True if we are currently connected to the
 161         introducer, False if not."""
 162
 163 class RIStubClient(RemoteInterface):
 164     """Each client publishes a service announcement for a dummy object called
 165     the StubClient. This object doesn't actually offer any services, but the
 166     announcement helps the Introducer keep track of which clients are
 167     subscribed (so the grid admin can keep track of things like the size of
 168     the grid and the client versions in use. This is the (empty)
 169     RemoteInterface for the StubClient."""
 170
 171 class RIBucketWriter(RemoteInterface):
 172     def write(offset=Offset, data=ShareData):
 173         return None
 174
 175     def close():
 176         """
 177         If the data that has been written is incomplete or inconsistent then
 178         the server will throw the data away, else it will store it for future
 179         retrieval.
 180         """
 181         return None
 182
 183     def abort():
 184         """Abandon all the data that has been written.
 185         """
 186         return None
 187
 188 class RIBucketReader(RemoteInterface):
 189     def read(offset=Offset, length=ReadSize):
 190         # ShareData is limited to 1MiB, so we don't need length= to be any
 191         # larger than that. Large files must be read in pieces.
 192         return ShareData
 193
 194 TestVector = ListOf(TupleOf(Offset, ReadSize, str, str))
 195 # elements are (offset, length, operator, specimen)
 196 # operator is one of "lt, le, eq, ne, ge, gt"
 197 # nop always passes and is used to fetch data while writing.
 198 # you should use length==len(specimen) for everything except nop
 199 DataVector = ListOf(TupleOf(Offset, ShareData))
 200 # (offset, data). This limits us to 30 writes of 1MiB each per call
 201 TestAndWriteVectorsForShares = DictOf(int,
 202                                       TupleOf(TestVector,
 203                                               DataVector,
 204                                               ChoiceOf(None, Offset), # new_length
 205                                               ))
 206 ReadVector = ListOf(TupleOf(Offset, ReadSize))
 207 ReadData = ListOf(ShareData)
 208 # returns data[offset:offset+length] for each element of TestVector
 209
 210 class RIStorageServer(RemoteInterface):
 211     __remote_name__ = "RIStorageServer.tahoe.allmydata.com"
 212
 213     def get_versions():
 214         """Return a tuple of (my_version, oldest_supported) strings.
 215         Each string can be parsed by an allmydata.util.version.Version
 216         instance, and then compared. The first goal is to make sure that
 217         nodes are not confused by speaking to an incompatible peer. The
 218         second goal is to enable the development of backwards-compatibility
 219         code.
 220
 221         This method is likely to change in incompatible ways until we get the
 222         whole compatibility scheme nailed down.
 223         """
 224         return TupleOf(str, str)
 225
 226     def allocate_buckets(storage_index=StorageIndex,
 227                          renew_secret=LeaseRenewSecret,
 228                          cancel_secret=LeaseCancelSecret,
 229                          sharenums=SetOf(int, maxLength=MAX_BUCKETS),
 230                          allocated_size=Offset, canary=Referenceable):
 231         """
 232         @param storage_index: the index of the bucket to be created or
 233                               increfed.
 234         @param sharenums: these are the share numbers (probably between 0 and
 235                           99) that the sender is proposing to store on this
 236                           server.
 237         @param renew_secret: This is the secret used to protect bucket refresh
 238                              This secret is generated by the client and
 239                              stored for later comparison by the server. Each
 240                              server is given a different secret.
 241         @param cancel_secret: Like renew_secret, but protects bucket decref.
 242         @param canary: If the canary is lost before close(), the bucket is
 243                        deleted.
 244         @return: tuple of (alreadygot, allocated), where alreadygot is what we
 245                  already have and is what we hereby agree to accept. New
 246                  leases are added for shares in both lists.
 247         """
 248         return TupleOf(SetOf(int, maxLength=MAX_BUCKETS),
 249                        DictOf(int, RIBucketWriter, maxKeys=MAX_BUCKETS))
 250
 251     def renew_lease(storage_index=StorageIndex, renew_secret=LeaseRenewSecret):
 252         """
 253         Renew the lease on a given bucket. Some networks will use this, some
 254         will not.
 255         """
 256
 257     def cancel_lease(storage_index=StorageIndex,
 258                      cancel_secret=LeaseCancelSecret):
 259         """
 260         Cancel the lease on a given bucket. If this was the last lease on the
 261         bucket, the bucket will be deleted.
 262         """
 263
 264     def get_buckets(storage_index=StorageIndex):
 265         return DictOf(int, RIBucketReader, maxKeys=MAX_BUCKETS)
 266
 267
 268
 269     def slot_readv(storage_index=StorageIndex,
 270                    shares=ListOf(int), readv=ReadVector):
 271         """Read a vector from the numbered shares associated with the given
 272         storage index. An empty shares list means to return data from all
 273         known shares. Returns a dictionary with one key per share."""
 274         return DictOf(int, ReadData) # shnum -> results
 275
 276     def slot_testv_and_readv_and_writev(storage_index=StorageIndex,
 277                                         secrets=TupleOf(Hash, Hash, Hash),
 278                                         tw_vectors=TestAndWriteVectorsForShares,
 279                                         r_vector=ReadVector,
 280                                         ):
 281         """General-purpose test-and-set operation for mutable slots. Perform
 282         a bunch of comparisons against the existing shares. If they all pass,
 283         then apply a bunch of write vectors to those shares. Then use the
 284         read vectors to extract data from all the shares and return the data.
 285
 286         This method is, um, large. The goal is to allow clients to update all
 287         the shares associated with a mutable file in a single round trip.
 288
 289         @param storage_index: the index of the bucket to be created or
 290                               increfed.
 291         @param write_enabler: a secret that is stored along with the slot.
 292                               Writes are accepted from any caller who can
 293                               present the matching secret. A different secret
 294                               should be used for each slot*server pair.
 295         @param renew_secret: This is the secret used to protect bucket refresh
 296                              This secret is generated by the client and
 297                              stored for later comparison by the server. Each
 298                              server is given a different secret.
 299         @param cancel_secret: Like renew_secret, but protects bucket decref.
 300
 301         The 'secrets' argument is a tuple of (write_enabler, renew_secret,
 302         cancel_secret). The first is required to perform any write. The
 303         latter two are used when allocating new shares. To simply acquire a
 304         new lease on existing shares, use an empty testv and an empty writev.
 305
 306         Each share can have a separate test vector (i.e. a list of
 307         comparisons to perform). If all vectors for all shares pass, then all
 308         writes for all shares are recorded. Each comparison is a 4-tuple of
 309         (offset, length, operator, specimen), which effectively does a bool(
 310         (read(offset, length)) OPERATOR specimen ) and only performs the
 311         write if all these evaluate to True. Basic test-and-set uses 'eq'.
 312         Write-if-newer uses a seqnum and (offset, length, 'lt', specimen).
 313         Write-if-same-or-newer uses 'le'.
 314
 315         Reads from the end of the container are truncated, and missing shares
 316         behave like empty ones, so to assert that a share doesn't exist (for
 317         use when creating a new share), use (0, 1, 'eq', '').
 318
 319         The write vector will be applied to the given share, expanding it if
 320         necessary. A write vector applied to a share number that did not
 321         exist previously will cause that share to be created.
 322
 323         Each write vector is accompanied by a 'new_length' argument. If
 324         new_length is not None, use it to set the size of the container. This
 325         can be used to pre-allocate space for a series of upcoming writes, or
 326         truncate existing data. If the container is growing, new_length will
 327         be applied before datav. If the container is shrinking, it will be
 328         applied afterwards.
 329
 330         The read vector is used to extract data from all known shares,
 331         *before* any writes have been applied. The same vector is used for
 332         all shares. This captures the state that was tested by the test
 333         vector.
 334
 335         This method returns two values: a boolean and a dict. The boolean is
 336         True if the write vectors were applied, False if not. The dict is
 337         keyed by share number, and each value contains a list of strings, one
 338         for each element of the read vector.
 339
 340         If the write_enabler is wrong, this will raise BadWriteEnablerError.
 341         To enable share migration, the exception will have the nodeid used
 342         for the old write enabler embedded in it, in the following string::
 343
 344          The write enabler was recorded by nodeid '%s'.
 345
 346         Note that the nodeid here is encoded using the same base32 encoding
 347         used by Foolscap and allmydata.util.idlib.nodeid_b2a().
 348
 349         """
 350         return TupleOf(bool, DictOf(int, ReadData))
 351
 352 class IStorageBucketWriter(Interface):
 353     def put_block(segmentnum=int, data=ShareData):
 354         """@param data: For most segments, this data will be 'blocksize'
 355         bytes in length. The last segment might be shorter.
 356         @return: a Deferred that fires (with None) when the operation completes
 357         """
 358
 359     def put_plaintext_hashes(hashes=ListOf(Hash, maxLength=2**20)):
 360         """
 361         @return: a Deferred that fires (with None) when the operation completes
 362         """
 363
 364     def put_crypttext_hashes(hashes=ListOf(Hash, maxLength=2**20)):
 365         """
 366         @return: a Deferred that fires (with None) when the operation completes
 367         """
 368
 369     def put_block_hashes(blockhashes=ListOf(Hash, maxLength=2**20)):
 370         """
 371         @return: a Deferred that fires (with None) when the operation completes
 372         """
 373
 374     def put_share_hashes(sharehashes=ListOf(TupleOf(int, Hash),
 375                                             maxLength=2**20)):
 376         """
 377         @return: a Deferred that fires (with None) when the operation completes
 378         """
 379
 380     def put_uri_extension(data=URIExtensionData):
 381         """This block of data contains integrity-checking information (hashes
 382         of plaintext, crypttext, and shares), as well as encoding parameters
 383         that are necessary to recover the data. This is a serialized dict
 384         mapping strings to other strings. The hash of this data is kept in
 385         the URI and verified before any of the data is used. All buckets for
 386         a given file contain identical copies of this data.
 387
 388         The serialization format is specified with the following pseudocode:
 389         for k in sorted(dict.keys()):
 390             assert re.match(r'^[a-zA-Z_\-]+$', k)
 391             write(k + ':' + netstring(dict[k]))
 392
 393         @return: a Deferred that fires (with None) when the operation completes
 394         """
 395
 396     def close():
 397         """Finish writing and close the bucket. The share is not finalized
 398         until this method is called: if the uploading client disconnects
 399         before calling close(), the partially-written share will be
 400         discarded.
 401
 402         @return: a Deferred that fires (with None) when the operation completes
 403         """
 404
 405 class IStorageBucketReader(Interface):
 406
 407     def get_block(blocknum=int):
 408         """Most blocks will be the same size. The last block might be shorter
 409         than the others.
 410
 411         @return: ShareData
 412         """
 413
 414     def get_plaintext_hashes():
 415         """
 416         @return: ListOf(Hash, maxLength=2**20)
 417         """
 418
 419     def get_crypttext_hashes():
 420         """
 421         @return: ListOf(Hash, maxLength=2**20)
 422         """
 423
 424     def get_block_hashes():
 425         """
 426         @return: ListOf(Hash, maxLength=2**20)
 427         """
 428
 429     def get_share_hashes():
 430         """
 431         @return: ListOf(TupleOf(int, Hash), maxLength=2**20)
 432         """
 433
 434     def get_uri_extension():
 435         """
 436         @return: URIExtensionData
 437         """
 438
 439
 440
 441 # hm, we need a solution for forward references in schemas
 442 from foolscap.schema import Any
 443
 444 FileNode_ = Any() # TODO: foolscap needs constraints on copyables
 445 DirectoryNode_ = Any() # TODO: same
 446 AnyNode_ = ChoiceOf(FileNode_, DirectoryNode_)
 447 EncryptedThing = str
 448
 449 class IURI(Interface):
 450     def init_from_string(uri):
 451         """Accept a string (as created by my to_string() method) and populate
 452         this instance with its data. I am not normally called directly,
 453         please use the module-level uri.from_string() function to convert
 454         arbitrary URI strings into IURI-providing instances."""
 455
 456     def is_readonly():
 457         """Return False if this URI be used to modify the data. Return True
 458         if this URI cannot be used to modify the data."""
 459
 460     def is_mutable():
 461         """Return True if the data can be modified by *somebody* (perhaps
 462         someone who has a more powerful URI than this one)."""
 463
 464     def get_readonly():
 465         """Return another IURI instance, which represents a read-only form of
 466         this one. If is_readonly() is True, this returns self."""
 467
 468     def get_verifier():
 469         """Return an instance that provides IVerifierURI, which can be used
 470         to check on the availability of the file or directory, without
 471         providing enough capabilities to actually read or modify the
 472         contents. This may return None if the file does not need checking or
 473         verification (e.g. LIT URIs).
 474         """
 475
 476     def to_string():
 477         """Return a string of printable ASCII characters, suitable for
 478         passing into init_from_string."""
 479
 480 class IVerifierURI(Interface):
 481     def init_from_string(uri):
 482         """Accept a string (as created by my to_string() method) and populate
 483         this instance with its data. I am not normally called directly,
 484         please use the module-level uri.from_string() function to convert
 485         arbitrary URI strings into IURI-providing instances."""
 486
 487     def to_string():
 488         """Return a string of printable ASCII characters, suitable for
 489         passing into init_from_string."""
 490
 491 class IDirnodeURI(Interface):
 492     """I am a URI which represents a dirnode."""
 493
 494
 495 class IFileURI(Interface):
 496     """I am a URI which represents a filenode."""
 497     def get_size():
 498         """Return the length (in bytes) of the file that I represent."""
 499
 500 class IMutableFileURI(Interface):
 501     """I am a URI which represents a mutable filenode."""
 502 class INewDirectoryURI(Interface):
 503     pass
 504 class IReadonlyNewDirectoryURI(Interface):
 505     pass
 506
 507
 508 class IFilesystemNode(Interface):
 509     def get_uri():
 510         """
 511         Return the URI that can be used by others to get access to this
 512         node. If this node is read-only, the URI will only offer read-only
 513         access. If this node is read-write, the URI will offer read-write
 514         access.
 515
 516         If you have read-write access to a node and wish to share merely
 517         read-only access with others, use get_readonly_uri().
 518         """
 519
 520     def get_readonly_uri():
 521         """Return the directory URI that can be used by others to get
 522         read-only access to this directory node. The result is a read-only
 523         URI, regardless of whether this dirnode is read-only or read-write.
 524
 525         If you have merely read-only access to this dirnode,
 526         get_readonly_uri() will return the same thing as get_uri().
 527         """
 528
 529     def get_verifier():
 530         """Return an IVerifierURI instance that represents the
 531         'verifiy/refresh capability' for this node. The holder of this
 532         capability will be able to renew the lease for this node, protecting
 533         it from garbage-collection. They will also be able to ask a server if
 534         it holds a share for the file or directory.
 535         """
 536
 537     def check():
 538         """Perform a file check. See IChecker.check for details."""
 539
 540     def is_readonly():
 541         """Return True if this reference provides mutable access to the given
 542         file or directory (i.e. if you can modify it), or False if not. Note
 543         that even if this reference is read-only, someone else may hold a
 544         read-write reference to it."""
 545
 546     def is_mutable():
 547         """Return True if this file or directory is mutable (by *somebody*,
 548         not necessarily you), False if it is is immutable. Note that a file
 549         might be mutable overall, but your reference to it might be
 550         read-only. On the other hand, all references to an immutable file
 551         will be read-only; there are no read-write references to an immutable
 552         file.
 553         """
 554
 555 class IMutableFilesystemNode(IFilesystemNode):
 556     pass
 557
 558 class IFileNode(IFilesystemNode):
 559     def download(target):
 560         """Download the file's contents to a given IDownloadTarget"""
 561
 562     def download_to_data():
 563         """Download the file's contents. Return a Deferred that fires
 564         with those contents."""
 565
 566     def get_size():
 567         """Return the length (in bytes) of the data this node represents."""
 568
 569 class IMutableFileNode(IFileNode, IMutableFilesystemNode):
 570     def download_to_data():
 571         """Download the file's contents. Return a Deferred that fires with
 572         those contents. If there are multiple retrievable versions in the
 573         grid (because you failed to avoid simultaneous writes, see
 574         docs/mutable.txt), this will return the first version that it can
 575         reconstruct, and will silently ignore the others. In the future, a
 576         more advanced API will signal and provide access to the multiple
 577         heads."""
 578
 579     def update(newdata):
 580         """Attempt to replace the old contents with the new data.
 581
 582         download_to_data() must have been called before calling update().
 583
 584         Returns a Deferred. If the Deferred fires successfully, the update
 585         appeared to succeed. However, another writer (who read before your
 586         changes were published) might still clobber your changes: they will
 587         discover a problem but you will not. (see ticket #347 for details).
 588
 589         If the mutable file has been changed (by some other writer) since the
 590         last call to download_to_data(), this will raise
 591         UncoordinatedWriteError and the file will be left in an inconsistent
 592         state (possibly the version you provided, possibly the old version,
 593         possibly somebody else's version, and possibly a mix of shares from
 594         all of these). The recommended response to UncoordinatedWriteError is
 595         to either return it to the caller (since they failed to coordinate
 596         their writes), or to do a new download_to_data() / modify-data /
 597         update() loop.
 598
 599         update() is appropriate to use in a read-modify-write sequence, such
 600         as a directory modification.
 601         """
 602
 603     def overwrite(newdata):
 604         """Attempt to replace the old contents with the new data.
 605
 606         Unlike update(), overwrite() does not require a previous call to
 607         download_to_data(). It will unconditionally replace the old contents
 608         with new data.
 609
 610         overwrite() is implemented by doing download_to_data() and update()
 611         in rapid succession, so there remains a (smaller) possibility of
 612         UncoordinatedWriteError. A future version will remove the full
 613         download_to_data step, making this faster than update().
 614
 615         overwrite() is only appropriate to use when the new contents of the
 616         mutable file are completely unrelated to the old ones, and you do not
 617         care about other clients changes to the file.
 618         """
 619
 620     def get_writekey():
 621         """Return this filenode's writekey, or None if the node does not have
 622         write-capability. This may be used to assist with data structures
 623         that need to make certain data available only to writers, such as the
 624         read-write child caps in dirnodes. The recommended process is to have
 625         reader-visible data be submitted to the filenode in the clear (where
 626         it will be encrypted by the filenode using the readkey), but encrypt
 627         writer-visible data using this writekey.
 628         """
 629
 630 class IDirectoryNode(IMutableFilesystemNode):
 631     """I represent a name-to-child mapping, holding the tahoe equivalent of a
 632     directory. All child names are unicode strings, and all children are some
 633     sort of IFilesystemNode (either files or subdirectories).
 634     """
 635
 636     def get_uri():
 637         """
 638         The dirnode ('1') URI returned by this method can be used in
 639         set_uri() on a different directory ('2') to 'mount' a reference to
 640         this directory ('1') under the other ('2'). This URI is just a
 641         string, so it can be passed around through email or other out-of-band
 642         protocol.
 643         """
 644
 645     def get_readonly_uri():
 646         """
 647         The dirnode ('1') URI returned by this method can be used in
 648         set_uri() on a different directory ('2') to 'mount' a reference to
 649         this directory ('1') under the other ('2'). This URI is just a
 650         string, so it can be passed around through email or other out-of-band
 651         protocol.
 652         """
 653
 654     def list():
 655         """I return a Deferred that fires with a dictionary mapping child
 656         name (a unicode string) to (node, metadata_dict) tuples, in which
 657         'node' is either an IFileNode or IDirectoryNode, and 'metadata_dict'
 658         is a dictionary of metadata."""
 659
 660     def has_child(name):
 661         """I return a Deferred that fires with a boolean, True if there
 662         exists a child of the given name, False if not. The child name must
 663         be a unicode string."""
 664
 665     def get(name):
 666         """I return a Deferred that fires with a specific named child node,
 667         either an IFileNode or an IDirectoryNode. The child name must be a
 668         unicode string."""
 669
 670     def get_metadata_for(name):
 671         """I return a Deferred that fires with the metadata dictionary for a
 672         specific named child node. This metadata is stored in the *edge*, not
 673         in the child, so it is attached to the parent dirnode rather than the
 674         child dir-or-file-node. The child name must be a unicode string."""
 675
 676     def set_metadata_for(name, metadata):
 677         """I replace any existing metadata for the named child with the new
 678         metadata. The child name must be a unicode string. This metadata is
 679         stored in the *edge*, not in the child, so it is attached to the
 680         parent dirnode rather than the child dir-or-file-node. I return a
 681         Deferred (that fires with this dirnode) when the operation is
 682         complete."""
 683
 684     def get_child_at_path(path):
 685         """Transform a child path into an IDirectoryNode or IFileNode.
 686
 687         I perform a recursive series of 'get' operations to find the named
 688         descendant node. I return a Deferred that fires with the node, or
 689         errbacks with IndexError if the node could not be found.
 690
 691         The path can be either a single string (slash-separated) or a list of
 692         path-name elements. All elements must be unicode strings.
 693         """
 694
 695     def set_uri(name, child_uri, metadata=None):
 696         """I add a child (by URI) at the specific name. I return a Deferred
 697         that fires when the operation finishes. I will replace any existing
 698         child of the same name. The child name must be a unicode string.
 699
 700         The child_uri could be for a file, or for a directory (either
 701         read-write or read-only, using a URI that came from get_uri() ).
 702
 703         If metadata= is provided, I will use it as the metadata for the named
 704         edge. This will replace any existing metadata. If metadata= is left
 705         as the default value of None, I will set ['mtime'] to the current
 706         time, and I will set ['ctime'] to the current time if there was not
 707         already a child by this name present. This roughly matches the
 708         ctime/mtime semantics of traditional filesystems.
 709
 710         If this directory node is read-only, the Deferred will errback with a
 711         NotMutableError."""
 712
 713     def set_children(entries):
 714         """Add multiple (name, child_uri) pairs (or (name, child_uri,
 715         metadata) triples) to a directory node. Returns a Deferred that fires
 716         (with None) when the operation finishes. This is equivalent to
 717         calling set_uri() multiple times, but is much more efficient. All
 718         child names must be unicode strings.
 719         """
 720
 721     def set_node(name, child, metadata=None):
 722         """I add a child at the specific name. I return a Deferred that fires
 723         when the operation finishes. This Deferred will fire with the child
 724         node that was just added. I will replace any existing child of the
 725         same name. The child name must be a unicode string.
 726
 727         If metadata= is provided, I will use it as the metadata for the named
 728         edge. This will replace any existing metadata. If metadata= is left
 729         as the default value of None, I will set ['mtime'] to the current
 730         time, and I will set ['ctime'] to the current time if there was not
 731         already a child by this name present. This roughly matches the
 732         ctime/mtime semantics of traditional filesystems.
 733
 734         If this directory node is read-only, the Deferred will errback with a
 735         NotMutableError."""
 736
 737     def set_nodes(entries):
 738         """Add multiple (name, child_node) pairs (or (name, child_node,
 739         metadata) triples) to a directory node. Returns a Deferred that fires
 740         (with None) when the operation finishes. This is equivalent to
 741         calling set_node() multiple times, but is much more efficient. All
 742         child names must be unicode strings."""
 743
 744
 745     def add_file(name, uploadable, metadata=None):
 746         """I upload a file (using the given IUploadable), then attach the
 747         resulting FileNode to the directory at the given name. I set metadata
 748         the same way as set_uri and set_node. The child name must be a
 749         unicode string.
 750
 751         I return a Deferred that fires (with the IFileNode of the uploaded
 752         file) when the operation completes."""
 753
 754     def delete(name):
 755         """I remove the child at the specific name. I return a Deferred that
 756         fires when the operation finishes. The child name must be a unicode
 757         string."""
 758
 759     def create_empty_directory(name):
 760         """I create and attach an empty directory at the given name. The
 761         child name must be a unicode string. I return a Deferred that fires
 762         when the operation finishes."""
 763
 764     def move_child_to(current_child_name, new_parent, new_child_name=None):
 765         """I take one of my children and move them to a new parent. The child
 766         is referenced by name. On the new parent, the child will live under
 767         'new_child_name', which defaults to 'current_child_name'. TODO: what
 768         should we do about metadata? I return a Deferred that fires when the
 769         operation finishes. The child name must be a unicode string."""
 770
 771     def build_manifest():
 772         """Return a frozenset of verifier-capability strings for all nodes
 773         (directories and files) reachable from this one."""
 774
 775 class ICodecEncoder(Interface):
 776     def set_params(data_size, required_shares, max_shares):
 777         """Set up the parameters of this encoder.
 778
 779         This prepares the encoder to perform an operation that converts a
 780         single block of data into a number of shares, such that a future
 781         ICodecDecoder can use a subset of these shares to recover the
 782         original data. This operation is invoked by calling encode(). Once
 783         the encoding parameters are set up, the encode operation can be
 784         invoked multiple times.
 785
 786         set_params() prepares the encoder to accept blocks of input data that
 787         are exactly 'data_size' bytes in length. The encoder will be prepared
 788         to produce 'max_shares' shares for each encode() operation (although
 789         see the 'desired_share_ids' to use less CPU). The encoding math will
 790         be chosen such that the decoder can get by with as few as
 791         'required_shares' of these shares and still reproduce the original
 792         data. For example, set_params(1000, 5, 5) offers no redundancy at
 793         all, whereas set_params(1000, 1, 10) provides 10x redundancy.
 794
 795         Numerical Restrictions: 'data_size' is required to be an integral
 796         multiple of 'required_shares'. In general, the caller should choose
 797         required_shares and max_shares based upon their reliability
 798         requirements and the number of peers available (the total storage
 799         space used is roughly equal to max_shares*data_size/required_shares),
 800         then choose data_size to achieve the memory footprint desired (larger
 801         data_size means more efficient operation, smaller data_size means
 802         smaller memory footprint).
 803
 804         In addition, 'max_shares' must be equal to or greater than
 805         'required_shares'. Of course, setting them to be equal causes
 806         encode() to degenerate into a particularly slow form of the 'split'
 807         utility.
 808
 809         See encode() for more details about how these parameters are used.
 810
 811         set_params() must be called before any other ICodecEncoder methods
 812         may be invoked.
 813         """
 814
 815     def get_encoder_type():
 816         """Return a short string that describes the type of this encoder.
 817
 818         There is required to be a global table of encoder classes. This method
 819         returns an index into this table; the value at this index is an
 820         encoder class, and this encoder is an instance of that class.
 821         """
 822
 823     def get_serialized_params(): # TODO: maybe, maybe not
 824         """Return a string that describes the parameters of this encoder.
 825
 826         This string can be passed to the decoder to prepare it for handling
 827         the encoded shares we create. It might contain more information than
 828         was presented to set_params(), if there is some flexibility of
 829         parameter choice.
 830
 831         This string is intended to be embedded in the URI, so there are
 832         several restrictions on its contents. At the moment I'm thinking that
 833         this means it may contain hex digits and hyphens, and nothing else.
 834         The idea is that the URI contains something like '%s:%s:%s' %
 835         (encoder.get_encoder_name(), encoder.get_serialized_params(),
 836         b2a(crypttext_hash)), and this is enough information to construct a
 837         compatible decoder.
 838         """
 839
 840     def get_block_size():
 841         """Return the length of the shares that encode() will produce.
 842         """
 843
 844     def encode_proposal(data, desired_share_ids=None):
 845         """Encode some data.
 846
 847         'data' must be a string (or other buffer object), and len(data) must
 848         be equal to the 'data_size' value passed earlier to set_params().
 849
 850         This will return a Deferred that will fire with two lists. The first
 851         is a list of shares, each of which is a string (or other buffer
 852         object) such that len(share) is the same as what get_share_size()
 853         returned earlier. The second is a list of shareids, in which each is
 854         an integer. The lengths of the two lists will always be equal to each
 855         other. The user should take care to keep each share closely
 856         associated with its shareid, as one is useless without the other.
 857
 858         The length of this output list will normally be the same as the value
 859         provided to the 'max_shares' parameter of set_params(). This may be
 860         different if 'desired_share_ids' is provided.
 861
 862         'desired_share_ids', if provided, is required to be a sequence of
 863         ints, each of which is required to be >= 0 and < max_shares. If not
 864         provided, encode() will produce 'max_shares' shares, as if
 865         'desired_share_ids' were set to range(max_shares). You might use this
 866         if you initially thought you were going to use 10 peers, started
 867         encoding, and then two of the peers dropped out: you could use
 868         desired_share_ids= to skip the work (both memory and CPU) of
 869         producing shares for the peers which are no longer available.
 870
 871         """
 872
 873     def encode(inshares, desired_share_ids=None):
 874         """Encode some data. This may be called multiple times. Each call is
 875         independent.
 876
 877         inshares is a sequence of length required_shares, containing buffers
 878         (i.e. strings), where each buffer contains the next contiguous
 879         non-overlapping segment of the input data. Each buffer is required to
 880         be the same length, and the sum of the lengths of the buffers is
 881         required to be exactly the data_size promised by set_params(). (This
 882         implies that the data has to be padded before being passed to
 883         encode(), unless of course it already happens to be an even multiple
 884         of required_shares in length.)
 885
 886          ALSO: the requirement to break up your data into 'required_shares'
 887          chunks before calling encode() feels a bit surprising, at least from
 888          the point of view of a user who doesn't know how FEC works. It feels
 889          like an implementation detail that has leaked outside the
 890          abstraction barrier. Can you imagine a use case in which the data to
 891          be encoded might already be available in pre-segmented chunks, such
 892          that it is faster or less work to make encode() take a list rather
 893          than splitting a single string?
 894
 895          ALSO ALSO: I think 'inshares' is a misleading term, since encode()
 896          is supposed to *produce* shares, so what it *accepts* should be
 897          something other than shares. Other places in this interface use the
 898          word 'data' for that-which-is-not-shares.. maybe we should use that
 899          term?
 900
 901         'desired_share_ids', if provided, is required to be a sequence of
 902         ints, each of which is required to be >= 0 and < max_shares. If not
 903         provided, encode() will produce 'max_shares' shares, as if
 904         'desired_share_ids' were set to range(max_shares). You might use this
 905         if you initially thought you were going to use 10 peers, started
 906         encoding, and then two of the peers dropped out: you could use
 907         desired_share_ids= to skip the work (both memory and CPU) of
 908         producing shares for the peers which are no longer available.
 909
 910         For each call, encode() will return a Deferred that fires with two
 911         lists, one containing shares and the other containing the shareids.
 912         The get_share_size() method can be used to determine the length of
 913         the share strings returned by encode(). Each shareid is a small
 914         integer, exactly as passed into 'desired_share_ids' (or
 915         range(max_shares), if desired_share_ids was not provided).
 916
 917         The shares and their corresponding shareids are required to be kept
 918         together during storage and retrieval. Specifically, the share data is
 919         useless by itself: the decoder needs to be told which share is which
 920         by providing it with both the shareid and the actual share data.
 921
 922         This function will allocate an amount of memory roughly equal to::
 923
 924          (max_shares - required_shares) * get_share_size()
 925
 926         When combined with the memory that the caller must allocate to
 927         provide the input data, this leads to a memory footprint roughly
 928         equal to the size of the resulting encoded shares (i.e. the expansion
 929         factor times the size of the input segment).
 930         """
 931
 932         # rejected ideas:
 933         #
 934         #  returning a list of (shareidN,shareN) tuples instead of a pair of
 935         #  lists (shareids..,shares..). Brian thought the tuples would
 936         #  encourage users to keep the share and shareid together throughout
 937         #  later processing, Zooko pointed out that the code to iterate
 938         #  through two lists is not really more complicated than using a list
 939         #  of tuples and there's also a performance improvement
 940         #
 941         #  having 'data_size' not required to be an integral multiple of
 942         #  'required_shares'. Doing this would require encode() to perform
 943         #  padding internally, and we'd prefer to have any padding be done
 944         #  explicitly by the caller. Yes, it is an abstraction leak, but
 945         #  hopefully not an onerous one.
 946
 947
 948 class ICodecDecoder(Interface):
 949     def set_serialized_params(params):
 950         """Set up the parameters of this encoder, from a string returned by
 951         encoder.get_serialized_params()."""
 952
 953     def get_needed_shares():
 954         """Return the number of shares needed to reconstruct the data.
 955         set_serialized_params() is required to be called before this."""
 956
 957     def decode(some_shares, their_shareids):
 958         """Decode a partial list of shares into data.
 959
 960         'some_shares' is required to be a sequence of buffers of sharedata, a
 961         subset of the shares returned by ICodecEncode.encode(). Each share is
 962         required to be of the same length.  The i'th element of their_shareids
 963         is required to be the shareid of the i'th buffer in some_shares.
 964
 965         This returns a Deferred which fires with a sequence of buffers. This
 966         sequence will contain all of the segments of the original data, in
 967         order. The sum of the lengths of all of the buffers will be the
 968         'data_size' value passed into the original ICodecEncode.set_params()
 969         call. To get back the single original input block of data, use
 970         ''.join(output_buffers), or you may wish to simply write them in
 971         order to an output file.
 972
 973         Note that some of the elements in the result sequence may be
 974         references to the elements of the some_shares input sequence. In
 975         particular, this means that if those share objects are mutable (e.g.
 976         arrays) and if they are changed, then both the input (the
 977         'some_shares' parameter) and the output (the value given when the
 978         deferred is triggered) will change.
 979
 980         The length of 'some_shares' is required to be exactly the value of
 981         'required_shares' passed into the original ICodecEncode.set_params()
 982         call.
 983         """
 984
 985 class IEncoder(Interface):
 986     """I take an object that provides IEncryptedUploadable, which provides
 987     encrypted data, and a list of shareholders. I then encode, hash, and
 988     deliver shares to those shareholders. I will compute all the necessary
 989     Merkle hash trees that are necessary to validate the crypttext that
 990     eventually comes back from the shareholders. I provide the URI Extension
 991     Block Hash, and the encoding parameters, both of which must be included
 992     in the URI.
 993
 994     I do not choose shareholders, that is left to the IUploader. I must be
 995     given a dict of RemoteReferences to storage buckets that are ready and
 996     willing to receive data.
 997     """
 998
 999     def set_size(size):
1000         """Specify the number of bytes that will be encoded. This must be
1001         peformed before get_serialized_params() can be called.
1002         """
1003     def set_params(params):
1004         """Override the default encoding parameters. 'params' is a tuple of
1005         (k,d,n), where 'k' is the number of required shares, 'd' is the
1006         shares_of_happiness, and 'n' is the total number of shares that will
1007         be created.
1008
1009         Encoding parameters can be set in three ways. 1: The Encoder class
1010         provides defaults (3/7/10). 2: the Encoder can be constructed with
1011         an 'options' dictionary, in which the
1012         needed_and_happy_and_total_shares' key can be a (k,d,n) tuple. 3:
1013         set_params((k,d,n)) can be called.
1014
1015         If you intend to use set_params(), you must call it before
1016         get_share_size or get_param are called.
1017         """
1018
1019     def set_encrypted_uploadable(u):
1020         """Provide a source of encrypted upload data. 'u' must implement
1021         IEncryptedUploadable.
1022
1023         When this is called, the IEncryptedUploadable will be queried for its
1024         length and the storage_index that should be used.
1025
1026         This returns a Deferred that fires with this Encoder instance.
1027
1028         This must be performed before start() can be called.
1029         """
1030
1031     def get_param(name):
1032         """Return an encoding parameter, by name.
1033
1034         'storage_index': return a string with the (16-byte truncated SHA-256
1035                          hash) storage index to which these shares should be
1036                          pushed.
1037
1038         'share_counts': return a tuple describing how many shares are used:
1039                         (needed_shares, shares_of_happiness, total_shares)
1040
1041         'num_segments': return an int with the number of segments that
1042                         will be encoded.
1043
1044         'segment_size': return an int with the size of each segment.
1045
1046         'block_size': return the size of the individual blocks that will
1047                       be delivered to a shareholder's put_block() method. By
1048                       knowing this, the shareholder will be able to keep all
1049                       blocks in a single file and still provide random access
1050                       when reading them. # TODO: can we avoid exposing this?
1051
1052         'share_size': an int with the size of the data that will be stored
1053                       on each shareholder. This is aggregate amount of data
1054                       that will be sent to the shareholder, summed over all
1055                       the put_block() calls I will ever make. It is useful to
1056                       determine this size before asking potential
1057                       shareholders whether they will grant a lease or not,
1058                       since their answers will depend upon how much space we
1059                       need. TODO: this might also include some amount of
1060                       overhead, like the size of all the hashes. We need to
1061                       decide whether this is useful or not.
1062
1063         'serialized_params': a string with a concise description of the
1064                              codec name and its parameters. This may be passed
1065                              into the IUploadable to let it make sure that
1066                              the same file encoded with different parameters
1067                              will result in different storage indexes.
1068
1069         Once this is called, set_size() and set_params() may not be called.
1070         """
1071
1072     def set_shareholders(shareholders):
1073         """Tell the encoder where to put the encoded shares. 'shareholders'
1074         must be a dictionary that maps share number (an integer ranging from
1075         0 to n-1) to an instance that provides IStorageBucketWriter. This
1076         must be performed before start() can be called."""
1077
1078     def start():
1079         """Begin the encode/upload process. This involves reading encrypted
1080         data from the IEncryptedUploadable, encoding it, uploading the shares
1081         to the shareholders, then sending the hash trees.
1082
1083         set_encrypted_uploadable() and set_shareholders() must be called
1084         before this can be invoked.
1085
1086         This returns a Deferred that fires with a tuple of
1087         (uri_extension_hash, needed_shares, total_shares, size) when the
1088         upload process is complete. This information, plus the encryption
1089         key, is sufficient to construct the URI.
1090         """
1091
1092 class IDecoder(Interface):
1093     """I take a list of shareholders and some setup information, then
1094     download, validate, decode, and decrypt data from them, writing the
1095     results to an output file.
1096
1097     I do not locate the shareholders, that is left to the IDownloader. I must
1098     be given a dict of RemoteReferences to storage buckets that are ready to
1099     send data.
1100     """
1101
1102     def setup(outfile):
1103         """I take a file-like object (providing write and close) to which all
1104         the plaintext data will be written.
1105
1106         TODO: producer/consumer . Maybe write() should return a Deferred that
1107         indicates when it will accept more data? But probably having the
1108         IDecoder be a producer is easier to glue to IConsumer pieces.
1109         """
1110
1111     def set_shareholders(shareholders):
1112         """I take a dictionary that maps share identifiers (small integers)
1113         to RemoteReferences that provide RIBucketReader. This must be called
1114         before start()."""
1115
1116     def start():
1117         """I start the download. This process involves retrieving data and
1118         hash chains from the shareholders, using the hashes to validate the
1119         data, decoding the shares into segments, decrypting the segments,
1120         then writing the resulting plaintext to the output file.
1121
1122         I return a Deferred that will fire (with self) when the download is
1123         complete.
1124         """
1125
1126 class IDownloadTarget(Interface):
1127     def open(size):
1128         """Called before any calls to write() or close(). If an error
1129         occurs before any data is available, fail() may be called without
1130         a previous call to open().
1131
1132         'size' is the length of the file being downloaded, in bytes."""
1133
1134     def write(data):
1135         """Output some data to the target."""
1136     def close():
1137         """Inform the target that there is no more data to be written."""
1138     def fail(why):
1139         """fail() is called to indicate that the download has failed. 'why'
1140         is a Failure object indicating what went wrong. No further methods
1141         will be invoked on the IDownloadTarget after fail()."""
1142     def register_canceller(cb):
1143         """The FileDownloader uses this to register a no-argument function
1144         that the target can call to cancel the download. Once this canceller
1145         is invoked, no further calls to write() or close() will be made."""
1146     def finish():
1147         """When the FileDownloader is done, this finish() function will be
1148         called. Whatever it returns will be returned to the invoker of
1149         Downloader.download.
1150         """
1151
1152 class IDownloader(Interface):
1153     def download(uri, target):
1154         """Perform a CHK download, sending the data to the given target.
1155         'target' must provide IDownloadTarget.
1156
1157         Returns a Deferred that fires (with the results of target.finish)
1158         when the download is finished, or errbacks if something went wrong."""
1159
1160 class IEncryptedUploadable(Interface):
1161     def set_upload_status(upload_status):
1162         """Provide an IUploadStatus object that should be filled with status
1163         information. The IEncryptedUploadable is responsible for setting
1164         key-determination progress ('chk'), size, storage_index, and
1165         ciphertext-fetch progress. It may delegate some of this
1166         responsibility to others, in particular to the IUploadable."""
1167
1168     def get_size():
1169         """This behaves just like IUploadable.get_size()."""
1170
1171     def get_all_encoding_parameters():
1172         """Return a Deferred that fires with a tuple of
1173         (k,happy,n,segment_size). The segment_size will be used as-is, and
1174         must match the following constraints: it must be a multiple of k, and
1175         it shouldn't be unreasonably larger than the file size (if
1176         segment_size is larger than filesize, the difference must be stored
1177         as padding).
1178
1179         This usually passes through to the IUploadable method of the same
1180         name.
1181
1182         The encoder strictly obeys the values returned by this method. To
1183         make an upload use non-default encoding parameters, you must arrange
1184         to control the values that this method returns.
1185         """
1186
1187     def get_storage_index():
1188         """Return a Deferred that fires with a 16-byte storage index.
1189         """
1190
1191     def read_encrypted(length, hash_only):
1192         """This behaves just like IUploadable.read(), but returns crypttext
1193         instead of plaintext. If hash_only is True, then this discards the
1194         data (and returns an empty list); this improves efficiency when
1195         resuming an interrupted upload (where we need to compute the
1196         plaintext hashes, but don't need the redundant encrypted data)."""
1197
1198     def get_plaintext_hashtree_leaves(first, last, num_segments):
1199         """Get the leaf nodes of a merkle hash tree over the plaintext
1200         segments, i.e. get the tagged hashes of the given segments. The
1201         segment size is expected to be generated by the IEncryptedUploadable
1202         before any plaintext is read or ciphertext produced, so that the
1203         segment hashes can be generated with only a single pass.
1204
1205         This returns a Deferred which fires with a sequence of hashes, using:
1206
1207          tuple(segment_hashes[first:last])
1208
1209         'num_segments' is used to assert that the number of segments that the
1210         IEncryptedUploadable handled matches the number of segments that the
1211         encoder was expecting.
1212
1213         This method must not be called until the final byte has been read
1214         from read_encrypted(). Once this method is called, read_encrypted()
1215         can never be called again.
1216         """
1217
1218     def get_plaintext_hash():
1219         """Get the hash of the whole plaintext.
1220
1221         This returns a Deferred which fires with a tagged SHA-256 hash of the
1222         whole plaintext, obtained from hashutil.plaintext_hash(data).
1223         """
1224
1225     def close():
1226         """Just like IUploadable.close()."""
1227
1228 class IUploadable(Interface):
1229     def set_upload_status(upload_status):
1230         """Provide an IUploadStatus object that should be filled with status
1231         information. The IUploadable is responsible for setting
1232         key-determination progress ('chk')."""
1233
1234     def set_default_encoding_parameters(params):
1235         """Set the default encoding parameters, which must be a dict mapping
1236         strings to ints. The meaningful keys are 'k', 'happy', 'n', and
1237         'max_segment_size'. These might have an influence on the final
1238         encoding parameters returned by get_all_encoding_parameters(), if the
1239         Uploadable doesn't have more specific preferences.
1240
1241         This call is optional: if it is not used, the Uploadable will use
1242         some built-in defaults. If used, this method must be called before
1243         any other IUploadable methods to have any effect.
1244         """
1245
1246     def get_size():
1247         """Return a Deferred that will fire with the length of the data to be
1248         uploaded, in bytes. This will be called before the data is actually
1249         used, to compute encoding parameters.
1250         """
1251
1252     def get_all_encoding_parameters():
1253         """Return a Deferred that fires with a tuple of
1254         (k,happy,n,segment_size). The segment_size will be used as-is, and
1255         must match the following constraints: it must be a multiple of k, and
1256         it shouldn't be unreasonably larger than the file size (if
1257         segment_size is larger than filesize, the difference must be stored
1258         as padding).
1259
1260         The relative values of k and n allow some IUploadables to request
1261         better redundancy than others (in exchange for consuming more space
1262         in the grid).
1263
1264         Larger values of segment_size reduce hash overhead, while smaller
1265         values reduce memory footprint and cause data to be delivered in
1266         smaller pieces (which may provide a smoother and more predictable
1267         download experience).
1268
1269         The encoder strictly obeys the values returned by this method. To
1270         make an upload use non-default encoding parameters, you must arrange
1271         to control the values that this method returns. One way to influence
1272         them may be to call set_encoding_parameters() before calling
1273         get_all_encoding_parameters().
1274         """
1275
1276     def get_encryption_key():
1277         """Return a Deferred that fires with a 16-byte AES key. This key will
1278         be used to encrypt the data. The key will also be hashed to derive
1279         the StorageIndex.
1280
1281         Uploadables which want to achieve convergence should hash their file
1282         contents and the serialized_encoding_parameters to form the key
1283         (which of course requires a full pass over the data). Uploadables can
1284         use the upload.ConvergentUploadMixin class to achieve this
1285         automatically.
1286
1287         Uploadables which do not care about convergence (or do not wish to
1288         make multiple passes over the data) can simply return a
1289         strongly-random 16 byte string.
1290
1291         get_encryption_key() may be called multiple times: the IUploadable is
1292         required to return the same value each time.
1293         """
1294
1295     def read(length):
1296         """Return a Deferred that fires with a list of strings (perhaps with
1297         only a single element) which, when concatenated together, contain the
1298         next 'length' bytes of data. If EOF is near, this may provide fewer
1299         than 'length' bytes. The total number of bytes provided by read()
1300         before it signals EOF must equal the size provided by get_size().
1301
1302         If the data must be acquired through multiple internal read
1303         operations, returning a list instead of a single string may help to
1304         reduce string copies.
1305
1306         'length' will typically be equal to (min(get_size(),1MB)/req_shares),
1307         so a 10kB file means length=3kB, 100kB file means length=30kB,
1308         and >=1MB file means length=300kB.
1309
1310         This method provides for a single full pass through the data. Later
1311         use cases may desire multiple passes or access to only parts of the
1312         data (such as a mutable file making small edits-in-place). This API
1313         will be expanded once those use cases are better understood.
1314         """
1315
1316     def close():
1317         """The upload is finished, and whatever filehandle was in use may be
1318         closed."""
1319
1320 class IUploadResults(Interface):
1321     """I am returned by upload() methods. I contain a number of public
1322     attributes which can be read to determine the results of the upload. Some
1323     of these are functional, some are timing information. All of these may be
1324     None.::
1325
1326      .file_size : the size of the file, in bytes
1327      .uri : the CHK read-cap for the file
1328      .ciphertext_fetched : how many bytes were fetched by the helper
1329      .sharemap : dict mapping share number to placement string
1330      .servermap : dict mapping server peerid to a set of share numbers
1331      .timings : dict of timing information, mapping name to seconds (float)
1332        total : total upload time, start to finish
1333        storage_index : time to compute the storage index
1334        peer_selection : time to decide which peers will be used
1335        contacting_helper : initial helper query to upload/no-upload decision
1336        existence_check : helper pre-upload existence check
1337        helper_total : initial helper query to helper finished pushing
1338        cumulative_fetch : helper waiting for ciphertext requests
1339        total_fetch : helper start to last ciphertext response
1340        cumulative_encoding : just time spent in zfec
1341        cumulative_sending : just time spent waiting for storage servers
1342        hashes_and_close : last segment push to shareholder close
1343        total_encode_and_push : first encode to shareholder close
1344
1345     """
1346
1347 class IDownloadResults(Interface):
1348     """I am created internally by download() methods. I contain a number of
1349     public attributes which contain details about the download process.::
1350
1351      .file_size : the size of the file, in bytes
1352      .servers_used : set of server peerids that were used during download
1353      .server_problems : dict mapping server peerid to a problem string. Only
1354                         servers that had problems (bad hashes, disconnects) are
1355                         listed here.
1356      .servermap : dict mapping server peerid to a set of share numbers. Only
1357                   servers that had any shares are listed here.
1358      .timings : dict of timing information, mapping name to seconds (float)
1359        peer_selection : time to ask servers about shares
1360        servers_peer_selection : dict of peerid to DYHB-query time
1361        uri_extension : time to fetch a copy of the URI extension block
1362        hashtrees : time to fetch the hash trees
1363        segments : time to fetch, decode, and deliver segments
1364        cumulative_fetch : time spent waiting for storage servers
1365        cumulative_decode : just time spent in zfec
1366        cumulative_decrypt : just time spent in decryption
1367        total : total download time, start to finish
1368        fetch_per_server : dict of peerid to list of per-segment fetch times
1369
1370     """
1371
1372 class IUploader(Interface):
1373     def upload(uploadable):
1374         """Upload the file. 'uploadable' must impement IUploadable. This
1375         returns a Deferred which fires with an UploadResults instance, from
1376         which the URI of the file can be obtained as results.uri ."""
1377
1378     def upload_ssk(write_capability, new_version, uploadable):
1379         """TODO: how should this work?"""
1380
1381 class IChecker(Interface):
1382     def check(uri_to_check):
1383         """Accepts an IVerifierURI, and checks upon the health of its target.
1384
1385         For now, uri_to_check must be an IVerifierURI. In the future we
1386         expect to relax that to be anything that can be adapted to
1387         IVerifierURI (like read-only or read-write dirnode/filenode URIs).
1388
1389         This returns a Deferred. For dirnodes, this fires with either True or
1390         False (dirnodes are not distributed, so their health is a boolean).
1391
1392         For filenodes, this fires with a tuple of (needed_shares,
1393         total_shares, found_shares, sharemap). The first three are ints. The
1394         basic health of the file is found_shares / needed_shares: if less
1395         than 1.0, the file is unrecoverable.
1396
1397         The sharemap has a key for each sharenum. The value is a list of
1398         (binary) nodeids who hold that share. If two shares are kept on the
1399         same nodeid, they will fail as a pair, and overall reliability is
1400         decreased.
1401
1402         The IChecker instance remembers the results of the check. By default,
1403         these results are stashed in RAM (and are forgotten at shutdown). If
1404         a file named 'checker_results.db' exists in the node's basedir, it is
1405         used as a sqlite database of results, making them persistent across
1406         runs. To start using this feature, just 'touch checker_results.db',
1407         and the node will initialize it properly the next time it is started.
1408         """
1409
1410     def verify(uri_to_check):
1411         """Accepts an IVerifierURI, and verifies the crypttext of the target.
1412
1413         This is a more-intensive form of checking. For verification, the
1414         file's crypttext contents are retrieved, and the associated hash
1415         checks are performed. If a storage server is holding a corrupted
1416         share, verification will detect the problem, but checking will not.
1417         This returns a Deferred that fires with True if the crypttext hashes
1418         look good, and will probably raise an exception if anything goes
1419         wrong.
1420
1421         For dirnodes, 'verify' is the same as 'check', so the Deferred will
1422         fire with True or False.
1423
1424         Verification currently only uses a minimal subset of peers, so a lot
1425         of share corruption will not be caught by it. We expect to improve
1426         this in the future.
1427         """
1428
1429     def checker_results_for(uri_to_check):
1430         """Accepts an IVerifierURI, and returns a list of previously recorded
1431         checker results. This method performs no checking itself: it merely
1432         reports the results of checks that have taken place in the past.
1433
1434         Each element of the list is a two-entry tuple: (when, results).
1435         The 'when' values are timestamps (float seconds since epoch), and the
1436         results are as defined in the check() method.
1437
1438         Note: at the moment, this is specified to return synchronously. We
1439         might need to back away from this in the future.
1440         """
1441
1442 class IClient(Interface):
1443     def upload(uploadable):
1444         """Upload some data into a CHK, get back the UploadResults for it.
1445         @param uploadable: something that implements IUploadable
1446         @return: a Deferred that fires with the UploadResults instance.
1447                  To get the URI for this file, use results.uri .
1448         """
1449
1450     def create_mutable_file(contents=""):
1451         """Create a new mutable file with contents, get back the URI string.
1452         @param contents: the initial contents to place in the file.
1453         @return: a Deferred that fires with tne (string) SSK URI for the new
1454                  file.
1455         """
1456
1457     def create_empty_dirnode():
1458         """Create a new dirnode, empty and unattached.
1459         @return: a Deferred that fires with the new IDirectoryNode instance.
1460         """
1461
1462     def create_node_from_uri(uri):
1463         """Create a new IFilesystemNode instance from the uri, synchronously.
1464         @param uri: a string or IURI-providing instance. This could be for a
1465                     LiteralFileNode, a CHK file node, a mutable file node, or
1466                     a directory node
1467         @return: an instance that provides IFilesystemNode (or more usefully one
1468                  of its subclasses). File-specifying URIs will result in
1469                  IFileNode or IMutableFileNode -providing instances, like
1470                  FileNode, LiteralFileNode, or MutableFileNode.
1471                  Directory-specifying URIs will result in
1472                  IDirectoryNode-providing instances, like NewDirectoryNode.
1473         """
1474
1475 class IClientStatus(Interface):
1476     def list_all_uploads():
1477         """Return a list of uploader objects, one for each upload which
1478         currently has an object available (tracked with weakrefs). This is
1479         intended for debugging purposes."""
1480     def list_active_uploads():
1481         """Return a list of active IUploadStatus objects."""
1482     def list_recent_uploads():
1483         """Return a list of IUploadStatus objects for the most recently
1484         started uploads."""
1485
1486     def list_all_downloads():
1487         """Return a list of downloader objects, one for each download which
1488         currently has an object available (tracked with weakrefs). This is
1489         intended for debugging purposes."""
1490     def list_active_downloads():
1491         """Return a list of active IDownloadStatus objects."""
1492     def list_recent_downloads():
1493         """Return a list of IDownloadStatus objects for the most recently
1494         started downloads."""
1495
1496 class IUploadStatus(Interface):
1497     def get_started():
1498         """Return a timestamp (float with seconds since epoch) indicating
1499         when the operation was started."""
1500     def get_storage_index():
1501         """Return a string with the (binary) storage index in use on this
1502         upload. Returns None if the storage index has not yet been
1503         calculated."""
1504     def get_size():
1505         """Return an integer with the number of bytes that will eventually
1506         be uploaded for this file. Returns None if the size is not yet known.
1507         """
1508     def using_helper():
1509         """Return True if this upload is using a Helper, False if not."""
1510     def get_status():
1511         """Return a string describing the current state of the upload
1512         process."""
1513     def get_progress():
1514         """Returns a tuple of floats, (chk, ciphertext, encode_and_push),
1515         each from 0.0 to 1.0 . 'chk' describes how much progress has been
1516         made towards hashing the file to determine a CHK encryption key: if
1517         non-convergent encryption is in use, this will be trivial, otherwise
1518         the whole file must be hashed. 'ciphertext' describes how much of the
1519         ciphertext has been pushed to the helper, and is '1.0' for non-helper
1520         uploads. 'encode_and_push' describes how much of the encode-and-push
1521         process has finished: for helper uploads this is dependent upon the
1522         helper providing progress reports. It might be reasonable to add all
1523         three numbers and report the sum to the user."""
1524     def get_active():
1525         """Return True if the upload is currently active, False if not."""
1526     def get_results():
1527         """Return an instance of UploadResults (which contains timing and
1528         sharemap information). Might return None if the upload is not yet
1529         finished."""
1530     def get_counter():
1531         """Each upload status gets a unique number: this method returns that
1532         number. This provides a handle to this particular upload, so a web
1533         page can generate a suitable hyperlink."""
1534
1535 class IDownloadStatus(Interface):
1536     def get_started():
1537         """Return a timestamp (float with seconds since epoch) indicating
1538         when the operation was started."""
1539     def get_storage_index():
1540         """Return a string with the (binary) storage index in use on this
1541         download. This may be None if there is no storage index (i.e. LIT
1542         files)."""
1543     def get_size():
1544         """Return an integer with the number of bytes that will eventually be
1545         retrieved for this file. Returns None if the size is not yet known.
1546         """
1547     def using_helper():
1548         """Return True if this download is using a Helper, False if not."""
1549     def get_status():
1550         """Return a string describing the current state of the download
1551         process."""
1552     def get_progress():
1553         """Returns a float (from 0.0 to 1.0) describing the amount of the
1554         download that has completed. This value will remain at 0.0 until the
1555         first byte of plaintext is pushed to the download target."""
1556     def get_active():
1557         """Return True if the download is currently active, False if not."""
1558     def get_counter():
1559         """Each download status gets a unique number: this method returns
1560         that number. This provides a handle to this particular download, so a
1561         web page can generate a suitable hyperlink."""
1562
1563 class IPublishStatus(Interface):
1564     pass
1565 class IRetrieveStatus(Interface):
1566     pass
1567
1568 class NotCapableError(Exception):
1569     """You have tried to write to a read-only node."""
1570
1571 class BadWriteEnablerError(Exception):
1572     pass
1573
1574 class RIControlClient(RemoteInterface):
1575
1576     def wait_for_client_connections(num_clients=int):
1577         """Do not return until we have connections to at least NUM_CLIENTS
1578         storage servers.
1579         """
1580
1581     def upload_from_file_to_uri(filename=str, convergence=ChoiceOf(None, StringConstraint(2**20))):
1582         """Upload a file to the grid. This accepts a filename (which must be
1583         absolute) that points to a file on the node's local disk. The node will
1584         read the contents of this file, upload it to the grid, then return the
1585         URI at which it was uploaded.  If convergence is None then a random
1586         encryption key will be used, else the plaintext will be hashed, then
1587         that hash will be mixed together with the "convergence" string to form
1588         the encryption key.
1589         """
1590         return URI
1591
1592     def download_from_uri_to_file(uri=URI, filename=str):
1593         """Download a file from the grid, placing it on the node's local disk
1594         at the given filename (which must be absolute[?]). Returns the
1595         absolute filename where the file was written."""
1596         return str
1597
1598     # debug stuff
1599
1600     def get_memory_usage():
1601         """Return a dict describes the amount of memory currently in use. The
1602         keys are 'VmPeak', 'VmSize', and 'VmData'. The values are integers,
1603         measuring memory consupmtion in bytes."""
1604         return DictOf(str, int)
1605
1606     def speed_test(count=int, size=int, mutable=Any()):
1607         """Write 'count' tempfiles to disk, all of the given size. Measure
1608         how long (in seconds) it takes to upload them all to the servers.
1609         Then measure how long it takes to download all of them. If 'mutable'
1610         is 'create', time creation of mutable files. If 'mutable' is
1611         'upload', then time access to the same mutable file instead of
1612         creating one.
1613
1614         Returns a tuple of (upload_time, download_time).
1615         """
1616         return (float, float)
1617
1618     def measure_peer_response_time():
1619         """Send a short message to each connected peer, and measure the time
1620         it takes for them to respond to it. This is a rough measure of the
1621         application-level round trip time.
1622
1623         @return: a dictionary mapping peerid to a float (RTT time in seconds)
1624         """
1625
1626         return DictOf(Nodeid, float)
1627
1628 UploadResults = Any() #DictOf(str, str)
1629
1630 class RIEncryptedUploadable(RemoteInterface):
1631     __remote_name__ = "RIEncryptedUploadable.tahoe.allmydata.com"
1632
1633     def get_size():
1634         return Offset
1635
1636     def get_all_encoding_parameters():
1637         return (int, int, int, long)
1638
1639     def read_encrypted(offset=Offset, length=ReadSize):
1640         return ListOf(str)
1641
1642     def get_plaintext_hashtree_leaves(first=int, last=int, num_segments=int):
1643         return ListOf(Hash)
1644
1645     def get_plaintext_hash():
1646         return Hash
1647
1648     def close():
1649         return None
1650
1651
1652 class RICHKUploadHelper(RemoteInterface):
1653     __remote_name__ = "RIUploadHelper.tahoe.allmydata.com"
1654
1655     def upload(reader=RIEncryptedUploadable):
1656         return UploadResults
1657
1658
1659 class RIHelper(RemoteInterface):
1660     __remote_name__ = "RIHelper.tahoe.allmydata.com"
1661
1662     def upload_chk(si=StorageIndex):
1663         """See if a file with a given storage index needs uploading. The
1664         helper will ask the appropriate storage servers to see if the file
1665         has already been uploaded. If so, the helper will return a set of
1666         'upload results' that includes whatever hashes are needed to build
1667         the read-cap, and perhaps a truncated sharemap.
1668
1669         If the file has not yet been uploaded (or if it was only partially
1670         uploaded), the helper will return an empty upload-results dictionary
1671         and also an RICHKUploadHelper object that will take care of the
1672         upload process. The client should call upload() on this object and
1673         pass it a reference to an RIEncryptedUploadable object that will
1674         provide ciphertext. When the upload is finished, the upload() method
1675         will finish and return the upload results.
1676         """
1677         return (UploadResults, ChoiceOf(RICHKUploadHelper, None))
1678
1679
1680 class RIStatsProvider(RemoteInterface):
1681     __remote_name__ = "RIStatsProvider.tahoe.allmydata.com"
1682     """
1683     Provides access to statistics and monitoring information.
1684     """
1685
1686     def get_stats():
1687         """
1688         returns a dictionary containing 'counters' and 'stats', each a dictionary
1689         with string counter/stat name keys, and numeric values.  counters are
1690         monotonically increasing measures of work done, and stats are instantaneous
1691         measures (potentially time averaged internally)
1692         """
1693         return DictOf(str, DictOf(str, ChoiceOf(float, int, long)))
1694
1695 class RIStatsGatherer(RemoteInterface):
1696     __remote_name__ = "RIStatsGatherer.tahoe.allmydata.com"
1697     """
1698     Provides a monitoring service for centralised collection of stats
1699     """
1700
1701     def provide(provider=RIStatsProvider, nickname=str):
1702         """
1703         @param provider: a stats collector instance which should be polled
1704                          periodically by the gatherer to collect stats.
1705         @param nickname: a name useful to identify the provided client
1706         """
1707         return None
1708
1709
1710 class IStatsProducer(Interface):
1711     def get_stats():
1712         """
1713         returns a dictionary, with str keys representing the names of stats
1714         to be monitored, and numeric values.
1715         """
1716