]> git.rkrishnan.org Git - tahoe-lafs/tahoe-lafs.git/blobdiff - src/allmydata/test/test_system.py
After a server disconnects, make the IServer retain the dead RemoteReference, and...
[tahoe-lafs/tahoe-lafs.git] / src / allmydata / test / test_system.py
index 4a8e40617b8fbbeaa5325b6cb56284a4a204d38f..a9d20eafd17e3b66ff34800f029b5d60317e66b9 100644 (file)
@@ -24,7 +24,7 @@ from allmydata.monitor import Monitor
 from allmydata.mutable.common import NotWriteableError
 from allmydata.mutable import layout as mutable_layout
 from allmydata.mutable.publish import MutableData
-from foolscap.api import DeadReferenceError
+from foolscap.api import DeadReferenceError, fireEventually
 from twisted.python.failure import Failure
 from twisted.web.client import getPage
 from twisted.web.error import Error
@@ -122,7 +122,7 @@ class SystemTest(SystemTestMixin, RunBinTahoeMixin, unittest.TestCase):
             return d1
         d.addCallback(_do_upload)
         def _upload_done(results):
-            theuri = results.uri
+            theuri = results.get_uri()
             log.msg("upload finished: uri is %s" % (theuri,))
             self.uri = theuri
             assert isinstance(self.uri, str), self.uri
@@ -194,7 +194,7 @@ class SystemTest(SystemTestMixin, RunBinTahoeMixin, unittest.TestCase):
                     facility="tahoe.tests")
             d1 = download_to_data(badnode)
             def _baduri_should_fail(res):
-                log.msg("finished downloading non-existend URI",
+                log.msg("finished downloading non-existent URI",
                         level=log.UNUSUAL, facility="tahoe.tests")
                 self.failUnless(isinstance(res, Failure))
                 self.failUnless(res.check(NoSharesError),
@@ -213,12 +213,18 @@ class SystemTest(SystemTestMixin, RunBinTahoeMixin, unittest.TestCase):
             self.extra_node.DEFAULT_ENCODING_PARAMETERS['happy'] = 5
         d.addCallback(_added)
 
+        def _has_helper():
+            uploader = self.extra_node.getServiceNamed("uploader")
+            furl, connected = uploader.get_helper_info()
+            return connected
+        d.addCallback(lambda ign: self.poll(_has_helper))
+
         HELPER_DATA = "Data that needs help to upload" * 1000
         def _upload_with_helper(res):
             u = upload.Data(HELPER_DATA, convergence=convergence)
             d = self.extra_node.upload(u)
             def _uploaded(results):
-                n = self.clients[1].create_node_from_uri(results.uri)
+                n = self.clients[1].create_node_from_uri(results.get_uri())
                 return download_to_data(n)
             d.addCallback(_uploaded)
             def _check(newdata):
@@ -232,7 +238,7 @@ class SystemTest(SystemTestMixin, RunBinTahoeMixin, unittest.TestCase):
             u.debug_stash_RemoteEncryptedUploadable = True
             d = self.extra_node.upload(u)
             def _uploaded(results):
-                n = self.clients[1].create_node_from_uri(results.uri)
+                n = self.clients[1].create_node_from_uri(results.get_uri())
                 return download_to_data(n)
             d.addCallback(_uploaded)
             def _check(newdata):
@@ -244,17 +250,22 @@ class SystemTest(SystemTestMixin, RunBinTahoeMixin, unittest.TestCase):
         if convergence is not None:
             d.addCallback(_upload_duplicate_with_helper)
 
+        d.addCallback(fireEventually)
+
         def _upload_resumable(res):
             DATA = "Data that needs help to upload and gets interrupted" * 1000
             u1 = CountingDataUploadable(DATA, convergence=convergence)
             u2 = CountingDataUploadable(DATA, convergence=convergence)
 
             # we interrupt the connection after about 5kB by shutting down
-            # the helper, then restartingit.
+            # the helper, then restarting it.
             u1.interrupt_after = 5000
             u1.interrupt_after_d = defer.Deferred()
-            u1.interrupt_after_d.addCallback(lambda res:
-                                             self.bounce_client(0))
+            bounced_d = defer.Deferred()
+            def _do_bounce(res):
+                d = self.bounce_client(0)
+                d.addBoth(bounced_d.callback)
+            u1.interrupt_after_d.addCallback(_do_bounce)
 
             # sneak into the helper and reduce its chunk size, so that our
             # debug_interrupt will sever the connection on about the fifth
@@ -265,34 +276,27 @@ class SystemTest(SystemTestMixin, RunBinTahoeMixin, unittest.TestCase):
             # this same test run, but I'm not currently worried about it.
             offloaded.CHKCiphertextFetcher.CHUNK_SIZE = 1000
 
-            d = self.extra_node.upload(u1)
-
-            def _should_not_finish(res):
-                self.fail("interrupted upload should have failed, not finished"
-                          " with result %s" % (res,))
-            def _interrupted(f):
-                f.trap(DeadReferenceError)
-
-                # make sure we actually interrupted it before finishing the
-                # file
-                self.failUnless(u1.bytes_read < len(DATA),
-                                "read %d out of %d total" % (u1.bytes_read,
-                                                             len(DATA)))
-
-                log.msg("waiting for reconnect", level=log.NOISY,
-                        facility="tahoe.test.test_system")
-                # now, we need to give the nodes a chance to notice that this
-                # connection has gone away. When this happens, the storage
-                # servers will be told to abort their uploads, removing the
-                # partial shares. Unfortunately this involves TCP messages
-                # going through the loopback interface, and we can't easily
-                # predict how long that will take. If it were all local, we
-                # could use fireEventually() to stall. Since we don't have
-                # the right introduction hooks, the best we can do is use a
-                # fixed delay. TODO: this is fragile.
-                u1.interrupt_after_d.addCallback(self.stall, 2.0)
-                return u1.interrupt_after_d
-            d.addCallbacks(_should_not_finish, _interrupted)
+            upload_d = self.extra_node.upload(u1)
+            # The upload will start, and bounce_client() will be called after
+            # about 5kB. bounced_d will fire after bounce_client() finishes
+            # shutting down and restarting the node.
+            d = bounced_d
+            def _bounced(ign):
+                # By this point, the upload should have failed because of the
+                # interruption. upload_d will fire in a moment
+                def _should_not_finish(res):
+                    self.fail("interrupted upload should have failed, not"
+                              " finished with result %s" % (res,))
+                def _interrupted(f):
+                    f.trap(DeadReferenceError)
+                    # make sure we actually interrupted it before finishing
+                    # the file
+                    self.failUnless(u1.bytes_read < len(DATA),
+                                    "read %d out of %d total" %
+                                    (u1.bytes_read, len(DATA)))
+                upload_d.addCallbacks(_should_not_finish, _interrupted)
+                return upload_d
+            d.addCallback(_bounced)
 
             def _disconnected(res):
                 # check to make sure the storage servers aren't still hanging
@@ -306,13 +310,12 @@ class SystemTest(SystemTestMixin, RunBinTahoeMixin, unittest.TestCase):
                     self.failIf(os.path.exists(incdir) and os.listdir(incdir))
             d.addCallback(_disconnected)
 
-            # then we need to give the reconnector a chance to
-            # reestablish the connection to the helper.
             d.addCallback(lambda res:
-                          log.msg("wait_for_connections", level=log.NOISY,
+                          log.msg("wait_for_helper", level=log.NOISY,
                                   facility="tahoe.test.test_system"))
-            d.addCallback(lambda res: self.wait_for_connections())
-
+            # then we need to wait for the extra node to reestablish its
+            # connection to the helper.
+            d.addCallback(lambda ign: self.poll(_has_helper))
 
             d.addCallback(lambda res:
                           log.msg("uploading again", level=log.NOISY,
@@ -320,7 +323,7 @@ class SystemTest(SystemTestMixin, RunBinTahoeMixin, unittest.TestCase):
             d.addCallback(lambda res: self.extra_node.upload(u2))
 
             def _uploaded(results):
-                cap = results.uri
+                cap = results.get_uri()
                 log.msg("Second upload complete", level=log.NOISY,
                         facility="tahoe.test.test_system")
 
@@ -1218,8 +1221,12 @@ class SystemTest(SystemTestMixin, RunBinTahoeMixin, unittest.TestCase):
             return self.GET("status/publish-%d" % self._publish_status)
         d.addCallback(_got_update)
         def _got_publish(res):
+            self.failUnlessIn("Publish Results", res)
             return self.GET("status/retrieve-%d" % self._retrieve_status)
         d.addCallback(_got_publish)
+        def _got_retrieve(res):
+            self.failUnlessIn("Retrieve Results", res)
+        d.addCallback(_got_retrieve)
 
         # check that the helper status page exists
         d.addCallback(lambda res:
@@ -1876,3 +1883,33 @@ class SystemTest(SystemTestMixin, RunBinTahoeMixin, unittest.TestCase):
             return d
         d.addCallback(_got_lit_filenode)
         return d
+
+class Connections(SystemTestMixin, unittest.TestCase):
+    def test_rref(self):
+        self.basedir = "system/Connections/rref"
+        d = self.set_up_nodes(2)
+        def _start(ign):
+            self.c0 = self.clients[0]
+            for s in self.c0.storage_broker.get_connected_servers():
+                if "pub-"+s.get_longname() != self.c0.node_key_s:
+                    break
+            self.s1 = s # s1 is the server, not c0
+            self.s1_rref = s.get_rref()
+            self.failIfEqual(self.s1_rref, None)
+            self.failUnless(self.s1.is_connected())
+        d.addCallback(_start)
+
+        # now shut down the server
+        d.addCallback(lambda ign: self.clients[1].disownServiceParent())
+        # and wait for the client to notice
+        def _poll():
+            return len(self.c0.storage_broker.get_connected_servers()) < 2
+        d.addCallback(lambda ign: self.poll(_poll))
+
+        def _down(ign):
+            self.failIf(self.s1.is_connected())
+            rref = self.s1.get_rref()
+            self.failUnless(rref)
+            self.failUnlessIdentical(rref, self.s1_rref)
+        d.addCallback(_down)
+        return d