Better name for the file that causes a node to exit after a timeout when running...
authorDaira Hopwood <daira@jacaranda.org>
Sun, 17 Aug 2014 14:36:57 +0000 (15:36 +0100)
committerDaira Hopwood <daira@jacaranda.org>
Sun, 17 Aug 2014 14:51:19 +0000 (15:51 +0100)
Signed-off-by: Daira Hopwood <daira@jacaranda.org>
src/allmydata/client.py
src/allmydata/test/check_memory.py
src/allmydata/test/test_client.py
src/allmydata/test/test_runner.py

index e3214762a771367bd6440597ccc1dd54e1f34843..64b09c076c2b06d8f8963b383eef2c0941367cad 100644 (file)
@@ -109,7 +109,7 @@ class Client(node.Node, pollmixin.PollMixin):
     PORTNUMFILE = "client.port"
     STOREDIR = 'storage'
     NODETYPE = "client"
-    SUICIDE_PREVENTION_HOTLINE_FILE = "suicide_prevention_hotline"
+    EXIT_TRIGGER_FILE = "exit_trigger"
 
     # This means that if a storage server treats me as though I were a
     # 1.0.0 storage client, it will work as they expect.
@@ -150,13 +150,16 @@ class Client(node.Node, pollmixin.PollMixin):
         self.init_sftp_server()
         self.init_drop_uploader()
 
-        hotline_file = os.path.join(self.basedir,
-                                    self.SUICIDE_PREVENTION_HOTLINE_FILE)
-        if os.path.exists(hotline_file):
-            age = time.time() - os.stat(hotline_file)[stat.ST_MTIME]
-            self.log("hotline file noticed (%ds old), starting timer" % age)
-            hotline = TimerService(1.0, self._check_hotline, hotline_file)
-            hotline.setServiceParent(self)
+        # If the node sees an exit_trigger file, it will poll every second to see
+        # whether the file still exists, and what its mtime is. If the file does not
+        # exist or has not been modified for a given timeout, the node will exit.
+        exit_trigger_file = os.path.join(self.basedir,
+                                         self.EXIT_TRIGGER_FILE)
+        if os.path.exists(exit_trigger_file):
+            age = time.time() - os.stat(exit_trigger_file)[stat.ST_MTIME]
+            self.log("%s file noticed (%ds old), starting timer" % (self.EXIT_TRIGGER_FILE, age))
+            exit_trigger = TimerService(1.0, self._check_exit_trigger, exit_trigger_file)
+            exit_trigger.setServiceParent(self)
 
         # this needs to happen last, so it can use getServiceNamed() to
         # acquire references to StorageServer and other web-statusable things
@@ -492,15 +495,15 @@ class Client(node.Node, pollmixin.PollMixin):
             except Exception, e:
                 self.log("couldn't start drop-uploader: %r", args=(e,))
 
-    def _check_hotline(self, hotline_file):
-        if os.path.exists(hotline_file):
-            mtime = os.stat(hotline_file)[stat.ST_MTIME]
+    def _check_exit_trigger(self, exit_trigger_file):
+        if os.path.exists(exit_trigger_file):
+            mtime = os.stat(exit_trigger_file)[stat.ST_MTIME]
             if mtime > time.time() - 120.0:
                 return
             else:
-                self.log("hotline file too old, shutting down")
+                self.log("%s file too old, shutting down" % (self.EXIT_TRIGGER_FILE,))
         else:
-            self.log("hotline file missing, shutting down")
+            self.log("%s file missing, shutting down" % (self.EXIT_TRIGGER_FILE,))
         reactor.stop()
 
     def get_encoding_parameters(self):
index b9d79014e4a0316564454b65b7cdee5f18c3ee5d..ce6b33a7c380c6baf01ade6f0752d1ea093433fa 100644 (file)
@@ -145,7 +145,7 @@ class SystemFramework(pollmixin.PollMixin):
 
     def tearDown(self, passthrough):
         # the client node will shut down in a few seconds
-        #os.remove(os.path.join(self.clientdir, "suicide_prevention_hotline"))
+        #os.remove(os.path.join(self.clientdir, client.Client.EXIT_TRIGGER_FILE))
         log.msg("shutting down SystemTest services")
         if self.keepalive_file and os.path.exists(self.keepalive_file):
             age = time.time() - os.stat(self.keepalive_file)[stat.ST_MTIME]
@@ -255,7 +255,7 @@ this file are ignored.
             pass
         f.close()
         self.keepalive_file = os.path.join(clientdir,
-                                           "suicide_prevention_hotline")
+                                           client.Client.EXIT_TRIGGER_FILE)
         # now start updating the mtime.
         self.touch_keepalive()
         ts = internet.TimerService(1.0, self.touch_keepalive)
index c134249e7bf82c05d5cf66303e4883223f975aef..531215f6129cfbf53f1c1bbe96e3be15a410b0e1 100644 (file)
@@ -317,7 +317,7 @@ class Run(unittest.TestCase, testutil.StallMixin):
         os.mkdir(basedir)
         dummy = "pb://wl74cyahejagspqgy4x5ukrvfnevlknt@127.0.0.1:58889/bogus"
         fileutil.write(os.path.join(basedir, "tahoe.cfg"), BASECONFIG_I % dummy)
-        fileutil.write(os.path.join(basedir, "suicide_prevention_hotline"), "")
+        fileutil.write(os.path.join(basedir, client.Client.EXIT_TRIGGER_FILE), "")
         client.Client(basedir)
 
     def test_reloadable(self):
@@ -340,13 +340,13 @@ class Run(unittest.TestCase, testutil.StallMixin):
         d.addCallback(self.stall, delay=2.0)
         def _restart(res):
             # TODO: pause for slightly over one second, to let
-            # Client._check_hotline poll the file once. That will exercise
+            # Client._check_exit_trigger poll the file once. That will exercise
             # another few lines. Then add another test in which we don't
-            # update the file at all, and watch to see the node shutdown. (to
-            # do this, use a modified node which overrides Node.shutdown(),
-            # also change _check_hotline to use it instead of a raw
+            # update the file at all, and watch to see the node shutdown.
+            # (To do this, use a modified node which overrides Node.shutdown(),
+            # also change _check_exit_trigger to use it instead of a raw
             # reactor.stop, also instrument the shutdown event in an
-            # attribute that we can check)
+            # attribute that we can check.)
             c2 = client.Client(basedir)
             c2.setServiceParent(self.sparent)
             return c2.disownServiceParent()
index b5ccaa568639a6573da3ed48fc3c1a2574026feb..bfb59f7c929ffa4804dfd80d88a8fe822073eeac 100644 (file)
@@ -1,17 +1,19 @@
+import os.path, re, sys, subprocess
+from cStringIO import StringIO
+
 from twisted.trial import unittest
 
 from twisted.python import usage, runtime
 from twisted.internet import threads
 
-import os.path, re, sys, subprocess
-from cStringIO import StringIO
 from allmydata.util import fileutil, pollmixin
 from allmydata.util.encodingutil import unicode_to_argv, unicode_to_output, get_filesystem_encoding
 from allmydata.scripts import runner
-
+from allmydata.client import Client
 from allmydata.test import common_util
 import allmydata
 
+
 timeout = 240
 
 def get_root_from_file(src):
@@ -357,7 +359,7 @@ class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin,
         self.skip_if_cannot_daemonize()
         basedir = self.workdir("test_introducer")
         c1 = os.path.join(basedir, "c1")
-        HOTLINE_FILE = os.path.join(c1, "suicide_prevention_hotline")
+        exit_trigger_file = os.path.join(c1, Client.EXIT_TRIGGER_FILE)
         TWISTD_PID_FILE = os.path.join(c1, "twistd.pid")
         INTRODUCER_FURL_FILE = os.path.join(c1, "private", "introducer.furl")
         PORTNUM_FILE = os.path.join(c1, "introducer.port")
@@ -378,7 +380,7 @@ class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin,
             # by writing this file, we get ten seconds before the node will
             # exit. This insures that even if the test fails (and the 'stop'
             # command doesn't work), the client should still terminate.
-            fileutil.write(HOTLINE_FILE, "")
+            fileutil.write(exit_trigger_file, "")
             # now it's safe to start the node
         d.addCallback(_cb)
 
@@ -389,7 +391,7 @@ class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin,
         def _cb2(res):
             out, err, rc_or_sig = res
 
-            fileutil.write(HOTLINE_FILE, "")
+            fileutil.write(exit_trigger_file, "")
             errstr = "rc=%d, OUT: '%s', ERR: '%s'" % (rc_or_sig, out, err)
             self.failUnlessEqual(rc_or_sig, 0, errstr)
             self.failUnlessEqual(out, "", errstr)
@@ -416,7 +418,7 @@ class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin,
             self.failUnless(os.path.exists(PORTNUM_FILE))
             self.portnum = fileutil.read(PORTNUM_FILE)
 
-            fileutil.write(HOTLINE_FILE, "")
+            fileutil.write(exit_trigger_file, "")
             self.failUnless(os.path.exists(TWISTD_PID_FILE))
             self.failUnless(os.path.exists(NODE_URL_FILE))
 
@@ -427,7 +429,7 @@ class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin,
 
         def _then(res):
             out, err, rc_or_sig = res
-            fileutil.write(HOTLINE_FILE, "")
+            fileutil.write(exit_trigger_file, "")
             errstr = "rc=%d, OUT: '%s', ERR: '%s'" % (rc_or_sig, out, err)
             self.failUnlessEqual(rc_or_sig, 0, errstr)
             self.failUnlessEqual(out, "", errstr)
@@ -451,7 +453,7 @@ class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin,
         # itself before we get a chance to, especially if spawning the
         # 'tahoe stop' command takes a while.
         def _stop(res):
-            fileutil.write(HOTLINE_FILE, "")
+            fileutil.write(exit_trigger_file, "")
             self.failUnless(os.path.exists(TWISTD_PID_FILE))
 
             return self.run_bintahoe(["--quiet", "stop", c1])
@@ -459,7 +461,7 @@ class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin,
 
         def _after_stopping(res):
             out, err, rc_or_sig = res
-            fileutil.write(HOTLINE_FILE, "")
+            fileutil.write(exit_trigger_file, "")
             # the parent has exited by now
             errstr = "rc=%d, OUT: '%s', ERR: '%s'" % (rc_or_sig, out, err)
             self.failUnlessEqual(rc_or_sig, 0, errstr)
@@ -470,7 +472,7 @@ class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin,
             # gone by now.
             self.failIf(os.path.exists(TWISTD_PID_FILE))
         d.addCallback(_after_stopping)
-        d.addBoth(self._remove, HOTLINE_FILE)
+        d.addBoth(self._remove, exit_trigger_file)
         return d
     # This test has hit a 240-second timeout on our feisty2.5 buildslave, and a 480-second timeout
     # on Francois's Lenny-armv5tel buildslave.
@@ -481,7 +483,7 @@ class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin,
 
         basedir = self.workdir("test_client_no_noise")
         c1 = os.path.join(basedir, "c1")
-        HOTLINE_FILE = os.path.join(c1, "suicide_prevention_hotline")
+        exit_trigger_file = os.path.join(c1, Client.EXIT_TRIGGER_FILE)
         TWISTD_PID_FILE = os.path.join(c1, "twistd.pid")
         PORTNUM_FILE = os.path.join(c1, "client.port")
 
@@ -495,7 +497,7 @@ class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin,
             # By writing this file, we get two minutes before the client will exit. This ensures
             # that even if the 'stop' command doesn't work (and the test fails), the client should
             # still terminate.
-            fileutil.write(HOTLINE_FILE, "")
+            fileutil.write(exit_trigger_file, "")
             # now it's safe to start the node
         d.addCallback(_cb)
 
@@ -506,7 +508,7 @@ class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin,
         def _cb2(res):
             out, err, rc_or_sig = res
             errstr = "cc=%d, OUT: '%s', ERR: '%s'" % (rc_or_sig, out, err)
-            fileutil.write(HOTLINE_FILE, "")
+            fileutil.write(exit_trigger_file, "")
             self.failUnlessEqual(rc_or_sig, 0, errstr)
             self.failUnlessEqual(out, "", errstr) # If you emit noise, you fail this test.
             errlines = err.split("\n")
@@ -536,14 +538,14 @@ class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin,
             self.failUnless(os.path.exists(TWISTD_PID_FILE), (TWISTD_PID_FILE, os.listdir(os.path.dirname(TWISTD_PID_FILE))))
             return self.run_bintahoe(["--quiet", "stop", c1])
         d.addCallback(_stop)
-        d.addBoth(self._remove, HOTLINE_FILE)
+        d.addBoth(self._remove, exit_trigger_file)
         return d
 
     def test_client(self):
         self.skip_if_cannot_daemonize()
         basedir = self.workdir("test_client")
         c1 = os.path.join(basedir, "c1")
-        HOTLINE_FILE = os.path.join(c1, "suicide_prevention_hotline")
+        exit_trigger_file = os.path.join(c1, Client.EXIT_TRIGGER_FILE)
         TWISTD_PID_FILE = os.path.join(c1, "twistd.pid")
         PORTNUM_FILE = os.path.join(c1, "client.port")
         NODE_URL_FILE = os.path.join(c1, "node.url")
@@ -561,7 +563,7 @@ class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin,
             # By writing this file, we get two minutes before the client will exit. This ensures
             # that even if the 'stop' command doesn't work (and the test fails), the client should
             # still terminate.
-            fileutil.write(HOTLINE_FILE, "")
+            fileutil.write(exit_trigger_file, "")
             # now it's safe to start the node
         d.addCallback(_cb)
 
@@ -571,7 +573,7 @@ class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin,
 
         def _cb2(res):
             out, err, rc_or_sig = res
-            fileutil.write(HOTLINE_FILE, "")
+            fileutil.write(exit_trigger_file, "")
             errstr = "rc=%d, OUT: '%s', ERR: '%s'" % (rc_or_sig, out, err)
             self.failUnlessEqual(rc_or_sig, 0, errstr)
             self.failUnlessEqual(out, "", errstr)
@@ -597,7 +599,7 @@ class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin,
             # don't change on restart
             self.portnum = fileutil.read(PORTNUM_FILE)
 
-            fileutil.write(HOTLINE_FILE, "")
+            fileutil.write(exit_trigger_file, "")
             self.failUnless(os.path.exists(TWISTD_PID_FILE))
 
             # rm this so we can detect when the second incarnation is ready
@@ -608,7 +610,7 @@ class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin,
         def _cb3(res):
             out, err, rc_or_sig = res
 
-            fileutil.write(HOTLINE_FILE, "")
+            fileutil.write(exit_trigger_file, "")
             errstr = "rc=%d, OUT: '%s', ERR: '%s'" % (rc_or_sig, out, err)
             self.failUnlessEqual(rc_or_sig, 0, errstr)
             self.failUnlessEqual(out, "", errstr)
@@ -627,7 +629,7 @@ class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin,
         # itself before we get a chance to, especially if spawning the
         # 'tahoe stop' command takes a while.
         def _stop(res):
-            fileutil.write(HOTLINE_FILE, "")
+            fileutil.write(exit_trigger_file, "")
             self.failUnless(os.path.exists(TWISTD_PID_FILE),
                             (TWISTD_PID_FILE,
                              os.listdir(os.path.dirname(TWISTD_PID_FILE))))
@@ -637,7 +639,7 @@ class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin,
         def _cb4(res):
             out, err, rc_or_sig = res
 
-            fileutil.write(HOTLINE_FILE, "")
+            fileutil.write(exit_trigger_file, "")
             # the parent has exited by now
             errstr = "rc=%d, OUT: '%s', ERR: '%s'" % (rc_or_sig, out, err)
             self.failUnlessEqual(rc_or_sig, 0, errstr)
@@ -648,7 +650,7 @@ class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin,
             # gone by now.
             self.failIf(os.path.exists(TWISTD_PID_FILE))
         d.addCallback(_cb4)
-        d.addBoth(self._remove, HOTLINE_FILE)
+        d.addBoth(self._remove, exit_trigger_file)
         return d
 
     def _remove(self, res, file):