From: Daira Hopwood Date: Sun, 17 Aug 2014 14:36:57 +0000 (+0100) Subject: Better name for the file that causes a node to exit after a timeout when running... X-Git-Tag: allmydata-tahoe-1.10.1a1~173 X-Git-Url: https://git.rkrishnan.org/specifications/reliability?a=commitdiff_plain;h=647ebce6b993cc6d319ad6be0f4921909d159d73;p=tahoe-lafs%2Ftahoe-lafs.git Better name for the file that causes a node to exit after a timeout when running unit tests. refs #1336 Signed-off-by: Daira Hopwood --- diff --git a/src/allmydata/client.py b/src/allmydata/client.py index e3214762..64b09c07 100644 --- a/src/allmydata/client.py +++ b/src/allmydata/client.py @@ -109,7 +109,7 @@ class Client(node.Node, pollmixin.PollMixin): PORTNUMFILE = "client.port" STOREDIR = 'storage' NODETYPE = "client" - SUICIDE_PREVENTION_HOTLINE_FILE = "suicide_prevention_hotline" + EXIT_TRIGGER_FILE = "exit_trigger" # This means that if a storage server treats me as though I were a # 1.0.0 storage client, it will work as they expect. @@ -150,13 +150,16 @@ class Client(node.Node, pollmixin.PollMixin): self.init_sftp_server() self.init_drop_uploader() - hotline_file = os.path.join(self.basedir, - self.SUICIDE_PREVENTION_HOTLINE_FILE) - if os.path.exists(hotline_file): - age = time.time() - os.stat(hotline_file)[stat.ST_MTIME] - self.log("hotline file noticed (%ds old), starting timer" % age) - hotline = TimerService(1.0, self._check_hotline, hotline_file) - hotline.setServiceParent(self) + # If the node sees an exit_trigger file, it will poll every second to see + # whether the file still exists, and what its mtime is. If the file does not + # exist or has not been modified for a given timeout, the node will exit. + exit_trigger_file = os.path.join(self.basedir, + self.EXIT_TRIGGER_FILE) + if os.path.exists(exit_trigger_file): + age = time.time() - os.stat(exit_trigger_file)[stat.ST_MTIME] + self.log("%s file noticed (%ds old), starting timer" % (self.EXIT_TRIGGER_FILE, age)) + exit_trigger = TimerService(1.0, self._check_exit_trigger, exit_trigger_file) + exit_trigger.setServiceParent(self) # this needs to happen last, so it can use getServiceNamed() to # acquire references to StorageServer and other web-statusable things @@ -492,15 +495,15 @@ class Client(node.Node, pollmixin.PollMixin): except Exception, e: self.log("couldn't start drop-uploader: %r", args=(e,)) - def _check_hotline(self, hotline_file): - if os.path.exists(hotline_file): - mtime = os.stat(hotline_file)[stat.ST_MTIME] + def _check_exit_trigger(self, exit_trigger_file): + if os.path.exists(exit_trigger_file): + mtime = os.stat(exit_trigger_file)[stat.ST_MTIME] if mtime > time.time() - 120.0: return else: - self.log("hotline file too old, shutting down") + self.log("%s file too old, shutting down" % (self.EXIT_TRIGGER_FILE,)) else: - self.log("hotline file missing, shutting down") + self.log("%s file missing, shutting down" % (self.EXIT_TRIGGER_FILE,)) reactor.stop() def get_encoding_parameters(self): diff --git a/src/allmydata/test/check_memory.py b/src/allmydata/test/check_memory.py index b9d79014..ce6b33a7 100644 --- a/src/allmydata/test/check_memory.py +++ b/src/allmydata/test/check_memory.py @@ -145,7 +145,7 @@ class SystemFramework(pollmixin.PollMixin): def tearDown(self, passthrough): # the client node will shut down in a few seconds - #os.remove(os.path.join(self.clientdir, "suicide_prevention_hotline")) + #os.remove(os.path.join(self.clientdir, client.Client.EXIT_TRIGGER_FILE)) log.msg("shutting down SystemTest services") if self.keepalive_file and os.path.exists(self.keepalive_file): age = time.time() - os.stat(self.keepalive_file)[stat.ST_MTIME] @@ -255,7 +255,7 @@ this file are ignored. pass f.close() self.keepalive_file = os.path.join(clientdir, - "suicide_prevention_hotline") + client.Client.EXIT_TRIGGER_FILE) # now start updating the mtime. self.touch_keepalive() ts = internet.TimerService(1.0, self.touch_keepalive) diff --git a/src/allmydata/test/test_client.py b/src/allmydata/test/test_client.py index c134249e..531215f6 100644 --- a/src/allmydata/test/test_client.py +++ b/src/allmydata/test/test_client.py @@ -317,7 +317,7 @@ class Run(unittest.TestCase, testutil.StallMixin): os.mkdir(basedir) dummy = "pb://wl74cyahejagspqgy4x5ukrvfnevlknt@127.0.0.1:58889/bogus" fileutil.write(os.path.join(basedir, "tahoe.cfg"), BASECONFIG_I % dummy) - fileutil.write(os.path.join(basedir, "suicide_prevention_hotline"), "") + fileutil.write(os.path.join(basedir, client.Client.EXIT_TRIGGER_FILE), "") client.Client(basedir) def test_reloadable(self): @@ -340,13 +340,13 @@ class Run(unittest.TestCase, testutil.StallMixin): d.addCallback(self.stall, delay=2.0) def _restart(res): # TODO: pause for slightly over one second, to let - # Client._check_hotline poll the file once. That will exercise + # Client._check_exit_trigger poll the file once. That will exercise # another few lines. Then add another test in which we don't - # update the file at all, and watch to see the node shutdown. (to - # do this, use a modified node which overrides Node.shutdown(), - # also change _check_hotline to use it instead of a raw + # update the file at all, and watch to see the node shutdown. + # (To do this, use a modified node which overrides Node.shutdown(), + # also change _check_exit_trigger to use it instead of a raw # reactor.stop, also instrument the shutdown event in an - # attribute that we can check) + # attribute that we can check.) c2 = client.Client(basedir) c2.setServiceParent(self.sparent) return c2.disownServiceParent() diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py index b5ccaa56..bfb59f7c 100644 --- a/src/allmydata/test/test_runner.py +++ b/src/allmydata/test/test_runner.py @@ -1,17 +1,19 @@ +import os.path, re, sys, subprocess +from cStringIO import StringIO + from twisted.trial import unittest from twisted.python import usage, runtime from twisted.internet import threads -import os.path, re, sys, subprocess -from cStringIO import StringIO from allmydata.util import fileutil, pollmixin from allmydata.util.encodingutil import unicode_to_argv, unicode_to_output, get_filesystem_encoding from allmydata.scripts import runner - +from allmydata.client import Client from allmydata.test import common_util import allmydata + timeout = 240 def get_root_from_file(src): @@ -357,7 +359,7 @@ class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin, self.skip_if_cannot_daemonize() basedir = self.workdir("test_introducer") c1 = os.path.join(basedir, "c1") - HOTLINE_FILE = os.path.join(c1, "suicide_prevention_hotline") + exit_trigger_file = os.path.join(c1, Client.EXIT_TRIGGER_FILE) TWISTD_PID_FILE = os.path.join(c1, "twistd.pid") INTRODUCER_FURL_FILE = os.path.join(c1, "private", "introducer.furl") PORTNUM_FILE = os.path.join(c1, "introducer.port") @@ -378,7 +380,7 @@ class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin, # by writing this file, we get ten seconds before the node will # exit. This insures that even if the test fails (and the 'stop' # command doesn't work), the client should still terminate. - fileutil.write(HOTLINE_FILE, "") + fileutil.write(exit_trigger_file, "") # now it's safe to start the node d.addCallback(_cb) @@ -389,7 +391,7 @@ class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin, def _cb2(res): out, err, rc_or_sig = res - fileutil.write(HOTLINE_FILE, "") + fileutil.write(exit_trigger_file, "") errstr = "rc=%d, OUT: '%s', ERR: '%s'" % (rc_or_sig, out, err) self.failUnlessEqual(rc_or_sig, 0, errstr) self.failUnlessEqual(out, "", errstr) @@ -416,7 +418,7 @@ class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin, self.failUnless(os.path.exists(PORTNUM_FILE)) self.portnum = fileutil.read(PORTNUM_FILE) - fileutil.write(HOTLINE_FILE, "") + fileutil.write(exit_trigger_file, "") self.failUnless(os.path.exists(TWISTD_PID_FILE)) self.failUnless(os.path.exists(NODE_URL_FILE)) @@ -427,7 +429,7 @@ class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin, def _then(res): out, err, rc_or_sig = res - fileutil.write(HOTLINE_FILE, "") + fileutil.write(exit_trigger_file, "") errstr = "rc=%d, OUT: '%s', ERR: '%s'" % (rc_or_sig, out, err) self.failUnlessEqual(rc_or_sig, 0, errstr) self.failUnlessEqual(out, "", errstr) @@ -451,7 +453,7 @@ class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin, # itself before we get a chance to, especially if spawning the # 'tahoe stop' command takes a while. def _stop(res): - fileutil.write(HOTLINE_FILE, "") + fileutil.write(exit_trigger_file, "") self.failUnless(os.path.exists(TWISTD_PID_FILE)) return self.run_bintahoe(["--quiet", "stop", c1]) @@ -459,7 +461,7 @@ class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin, def _after_stopping(res): out, err, rc_or_sig = res - fileutil.write(HOTLINE_FILE, "") + fileutil.write(exit_trigger_file, "") # the parent has exited by now errstr = "rc=%d, OUT: '%s', ERR: '%s'" % (rc_or_sig, out, err) self.failUnlessEqual(rc_or_sig, 0, errstr) @@ -470,7 +472,7 @@ class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin, # gone by now. self.failIf(os.path.exists(TWISTD_PID_FILE)) d.addCallback(_after_stopping) - d.addBoth(self._remove, HOTLINE_FILE) + d.addBoth(self._remove, exit_trigger_file) return d # This test has hit a 240-second timeout on our feisty2.5 buildslave, and a 480-second timeout # on Francois's Lenny-armv5tel buildslave. @@ -481,7 +483,7 @@ class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin, basedir = self.workdir("test_client_no_noise") c1 = os.path.join(basedir, "c1") - HOTLINE_FILE = os.path.join(c1, "suicide_prevention_hotline") + exit_trigger_file = os.path.join(c1, Client.EXIT_TRIGGER_FILE) TWISTD_PID_FILE = os.path.join(c1, "twistd.pid") PORTNUM_FILE = os.path.join(c1, "client.port") @@ -495,7 +497,7 @@ class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin, # By writing this file, we get two minutes before the client will exit. This ensures # that even if the 'stop' command doesn't work (and the test fails), the client should # still terminate. - fileutil.write(HOTLINE_FILE, "") + fileutil.write(exit_trigger_file, "") # now it's safe to start the node d.addCallback(_cb) @@ -506,7 +508,7 @@ class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin, def _cb2(res): out, err, rc_or_sig = res errstr = "cc=%d, OUT: '%s', ERR: '%s'" % (rc_or_sig, out, err) - fileutil.write(HOTLINE_FILE, "") + fileutil.write(exit_trigger_file, "") self.failUnlessEqual(rc_or_sig, 0, errstr) self.failUnlessEqual(out, "", errstr) # If you emit noise, you fail this test. errlines = err.split("\n") @@ -536,14 +538,14 @@ class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin, self.failUnless(os.path.exists(TWISTD_PID_FILE), (TWISTD_PID_FILE, os.listdir(os.path.dirname(TWISTD_PID_FILE)))) return self.run_bintahoe(["--quiet", "stop", c1]) d.addCallback(_stop) - d.addBoth(self._remove, HOTLINE_FILE) + d.addBoth(self._remove, exit_trigger_file) return d def test_client(self): self.skip_if_cannot_daemonize() basedir = self.workdir("test_client") c1 = os.path.join(basedir, "c1") - HOTLINE_FILE = os.path.join(c1, "suicide_prevention_hotline") + exit_trigger_file = os.path.join(c1, Client.EXIT_TRIGGER_FILE) TWISTD_PID_FILE = os.path.join(c1, "twistd.pid") PORTNUM_FILE = os.path.join(c1, "client.port") NODE_URL_FILE = os.path.join(c1, "node.url") @@ -561,7 +563,7 @@ class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin, # By writing this file, we get two minutes before the client will exit. This ensures # that even if the 'stop' command doesn't work (and the test fails), the client should # still terminate. - fileutil.write(HOTLINE_FILE, "") + fileutil.write(exit_trigger_file, "") # now it's safe to start the node d.addCallback(_cb) @@ -571,7 +573,7 @@ class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin, def _cb2(res): out, err, rc_or_sig = res - fileutil.write(HOTLINE_FILE, "") + fileutil.write(exit_trigger_file, "") errstr = "rc=%d, OUT: '%s', ERR: '%s'" % (rc_or_sig, out, err) self.failUnlessEqual(rc_or_sig, 0, errstr) self.failUnlessEqual(out, "", errstr) @@ -597,7 +599,7 @@ class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin, # don't change on restart self.portnum = fileutil.read(PORTNUM_FILE) - fileutil.write(HOTLINE_FILE, "") + fileutil.write(exit_trigger_file, "") self.failUnless(os.path.exists(TWISTD_PID_FILE)) # rm this so we can detect when the second incarnation is ready @@ -608,7 +610,7 @@ class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin, def _cb3(res): out, err, rc_or_sig = res - fileutil.write(HOTLINE_FILE, "") + fileutil.write(exit_trigger_file, "") errstr = "rc=%d, OUT: '%s', ERR: '%s'" % (rc_or_sig, out, err) self.failUnlessEqual(rc_or_sig, 0, errstr) self.failUnlessEqual(out, "", errstr) @@ -627,7 +629,7 @@ class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin, # itself before we get a chance to, especially if spawning the # 'tahoe stop' command takes a while. def _stop(res): - fileutil.write(HOTLINE_FILE, "") + fileutil.write(exit_trigger_file, "") self.failUnless(os.path.exists(TWISTD_PID_FILE), (TWISTD_PID_FILE, os.listdir(os.path.dirname(TWISTD_PID_FILE)))) @@ -637,7 +639,7 @@ class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin, def _cb4(res): out, err, rc_or_sig = res - fileutil.write(HOTLINE_FILE, "") + fileutil.write(exit_trigger_file, "") # the parent has exited by now errstr = "rc=%d, OUT: '%s', ERR: '%s'" % (rc_or_sig, out, err) self.failUnlessEqual(rc_or_sig, 0, errstr) @@ -648,7 +650,7 @@ class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin, # gone by now. self.failIf(os.path.exists(TWISTD_PID_FILE)) d.addCallback(_cb4) - d.addBoth(self._remove, HOTLINE_FILE) + d.addBoth(self._remove, exit_trigger_file) return d def _remove(self, res, file):