From: Brian Warner Date: Fri, 26 Nov 2010 22:45:38 +0000 (-0800) Subject: startstop_node.py: fix "tahoe start -m" by forking before non-final targets X-Git-Url: https://git.rkrishnan.org/Site/Content/Exhibitors/provisioning?a=commitdiff_plain;h=f3adb037ae0d22eb06c719c2faef75a834618442;p=tahoe-lafs%2Ftahoe-lafs.git startstop_node.py: fix "tahoe start -m" by forking before non-final targets * don't advertise -m flag on tahoe start/restart/run unless os.fork is available (i.e. windows) * test_runner.py: add test to exercise "start/stop/restart -m" --- diff --git a/src/allmydata/scripts/common.py b/src/allmydata/scripts/common.py index 96879b68..5e4bfb91 100644 --- a/src/allmydata/scripts/common.py +++ b/src/allmydata/scripts/common.py @@ -54,13 +54,18 @@ class BaseOptions(usage.Options): class BasedirMixin: default_nodedir = _default_nodedir allow_multiple = True + # startstop_node.py needs os.fork to implement the "-m" option on "tahoe + # start" option + can_start_multiple = hasattr(os, "fork") + optParameters = [ ["basedir", "C", None, "Same as --node-directory."], ] - optFlags = [ - ["multiple", "m", "Specify multiple node directories at once."], - ] + optFlags = [ ] + if can_start_multiple: + optFlags.append(["multiple", "m", + "Specify multiple node directories at once."]) def parseArgs(self, *args): if self['node-directory'] and self['basedir']: diff --git a/src/allmydata/scripts/startstop_node.py b/src/allmydata/scripts/startstop_node.py index bc2616ea..c76c9d89 100644 --- a/src/allmydata/scripts/startstop_node.py +++ b/src/allmydata/scripts/startstop_node.py @@ -26,10 +26,17 @@ class RunOptions(BasedirMixin, BaseOptions): optParameters = [ ["node-directory", "d", None, "Specify the directory of the node to be run. [default, for 'tahoe run' only: current directory]"], - ["multiple", "m", None, "['tahoe run' cannot accept multiple node directories]"], - ] - -def do_start(basedir, opts, out=sys.stdout, err=sys.stderr): + ] + optFlags = [ ] + if BasedirMixin.can_start_multiple: + # usage.Options doesn't let us remove flags that we inherit from a + # parent class, so at least provide a --help string that warns people + # away from using it. There is also code (switching on + # allow_multiple) to disable it at runtime. + optFlags.append(["multiple", "m", + "['tahoe run' cannot accept multiple node directories]"]) + +def do_start(basedir, opts, out=sys.stdout, err=sys.stderr, fork=False): print >>out, "STARTING", quote_output(basedir) if not os.path.isdir(basedir): print >>err, "%s does not look like a directory at all" % quote_output(basedir) @@ -57,6 +64,10 @@ def do_start(basedir, opts, out=sys.stdout, err=sys.stderr): if opts["profile"]: args.extend(["--profile=profiling_results.prof", "--savestats",]) # now we're committed + if fork: + if os.fork() != 0: + return 0 # parent + # we're in the child os.chdir(basedir) from twisted.scripts import twistd sys.argv = args @@ -123,8 +134,11 @@ def do_stop(basedir, out=sys.stdout, err=sys.stderr): def start(config, stdout, stderr): rc = 0 - for basedir in config['basedirs']: - rc = do_start(basedir, config, stdout, stderr) or rc + for basedir in config['basedirs'][:-1]: + # fork before starting all but the last one + rc = do_start(basedir, config, stdout, stderr, fork=True) or rc + # start the last one in the current process, to capture its exit code + rc = do_start(config['basedirs'][-1], config, stdout, stderr, fork=False) or rc return rc def stop(config, stdout, stderr): @@ -143,8 +157,7 @@ def restart(config, stdout, stderr): if rc: print >>stderr, "not restarting" return rc - for basedir in config['basedirs']: - rc = do_start(basedir, config, stdout, stderr) or rc + rc = start(config, stdout, stderr) or rc return rc def run(config, stdout, stderr): diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py index 6a49f6bb..ff516888 100644 --- a/src/allmydata/test/test_runner.py +++ b/src/allmydata/test/test_runner.py @@ -578,6 +578,143 @@ class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin, d.addBoth(_remove_hotline) return d + def test_multiple_clients(self): + self.skip_if_cannot_daemonize() + basedir = self.workdir("test_multiple_clients") + # this furl must be syntactically correct, else 'tahoe start' will + # correctly report an error. It doesn't need to point at a real + # introducer. + introducer_furl = "pb://xrndsskn2zuuian5ltnxrte7lnuqdrkz@127.0.0.1:55617/introducer" + c1 = os.path.join(basedir, "c1") + HOTLINE_FILE_1 = os.path.join(c1, "suicide_prevention_hotline") + TWISTD_PID_FILE_1 = os.path.join(c1, "twistd.pid") + PORTNUMFILE_1 = os.path.join(c1, "client.port") + c1_args = ["--quiet", "create-node", "--basedir", c1, "--webport", "0", + "--introducer", introducer_furl] + + c2 = os.path.join(basedir, "c2") + HOTLINE_FILE_2 = os.path.join(c2, "suicide_prevention_hotline") + TWISTD_PID_FILE_2 = os.path.join(c2, "twistd.pid") + PORTNUMFILE_2 = os.path.join(c2, "client.port") + c2_args = ["--quiet", "create-node", "--basedir", c2, "--webport", "0", + "--introducer", introducer_furl] + + d = utils.getProcessOutputAndValue(bintahoe, args=c1_args, env=os.environ) + def _cb1(res): + out, err, rc_or_sig = res + self.failUnlessEqual(rc_or_sig, 0) + return utils.getProcessOutputAndValue(bintahoe, args=c2_args, + env=os.environ) + d.addCallback(_cb1) + def _cb2(res): + out, err, rc_or_sig = res + self.failUnlessEqual(rc_or_sig, 0) + # both nodes have been constructed, but they are not yet running + + # By writing this file, we get sixty seconds before the client + # will exit. This insures that even if the 'stop' command doesn't + # work (and the test fails), the client should still terminate. + open(HOTLINE_FILE_1, "w").write("") + open(HOTLINE_FILE_2, "w").write("") + # now it's safe to start the nodes + start_args = ["--quiet", "start", "-m", c1, c2] + return utils.getProcessOutputAndValue(bintahoe, args=start_args, + env=os.environ) + d.addCallback(_cb2) + + def _nodes_have_started(): + open(HOTLINE_FILE_1, "w").write("") + open(HOTLINE_FILE_2, "w").write("") + return (os.path.exists(PORTNUMFILE_1) + and os.path.exists(PORTNUMFILE_2)) + + def _cb3(res): + out, err, rc_or_sig = res + open(HOTLINE_FILE_1, "w").write("") + open(HOTLINE_FILE_2, "w").write("") + errstr = "rc=%d, OUT: '%s', ERR: '%s'" % (rc_or_sig, out, err) + self.failUnlessEqual(rc_or_sig, 0, errstr) + self.failUnlessEqual(out, "", errstr) + # See test_client_no_noise -- for now we ignore noise. + # self.failUnlessEqual(err, "", errstr) + + # if 'tahoe start' exited without error, we're guaranteed that it + # successfully loaded the code and tahoe.cfg for the last node + # (argv[-1]), since these happen before twistd.run() calls + # fork(). There made have been a runtime error after fork(). + # Also, there may have been an import or config error while + # loading any of the earlier nodes (argv[:-1]), since 'tahoe + # start' must do an additional fork before calling twistd.run() + # on those. + + # so watch the filesystem to determine when the nodes have + # started running + + return self.poll(_nodes_have_started) + d.addCallback(_cb3) + + def _restart(ign): + # delete the port-number files, then restart the nodes. The new + # instances will re-write those files. + os.unlink(PORTNUMFILE_1) + os.unlink(PORTNUMFILE_2) + restart_args = ["--quiet", "restart", "-m", c1, c2] + return utils.getProcessOutputAndValue(bintahoe, args=restart_args, + env=os.environ) + d.addCallback(_restart) + def _did_restart(res): + out, err, rc_or_sig = res + errstr = "rc=%d, OUT: '%s', ERR: '%s'" % (rc_or_sig, out, err) + self.failUnlessEqual(rc_or_sig, 0, errstr) + self.failUnlessEqual(out, "", errstr) + # See test_client_no_noise -- for now we ignore noise. + # self.failUnlessEqual(err, "", errstr) + + # we must still poll for the startup process to get far enough + return self.poll(_nodes_have_started) + d.addCallback(_did_restart) + # when that finishes, we know the nodes have restarted. If 'tahoe + # restart -m' is broken, that will time out. + + # now we can kill it. TODO: On a slow machine, the node might kill + # itself before we get a chance too, especially if spawning the + # 'tahoe stop' command takes a while. + def _stop(res): + open(HOTLINE_FILE_1, "w").write("") + open(HOTLINE_FILE_2, "w").write("") + self.failUnless(os.path.exists(TWISTD_PID_FILE_1), + (TWISTD_PID_FILE_1, os.listdir(c1))) + self.failUnless(os.path.exists(TWISTD_PID_FILE_2), + (TWISTD_PID_FILE_2, os.listdir(c2))) + stop_args = ["--quiet", "stop", "-m", c1, c2] + return utils.getProcessOutputAndValue(bintahoe, args=stop_args, + env=os.environ) + d.addCallback(_stop) + + def _cb4(res): + out, err, rc_or_sig = res + errstr = "rc=%d, OUT: '%s', ERR: '%s'" % (rc_or_sig, out, err) + self.failUnlessEqual(rc_or_sig, 0, errstr) + self.failUnlessEqual(out, "", errstr) + # See test_client_no_noise -- for now we ignore noise. + # self.failUnlessEqual(err, "", errstr) + + # the parent was supposed to poll and wait until it sees + # twistd.pid go away before it exits, so twistd.pid should be + # gone by now. + self.failIf(os.path.exists(TWISTD_PID_FILE_1)) + self.failIf(os.path.exists(TWISTD_PID_FILE_2)) + d.addCallback(_cb4) + def _remove_hotline(res): + # always remove these, so that if something went wrong, the nodes + # will quit eventually + os.unlink(HOTLINE_FILE_1) + os.unlink(HOTLINE_FILE_2) + return res + d.addBoth(_remove_hotline) + return d + + def test_baddir(self): self.skip_if_cannot_daemonize() basedir = self.workdir("test_baddir")