From f3adb037ae0d22eb06c719c2faef75a834618442 Mon Sep 17 00:00:00 2001
From: Brian Warner <warner@lothar.com>
Date: Fri, 26 Nov 2010 14:45:38 -0800
Subject: [PATCH] startstop_node.py: fix "tahoe start -m" by forking before
 non-final targets

* don't advertise -m flag on tahoe start/restart/run unless os.fork is
  available (i.e. windows)
* test_runner.py: add test to exercise "start/stop/restart -m"
---
 src/allmydata/scripts/common.py         |  11 +-
 src/allmydata/scripts/startstop_node.py |  29 +++--
 src/allmydata/test/test_runner.py       | 137 ++++++++++++++++++++++++
 3 files changed, 166 insertions(+), 11 deletions(-)

diff --git a/src/allmydata/scripts/common.py b/src/allmydata/scripts/common.py
index 96879b68..5e4bfb91 100644
--- a/src/allmydata/scripts/common.py
+++ b/src/allmydata/scripts/common.py
@@ -54,13 +54,18 @@ class BaseOptions(usage.Options):
 class BasedirMixin:
     default_nodedir = _default_nodedir
     allow_multiple = True
+    # startstop_node.py needs os.fork to implement the "-m" option on "tahoe
+    # start" option
+    can_start_multiple = hasattr(os, "fork")
+
 
     optParameters = [
         ["basedir", "C", None, "Same as --node-directory."],
     ]
-    optFlags = [
-        ["multiple", "m", "Specify multiple node directories at once."],
-    ]
+    optFlags = [ ]
+    if can_start_multiple:
+        optFlags.append(["multiple", "m",
+                         "Specify multiple node directories at once."])
 
     def parseArgs(self, *args):
         if self['node-directory'] and self['basedir']:
diff --git a/src/allmydata/scripts/startstop_node.py b/src/allmydata/scripts/startstop_node.py
index bc2616ea..c76c9d89 100644
--- a/src/allmydata/scripts/startstop_node.py
+++ b/src/allmydata/scripts/startstop_node.py
@@ -26,10 +26,17 @@ class RunOptions(BasedirMixin, BaseOptions):
 
     optParameters = [
         ["node-directory", "d", None, "Specify the directory of the node to be run. [default, for 'tahoe run' only: current directory]"],
-        ["multiple", "m", None, "['tahoe run' cannot accept multiple node directories]"],
-    ]
-
-def do_start(basedir, opts, out=sys.stdout, err=sys.stderr):
+        ]
+    optFlags = [ ]
+    if BasedirMixin.can_start_multiple:
+        # usage.Options doesn't let us remove flags that we inherit from a
+        # parent class, so at least provide a --help string that warns people
+        # away from using it. There is also code (switching on
+        # allow_multiple) to disable it at runtime.
+        optFlags.append(["multiple", "m",
+                         "['tahoe run' cannot accept multiple node directories]"])
+
+def do_start(basedir, opts, out=sys.stdout, err=sys.stderr, fork=False):
     print >>out, "STARTING", quote_output(basedir)
     if not os.path.isdir(basedir):
         print >>err, "%s does not look like a directory at all" % quote_output(basedir)
@@ -57,6 +64,10 @@ def do_start(basedir, opts, out=sys.stdout, err=sys.stderr):
     if opts["profile"]:
         args.extend(["--profile=profiling_results.prof", "--savestats",])
     # now we're committed
+    if fork:
+        if os.fork() != 0:
+            return 0 # parent
+        # we're in the child
     os.chdir(basedir)
     from twisted.scripts import twistd
     sys.argv = args
@@ -123,8 +134,11 @@ def do_stop(basedir, out=sys.stdout, err=sys.stderr):
 
 def start(config, stdout, stderr):
     rc = 0
-    for basedir in config['basedirs']:
-        rc = do_start(basedir, config, stdout, stderr) or rc
+    for basedir in config['basedirs'][:-1]:
+        # fork before starting all but the last one
+        rc = do_start(basedir, config, stdout, stderr, fork=True) or rc
+    # start the last one in the current process, to capture its exit code
+    rc = do_start(config['basedirs'][-1], config, stdout, stderr, fork=False) or rc
     return rc
 
 def stop(config, stdout, stderr):
@@ -143,8 +157,7 @@ def restart(config, stdout, stderr):
     if rc:
         print >>stderr, "not restarting"
         return rc
-    for basedir in config['basedirs']:
-        rc = do_start(basedir, config, stdout, stderr) or rc
+    rc = start(config, stdout, stderr) or rc
     return rc
 
 def run(config, stdout, stderr):
diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py
index 6a49f6bb..ff516888 100644
--- a/src/allmydata/test/test_runner.py
+++ b/src/allmydata/test/test_runner.py
@@ -578,6 +578,143 @@ class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin,
         d.addBoth(_remove_hotline)
         return d
 
+    def test_multiple_clients(self):
+        self.skip_if_cannot_daemonize()
+        basedir = self.workdir("test_multiple_clients")
+        # this furl must be syntactically correct, else 'tahoe start' will
+        # correctly report an error. It doesn't need to point at a real
+        # introducer.
+        introducer_furl = "pb://xrndsskn2zuuian5ltnxrte7lnuqdrkz@127.0.0.1:55617/introducer"
+        c1 = os.path.join(basedir, "c1")
+        HOTLINE_FILE_1 = os.path.join(c1, "suicide_prevention_hotline")
+        TWISTD_PID_FILE_1 = os.path.join(c1, "twistd.pid")
+        PORTNUMFILE_1 = os.path.join(c1, "client.port")
+        c1_args = ["--quiet", "create-node", "--basedir", c1, "--webport", "0",
+                   "--introducer", introducer_furl]
+
+        c2 = os.path.join(basedir, "c2")
+        HOTLINE_FILE_2 = os.path.join(c2, "suicide_prevention_hotline")
+        TWISTD_PID_FILE_2 = os.path.join(c2, "twistd.pid")
+        PORTNUMFILE_2 = os.path.join(c2, "client.port")
+        c2_args = ["--quiet", "create-node", "--basedir", c2, "--webport", "0",
+                   "--introducer", introducer_furl]
+
+        d = utils.getProcessOutputAndValue(bintahoe, args=c1_args, env=os.environ)
+        def _cb1(res):
+            out, err, rc_or_sig = res
+            self.failUnlessEqual(rc_or_sig, 0)
+            return utils.getProcessOutputAndValue(bintahoe, args=c2_args,
+                                                  env=os.environ)
+        d.addCallback(_cb1)
+        def _cb2(res):
+            out, err, rc_or_sig = res
+            self.failUnlessEqual(rc_or_sig, 0)
+            # both nodes have been constructed, but they are not yet running
+
+            # By writing this file, we get sixty seconds before the client
+            # will exit. This insures that even if the 'stop' command doesn't
+            # work (and the test fails), the client should still terminate.
+            open(HOTLINE_FILE_1, "w").write("")
+            open(HOTLINE_FILE_2, "w").write("")
+            # now it's safe to start the nodes
+            start_args = ["--quiet", "start", "-m", c1, c2]
+            return utils.getProcessOutputAndValue(bintahoe, args=start_args,
+                                                  env=os.environ)
+        d.addCallback(_cb2)
+
+        def _nodes_have_started():
+            open(HOTLINE_FILE_1, "w").write("")
+            open(HOTLINE_FILE_2, "w").write("")
+            return (os.path.exists(PORTNUMFILE_1)
+                    and os.path.exists(PORTNUMFILE_2))
+
+        def _cb3(res):
+            out, err, rc_or_sig = res
+            open(HOTLINE_FILE_1, "w").write("")
+            open(HOTLINE_FILE_2, "w").write("")
+            errstr = "rc=%d, OUT: '%s', ERR: '%s'" % (rc_or_sig, out, err)
+            self.failUnlessEqual(rc_or_sig, 0, errstr)
+            self.failUnlessEqual(out, "", errstr)
+            # See test_client_no_noise -- for now we ignore noise.
+            # self.failUnlessEqual(err, "", errstr)
+
+            # if 'tahoe start' exited without error, we're guaranteed that it
+            # successfully loaded the code and tahoe.cfg for the last node
+            # (argv[-1]), since these happen before twistd.run() calls
+            # fork(). There made have been a runtime error after fork().
+            # Also, there may have been an import or config error while
+            # loading any of the earlier nodes (argv[:-1]), since 'tahoe
+            # start' must do an additional fork before calling twistd.run()
+            # on those.
+
+            # so watch the filesystem to determine when the nodes have
+            # started running
+
+            return self.poll(_nodes_have_started)
+        d.addCallback(_cb3)
+
+        def _restart(ign):
+            # delete the port-number files, then restart the nodes. The new
+            # instances will re-write those files.
+            os.unlink(PORTNUMFILE_1)
+            os.unlink(PORTNUMFILE_2)
+            restart_args = ["--quiet", "restart", "-m", c1, c2]
+            return utils.getProcessOutputAndValue(bintahoe, args=restart_args,
+                                                  env=os.environ)
+        d.addCallback(_restart)
+        def _did_restart(res):
+            out, err, rc_or_sig = res
+            errstr = "rc=%d, OUT: '%s', ERR: '%s'" % (rc_or_sig, out, err)
+            self.failUnlessEqual(rc_or_sig, 0, errstr)
+            self.failUnlessEqual(out, "", errstr)
+            # See test_client_no_noise -- for now we ignore noise.
+            # self.failUnlessEqual(err, "", errstr)
+
+            # we must still poll for the startup process to get far enough
+            return self.poll(_nodes_have_started)
+        d.addCallback(_did_restart)
+        # when that finishes, we know the nodes have restarted. If 'tahoe
+        # restart -m' is broken, that will time out.
+
+        # now we can kill it. TODO: On a slow machine, the node might kill
+        # itself before we get a chance too, especially if spawning the
+        # 'tahoe stop' command takes a while.
+        def _stop(res):
+            open(HOTLINE_FILE_1, "w").write("")
+            open(HOTLINE_FILE_2, "w").write("")
+            self.failUnless(os.path.exists(TWISTD_PID_FILE_1),
+                            (TWISTD_PID_FILE_1, os.listdir(c1)))
+            self.failUnless(os.path.exists(TWISTD_PID_FILE_2),
+                            (TWISTD_PID_FILE_2, os.listdir(c2)))
+            stop_args = ["--quiet", "stop", "-m", c1, c2]
+            return utils.getProcessOutputAndValue(bintahoe, args=stop_args,
+                                                  env=os.environ)
+        d.addCallback(_stop)
+
+        def _cb4(res):
+            out, err, rc_or_sig = res
+            errstr = "rc=%d, OUT: '%s', ERR: '%s'" % (rc_or_sig, out, err)
+            self.failUnlessEqual(rc_or_sig, 0, errstr)
+            self.failUnlessEqual(out, "", errstr)
+            # See test_client_no_noise -- for now we ignore noise.
+            # self.failUnlessEqual(err, "", errstr)
+
+            # the parent was supposed to poll and wait until it sees
+            # twistd.pid go away before it exits, so twistd.pid should be
+            # gone by now.
+            self.failIf(os.path.exists(TWISTD_PID_FILE_1))
+            self.failIf(os.path.exists(TWISTD_PID_FILE_2))
+        d.addCallback(_cb4)
+        def _remove_hotline(res):
+            # always remove these, so that if something went wrong, the nodes
+            # will quit eventually
+            os.unlink(HOTLINE_FILE_1)
+            os.unlink(HOTLINE_FILE_2)
+            return res
+        d.addBoth(_remove_hotline)
+        return d
+
+
     def test_baddir(self):
         self.skip_if_cannot_daemonize()
         basedir = self.workdir("test_baddir")
-- 
2.45.2