From c4d7b7b109b583b0c142bac9fae5d983f25fdd1c Mon Sep 17 00:00:00 2001
From: Brian Warner <warner@lothar.com>
Date: Mon, 14 May 2012 13:32:03 -0700
Subject: [PATCH] write node.url and portnum files atomically, to fix race in
 test_runner

Previously, test_runner sometimes fails because the _node_has_started()
poller fires after the portnum file has been opened, but before it has
actually been filled, allowing the test process to observe an empty file,
which flunks the test.

This adds a new fileutil.write_atomically() function (using the usual
write-to-.tmp-then-rename approach), and uses it for both node.url and
client.port . These files are written a bit before the node is really up and
running, but they're late enough for test_runner's purposes, which is to know
when it's safe to read client.port and use 'tahoe restart' (and therefore
SIGINT) to restart the node.

The current node/client code doesn't offer any better "are you really done
with startup" indicator.. the ideal approach would be to either watch the
logfile, or connect to its flogport, but both are a hassle. Changing the node
to write out a new "all done" file would be intrusive for regular
operations.
---
 src/allmydata/node.py             | 2 +-
 src/allmydata/test/test_runner.py | 5 ++++-
 src/allmydata/test/test_util.py   | 9 +++++++++
 src/allmydata/util/fileutil.py    | 6 ++++++
 src/allmydata/webish.py           | 3 ++-
 5 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/src/allmydata/node.py b/src/allmydata/node.py
index 458c57e1..aedaf842 100644
--- a/src/allmydata/node.py
+++ b/src/allmydata/node.py
@@ -360,7 +360,7 @@ class Node(service.MultiService):
         portnum = l.getPortnum()
         # record which port we're listening on, so we can grab the same one
         # next time
-        open(self._portnumfile, "w").write("%d\n" % portnum)
+        fileutil.write_atomically(self._portnumfile, "%d\n" % portnum, mode="")
 
         base_location = ",".join([ "%s:%d" % (addr, portnum)
                                    for addr in local_addresses ])
diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py
index 6dd8b634..4e9f683d 100644
--- a/src/allmydata/test/test_runner.py
+++ b/src/allmydata/test/test_runner.py
@@ -588,6 +588,7 @@ class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin,
         d.addCallback(_cb2)
 
         def _node_has_started():
+            # this depends upon both files being created atomically
             return os.path.exists(NODE_URL_FILE) and os.path.exists(PORTNUM_FILE)
         d.addCallback(lambda res: self.poll(_node_has_started))
 
@@ -627,7 +628,9 @@ class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin,
         # 'tahoe stop' command takes a while.
         def _stop(res):
             fileutil.write(HOTLINE_FILE, "")
-            self.failUnless(os.path.exists(TWISTD_PID_FILE), (TWISTD_PID_FILE, os.listdir(os.path.dirname(TWISTD_PID_FILE))))
+            self.failUnless(os.path.exists(TWISTD_PID_FILE),
+                            (TWISTD_PID_FILE,
+                             os.listdir(os.path.dirname(TWISTD_PID_FILE))))
             return self.run_bintahoe(["--quiet", "stop", c1])
         d.addCallback(_stop)
 
diff --git a/src/allmydata/test/test_util.py b/src/allmydata/test/test_util.py
index 54ef2551..575a7a49 100644
--- a/src/allmydata/test/test_util.py
+++ b/src/allmydata/test/test_util.py
@@ -420,6 +420,15 @@ class FileUtil(unittest.TestCase):
         fileutil.rm_dir(basedir)
         fileutil.remove_if_possible(fn) # should survive errors
 
+    def test_write_atomically(self):
+        basedir = "util/FileUtil/test_write_atomically"
+        fileutil.make_dirs(basedir)
+        fn = os.path.join(basedir, "here")
+        fileutil.write_atomically(fn, "one")
+        self.failUnlessEqual(fileutil.read(fn), "one")
+        fileutil.write_atomically(fn, "two", mode="") # non-binary
+        self.failUnlessEqual(fileutil.read(fn), "two")
+
     def test_open_or_create(self):
         basedir = "util/FileUtil/test_open_or_create"
         fileutil.make_dirs(basedir)
diff --git a/src/allmydata/util/fileutil.py b/src/allmydata/util/fileutil.py
index bfcffd8f..d6235509 100644
--- a/src/allmydata/util/fileutil.py
+++ b/src/allmydata/util/fileutil.py
@@ -247,6 +247,12 @@ def move_into_place(source, dest):
         remove_if_possible(dest)
     os.rename(source, dest)
 
+def write_atomically(target, contents, mode="b"):
+    f = open(target+".tmp", "w"+mode)
+    f.write(contents)
+    f.close()
+    move_into_place(target+".tmp", target)
+
 def write(path, data):
     wf = open(path, "wb")
     try:
diff --git a/src/allmydata/webish.py b/src/allmydata/webish.py
index a8e0bff8..813856cb 100644
--- a/src/allmydata/webish.py
+++ b/src/allmydata/webish.py
@@ -163,7 +163,8 @@ class WebishServer(service.MultiService):
         if nodeurl_path:
             def _write_nodeurl_file(ign):
                 # this file will be created with default permissions
-                fileutil.write(nodeurl_path, self.getURL() + "\n")
+                line = self.getURL() + "\n"
+                fileutil.write_atomically(nodeurl_path, line, mode="")
             self._started.addCallback(_write_nodeurl_file)
 
     def getURL(self):
-- 
2.45.2