check_memory.py: finish the failsafe-shutdown code
authorBrian Warner <warner@allmydata.com>
Fri, 25 May 2007 00:34:42 +0000 (17:34 -0700)
committerBrian Warner <warner@allmydata.com>
Fri, 25 May 2007 00:34:42 +0000 (17:34 -0700)
GNUmakefile
src/allmydata/client.py
src/allmydata/test/check_memory.py

index 6b4201a11a503d1b559fbbdfc25afd174ef1f4a3..611468853d22e2e5baab4a2b514d391cf7beb795 100644 (file)
@@ -173,7 +173,7 @@ count-lines:
        @echo -n "TODO: "
        @grep TODO `find src -name '*.py' |grep -v /build/` | wc --lines
 
-check-memory:
+check-memory: build
        $(PP) $(PYTHON) src/allmydata/test/check_memory.py
 
 test-darcs-boringfile:
index e4e142c185e302ecf0a6f0833bed10ead6e52674..272a770f29bb3e8f5919f1533c8e2f9a39c750f9 100644 (file)
@@ -1,11 +1,12 @@
 
-import os, sha
+import os, sha, stat, time
 from foolscap import Referenceable
 from zope.interface import implements
 from allmydata.interfaces import RIClient
 from allmydata import node
 
-from twisted.internet import defer
+from twisted.internet import defer, reactor
+from twisted.application.internet import TimerService
 
 import allmydata
 from allmydata.Crypto.Util.number import bytes_to_long
@@ -25,6 +26,7 @@ class Client(node.Node, Referenceable):
     WEBPORTFILE = "webport"
     INTRODUCER_FURL_FILE = "introducer.furl"
     GLOBAL_VDRIVE_FURL_FILE = "vdrive.furl"
+    SUICIDE_PREVENTION_HOTLINE_FILE = "suicide_prevention_hotline"
 
     # we're pretty narrow-minded right now
     OLDEST_SUPPORTED_VERSION = allmydata.__version__
@@ -57,6 +59,20 @@ class Client(node.Node, Referenceable):
         self.global_vdrive_furl = f.read().strip()
         f.close()
 
+        hotline_file = os.path.join(self.basedir,
+                                    self.SUICIDE_PREVENTION_HOTLINE_FILE)
+        if os.path.exists(hotline_file):
+            hotline = TimerService(5.0, self._check_hotline, hotline_file)
+            hotline.setServiceParent(self)
+
+    def _check_hotline(self, hotline_file):
+        if os.path.exists(hotline_file):
+            mtime = os.stat(hotline_file)[stat.ST_MTIME]
+            if mtime > time.time() - 10.0:
+                return
+        self.log("hotline missing or too old, shutting down")
+        reactor.stop()
+
     def tub_ready(self):
         self.log("tub_ready")
         self.my_furl = self.tub.registerReference(self)
index a9f0de16a3ff2051ada13cce32cb9246f857a23c..549a7b9b42cfa4be7d5521550bec0f04afd90ac1 100644 (file)
@@ -3,7 +3,7 @@
 import os, shutil
 
 from twisted.internet import defer, reactor, protocol, error
-from twisted.application import service
+from twisted.application import service, internet
 from allmydata import client, introducer_and_vdrive
 from allmydata.scripts import runner
 from foolscap.eventual import eventually, flushEventualQueue
@@ -57,9 +57,10 @@ class SystemFramework:
         return s
 
     def make_introducer_and_vdrive(self):
-        introducer_and_vdrive_dir = os.path.join(self.basedir, "introducer_and_vdrive")
-        os.mkdir(introducer_and_vdrive_dir)
-        self.introducer_and_vdrive = self.add_service(introducer_and_vdrive.IntroducerAndVdrive(basedir=introducer_and_vdrive_dir))
+        iv_basedir = os.path.join(self.basedir, "introducer_and_vdrive")
+        os.mkdir(iv_basedir)
+        iv = introducer_and_vdrive.IntroducerAndVdrive(basedir=iv_basedir)
+        self.introducer_and_vdrive = self.add_service(iv)
         d = self.introducer_and_vdrive.when_tub_ready()
         return d
 
@@ -84,25 +85,33 @@ class SystemFramework:
 
     def touch_keepalive(self):
         f = open(self.keepalive_file, "w")
-        f.write("If the node notices this file at startup, it will poll and\n")
-        f.write("terminate as soon as the file goes away. This prevents\n")
-        f.write("leaving processes around if the test harness has an\n")
-        f.write("internal failure and neglects to kill off the node\n")
-        f.write("itself. The contents of this file are ignored.\n")
+        f.write("""\
+If the node notices this file at startup, it will poll every 5 seconds and
+terminate if the file is more than 10 seconds old, or if it has been deleted.
+If the test harness has an internal failure and neglects to kill off the node
+itself, this helps to avoid leaving processes lying around. The contents of
+this file are ignored.
+        """)
         f.close()
 
     def start_client(self):
         log.msg("MAKING CLIENT")
         clientdir = self.clientdir = os.path.join(self.basedir, "client")
-        config = {'basedir': clientdir}
+        config = {'basedir': clientdir, 'quiet': False}
         runner.create_client(config)
         log.msg("DONE MAKING CLIENT")
         f = open(os.path.join(clientdir, "introducer.furl"), "w")
         f.write(self.introducer_furl + "\n")
         f.close()
-        self.keepalive_file = os.path.join(clientdir, "suicide_prevention_hotline")
-        self.touch_keepalive()
+        f = open(os.path.join(clientdir, "vdrive.furl"), "w")
+        f.write(self.introducer_furl + "\n")
+        f.close()
+        self.keepalive_file = os.path.join(clientdir,
+                                           "suicide_prevention_hotline")
         # now start updating the mtime.
+        self.touch_keepalive()
+        ts = internet.TimerService(4.0, self.touch_keepalive)
+        ts.setServiceParent(self.sparent)
 
         pp = ClientWatcher()
         cmd = ["twistd", "-y", "client.tac"]