From: Brian Warner Date: Fri, 25 May 2007 00:34:42 +0000 (-0700) Subject: check_memory.py: finish the failsafe-shutdown code X-Git-Tag: allmydata-tahoe-0.3.0~51 X-Git-Url: https://git.rkrishnan.org/simplejson/%22news.html/module-simplejson.tests.html?a=commitdiff_plain;h=c405c6117fe1d73ef52a2eb66c764e1ebfb9d6c0;p=tahoe-lafs%2Ftahoe-lafs.git check_memory.py: finish the failsafe-shutdown code --- diff --git a/GNUmakefile b/GNUmakefile index 6b4201a1..61146885 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -173,7 +173,7 @@ count-lines: @echo -n "TODO: " @grep TODO `find src -name '*.py' |grep -v /build/` | wc --lines -check-memory: +check-memory: build $(PP) $(PYTHON) src/allmydata/test/check_memory.py test-darcs-boringfile: diff --git a/src/allmydata/client.py b/src/allmydata/client.py index e4e142c1..272a770f 100644 --- a/src/allmydata/client.py +++ b/src/allmydata/client.py @@ -1,11 +1,12 @@ -import os, sha +import os, sha, stat, time from foolscap import Referenceable from zope.interface import implements from allmydata.interfaces import RIClient from allmydata import node -from twisted.internet import defer +from twisted.internet import defer, reactor +from twisted.application.internet import TimerService import allmydata from allmydata.Crypto.Util.number import bytes_to_long @@ -25,6 +26,7 @@ class Client(node.Node, Referenceable): WEBPORTFILE = "webport" INTRODUCER_FURL_FILE = "introducer.furl" GLOBAL_VDRIVE_FURL_FILE = "vdrive.furl" + SUICIDE_PREVENTION_HOTLINE_FILE = "suicide_prevention_hotline" # we're pretty narrow-minded right now OLDEST_SUPPORTED_VERSION = allmydata.__version__ @@ -57,6 +59,20 @@ class Client(node.Node, Referenceable): self.global_vdrive_furl = f.read().strip() f.close() + hotline_file = os.path.join(self.basedir, + self.SUICIDE_PREVENTION_HOTLINE_FILE) + if os.path.exists(hotline_file): + hotline = TimerService(5.0, self._check_hotline, hotline_file) + hotline.setServiceParent(self) + + def _check_hotline(self, hotline_file): + if os.path.exists(hotline_file): + mtime = os.stat(hotline_file)[stat.ST_MTIME] + if mtime > time.time() - 10.0: + return + self.log("hotline missing or too old, shutting down") + reactor.stop() + def tub_ready(self): self.log("tub_ready") self.my_furl = self.tub.registerReference(self) diff --git a/src/allmydata/test/check_memory.py b/src/allmydata/test/check_memory.py index a9f0de16..549a7b9b 100644 --- a/src/allmydata/test/check_memory.py +++ b/src/allmydata/test/check_memory.py @@ -3,7 +3,7 @@ import os, shutil from twisted.internet import defer, reactor, protocol, error -from twisted.application import service +from twisted.application import service, internet from allmydata import client, introducer_and_vdrive from allmydata.scripts import runner from foolscap.eventual import eventually, flushEventualQueue @@ -57,9 +57,10 @@ class SystemFramework: return s def make_introducer_and_vdrive(self): - introducer_and_vdrive_dir = os.path.join(self.basedir, "introducer_and_vdrive") - os.mkdir(introducer_and_vdrive_dir) - self.introducer_and_vdrive = self.add_service(introducer_and_vdrive.IntroducerAndVdrive(basedir=introducer_and_vdrive_dir)) + iv_basedir = os.path.join(self.basedir, "introducer_and_vdrive") + os.mkdir(iv_basedir) + iv = introducer_and_vdrive.IntroducerAndVdrive(basedir=iv_basedir) + self.introducer_and_vdrive = self.add_service(iv) d = self.introducer_and_vdrive.when_tub_ready() return d @@ -84,25 +85,33 @@ class SystemFramework: def touch_keepalive(self): f = open(self.keepalive_file, "w") - f.write("If the node notices this file at startup, it will poll and\n") - f.write("terminate as soon as the file goes away. This prevents\n") - f.write("leaving processes around if the test harness has an\n") - f.write("internal failure and neglects to kill off the node\n") - f.write("itself. The contents of this file are ignored.\n") + f.write("""\ +If the node notices this file at startup, it will poll every 5 seconds and +terminate if the file is more than 10 seconds old, or if it has been deleted. +If the test harness has an internal failure and neglects to kill off the node +itself, this helps to avoid leaving processes lying around. The contents of +this file are ignored. + """) f.close() def start_client(self): log.msg("MAKING CLIENT") clientdir = self.clientdir = os.path.join(self.basedir, "client") - config = {'basedir': clientdir} + config = {'basedir': clientdir, 'quiet': False} runner.create_client(config) log.msg("DONE MAKING CLIENT") f = open(os.path.join(clientdir, "introducer.furl"), "w") f.write(self.introducer_furl + "\n") f.close() - self.keepalive_file = os.path.join(clientdir, "suicide_prevention_hotline") - self.touch_keepalive() + f = open(os.path.join(clientdir, "vdrive.furl"), "w") + f.write(self.introducer_furl + "\n") + f.close() + self.keepalive_file = os.path.join(clientdir, + "suicide_prevention_hotline") # now start updating the mtime. + self.touch_keepalive() + ts = internet.TimerService(4.0, self.touch_keepalive) + ts.setServiceParent(self.sparent) pp = ClientWatcher() cmd = ["twistd", "-y", "client.tac"]