]> git.rkrishnan.org Git - tahoe-lafs/tahoe-lafs.git/blobdiff - src/allmydata/stats.py
Slightly improve the error message when a stats.pickle file cannot be read.
[tahoe-lafs/tahoe-lafs.git] / src / allmydata / stats.py
index 562aff0553113dd0d2a489134b6e9fcdd342a05e..7db323ba5ce68b2fe30101cb0dc47c1fc0188add 100644 (file)
@@ -2,21 +2,17 @@
 import os
 import pickle
 import pprint
-import sys
 import time
 from collections import deque
 
-from twisted.internet import reactor, defer
+from twisted.internet import reactor
 from twisted.application import service
 from twisted.application.internet import TimerService
 from zope.interface import implements
-import foolscap
-from foolscap.eventual import eventually
-from foolscap.logging.gatherer import get_local_ip_for
-from twisted.internet.error import ConnectionDone, ConnectionLost
-from foolscap import DeadReferenceError
+from foolscap.api import eventually, DeadReferenceError, Referenceable, Tub
 
 from allmydata.util import log
+from allmydata.util.encodingutil import quote_output
 from allmydata.interfaces import RIStatsProvider, RIStatsGatherer, IStatsProducer
 
 class LoadMonitor(service.MultiService):
@@ -75,7 +71,8 @@ class LoadMonitor(service.MultiService):
 
 class CPUUsageMonitor(service.MultiService):
     implements(IStatsProducer)
-    MINUTES = 15
+    HISTORY_LENGTH = 15
+    POLL_INTERVAL = 60
 
     def __init__(self):
         service.MultiService.__init__(self)
@@ -88,7 +85,7 @@ class CPUUsageMonitor(service.MultiService):
         eventually(self._set_initial_cpu)
         self.samples = []
         # we provide 1min, 5min, and 15min moving averages
-        TimerService(60, self.check).setServiceParent(self)
+        TimerService(self.POLL_INTERVAL, self.check).setServiceParent(self)
 
     def _set_initial_cpu(self):
         self.initial_cpu = time.clock()
@@ -97,7 +94,7 @@ class CPUUsageMonitor(service.MultiService):
         now_wall = time.time()
         now_cpu = time.clock()
         self.samples.append( (now_wall, now_cpu) )
-        while len(self.samples) > self.MINUTES+1:
+        while len(self.samples) > self.HISTORY_LENGTH+1:
             self.samples.pop(0)
 
     def _average_N_minutes(self, size):
@@ -124,31 +121,38 @@ class CPUUsageMonitor(service.MultiService):
         s["cpu_monitor.total"] = now_cpu - self.initial_cpu
         return s
 
-class StatsProvider(foolscap.Referenceable, service.MultiService):
+
+class StatsProvider(Referenceable, service.MultiService):
     implements(RIStatsProvider)
 
     def __init__(self, node, gatherer_furl):
         service.MultiService.__init__(self)
         self.node = node
-        self.gatherer_furl = gatherer_furl
+        self.gatherer_furl = gatherer_furl # might be None
 
         self.counters = {}
         self.stats_producers = []
 
-        self.load_monitor = LoadMonitor(self)
-        self.load_monitor.setServiceParent(self)
-        self.register_producer(self.load_monitor)
+        # only run the LoadMonitor (which submits a timer every second) if
+        # there is a gatherer who is going to be paying attention. Our stats
+        # are visible through HTTP even without a gatherer, so run the rest
+        # of the stats (including the once-per-minute CPUUsageMonitor)
+        if gatherer_furl:
+            self.load_monitor = LoadMonitor(self)
+            self.load_monitor.setServiceParent(self)
+            self.register_producer(self.load_monitor)
 
         self.cpu_monitor = CPUUsageMonitor()
         self.cpu_monitor.setServiceParent(self)
         self.register_producer(self.cpu_monitor)
 
     def startService(self):
-        if self.node:
+        if self.node and self.gatherer_furl:
             d = self.node.when_tub_ready()
             def connect(junk):
-                nickname = self.node.get_config('nickname')
-                self.node.tub.connectTo(self.gatherer_furl, self._connected, nickname)
+                nickname_utf8 = self.node.nickname.encode("utf-8")
+                self.node.tub.connectTo(self.gatherer_furl,
+                                        self._connected, nickname_utf8)
             d.addCallback(connect)
         service.MultiService.startService(self)
 
@@ -173,35 +177,24 @@ class StatsProvider(foolscap.Referenceable, service.MultiService):
     def _connected(self, gatherer, nickname):
         gatherer.callRemoteOnly('provide', self, nickname or '')
 
-class StatsGatherer(foolscap.Referenceable, service.MultiService):
+
+class StatsGatherer(Referenceable, service.MultiService):
     implements(RIStatsGatherer)
 
     poll_interval = 60
 
-    def __init__(self, tub, basedir):
+    def __init__(self, basedir):
         service.MultiService.__init__(self)
-        self.tub = tub
         self.basedir = basedir
 
         self.clients = {}
         self.nicknames = {}
 
-    def startService(self):
-        # the Tub must have a location set on it by now
-        service.MultiService.startService(self)
         self.timer = TimerService(self.poll_interval, self.poll)
         self.timer.setServiceParent(self)
-        self.registerGatherer()
-
-    def get_furl(self):
-        return self.my_furl
-
-    def registerGatherer(self):
-        furl_file = os.path.join(self.basedir, "stats_gatherer.furl")
-        self.my_furl = self.tub.registerReference(self, furlFile=furl_file)
 
     def get_tubid(self, rref):
-        return foolscap.SturdyRef(rref.tracker.getURL()).getTubRef().getTubID()
+        return rref.getRemoteTubID()
 
     def remote_provide(self, provider, nickname):
         tubid = self.get_tubid(provider)
@@ -225,7 +218,7 @@ class StatsGatherer(foolscap.Referenceable, service.MultiService):
         # this is called lazily, when a get_stats request fails
         del self.clients[tubid]
         del self.nicknames[tubid]
-        f.trap(DeadReferenceError, ConnectionDone, ConnectionLost)
+        f.trap(DeadReferenceError)
 
     def log_client_error(self, f, tubid):
         log.msg("StatsGatherer: error in get_stats(), peerid=%s" % tubid,
@@ -244,7 +237,7 @@ class StdOutStatsGatherer(StatsGatherer):
         StatsGatherer.remote_provide(self, provider, nickname)
 
     def announce_lost_client(self, tubid):
-        print 'disconnect "%s" [%s]:' % (self.nicknames[tubid], tubid)
+        print 'disconnect "%s" [%s]' % (self.nicknames[tubid], tubid)
 
     def got_stats(self, stats, tubid, nickname):
         print '"%s" [%s]:' % (nickname, tubid)
@@ -253,14 +246,20 @@ class StdOutStatsGatherer(StatsGatherer):
 class PickleStatsGatherer(StdOutStatsGatherer):
     # inherit from StdOutStatsGatherer for connect/disconnect notifications
 
-    def __init__(self, tub, basedir=".", verbose=True):
+    def __init__(self, basedir=".", verbose=True):
         self.verbose = verbose
-        StatsGatherer.__init__(self, tub, basedir)
+        StatsGatherer.__init__(self, basedir)
         self.picklefile = os.path.join(basedir, "stats.pickle")
 
         if os.path.exists(self.picklefile):
             f = open(self.picklefile, 'rb')
-            self.gathered_stats = pickle.load(f)
+            try:
+                self.gathered_stats = pickle.load(f)
+            except Exception:
+                print ("Error while attempting to load pickle file %s.\n"
+                       "You may need to restore this file from a backup, or delete it if no backup is available.\n" %
+                       quote_output(os.path.abspath(self.picklefile)))
+                raise
             f.close()
         else:
             self.gathered_stats = {}
@@ -281,51 +280,40 @@ class PickleStatsGatherer(StdOutStatsGatherer):
             os.unlink(self.picklefile)
         os.rename(tmp, self.picklefile)
 
-class GathererApp(object):
-    def __init__(self):
-        d = self.setup_tub()
-        d.addCallback(self._tub_ready)
-
-    def setup_tub(self):
-        self._tub = foolscap.Tub(certFile="stats_gatherer.pem")
-        self._tub.setOption("logLocalFailures", True)
-        self._tub.setOption("logRemoteFailures", True)
-        self._tub.startService()
-        portnumfile = "portnum"
+class StatsGathererService(service.MultiService):
+    furl_file = "stats_gatherer.furl"
+
+    def __init__(self, basedir=".", verbose=False):
+        service.MultiService.__init__(self)
+        self.basedir = basedir
+        self.tub = Tub(certFile=os.path.join(self.basedir,
+                                             "stats_gatherer.pem"))
+        self.tub.setServiceParent(self)
+        self.tub.setOption("logLocalFailures", True)
+        self.tub.setOption("logRemoteFailures", True)
+        self.tub.setOption("expose-remote-exception-types", False)
+
+        self.stats_gatherer = PickleStatsGatherer(self.basedir, verbose)
+        self.stats_gatherer.setServiceParent(self)
+
+        portnumfile = os.path.join(self.basedir, "portnum")
         try:
-            portnum = int(open(portnumfile, "r").read())
-        except (EnvironmentError, ValueError):
-            portnum = 0
-        self._tub.listenOn("tcp:%d" % portnum)
-        d = defer.maybeDeferred(get_local_ip_for)
-        d.addCallback(self._set_location)
-        d.addCallback(lambda res: self._tub)
-        return d
-
-    def _set_location(self, local_address):
-        if local_address is None:
-            local_addresses = ["127.0.0.1"]
-        else:
-            local_addresses = [local_address, "127.0.0.1"]
-        l = self._tub.getListeners()[0]
-        portnum = l.getPortnum()
-        portnumfile = "portnum"
-        open(portnumfile, "w").write("%d\n" % portnum)
-        local_addresses = [ "%s:%d" % (addr, portnum,)
-                            for addr in local_addresses ]
-        assert len(local_addresses) >= 1
-        location = ",".join(local_addresses)
-        self._tub.setLocation(location)
-
-    def _tub_ready(self, tub):
-        sg = PickleStatsGatherer(tub, ".")
-        sg.setServiceParent(tub)
-        sg.verbose = True
-        print '\nStatsGatherer: %s\n' % (sg.get_furl(),)
-
-def main(argv):
-    ga = GathererApp()
-    reactor.run()
-
-if __name__ == '__main__':
-    main(sys.argv)
+            portnum = open(portnumfile, "r").read()
+        except EnvironmentError:
+            portnum = None
+        self.listener = self.tub.listenOn(portnum or "tcp:0")
+        d = self.tub.setLocationAutomatically()
+        if portnum is None:
+            d.addCallback(self.save_portnum)
+        d.addCallback(self.tub_ready)
+        d.addErrback(log.err)
+
+    def save_portnum(self, junk):
+        portnum = self.listener.getPortnum()
+        portnumfile = os.path.join(self.basedir, 'portnum')
+        open(portnumfile, 'wb').write('%d\n' % (portnum,))
+
+    def tub_ready(self, ignored):
+        ff = os.path.join(self.basedir, self.furl_file)
+        self.gatherer_furl = self.tub.registerReference(self.stats_gatherer,
+                                                        furlFile=ff)