7 from collections import deque
9 from twisted.internet import reactor, defer
10 from twisted.application import service
11 from twisted.application.internet import TimerService
12 from zope.interface import implements
14 from foolscap.eventual import eventually
15 from foolscap.logging.gatherer import get_local_ip_for
16 from twisted.internet.error import ConnectionDone, ConnectionLost
17 from foolscap import DeadReferenceError
19 from allmydata.util import log
20 from allmydata.interfaces import RIStatsProvider, RIStatsGatherer, IStatsProducer
22 class LoadMonitor(service.MultiService):
23 implements(IStatsProducer)
28 def __init__(self, provider, warn_if_delay_exceeds=1):
29 service.MultiService.__init__(self)
30 self.provider = provider
31 self.warn_if_delay_exceeds = warn_if_delay_exceeds
37 def startService(self):
40 self.timer = reactor.callLater(self.loop_interval, self.loop)
41 service.MultiService.startService(self)
43 def stopService(self):
48 return service.MultiService.stopService(self)
55 if self.last is not None:
56 delay = now - self.last - self.loop_interval
57 if delay > self.warn_if_delay_exceeds:
58 log.msg(format='excessive reactor delay (%ss)', args=(delay,),
60 self.stats.append(delay)
61 while len(self.stats) > self.num_samples:
65 self.timer = reactor.callLater(self.loop_interval, self.loop)
69 avg = sum(self.stats) / len(self.stats)
73 return { 'load_monitor.avg_load': avg,
74 'load_monitor.max_load': m_x, }
76 class CPUUsageMonitor(service.MultiService):
77 implements(IStatsProducer)
82 service.MultiService.__init__(self)
83 # we don't use time.clock() here, because the constructor is run by
84 # the twistd parent process (as it loads the .tac file), whereas the
85 # rest of the program will be run by the child process, after twistd
86 # forks. Instead, set self.initial_cpu as soon as the reactor starts
88 self.initial_cpu = 0.0 # just in case
89 eventually(self._set_initial_cpu)
91 # we provide 1min, 5min, and 15min moving averages
92 TimerService(self.POLL_INTERVAL, self.check).setServiceParent(self)
94 def _set_initial_cpu(self):
95 self.initial_cpu = time.clock()
98 now_wall = time.time()
99 now_cpu = time.clock()
100 self.samples.append( (now_wall, now_cpu) )
101 while len(self.samples) > self.HISTORY_LENGTH+1:
104 def _average_N_minutes(self, size):
105 if len(self.samples) < size+1:
108 elapsed_wall = self.samples[-1][0] - self.samples[first][0]
109 elapsed_cpu = self.samples[-1][1] - self.samples[first][1]
110 fraction = elapsed_cpu / elapsed_wall
115 avg = self._average_N_minutes(1)
117 s["cpu_monitor.1min_avg"] = avg
118 avg = self._average_N_minutes(5)
120 s["cpu_monitor.5min_avg"] = avg
121 avg = self._average_N_minutes(15)
123 s["cpu_monitor.15min_avg"] = avg
124 now_cpu = time.clock()
125 s["cpu_monitor.total"] = now_cpu - self.initial_cpu
128 class StatsProvider(foolscap.Referenceable, service.MultiService):
129 implements(RIStatsProvider)
131 def __init__(self, node, gatherer_furl):
132 service.MultiService.__init__(self)
134 self.gatherer_furl = gatherer_furl # might be None
137 self.stats_producers = []
139 # only run the LoadMonitor (which submits a timer every second) if
140 # there is a gatherer who is going to be paying attention. Our stats
141 # are visible through HTTP even without a gatherer, so run the rest
142 # of the stats (including the once-per-minute CPUUsageMonitor)
144 self.load_monitor = LoadMonitor(self)
145 self.load_monitor.setServiceParent(self)
146 self.register_producer(self.load_monitor)
148 self.cpu_monitor = CPUUsageMonitor()
149 self.cpu_monitor.setServiceParent(self)
150 self.register_producer(self.cpu_monitor)
152 def startService(self):
153 if self.node and self.gatherer_furl:
154 d = self.node.when_tub_ready()
156 nickname_utf8 = self.node.nickname.encode("utf-8")
157 self.node.tub.connectTo(self.gatherer_furl,
158 self._connected, nickname_utf8)
159 d.addCallback(connect)
160 service.MultiService.startService(self)
162 def count(self, name, delta=1):
163 val = self.counters.setdefault(name, 0)
164 self.counters[name] = val + delta
166 def register_producer(self, stats_producer):
167 self.stats_producers.append(IStatsProducer(stats_producer))
171 for sp in self.stats_producers:
172 stats.update(sp.get_stats())
173 ret = { 'counters': self.counters, 'stats': stats }
174 log.msg(format='get_stats() -> %(stats)s', stats=ret, level=log.NOISY)
177 def remote_get_stats(self):
178 return self.get_stats()
180 def _connected(self, gatherer, nickname):
181 gatherer.callRemoteOnly('provide', self, nickname or '')
183 class StatsGatherer(foolscap.Referenceable, service.MultiService):
184 implements(RIStatsGatherer)
188 def __init__(self, tub, basedir):
189 service.MultiService.__init__(self)
191 self.basedir = basedir
196 def startService(self):
197 # the Tub must have a location set on it by now
198 service.MultiService.startService(self)
199 self.timer = TimerService(self.poll_interval, self.poll)
200 self.timer.setServiceParent(self)
201 self.registerGatherer()
206 def registerGatherer(self):
207 furl_file = os.path.join(self.basedir, "stats_gatherer.furl")
208 self.my_furl = self.tub.registerReference(self, furlFile=furl_file)
210 def get_tubid(self, rref):
211 return foolscap.SturdyRef(rref.tracker.getURL()).getTubRef().getTubID()
213 def remote_provide(self, provider, nickname):
214 tubid = self.get_tubid(provider)
215 if tubid == '<unauth>':
216 print "WARNING: failed to get tubid for %s (%s)" % (provider, nickname)
217 # don't add to clients to poll (polluting data) don't care about disconnect
219 self.clients[tubid] = provider
220 self.nicknames[tubid] = nickname
223 for tubid,client in self.clients.items():
224 nickname = self.nicknames.get(tubid)
225 d = client.callRemote('get_stats')
226 d.addCallbacks(self.got_stats, self.lost_client,
227 callbackArgs=(tubid, nickname),
228 errbackArgs=(tubid,))
229 d.addErrback(self.log_client_error, tubid)
231 def lost_client(self, f, tubid):
232 # this is called lazily, when a get_stats request fails
233 del self.clients[tubid]
234 del self.nicknames[tubid]
235 f.trap(DeadReferenceError, ConnectionDone, ConnectionLost)
237 def log_client_error(self, f, tubid):
238 log.msg("StatsGatherer: error in get_stats(), peerid=%s" % tubid,
239 level=log.UNUSUAL, failure=f)
241 def got_stats(self, stats, tubid, nickname):
242 raise NotImplementedError()
244 class StdOutStatsGatherer(StatsGatherer):
246 def remote_provide(self, provider, nickname):
247 tubid = self.get_tubid(provider)
249 print 'connect "%s" [%s]' % (nickname, tubid)
250 provider.notifyOnDisconnect(self.announce_lost_client, tubid)
251 StatsGatherer.remote_provide(self, provider, nickname)
253 def announce_lost_client(self, tubid):
254 print 'disconnect "%s" [%s]:' % (self.nicknames[tubid], tubid)
256 def got_stats(self, stats, tubid, nickname):
257 print '"%s" [%s]:' % (nickname, tubid)
260 class PickleStatsGatherer(StdOutStatsGatherer):
261 # inherit from StdOutStatsGatherer for connect/disconnect notifications
263 def __init__(self, tub, basedir=".", verbose=True):
264 self.verbose = verbose
265 StatsGatherer.__init__(self, tub, basedir)
266 self.picklefile = os.path.join(basedir, "stats.pickle")
268 if os.path.exists(self.picklefile):
269 f = open(self.picklefile, 'rb')
270 self.gathered_stats = pickle.load(f)
273 self.gathered_stats = {}
275 def got_stats(self, stats, tubid, nickname):
276 s = self.gathered_stats.setdefault(tubid, {})
277 s['timestamp'] = time.time()
278 s['nickname'] = nickname
282 def dump_pickle(self):
283 tmp = "%s.tmp" % (self.picklefile,)
285 pickle.dump(self.gathered_stats, f)
287 if os.path.exists(self.picklefile):
288 os.unlink(self.picklefile)
289 os.rename(tmp, self.picklefile)
291 class GathererApp(object):
294 d.addCallback(self._tub_ready)
297 self._tub = foolscap.Tub(certFile="stats_gatherer.pem")
298 self._tub.setOption("logLocalFailures", True)
299 self._tub.setOption("logRemoteFailures", True)
300 self._tub.startService()
301 portnumfile = "portnum"
303 portnum = int(open(portnumfile, "r").read())
304 except (EnvironmentError, ValueError):
306 self._tub.listenOn("tcp:%d" % portnum)
307 d = defer.maybeDeferred(get_local_ip_for)
308 d.addCallback(self._set_location)
309 d.addCallback(lambda res: self._tub)
312 def _set_location(self, local_address):
313 if local_address is None:
314 local_addresses = ["127.0.0.1"]
316 local_addresses = [local_address, "127.0.0.1"]
317 l = self._tub.getListeners()[0]
318 portnum = l.getPortnum()
319 portnumfile = "portnum"
320 open(portnumfile, "w").write("%d\n" % portnum)
321 local_addresses = [ "%s:%d" % (addr, portnum,)
322 for addr in local_addresses ]
323 assert len(local_addresses) >= 1
324 location = ",".join(local_addresses)
325 self._tub.setLocation(location)
327 def _tub_ready(self, tub):
328 sg = PickleStatsGatherer(tub, ".")
329 sg.setServiceParent(tub)
331 print '\nStatsGatherer: %s\n' % (sg.get_furl(),)
337 if __name__ == '__main__':