7 from collections import deque
9 from twisted.internet import reactor, defer
10 from twisted.application import service
11 from twisted.application.internet import TimerService
12 from zope.interface import implements
14 from foolscap.eventual import eventually
15 from foolscap.logging.gatherer import get_local_ip_for
16 from twisted.internet.error import ConnectionDone, ConnectionLost
17 from foolscap import DeadReferenceError
19 from allmydata.util import log
20 from allmydata.interfaces import RIStatsProvider, RIStatsGatherer, IStatsProducer
22 class LoadMonitor(service.MultiService):
23 implements(IStatsProducer)
28 def __init__(self, provider, warn_if_delay_exceeds=1):
29 service.MultiService.__init__(self)
30 self.provider = provider
31 self.warn_if_delay_exceeds = warn_if_delay_exceeds
37 def startService(self):
40 self.timer = reactor.callLater(self.loop_interval, self.loop)
41 service.MultiService.startService(self)
43 def stopService(self):
48 return service.MultiService.stopService(self)
55 if self.last is not None:
56 delay = now - self.last - self.loop_interval
57 if delay > self.warn_if_delay_exceeds:
58 log.msg(format='excessive reactor delay (%ss)', args=(delay,),
60 self.stats.append(delay)
61 while len(self.stats) > self.num_samples:
65 self.timer = reactor.callLater(self.loop_interval, self.loop)
69 avg = sum(self.stats) / len(self.stats)
73 return { 'load_monitor.avg_load': avg,
74 'load_monitor.max_load': m_x, }
76 class CPUUsageMonitor(service.MultiService):
77 implements(IStatsProducer)
82 service.MultiService.__init__(self)
83 # we don't use time.clock() here, because the constructor is run by
84 # the twistd parent process (as it loads the .tac file), whereas the
85 # rest of the program will be run by the child process, after twistd
86 # forks. Instead, set self.initial_cpu as soon as the reactor starts
88 self.initial_cpu = 0.0 # just in case
89 eventually(self._set_initial_cpu)
91 # we provide 1min, 5min, and 15min moving averages
92 TimerService(self.POLL_INTERVAL, self.check).setServiceParent(self)
94 def _set_initial_cpu(self):
95 self.initial_cpu = time.clock()
98 now_wall = time.time()
99 now_cpu = time.clock()
100 self.samples.append( (now_wall, now_cpu) )
101 while len(self.samples) > self.HISTORY_LENGTH+1:
104 def _average_N_minutes(self, size):
105 if len(self.samples) < size+1:
108 elapsed_wall = self.samples[-1][0] - self.samples[first][0]
109 elapsed_cpu = self.samples[-1][1] - self.samples[first][1]
110 fraction = elapsed_cpu / elapsed_wall
115 avg = self._average_N_minutes(1)
117 s["cpu_monitor.1min_avg"] = avg
118 avg = self._average_N_minutes(5)
120 s["cpu_monitor.5min_avg"] = avg
121 avg = self._average_N_minutes(15)
123 s["cpu_monitor.15min_avg"] = avg
124 now_cpu = time.clock()
125 s["cpu_monitor.total"] = now_cpu - self.initial_cpu
128 class StatsProvider(foolscap.Referenceable, service.MultiService):
129 implements(RIStatsProvider)
131 def __init__(self, node, gatherer_furl):
132 service.MultiService.__init__(self)
134 self.gatherer_furl = gatherer_furl
137 self.stats_producers = []
139 self.load_monitor = LoadMonitor(self)
140 self.load_monitor.setServiceParent(self)
141 self.register_producer(self.load_monitor)
143 self.cpu_monitor = CPUUsageMonitor()
144 self.cpu_monitor.setServiceParent(self)
145 self.register_producer(self.cpu_monitor)
147 def startService(self):
149 d = self.node.when_tub_ready()
151 nickname = self.node.get_config('nickname')
152 self.node.tub.connectTo(self.gatherer_furl, self._connected, nickname)
153 d.addCallback(connect)
154 service.MultiService.startService(self)
156 def count(self, name, delta=1):
157 val = self.counters.setdefault(name, 0)
158 self.counters[name] = val + delta
160 def register_producer(self, stats_producer):
161 self.stats_producers.append(IStatsProducer(stats_producer))
165 for sp in self.stats_producers:
166 stats.update(sp.get_stats())
167 ret = { 'counters': self.counters, 'stats': stats }
168 log.msg(format='get_stats() -> %(stats)s', stats=ret, level=log.NOISY)
171 def remote_get_stats(self):
172 return self.get_stats()
174 def _connected(self, gatherer, nickname):
175 gatherer.callRemoteOnly('provide', self, nickname or '')
177 class StatsGatherer(foolscap.Referenceable, service.MultiService):
178 implements(RIStatsGatherer)
182 def __init__(self, tub, basedir):
183 service.MultiService.__init__(self)
185 self.basedir = basedir
190 def startService(self):
191 # the Tub must have a location set on it by now
192 service.MultiService.startService(self)
193 self.timer = TimerService(self.poll_interval, self.poll)
194 self.timer.setServiceParent(self)
195 self.registerGatherer()
200 def registerGatherer(self):
201 furl_file = os.path.join(self.basedir, "stats_gatherer.furl")
202 self.my_furl = self.tub.registerReference(self, furlFile=furl_file)
204 def get_tubid(self, rref):
205 return foolscap.SturdyRef(rref.tracker.getURL()).getTubRef().getTubID()
207 def remote_provide(self, provider, nickname):
208 tubid = self.get_tubid(provider)
209 if tubid == '<unauth>':
210 print "WARNING: failed to get tubid for %s (%s)" % (provider, nickname)
211 # don't add to clients to poll (polluting data) don't care about disconnect
213 self.clients[tubid] = provider
214 self.nicknames[tubid] = nickname
217 for tubid,client in self.clients.items():
218 nickname = self.nicknames.get(tubid)
219 d = client.callRemote('get_stats')
220 d.addCallbacks(self.got_stats, self.lost_client,
221 callbackArgs=(tubid, nickname),
222 errbackArgs=(tubid,))
223 d.addErrback(self.log_client_error, tubid)
225 def lost_client(self, f, tubid):
226 # this is called lazily, when a get_stats request fails
227 del self.clients[tubid]
228 del self.nicknames[tubid]
229 f.trap(DeadReferenceError, ConnectionDone, ConnectionLost)
231 def log_client_error(self, f, tubid):
232 log.msg("StatsGatherer: error in get_stats(), peerid=%s" % tubid,
233 level=log.UNUSUAL, failure=f)
235 def got_stats(self, stats, tubid, nickname):
236 raise NotImplementedError()
238 class StdOutStatsGatherer(StatsGatherer):
240 def remote_provide(self, provider, nickname):
241 tubid = self.get_tubid(provider)
243 print 'connect "%s" [%s]' % (nickname, tubid)
244 provider.notifyOnDisconnect(self.announce_lost_client, tubid)
245 StatsGatherer.remote_provide(self, provider, nickname)
247 def announce_lost_client(self, tubid):
248 print 'disconnect "%s" [%s]:' % (self.nicknames[tubid], tubid)
250 def got_stats(self, stats, tubid, nickname):
251 print '"%s" [%s]:' % (nickname, tubid)
254 class PickleStatsGatherer(StdOutStatsGatherer):
255 # inherit from StdOutStatsGatherer for connect/disconnect notifications
257 def __init__(self, tub, basedir=".", verbose=True):
258 self.verbose = verbose
259 StatsGatherer.__init__(self, tub, basedir)
260 self.picklefile = os.path.join(basedir, "stats.pickle")
262 if os.path.exists(self.picklefile):
263 f = open(self.picklefile, 'rb')
264 self.gathered_stats = pickle.load(f)
267 self.gathered_stats = {}
269 def got_stats(self, stats, tubid, nickname):
270 s = self.gathered_stats.setdefault(tubid, {})
271 s['timestamp'] = time.time()
272 s['nickname'] = nickname
276 def dump_pickle(self):
277 tmp = "%s.tmp" % (self.picklefile,)
279 pickle.dump(self.gathered_stats, f)
281 if os.path.exists(self.picklefile):
282 os.unlink(self.picklefile)
283 os.rename(tmp, self.picklefile)
285 class GathererApp(object):
288 d.addCallback(self._tub_ready)
291 self._tub = foolscap.Tub(certFile="stats_gatherer.pem")
292 self._tub.setOption("logLocalFailures", True)
293 self._tub.setOption("logRemoteFailures", True)
294 self._tub.startService()
295 portnumfile = "portnum"
297 portnum = int(open(portnumfile, "r").read())
298 except (EnvironmentError, ValueError):
300 self._tub.listenOn("tcp:%d" % portnum)
301 d = defer.maybeDeferred(get_local_ip_for)
302 d.addCallback(self._set_location)
303 d.addCallback(lambda res: self._tub)
306 def _set_location(self, local_address):
307 if local_address is None:
308 local_addresses = ["127.0.0.1"]
310 local_addresses = [local_address, "127.0.0.1"]
311 l = self._tub.getListeners()[0]
312 portnum = l.getPortnum()
313 portnumfile = "portnum"
314 open(portnumfile, "w").write("%d\n" % portnum)
315 local_addresses = [ "%s:%d" % (addr, portnum,)
316 for addr in local_addresses ]
317 assert len(local_addresses) >= 1
318 location = ",".join(local_addresses)
319 self._tub.setLocation(location)
321 def _tub_ready(self, tub):
322 sg = PickleStatsGatherer(tub, ".")
323 sg.setServiceParent(tub)
325 print '\nStatsGatherer: %s\n' % (sg.get_furl(),)
331 if __name__ == '__main__':