From: Brian Warner Date: Mon, 14 Jul 2008 19:29:19 +0000 (-0700) Subject: misc/munin: add server_latency plugin X-Git-Url: https://git.rkrishnan.org/about.html?a=commitdiff_plain;h=b2f062ab827d75698ce51256320425aa545d5582;p=tahoe-lafs%2Ftahoe-lafs.git misc/munin: add server_latency plugin --- diff --git a/misc/munin/tahoe_server_latency_ b/misc/munin/tahoe_server_latency_ new file mode 100644 index 00000000..2a6a1ca3 --- /dev/null +++ b/misc/munin/tahoe_server_latency_ @@ -0,0 +1,85 @@ +#!/usr/bin/python + +# retrieve a latency statistic for a given operation and percentile from a +# set of storage servers. + +# the OPERATION value should come from the following list: +# allocate: allocate_buckets, first step to upload an immutable file +# write: write data to an immutable share +# close: finish writing to an immutable share +# cancel: abandon a partial immutable share +# get: get_buckets, first step to download an immutable file +# read: read data from an immutable share +# writev: slot_testv_and_readv_and_writev, modify/create a directory +# readv: read a directory (or mutable file) + +# the PERCENTILE value should come from the following list: +# 01_0: 1% +# 10_0: 10% +# 50_0: 50% (median) +# 90_0: 90% +# 99_0: 99% +# 99_9: 99.9% +# mean: + +# To use this, create a symlink from +# /etc/munin/plugins/tahoe_server_latency_OPERATION_PERCENTILE to this +# script. For example: + +# ln -s /usr/share/doc/allmydata-tahoe/munin/tahoe_server_latency_ \ +# /etc/munin/plugins/tahoe_server_latency_allocate_99_9 + +# Also, you will need to put a list of node statistics URLs in the plugin's +# environment, by adding a stanza like the following to a file in +# /etc/munin/plugin-conf.d/, such as /etc/munin/plugin-conf.d/tahoe_latencies: +# +# [tahoe_server_latency*] +# env.url_storage1 http://localhost:9011/statistics?t=json +# env.url_storage2 http://localhost:9012/statistics?t=json +# env.url_storage3 http://localhost:9013/statistics?t=json +# env.url_storage4 http://localhost:9014/statistics?t=json + +# of course, these URLs must match the webports you have configured into the +# storage nodes. + +import os, sys +import urllib +import simplejson + +node_urls = [] +for k,v in os.environ.items(): + if k.startswith("url_"): + nodename = k[len("url_"):] + node_urls.append( (nodename, v) ) +node_urls.sort() + +configinfo = \ +"""graph_title Tahoe Server Latency +graph_vlabel seconds +graph_category tahoe +graph_info This graph shows server-side latency for storage operations +""" + +for nodename, url in node_urls: + configinfo += "%s.label %s\n" % (nodename, nodename) + configinfo += "%s.draw LINE2\n" % (nodename,) + + +if len(sys.argv) > 1: + if sys.argv[1] == "config": + print configinfo.rstrip() + sys.exit(0) + +for nodename, url in node_urls: + data = simplejson.loads(urllib.urlopen(url).read()) + my_name = os.path.basename(sys.argv[0]) + PREFIX = "tahoe_server_latency_" + assert my_name.startswith(PREFIX) + my_name = my_name[len(PREFIX):] + (operation, percentile) = my_name.split("_", 1) + if percentile != "mean": + percentile = percentile + "_percentile" + key = "storage_server.latencies.%s.%s" % (operation, percentile) + value = data["stats"][key] + print "%s.value %s" % (nodename, value) +