From: Brian Warner Date: Fri, 11 Jul 2008 22:21:13 +0000 (-0700) Subject: misc/munin: rename plugins to make munin happy: use underscores rather than hyphens... X-Git-Url: https://git.rkrishnan.org/%5B/%5D%20/reliability?a=commitdiff_plain;h=f3036702652d73104c4f1227284d22bb23c97d16;p=tahoe-lafs%2Ftahoe-lafs.git misc/munin: rename plugins to make munin happy: use underscores rather than hyphens, remove .py suffix --- diff --git a/misc/munin/tahoe-files.py b/misc/munin/tahoe-files.py deleted file mode 100644 index c6026de1..00000000 --- a/misc/munin/tahoe-files.py +++ /dev/null @@ -1,52 +0,0 @@ -#! /usr/bin/python - -# This is a munin plugin to track the number of files that each node's -# StorageServer is holding on behalf of other nodes. Each file that has been -# uploaded to the mesh (and has shares present on this node) will be counted -# here. When there are <= 100 nodes in the mesh, this count will equal the -# total number of files that are active in the entire mesh. When there are -# 200 nodes present in the mesh, it will represent about half of the total -# number. - -# Copy this plugin into /etc/munun/plugins/tahoe-files and then put -# the following in your /etc/munin/plugin-conf.d/foo file to let it know -# where to find the basedirectory for each node: -# -# [tahoe-files] -# env.basedir_NODE1 /path/to/node1 -# env.basedir_NODE2 /path/to/node2 -# env.basedir_NODE3 /path/to/node3 -# - -import os, sys - -nodedirs = [] -for k,v in os.environ.items(): - if k.startswith("basedir_"): - nodename = k[len("basedir_"):] - nodedirs.append( (nodename, v) ) -nodedirs.sort() - -configinfo = \ -"""graph_title Allmydata Tahoe Filecount -graph_vlabel files -graph_category tahoe -graph_info This graph shows the number of files hosted by this node's StorageServer -""" - -for nodename, basedir in nodedirs: - configinfo += "%s.label %s\n" % (nodename, nodename) - configinfo += "%s.draw LINE2\n" % (nodename,) - - -if len(sys.argv) > 1: - if sys.argv[1] == "config": - print configinfo.rstrip() - sys.exit(0) - -for nodename, basedir in nodedirs: - files = len(os.listdir(os.path.join(basedir, "storage", "shares"))) - if os.path.exists(os.path.join(basedir, "storage", "shares", "incoming")): - files -= 1 # the 'incoming' directory doesn't count - print "%s.value %d" % (nodename, files) - diff --git a/misc/munin/tahoe-helperstats-active.py b/misc/munin/tahoe-helperstats-active.py deleted file mode 100644 index 472c8c42..00000000 --- a/misc/munin/tahoe-helperstats-active.py +++ /dev/null @@ -1,25 +0,0 @@ -#! /usr/bin/python - -import os, sys -import urllib -import simplejson - -configinfo = """\ -graph_title Tahoe Helper Stats - Active Files -graph_vlabel bytes -graph_category tahoe -graph_info This graph shows the number of files being actively processed by the helper -fetched.label Active Files -fetched.draw LINE2 -""" - -if len(sys.argv) > 1: - if sys.argv[1] == "config": - print configinfo.rstrip() - sys.exit(0) - -url = os.environ["url"] - -data = simplejson.loads(urllib.urlopen(url).read()) -print "fetched.value %d" % data["chk_upload_helper.active_uploads"] - diff --git a/misc/munin/tahoe-helperstats-fetched.py b/misc/munin/tahoe-helperstats-fetched.py deleted file mode 100644 index c64101df..00000000 --- a/misc/munin/tahoe-helperstats-fetched.py +++ /dev/null @@ -1,26 +0,0 @@ -#! /usr/bin/python - -import os, sys -import urllib -import simplejson - -configinfo = """\ -graph_title Tahoe Helper Stats - Bytes Fetched -graph_vlabel bytes -graph_category tahoe -graph_info This graph shows the amount of data being fetched by the helper -fetched.label Bytes Fetched -fetched.type GAUGE -fetched.draw LINE1 -fetched.min 0 -""" - -if len(sys.argv) > 1: - if sys.argv[1] == "config": - print configinfo.rstrip() - sys.exit(0) - -url = os.environ["url"] - -data = simplejson.loads(urllib.urlopen(url).read()) -print "fetched.value %d" % data["chk_upload_helper.fetched_bytes"] diff --git a/misc/munin/tahoe-introstats.py b/misc/munin/tahoe-introstats.py deleted file mode 100644 index 5f506eb9..00000000 --- a/misc/munin/tahoe-introstats.py +++ /dev/null @@ -1,28 +0,0 @@ -#! /usr/bin/python - -import os, sys -import urllib -import simplejson - -configinfo = """\ -graph_title Tahoe Introducer Stats -graph_vlabel hosts -graph_category tahoe -graph_info This graph shows the number of hosts announcing and subscribing to various services -storage_server.label Storage Servers -storage_server.draw LINE1 -storage_client.label Clients -storage_client.draw LINE2 -""" - -if len(sys.argv) > 1: - if sys.argv[1] == "config": - print configinfo.rstrip() - sys.exit(0) - -url = os.environ["url"] - -data = simplejson.loads(urllib.urlopen(url).read()) -print "storage_server.value %d" % data["announcement_summary"]["storage"] -print "storage_client.value %d" % data["subscription_summary"]["storage"] - diff --git a/misc/munin/tahoe-nodememory.py b/misc/munin/tahoe-nodememory.py deleted file mode 100644 index 670e32e5..00000000 --- a/misc/munin/tahoe-nodememory.py +++ /dev/null @@ -1,71 +0,0 @@ -#! /usr/bin/python - -# This munin plugin isolates processes by looking for the 'pid' file created -# by 'allmydata start', then extracts the amount of memory they consume (both -# VmSize and VmRSS) from /proc - -import os, sys, re - -if 0: - # for testing - os.environ["nodememory_warner1"] = "run/warner1" - os.environ["nodememory_warner2"] = "run/warner2" - -nodedirs = [] -for k,v in os.environ.items(): - if k.startswith("nodememory_"): - nodename = k[len("nodememory_"):] - nodedirs.append((nodename, v)) -nodedirs.sort(lambda a,b: cmp(a[0],b[0])) - -pids = {} - -for node,nodedir in nodedirs: - pidfile = os.path.join(nodedir, "twistd.pid") - if os.path.exists(pidfile): - pid = int(open(pidfile,"r").read()) - pids[node] = pid - -fields = ["VmSize", "VmRSS"] - - -if len(sys.argv) > 1: - if sys.argv[1] == "config": - configinfo = \ - """graph_title Memory Consumed by Nodes -graph_vlabel bytes -graph_category Tahoe -graph_info This graph shows the memory used by specific processes -""" - for nodename,nodedir in nodedirs: - for f in fields: - configinfo += "%s_%s.label %s used by %s\n" % (nodename, f, - f, nodename) - linetype = "LINE1" - if f == "VmSize": - linetype = "LINE2" - configinfo += "%s_%s.draw %s\n" % (nodename, f, linetype) - if f == "VmData": - configinfo += "%s_%s.graph no\n" % (nodename, f) - - print configinfo - sys.exit(0) - -nodestats = {} -for node,pid in pids.items(): - stats = {} - statusfile = "/proc/%s/status" % pid - if not os.path.exists(statusfile): - continue - for line in open(statusfile,"r").readlines(): - for f in fields: - if line.startswith(f + ":"): - m = re.search(r'(\d+)', line) - stats[f] = int(m.group(1)) - nodestats[node] = stats - -for node,stats in nodestats.items(): - for f,value in stats.items(): - # TODO: not sure if /proc/%d/status means 1000 or 1024 when it says - # 'kB' - print "%s_%s.value %d" % (node, f, 1024*value) diff --git a/misc/munin/tahoe-rootdir-space.py b/misc/munin/tahoe-rootdir-space.py deleted file mode 100644 index e3830fe4..00000000 --- a/misc/munin/tahoe-rootdir-space.py +++ /dev/null @@ -1,27 +0,0 @@ -#! /usr/bin/python - -import os, sys -import urllib -import simplejson - -configinfo = """\ -graph_title Tahoe Root Directory Size -graph_vlabel bytes -graph_category tahoe -graph_info This graph shows the amount of space consumed by all files reachable from a given directory -space.label Space -space.draw LINE2 -""" - -if len(sys.argv) > 1: - if sys.argv[1] == "config": - print configinfo.rstrip() - sys.exit(0) - -url = os.environ["url"] - -data = int(urllib.urlopen(url).read().strip()) -print "space.value %d" % data - - - diff --git a/misc/munin/tahoe-stats.py b/misc/munin/tahoe-stats.py deleted file mode 100644 index 57d18e78..00000000 --- a/misc/munin/tahoe-stats.py +++ /dev/null @@ -1,398 +0,0 @@ -#!/usr/bin/python - -import os -import pickle -import re -import sys -import time - -STAT_VALIDITY = 300 # 5min limit on reporting stats - -PLUGINS = { - 'tahoe_storage_consumed': - { 'statid': 'storage_server.consumed', - 'category': 'stats', - 'configheader': '\n'.join(['graph_title Tahoe Storage Server Space Consumed', - 'graph_vlabel bytes', - 'graph_category tahoe_storage_server', - 'graph_info This graph shows space consumed', - 'graph_args --base 1024', - ]), - 'graph_config': '\n'.join(['%(name)s.label %(name)s', - '%(name)s.draw LINE1', - ]), - 'graph_render': '\n'.join(['%(name)s.value %(value)s', - ]), - }, - 'tahoe_storage_allocated': - { 'statid': 'storage_server.allocated', - 'category': 'stats', - 'configheader': '\n'.join(['graph_title Tahoe Storage Server Space Allocated', - 'graph_vlabel bytes', - 'graph_category tahoe_storage_server', - 'graph_info This graph shows space allocated', - 'graph_args --base 1024', - ]), - 'graph_config': '\n'.join(['%(name)s.label %(name)s', - '%(name)s.draw LINE1', - ]), - 'graph_render': '\n'.join(['%(name)s.value %(value)s', - ]), - }, - - 'tahoe_runtime_load_avg': - { 'statid': 'load_monitor.avg_load', - 'category': 'stats', - 'configheader': '\n'.join(['graph_title Tahoe Runtime Load Average', - 'graph_vlabel load', - 'graph_category tahoe', - 'graph_info This graph shows average reactor delay', - ]), - 'graph_config': '\n'.join(['%(name)s.label %(name)s', - '%(name)s.draw LINE1', - ]), - 'graph_render': '\n'.join(['%(name)s.value %(value)s', - ]), - }, - 'tahoe_runtime_load_peak': - { 'statid': 'load_monitor.max_load', - 'category': 'stats', - 'configheader': '\n'.join(['graph_title Tahoe Runtime Load Peak', - 'graph_vlabel load', - 'graph_category tahoe', - 'graph_info This graph shows peak reactor delay', - ]), - 'graph_config': '\n'.join(['%(name)s.label %(name)s', - '%(name)s.draw LINE1', - ]), - 'graph_render': '\n'.join(['%(name)s.value %(value)s', - ]), - }, - - 'tahoe_storage_bytes_added': - { 'statid': 'storage_server.bytes_added', - 'category': 'counters', - 'configheader': '\n'.join(['graph_title Tahoe Storage Server Bytes Added', - 'graph_vlabel bytes', - 'graph_category tahoe_storage_server', - 'graph_info This graph shows cummulative bytes added', - ]), - 'graph_config': '\n'.join(['%(name)s.label %(name)s', - '%(name)s.draw LINE1', - ]), - 'graph_render': '\n'.join(['%(name)s.value %(value)s', - ]), - }, - 'tahoe_storage_bytes_freed': - { 'statid': 'storage_server.bytes_freed', - 'category': 'counters', - 'configheader': '\n'.join(['graph_title Tahoe Storage Server Bytes Removed', - 'graph_vlabel bytes', - 'graph_category tahoe_storage_server', - 'graph_info This graph shows cummulative bytes removed', - ]), - 'graph_config': '\n'.join(['%(name)s.label %(name)s', - '%(name)s.draw LINE1', - ]), - 'graph_render': '\n'.join(['%(name)s.value %(value)s', - ]), - }, - - 'tahoe_helper_incoming_files': - { 'statid': 'chk_upload_helper.incoming_count', - 'category': 'stats', - 'configheader': '\n'.join(['graph_title Tahoe Upload Helper Incoming File Count', - 'graph_vlabel n files', - 'graph_category tahoe_helper', - 'graph_info This graph shows number of incoming files', - ]), - 'graph_config': '\n'.join(['%(name)s.label %(name)s', - '%(name)s.draw LINE1', - ]), - 'graph_render': '\n'.join(['%(name)s.value %(value)s', - ]), - }, - 'tahoe_helper_incoming_filesize': - { 'statid': 'chk_upload_helper.incoming_size', - 'category': 'stats', - 'configheader': '\n'.join(['graph_title Tahoe Upload Helper Incoming File Size', - 'graph_vlabel bytes', - 'graph_category tahoe_helper', - 'graph_info This graph shows total size of incoming files', - ]), - 'graph_config': '\n'.join(['%(name)s.label %(name)s', - '%(name)s.draw LINE1', - ]), - 'graph_render': '\n'.join(['%(name)s.value %(value)s', - ]), - }, - 'tahoe_helper_incoming_files_old': - { 'statid': 'chk_upload_helper.incoming_size_old', - 'category': 'stats', - 'configheader': '\n'.join(['graph_title Tahoe Upload Helper Incoming Old Files', - 'graph_vlabel bytes', - 'graph_category tahoe_helper', - 'graph_info This graph shows total size of old incoming files', - ]), - 'graph_config': '\n'.join(['%(name)s.label %(name)s', - '%(name)s.draw LINE1', - ]), - 'graph_render': '\n'.join(['%(name)s.value %(value)s', - ]), - }, - - 'tahoe_helper_encoding_files': - { 'statid': 'chk_upload_helper.encoding_count', - 'category': 'stats', - 'configheader': '\n'.join(['graph_title Tahoe Upload Helper Encoding File Count', - 'graph_vlabel n files', - 'graph_category tahoe_helper', - 'graph_info This graph shows number of encoding files', - ]), - 'graph_config': '\n'.join(['%(name)s.label %(name)s', - '%(name)s.draw LINE1', - ]), - 'graph_render': '\n'.join(['%(name)s.value %(value)s', - ]), - }, - 'tahoe_helper_encoding_filesize': - { 'statid': 'chk_upload_helper.encoding_size', - 'category': 'stats', - 'configheader': '\n'.join(['graph_title Tahoe Upload Helper Encoding File Size', - 'graph_vlabel bytes', - 'graph_category tahoe_helper', - 'graph_info This graph shows total size of encoding files', - ]), - 'graph_config': '\n'.join(['%(name)s.label %(name)s', - '%(name)s.draw LINE1', - ]), - 'graph_render': '\n'.join(['%(name)s.value %(value)s', - ]), - }, - 'tahoe_helper_encoding_files_old': - { 'statid': 'chk_upload_helper.encoding_size_old', - 'category': 'stats', - 'configheader': '\n'.join(['graph_title Tahoe Upload Helper Encoding Old Files', - 'graph_vlabel bytes', - 'graph_category tahoe_helper', - 'graph_info This graph shows total size of old encoding files', - ]), - 'graph_config': '\n'.join(['%(name)s.label %(name)s', - '%(name)s.draw LINE1', - ]), - 'graph_render': '\n'.join(['%(name)s.value %(value)s', - ]), - }, - - 'tahoe_helper_active_uploads': - { 'statid': 'chk_upload_helper.active_uploads', - 'category': 'stats', - 'configheader': '\n'.join(['graph_title Tahoe Upload Helper Active Files', - 'graph_vlabel n files', - 'graph_category tahoe_helper', - 'graph_info This graph shows number of files actively being processed by the helper', - ]), - 'graph_config': '\n'.join(['%(name)s.label %(name)s', - '%(name)s.draw LINE1', - ]), - 'graph_render': '\n'.join(['%(name)s.value %(value)s', - ]), - }, - - 'tahoe_helper_upload_requests': - { 'statid': 'chk_upload_helper.upload_requests', - 'category': 'counters', - 'configheader': '\n'.join(['graph_title Tahoe Upload Helper Upload Requests', - 'graph_vlabel requests', - 'graph_category tahoe_helper', - 'graph_info This graph shows the number of upload requests arriving at the helper', - ]), - 'graph_config': '\n'.join(['%(name)s.label %(name)s', - '%(name)s.type DERIVE', - '%(name)s.min 0', - '%(name)s.draw LINE1', - ]), - 'graph_render': '\n'.join(['%(name)s.value %(value)s', - ]), - }, - 'tahoe_helper_upload_already_present': - { 'statid': 'chk_upload_helper.upload_already_present', - 'category': 'counters', - 'configheader': '\n'.join(['graph_title Tahoe Upload Helper Uploads Already Present', - 'graph_vlabel requests', - 'graph_category tahoe_helper', - 'graph_info This graph shows the number of uploads whose files are already present in the grid', - ]), - 'graph_config': '\n'.join(['%(name)s.label %(name)s', - '%(name)s.type DERIVE', - '%(name)s.min 0', - '%(name)s.draw LINE1', - ]), - 'graph_render': '\n'.join(['%(name)s.value %(value)s', - ]), - }, - 'tahoe_helper_upload_need_upload': - { 'statid': 'chk_upload_helper.upload_need_upload', - 'category': 'counters', - 'configheader': '\n'.join(['graph_title Tahoe Upload Helper Uploads Needing Upload', - 'graph_vlabel requests', - 'graph_category tahoe_helper', - 'graph_info This graph shows the number of uploads whose files are not already present in the grid', - ]), - 'graph_config': '\n'.join(['%(name)s.label %(name)s', - '%(name)s.type DERIVE', - '%(name)s.min 0', - '%(name)s.draw LINE1', - ]), - 'graph_render': '\n'.join(['%(name)s.value %(value)s', - ]), - }, - 'tahoe_helper_encoded_bytes': - { 'statid': 'chk_upload_helper.encoded_bytes', - 'category': 'counters', - 'configheader': '\n'.join(['graph_title Tahoe Upload Helper Encoded Bytes', - 'graph_vlabel bytes', - 'graph_category tahoe_helper', - 'graph_info This graph shows the number of bytes encoded by the helper', - ]), - 'graph_config': '\n'.join(['%(name)s.label %(name)s', - '%(name)s.type DERIVE', - '%(name)s.min 0', - '%(name)s.draw LINE1', - ]), - 'graph_render': '\n'.join(['%(name)s.value %(value)s', - ]), - }, - 'tahoe_helper_fetched_bytes': - { 'statid': 'chk_upload_helper.fetched_bytes', - 'category': 'counters', - 'configheader': '\n'.join(['graph_title Tahoe Upload Helper Fetched Bytes', - 'graph_vlabel bytes', - 'graph_category tahoe_helper', - 'graph_info This graph shows the number of bytes fetched by the helper', - ]), - 'graph_config': '\n'.join(['%(name)s.label %(name)s', - '%(name)s.type DERIVE', - '%(name)s.min 0', - '%(name)s.draw LINE1', - ]), - 'graph_render': '\n'.join(['%(name)s.value %(value)s', - ]), - }, - - 'tahoe_uploader_bytes_uploaded': - { 'statid': 'uploader.bytes_uploaded', - 'category': 'counters', - 'configheader': '\n'.join(['graph_title Tahoe Uploader Bytes Uploaded', - 'graph_vlabel bytes', - 'graph_category tahoe_traffic', - 'graph_info This graph shows the number of bytes uploaded', - ]), - 'graph_config': '\n'.join(['%(name)s.label %(name)s', - '%(name)s.type DERIVE', - '%(name)s.min 0', - '%(name)s.draw LINE1', - ]), - 'graph_render': '\n'.join(['%(name)s.value %(value)s', - ]), - }, - 'tahoe_uploader_files_uploaded': - { 'statid': 'uploader.files_uploaded', - 'category': 'counters', - 'configheader': '\n'.join(['graph_title Tahoe Uploader Bytes Uploaded', - 'graph_vlabel files', - 'graph_category tahoe_traffic', - 'graph_info This graph shows the number of files uploaded', - ]), - 'graph_config': '\n'.join(['%(name)s.label %(name)s', - '%(name)s.type DERIVE', - '%(name)s.min 0', - '%(name)s.draw LINE1', - ]), - 'graph_render': '\n'.join(['%(name)s.value %(value)s', - ]), - }, - 'tahoe_mutable_files_published': - { 'statid': 'mutable.files_published', - 'category': 'counters', - 'configheader': '\n'.join(['graph_title Tahoe Mutable Files Published', - 'graph_vlabel files', - 'graph_category tahoe_traffic', - 'graph_info This graph shows the number of mutable files published', - ]), - 'graph_config': '\n'.join(['%(name)s.label %(name)s', - '%(name)s.type DERIVE', - '%(name)s.min 0', - '%(name)s.draw LINE1', - ]), - 'graph_render': '\n'.join(['%(name)s.value %(value)s', - ]), - }, - 'tahoe_mutable_files_retrieved': - { 'statid': 'mutable.files_retrieved', - 'category': 'counters', - 'configheader': '\n'.join(['graph_title Tahoe Mutable Files Retrieved', - 'graph_vlabel files', - 'graph_category tahoe_traffic', - 'graph_info This graph shows the number of files retrieved', - ]), - 'graph_config': '\n'.join(['%(name)s.label %(name)s', - '%(name)s.type DERIVE', - '%(name)s.min 0', - '%(name)s.draw LINE1', - ]), - 'graph_render': '\n'.join(['%(name)s.value %(value)s', - ]), - }, - - } - -def smash_name(name): - return re.sub('[^a-zA-Z0-9]', '_', name) - -def open_stats(fname): - f = open(fname, 'rb') - stats = pickle.load(f) - f.close() - return stats - -def main(argv): - graph_name = os.path.basename(argv[0]) - if graph_name.endswith('.py'): - graph_name = graph_name[:-3] - - plugin_conf = PLUGINS.get(graph_name) - - for k,v in os.environ.items(): - if k.startswith('statsfile'): - stats_file = v - break - else: - raise RuntimeError("No 'statsfile' env var found") - - stats = open_stats(stats_file) - - now = time.time() - def output_nodes(output_section, check_time): - for tubid, nodestats in stats.items(): - if check_time and (now - nodestats.get('timestamp', 0)) > STAT_VALIDITY: - continue - name = smash_name("%s_%s" % (nodestats['nickname'], tubid[:4])) - #value = nodestats['stats'][plugin_conf['category']].get(plugin_conf['statid']) - category = plugin_conf['category'] - statid = plugin_conf['statid'] - value = nodestats['stats'][category].get(statid) - if value is not None: - args = { 'name': name, 'value': value } - print plugin_conf[output_section] % args - - if len(argv) > 1: - if sys.argv[1] == 'config': - print plugin_conf['configheader'] - output_nodes('graph_config', False) - sys.exit(0) - - output_nodes('graph_render', True) - -if __name__ == '__main__': - main(sys.argv) diff --git a/misc/munin/tahoe-storagespace.py b/misc/munin/tahoe-storagespace.py deleted file mode 100644 index ad4375b9..00000000 --- a/misc/munin/tahoe-storagespace.py +++ /dev/null @@ -1,58 +0,0 @@ -#! /usr/bin/python - -# This is a munin plugin to track the amount of disk space each node's -# StorageServer is consuming on behalf of other nodes. This is where the -# shares are kept. If there are N nodes present in the mesh, the total space -# consumed by the entire mesh will be about N times the space reported by -# this plugin. - -# Copy this plugin into /etc/munun/plugins/tahoe-storagespace and then put -# the following in your /etc/munin/plugin-conf.d/foo file to let it know -# where to find the basedirectory for each node: -# -# [tahoe-storagespace] -# env.basedir_NODE1 /path/to/node1 -# env.basedir_NODE2 /path/to/node2 -# env.basedir_NODE3 /path/to/node3 -# -# Allmydata-tahoe must be installed on the system where this plugin is used, -# since it imports a utility module from allmydata.utils . - -import os, sys -import commands - -nodedirs = [] -for k,v in os.environ.items(): - if k.startswith("basedir_"): - nodename = k[len("basedir_"):] - nodedirs.append( (nodename, v) ) -nodedirs.sort() - -seriesname = "storage" - -configinfo = \ -"""graph_title Allmydata Tahoe Shareholder Space -graph_vlabel bytes -graph_category tahoe -graph_info This graph shows the space consumed by this node's StorageServer -""" - -for nodename, basedir in nodedirs: - configinfo += "%s.label %s\n" % (nodename, nodename) - configinfo += "%s.draw LINE2\n" % (nodename,) - - -if len(sys.argv) > 1: - if sys.argv[1] == "config": - print configinfo.rstrip() - sys.exit(0) - -for nodename, basedir in nodedirs: - cmd = "du --bytes --summarize %s" % os.path.join(basedir, "storage") - rc,out = commands.getstatusoutput(cmd) - if rc != 0: - sys.exit(rc) - bytes, extra = out.split() - usage = int(bytes) - print "%s.value %d" % (nodename, usage) - diff --git a/misc/munin/tahoe_cpu_watcher b/misc/munin/tahoe_cpu_watcher new file mode 100644 index 00000000..1b1df335 --- /dev/null +++ b/misc/munin/tahoe_cpu_watcher @@ -0,0 +1,29 @@ +#! /usr/bin/python + +import os, sys, re +import urllib +import simplejson + +url = os.environ["url"] +current = simplejson.loads(urllib.urlopen(url).read()) + +configinfo = """\ +graph_title Tahoe CPU Usage +graph_vlabel CPU % +graph_category tahoe +graph_info This graph shows the 5min average of CPU usage for each process +""" +data = "" + +for (name, avg1, avg5, avg15) in current: + dataname = re.sub(r'[^\w]', '_', name) + configinfo += dataname + ".label " + name + "\n" + configinfo += dataname + ".draw LINE2\n" + if avg5 is not None: + data += dataname + ".value %.2f\n" % (100.0 * avg5) + +if len(sys.argv) > 1: + if sys.argv[1] == "config": + print configinfo.rstrip() + sys.exit(0) +print data.rstrip() diff --git a/misc/munin/tahoe_cpu_watcher.py b/misc/munin/tahoe_cpu_watcher.py deleted file mode 100644 index 1b1df335..00000000 --- a/misc/munin/tahoe_cpu_watcher.py +++ /dev/null @@ -1,29 +0,0 @@ -#! /usr/bin/python - -import os, sys, re -import urllib -import simplejson - -url = os.environ["url"] -current = simplejson.loads(urllib.urlopen(url).read()) - -configinfo = """\ -graph_title Tahoe CPU Usage -graph_vlabel CPU % -graph_category tahoe -graph_info This graph shows the 5min average of CPU usage for each process -""" -data = "" - -for (name, avg1, avg5, avg15) in current: - dataname = re.sub(r'[^\w]', '_', name) - configinfo += dataname + ".label " + name + "\n" - configinfo += dataname + ".draw LINE2\n" - if avg5 is not None: - data += dataname + ".value %.2f\n" % (100.0 * avg5) - -if len(sys.argv) > 1: - if sys.argv[1] == "config": - print configinfo.rstrip() - sys.exit(0) -print data.rstrip() diff --git a/misc/munin/tahoe_estimate_files b/misc/munin/tahoe_estimate_files new file mode 100644 index 00000000..4723cdad --- /dev/null +++ b/misc/munin/tahoe_estimate_files @@ -0,0 +1,49 @@ +#! /usr/bin/python + +import sys, os.path + +if len(sys.argv) > 1 and sys.argv[1] == "config": + print """\ +graph_title Tahoe File Estimate +graph_vlabel files +graph_category tahoe +graph_info This graph shows the estimated number of files and directories present in the grid +files.label files +files.draw LINE2""" + sys.exit(0) + +# Edit this to point at some subset of storage directories. +node_dirs = [os.path.expanduser("~amduser/prodnet/storage1"), + os.path.expanduser("~amduser/prodnet/storage2"), + os.path.expanduser("~amduser/prodnet/storage3"), + os.path.expanduser("~amduser/prodnet/storage4"), + ] + +sections = ["aa", "ab", "ac", "ad", "ae", "af", "ag", "ah", "ai", "aj"] +# and edit this to reflect your default encoding's "total_shares" value, and +# the total number of servers. +N = 10 +num_servers = 20 + +index_strings = set() +for base in node_dirs: + for section in sections: + sampledir = os.path.join(base, "storage", "shares", section) + indices = os.listdir(sampledir) + index_strings.update(indices) +unique_strings = len(index_strings) + +# the chance that any given file appears on any given server +chance = 1.0 * N / num_servers + +# the chance that the file does *not* appear on the servers that we're +# examining +no_chance = (1-chance) ** len(node_dirs) + +# if a file has a 25% chance of not appearing in our sample, then we need to +# raise our estimate by (1.25/1) +correction = 1+no_chance +#print "correction", correction + +files = unique_strings * (32*32/len(sections)) * correction +print "files.value %d" % int(files) diff --git a/misc/munin/tahoe_estimate_files.py b/misc/munin/tahoe_estimate_files.py deleted file mode 100644 index 4723cdad..00000000 --- a/misc/munin/tahoe_estimate_files.py +++ /dev/null @@ -1,49 +0,0 @@ -#! /usr/bin/python - -import sys, os.path - -if len(sys.argv) > 1 and sys.argv[1] == "config": - print """\ -graph_title Tahoe File Estimate -graph_vlabel files -graph_category tahoe -graph_info This graph shows the estimated number of files and directories present in the grid -files.label files -files.draw LINE2""" - sys.exit(0) - -# Edit this to point at some subset of storage directories. -node_dirs = [os.path.expanduser("~amduser/prodnet/storage1"), - os.path.expanduser("~amduser/prodnet/storage2"), - os.path.expanduser("~amduser/prodnet/storage3"), - os.path.expanduser("~amduser/prodnet/storage4"), - ] - -sections = ["aa", "ab", "ac", "ad", "ae", "af", "ag", "ah", "ai", "aj"] -# and edit this to reflect your default encoding's "total_shares" value, and -# the total number of servers. -N = 10 -num_servers = 20 - -index_strings = set() -for base in node_dirs: - for section in sections: - sampledir = os.path.join(base, "storage", "shares", section) - indices = os.listdir(sampledir) - index_strings.update(indices) -unique_strings = len(index_strings) - -# the chance that any given file appears on any given server -chance = 1.0 * N / num_servers - -# the chance that the file does *not* appear on the servers that we're -# examining -no_chance = (1-chance) ** len(node_dirs) - -# if a file has a 25% chance of not appearing in our sample, then we need to -# raise our estimate by (1.25/1) -correction = 1+no_chance -#print "correction", correction - -files = unique_strings * (32*32/len(sections)) * correction -print "files.value %d" % int(files) diff --git a/misc/munin/tahoe_files b/misc/munin/tahoe_files new file mode 100644 index 00000000..c6026de1 --- /dev/null +++ b/misc/munin/tahoe_files @@ -0,0 +1,52 @@ +#! /usr/bin/python + +# This is a munin plugin to track the number of files that each node's +# StorageServer is holding on behalf of other nodes. Each file that has been +# uploaded to the mesh (and has shares present on this node) will be counted +# here. When there are <= 100 nodes in the mesh, this count will equal the +# total number of files that are active in the entire mesh. When there are +# 200 nodes present in the mesh, it will represent about half of the total +# number. + +# Copy this plugin into /etc/munun/plugins/tahoe-files and then put +# the following in your /etc/munin/plugin-conf.d/foo file to let it know +# where to find the basedirectory for each node: +# +# [tahoe-files] +# env.basedir_NODE1 /path/to/node1 +# env.basedir_NODE2 /path/to/node2 +# env.basedir_NODE3 /path/to/node3 +# + +import os, sys + +nodedirs = [] +for k,v in os.environ.items(): + if k.startswith("basedir_"): + nodename = k[len("basedir_"):] + nodedirs.append( (nodename, v) ) +nodedirs.sort() + +configinfo = \ +"""graph_title Allmydata Tahoe Filecount +graph_vlabel files +graph_category tahoe +graph_info This graph shows the number of files hosted by this node's StorageServer +""" + +for nodename, basedir in nodedirs: + configinfo += "%s.label %s\n" % (nodename, nodename) + configinfo += "%s.draw LINE2\n" % (nodename,) + + +if len(sys.argv) > 1: + if sys.argv[1] == "config": + print configinfo.rstrip() + sys.exit(0) + +for nodename, basedir in nodedirs: + files = len(os.listdir(os.path.join(basedir, "storage", "shares"))) + if os.path.exists(os.path.join(basedir, "storage", "shares", "incoming")): + files -= 1 # the 'incoming' directory doesn't count + print "%s.value %d" % (nodename, files) + diff --git a/misc/munin/tahoe_helperstats_active b/misc/munin/tahoe_helperstats_active new file mode 100644 index 00000000..472c8c42 --- /dev/null +++ b/misc/munin/tahoe_helperstats_active @@ -0,0 +1,25 @@ +#! /usr/bin/python + +import os, sys +import urllib +import simplejson + +configinfo = """\ +graph_title Tahoe Helper Stats - Active Files +graph_vlabel bytes +graph_category tahoe +graph_info This graph shows the number of files being actively processed by the helper +fetched.label Active Files +fetched.draw LINE2 +""" + +if len(sys.argv) > 1: + if sys.argv[1] == "config": + print configinfo.rstrip() + sys.exit(0) + +url = os.environ["url"] + +data = simplejson.loads(urllib.urlopen(url).read()) +print "fetched.value %d" % data["chk_upload_helper.active_uploads"] + diff --git a/misc/munin/tahoe_helperstats_fetched b/misc/munin/tahoe_helperstats_fetched new file mode 100644 index 00000000..c64101df --- /dev/null +++ b/misc/munin/tahoe_helperstats_fetched @@ -0,0 +1,26 @@ +#! /usr/bin/python + +import os, sys +import urllib +import simplejson + +configinfo = """\ +graph_title Tahoe Helper Stats - Bytes Fetched +graph_vlabel bytes +graph_category tahoe +graph_info This graph shows the amount of data being fetched by the helper +fetched.label Bytes Fetched +fetched.type GAUGE +fetched.draw LINE1 +fetched.min 0 +""" + +if len(sys.argv) > 1: + if sys.argv[1] == "config": + print configinfo.rstrip() + sys.exit(0) + +url = os.environ["url"] + +data = simplejson.loads(urllib.urlopen(url).read()) +print "fetched.value %d" % data["chk_upload_helper.fetched_bytes"] diff --git a/misc/munin/tahoe_introstats b/misc/munin/tahoe_introstats new file mode 100644 index 00000000..5f506eb9 --- /dev/null +++ b/misc/munin/tahoe_introstats @@ -0,0 +1,28 @@ +#! /usr/bin/python + +import os, sys +import urllib +import simplejson + +configinfo = """\ +graph_title Tahoe Introducer Stats +graph_vlabel hosts +graph_category tahoe +graph_info This graph shows the number of hosts announcing and subscribing to various services +storage_server.label Storage Servers +storage_server.draw LINE1 +storage_client.label Clients +storage_client.draw LINE2 +""" + +if len(sys.argv) > 1: + if sys.argv[1] == "config": + print configinfo.rstrip() + sys.exit(0) + +url = os.environ["url"] + +data = simplejson.loads(urllib.urlopen(url).read()) +print "storage_server.value %d" % data["announcement_summary"]["storage"] +print "storage_client.value %d" % data["subscription_summary"]["storage"] + diff --git a/misc/munin/tahoe_nodememory b/misc/munin/tahoe_nodememory new file mode 100644 index 00000000..670e32e5 --- /dev/null +++ b/misc/munin/tahoe_nodememory @@ -0,0 +1,71 @@ +#! /usr/bin/python + +# This munin plugin isolates processes by looking for the 'pid' file created +# by 'allmydata start', then extracts the amount of memory they consume (both +# VmSize and VmRSS) from /proc + +import os, sys, re + +if 0: + # for testing + os.environ["nodememory_warner1"] = "run/warner1" + os.environ["nodememory_warner2"] = "run/warner2" + +nodedirs = [] +for k,v in os.environ.items(): + if k.startswith("nodememory_"): + nodename = k[len("nodememory_"):] + nodedirs.append((nodename, v)) +nodedirs.sort(lambda a,b: cmp(a[0],b[0])) + +pids = {} + +for node,nodedir in nodedirs: + pidfile = os.path.join(nodedir, "twistd.pid") + if os.path.exists(pidfile): + pid = int(open(pidfile,"r").read()) + pids[node] = pid + +fields = ["VmSize", "VmRSS"] + + +if len(sys.argv) > 1: + if sys.argv[1] == "config": + configinfo = \ + """graph_title Memory Consumed by Nodes +graph_vlabel bytes +graph_category Tahoe +graph_info This graph shows the memory used by specific processes +""" + for nodename,nodedir in nodedirs: + for f in fields: + configinfo += "%s_%s.label %s used by %s\n" % (nodename, f, + f, nodename) + linetype = "LINE1" + if f == "VmSize": + linetype = "LINE2" + configinfo += "%s_%s.draw %s\n" % (nodename, f, linetype) + if f == "VmData": + configinfo += "%s_%s.graph no\n" % (nodename, f) + + print configinfo + sys.exit(0) + +nodestats = {} +for node,pid in pids.items(): + stats = {} + statusfile = "/proc/%s/status" % pid + if not os.path.exists(statusfile): + continue + for line in open(statusfile,"r").readlines(): + for f in fields: + if line.startswith(f + ":"): + m = re.search(r'(\d+)', line) + stats[f] = int(m.group(1)) + nodestats[node] = stats + +for node,stats in nodestats.items(): + for f,value in stats.items(): + # TODO: not sure if /proc/%d/status means 1000 or 1024 when it says + # 'kB' + print "%s_%s.value %d" % (node, f, 1024*value) diff --git a/misc/munin/tahoe_rootdir_space b/misc/munin/tahoe_rootdir_space new file mode 100644 index 00000000..e3830fe4 --- /dev/null +++ b/misc/munin/tahoe_rootdir_space @@ -0,0 +1,27 @@ +#! /usr/bin/python + +import os, sys +import urllib +import simplejson + +configinfo = """\ +graph_title Tahoe Root Directory Size +graph_vlabel bytes +graph_category tahoe +graph_info This graph shows the amount of space consumed by all files reachable from a given directory +space.label Space +space.draw LINE2 +""" + +if len(sys.argv) > 1: + if sys.argv[1] == "config": + print configinfo.rstrip() + sys.exit(0) + +url = os.environ["url"] + +data = int(urllib.urlopen(url).read().strip()) +print "space.value %d" % data + + + diff --git a/misc/munin/tahoe_spacetime b/misc/munin/tahoe_spacetime new file mode 100644 index 00000000..75f58786 --- /dev/null +++ b/misc/munin/tahoe_spacetime @@ -0,0 +1,100 @@ +#! /usr/bin/python + +# copy .rrd files from a remote munin master host, sum the 'df' stats from a +# list of hosts, use them to estimate a rate-of-change for the past month, +# then extrapolate to guess how many weeks/months/years of storage space we +# have left, and output it to another munin graph + +import sys, os, time +import rrdtool + +MUNIN_HOST = "munin.allmydata.com" +PREFIX = "%s:/var/lib/munin/prodtahoe/" % MUNIN_HOST +FILES = [ "prodtahoe%d.allmydata.com-df-_dev_sd%s3-g.rrd" % (a,b) + for a in (1,2,3,4,5) + for b in ("a", "b", "c", "d") + ] +REMOTEFILES = [ PREFIX + f for f in FILES ] +LOCALFILES = ["/var/lib/munin/prodtahoe/" + f for f in FILES ] +WEBFILE = "/var/www/tahoe/spacetime.json" + + +def rsync_rrd(): + # copy the RRD files from your munin master host to a local one + cmd = "rsync %s rrds/" % (" ".join(REMOTEFILES)) + rc = os.system(cmd) + assert rc == 0, rc + +def format_time(t): + return time.strftime("%b %d %H:%M", time.localtime(t)) + +def predict_future(past_s): + + start_df = [] + end_df = [] + durations = [] + + for fn in LOCALFILES: + d = rrdtool.fetch(fn, "AVERAGE", "-s", "-"+past_s, "-e", "-1hr") + # ((start, end, step), (name1, name2, ...), [(data1, data2, ..), ...]) + (start_time, end_time ,step) = d[0] + #print format_time(start_time), " - ", format_time(end_time), step + names = d[1] + #for points in d[2]: + # point = points[0] + # print point + start_space = d[2][0][0] + # I don't know why, but the last few points are always bogus. Running + # 'rrdtool fetch' on the command line is usually ok.. I blame the python + # bindinds. + end_space = d[2][-4][0] + end_time = end_time - (4*step) + start_df.append(start_space) + end_df.append(end_space) + durations.append(end_time - start_time) + + avg_start_df = sum(start_df) / len(start_df) + avg_end_df = sum(end_df) / len(end_df) + avg_duration = sum(durations) / len(durations) + #print avg_start_df, avg_end_df, avg_duration + + rate = (avg_end_df - avg_start_df) / avg_duration + #print "Rate", rate, " %/s" + #print "measured over", avg_duration / 86400, "days" + remaining = 100 - avg_end_df + remaining_seconds = remaining / rate + #print "remaining seconds", remaining_seconds + remaining_days = remaining_seconds / 86400 + #print "remaining days", remaining_days + return remaining_days + +def write_to_file(samples): + # write a JSON-formatted dictionary + f = open(WEBFILE + ".tmp", "w") + f.write("{ ") + f.write(", ".join(['"%s": %s' % (k, samples[k]) + for k in sorted(samples.keys())])) + f.write("}\n") + f.close() + os.rename(WEBFILE + ".tmp", WEBFILE) + +if len(sys.argv) > 1 and sys.argv[1] == "config": + print """\ +graph_title Tahoe Remaining Space Predictor +graph_vlabel days remaining +graph_category tahoe +graph_info This graph shows the estimated number of days left until storage space is exhausted +days_2wk.label days left (2wk sample) +days_2wk.draw LINE2 +days_4wk.label days left (4wk sample) +days_4wk.draw LINE2""" + sys.exit(0) + +#rsync_rrd() +remaining_4wk = predict_future("4wk") +remaining_2wk = predict_future("2wk") +print "days_4wk.value", remaining_4wk +print "days_2wk.value", remaining_2wk +write_to_file({"remaining_2wk": remaining_2wk, + "remaining_4wk": remaining_4wk}) + diff --git a/misc/munin/tahoe_spacetime.py b/misc/munin/tahoe_spacetime.py deleted file mode 100644 index 75f58786..00000000 --- a/misc/munin/tahoe_spacetime.py +++ /dev/null @@ -1,100 +0,0 @@ -#! /usr/bin/python - -# copy .rrd files from a remote munin master host, sum the 'df' stats from a -# list of hosts, use them to estimate a rate-of-change for the past month, -# then extrapolate to guess how many weeks/months/years of storage space we -# have left, and output it to another munin graph - -import sys, os, time -import rrdtool - -MUNIN_HOST = "munin.allmydata.com" -PREFIX = "%s:/var/lib/munin/prodtahoe/" % MUNIN_HOST -FILES = [ "prodtahoe%d.allmydata.com-df-_dev_sd%s3-g.rrd" % (a,b) - for a in (1,2,3,4,5) - for b in ("a", "b", "c", "d") - ] -REMOTEFILES = [ PREFIX + f for f in FILES ] -LOCALFILES = ["/var/lib/munin/prodtahoe/" + f for f in FILES ] -WEBFILE = "/var/www/tahoe/spacetime.json" - - -def rsync_rrd(): - # copy the RRD files from your munin master host to a local one - cmd = "rsync %s rrds/" % (" ".join(REMOTEFILES)) - rc = os.system(cmd) - assert rc == 0, rc - -def format_time(t): - return time.strftime("%b %d %H:%M", time.localtime(t)) - -def predict_future(past_s): - - start_df = [] - end_df = [] - durations = [] - - for fn in LOCALFILES: - d = rrdtool.fetch(fn, "AVERAGE", "-s", "-"+past_s, "-e", "-1hr") - # ((start, end, step), (name1, name2, ...), [(data1, data2, ..), ...]) - (start_time, end_time ,step) = d[0] - #print format_time(start_time), " - ", format_time(end_time), step - names = d[1] - #for points in d[2]: - # point = points[0] - # print point - start_space = d[2][0][0] - # I don't know why, but the last few points are always bogus. Running - # 'rrdtool fetch' on the command line is usually ok.. I blame the python - # bindinds. - end_space = d[2][-4][0] - end_time = end_time - (4*step) - start_df.append(start_space) - end_df.append(end_space) - durations.append(end_time - start_time) - - avg_start_df = sum(start_df) / len(start_df) - avg_end_df = sum(end_df) / len(end_df) - avg_duration = sum(durations) / len(durations) - #print avg_start_df, avg_end_df, avg_duration - - rate = (avg_end_df - avg_start_df) / avg_duration - #print "Rate", rate, " %/s" - #print "measured over", avg_duration / 86400, "days" - remaining = 100 - avg_end_df - remaining_seconds = remaining / rate - #print "remaining seconds", remaining_seconds - remaining_days = remaining_seconds / 86400 - #print "remaining days", remaining_days - return remaining_days - -def write_to_file(samples): - # write a JSON-formatted dictionary - f = open(WEBFILE + ".tmp", "w") - f.write("{ ") - f.write(", ".join(['"%s": %s' % (k, samples[k]) - for k in sorted(samples.keys())])) - f.write("}\n") - f.close() - os.rename(WEBFILE + ".tmp", WEBFILE) - -if len(sys.argv) > 1 and sys.argv[1] == "config": - print """\ -graph_title Tahoe Remaining Space Predictor -graph_vlabel days remaining -graph_category tahoe -graph_info This graph shows the estimated number of days left until storage space is exhausted -days_2wk.label days left (2wk sample) -days_2wk.draw LINE2 -days_4wk.label days left (4wk sample) -days_4wk.draw LINE2""" - sys.exit(0) - -#rsync_rrd() -remaining_4wk = predict_future("4wk") -remaining_2wk = predict_future("2wk") -print "days_4wk.value", remaining_4wk -print "days_2wk.value", remaining_2wk -write_to_file({"remaining_2wk": remaining_2wk, - "remaining_4wk": remaining_4wk}) - diff --git a/misc/munin/tahoe_stats b/misc/munin/tahoe_stats new file mode 100644 index 00000000..57d18e78 --- /dev/null +++ b/misc/munin/tahoe_stats @@ -0,0 +1,398 @@ +#!/usr/bin/python + +import os +import pickle +import re +import sys +import time + +STAT_VALIDITY = 300 # 5min limit on reporting stats + +PLUGINS = { + 'tahoe_storage_consumed': + { 'statid': 'storage_server.consumed', + 'category': 'stats', + 'configheader': '\n'.join(['graph_title Tahoe Storage Server Space Consumed', + 'graph_vlabel bytes', + 'graph_category tahoe_storage_server', + 'graph_info This graph shows space consumed', + 'graph_args --base 1024', + ]), + 'graph_config': '\n'.join(['%(name)s.label %(name)s', + '%(name)s.draw LINE1', + ]), + 'graph_render': '\n'.join(['%(name)s.value %(value)s', + ]), + }, + 'tahoe_storage_allocated': + { 'statid': 'storage_server.allocated', + 'category': 'stats', + 'configheader': '\n'.join(['graph_title Tahoe Storage Server Space Allocated', + 'graph_vlabel bytes', + 'graph_category tahoe_storage_server', + 'graph_info This graph shows space allocated', + 'graph_args --base 1024', + ]), + 'graph_config': '\n'.join(['%(name)s.label %(name)s', + '%(name)s.draw LINE1', + ]), + 'graph_render': '\n'.join(['%(name)s.value %(value)s', + ]), + }, + + 'tahoe_runtime_load_avg': + { 'statid': 'load_monitor.avg_load', + 'category': 'stats', + 'configheader': '\n'.join(['graph_title Tahoe Runtime Load Average', + 'graph_vlabel load', + 'graph_category tahoe', + 'graph_info This graph shows average reactor delay', + ]), + 'graph_config': '\n'.join(['%(name)s.label %(name)s', + '%(name)s.draw LINE1', + ]), + 'graph_render': '\n'.join(['%(name)s.value %(value)s', + ]), + }, + 'tahoe_runtime_load_peak': + { 'statid': 'load_monitor.max_load', + 'category': 'stats', + 'configheader': '\n'.join(['graph_title Tahoe Runtime Load Peak', + 'graph_vlabel load', + 'graph_category tahoe', + 'graph_info This graph shows peak reactor delay', + ]), + 'graph_config': '\n'.join(['%(name)s.label %(name)s', + '%(name)s.draw LINE1', + ]), + 'graph_render': '\n'.join(['%(name)s.value %(value)s', + ]), + }, + + 'tahoe_storage_bytes_added': + { 'statid': 'storage_server.bytes_added', + 'category': 'counters', + 'configheader': '\n'.join(['graph_title Tahoe Storage Server Bytes Added', + 'graph_vlabel bytes', + 'graph_category tahoe_storage_server', + 'graph_info This graph shows cummulative bytes added', + ]), + 'graph_config': '\n'.join(['%(name)s.label %(name)s', + '%(name)s.draw LINE1', + ]), + 'graph_render': '\n'.join(['%(name)s.value %(value)s', + ]), + }, + 'tahoe_storage_bytes_freed': + { 'statid': 'storage_server.bytes_freed', + 'category': 'counters', + 'configheader': '\n'.join(['graph_title Tahoe Storage Server Bytes Removed', + 'graph_vlabel bytes', + 'graph_category tahoe_storage_server', + 'graph_info This graph shows cummulative bytes removed', + ]), + 'graph_config': '\n'.join(['%(name)s.label %(name)s', + '%(name)s.draw LINE1', + ]), + 'graph_render': '\n'.join(['%(name)s.value %(value)s', + ]), + }, + + 'tahoe_helper_incoming_files': + { 'statid': 'chk_upload_helper.incoming_count', + 'category': 'stats', + 'configheader': '\n'.join(['graph_title Tahoe Upload Helper Incoming File Count', + 'graph_vlabel n files', + 'graph_category tahoe_helper', + 'graph_info This graph shows number of incoming files', + ]), + 'graph_config': '\n'.join(['%(name)s.label %(name)s', + '%(name)s.draw LINE1', + ]), + 'graph_render': '\n'.join(['%(name)s.value %(value)s', + ]), + }, + 'tahoe_helper_incoming_filesize': + { 'statid': 'chk_upload_helper.incoming_size', + 'category': 'stats', + 'configheader': '\n'.join(['graph_title Tahoe Upload Helper Incoming File Size', + 'graph_vlabel bytes', + 'graph_category tahoe_helper', + 'graph_info This graph shows total size of incoming files', + ]), + 'graph_config': '\n'.join(['%(name)s.label %(name)s', + '%(name)s.draw LINE1', + ]), + 'graph_render': '\n'.join(['%(name)s.value %(value)s', + ]), + }, + 'tahoe_helper_incoming_files_old': + { 'statid': 'chk_upload_helper.incoming_size_old', + 'category': 'stats', + 'configheader': '\n'.join(['graph_title Tahoe Upload Helper Incoming Old Files', + 'graph_vlabel bytes', + 'graph_category tahoe_helper', + 'graph_info This graph shows total size of old incoming files', + ]), + 'graph_config': '\n'.join(['%(name)s.label %(name)s', + '%(name)s.draw LINE1', + ]), + 'graph_render': '\n'.join(['%(name)s.value %(value)s', + ]), + }, + + 'tahoe_helper_encoding_files': + { 'statid': 'chk_upload_helper.encoding_count', + 'category': 'stats', + 'configheader': '\n'.join(['graph_title Tahoe Upload Helper Encoding File Count', + 'graph_vlabel n files', + 'graph_category tahoe_helper', + 'graph_info This graph shows number of encoding files', + ]), + 'graph_config': '\n'.join(['%(name)s.label %(name)s', + '%(name)s.draw LINE1', + ]), + 'graph_render': '\n'.join(['%(name)s.value %(value)s', + ]), + }, + 'tahoe_helper_encoding_filesize': + { 'statid': 'chk_upload_helper.encoding_size', + 'category': 'stats', + 'configheader': '\n'.join(['graph_title Tahoe Upload Helper Encoding File Size', + 'graph_vlabel bytes', + 'graph_category tahoe_helper', + 'graph_info This graph shows total size of encoding files', + ]), + 'graph_config': '\n'.join(['%(name)s.label %(name)s', + '%(name)s.draw LINE1', + ]), + 'graph_render': '\n'.join(['%(name)s.value %(value)s', + ]), + }, + 'tahoe_helper_encoding_files_old': + { 'statid': 'chk_upload_helper.encoding_size_old', + 'category': 'stats', + 'configheader': '\n'.join(['graph_title Tahoe Upload Helper Encoding Old Files', + 'graph_vlabel bytes', + 'graph_category tahoe_helper', + 'graph_info This graph shows total size of old encoding files', + ]), + 'graph_config': '\n'.join(['%(name)s.label %(name)s', + '%(name)s.draw LINE1', + ]), + 'graph_render': '\n'.join(['%(name)s.value %(value)s', + ]), + }, + + 'tahoe_helper_active_uploads': + { 'statid': 'chk_upload_helper.active_uploads', + 'category': 'stats', + 'configheader': '\n'.join(['graph_title Tahoe Upload Helper Active Files', + 'graph_vlabel n files', + 'graph_category tahoe_helper', + 'graph_info This graph shows number of files actively being processed by the helper', + ]), + 'graph_config': '\n'.join(['%(name)s.label %(name)s', + '%(name)s.draw LINE1', + ]), + 'graph_render': '\n'.join(['%(name)s.value %(value)s', + ]), + }, + + 'tahoe_helper_upload_requests': + { 'statid': 'chk_upload_helper.upload_requests', + 'category': 'counters', + 'configheader': '\n'.join(['graph_title Tahoe Upload Helper Upload Requests', + 'graph_vlabel requests', + 'graph_category tahoe_helper', + 'graph_info This graph shows the number of upload requests arriving at the helper', + ]), + 'graph_config': '\n'.join(['%(name)s.label %(name)s', + '%(name)s.type DERIVE', + '%(name)s.min 0', + '%(name)s.draw LINE1', + ]), + 'graph_render': '\n'.join(['%(name)s.value %(value)s', + ]), + }, + 'tahoe_helper_upload_already_present': + { 'statid': 'chk_upload_helper.upload_already_present', + 'category': 'counters', + 'configheader': '\n'.join(['graph_title Tahoe Upload Helper Uploads Already Present', + 'graph_vlabel requests', + 'graph_category tahoe_helper', + 'graph_info This graph shows the number of uploads whose files are already present in the grid', + ]), + 'graph_config': '\n'.join(['%(name)s.label %(name)s', + '%(name)s.type DERIVE', + '%(name)s.min 0', + '%(name)s.draw LINE1', + ]), + 'graph_render': '\n'.join(['%(name)s.value %(value)s', + ]), + }, + 'tahoe_helper_upload_need_upload': + { 'statid': 'chk_upload_helper.upload_need_upload', + 'category': 'counters', + 'configheader': '\n'.join(['graph_title Tahoe Upload Helper Uploads Needing Upload', + 'graph_vlabel requests', + 'graph_category tahoe_helper', + 'graph_info This graph shows the number of uploads whose files are not already present in the grid', + ]), + 'graph_config': '\n'.join(['%(name)s.label %(name)s', + '%(name)s.type DERIVE', + '%(name)s.min 0', + '%(name)s.draw LINE1', + ]), + 'graph_render': '\n'.join(['%(name)s.value %(value)s', + ]), + }, + 'tahoe_helper_encoded_bytes': + { 'statid': 'chk_upload_helper.encoded_bytes', + 'category': 'counters', + 'configheader': '\n'.join(['graph_title Tahoe Upload Helper Encoded Bytes', + 'graph_vlabel bytes', + 'graph_category tahoe_helper', + 'graph_info This graph shows the number of bytes encoded by the helper', + ]), + 'graph_config': '\n'.join(['%(name)s.label %(name)s', + '%(name)s.type DERIVE', + '%(name)s.min 0', + '%(name)s.draw LINE1', + ]), + 'graph_render': '\n'.join(['%(name)s.value %(value)s', + ]), + }, + 'tahoe_helper_fetched_bytes': + { 'statid': 'chk_upload_helper.fetched_bytes', + 'category': 'counters', + 'configheader': '\n'.join(['graph_title Tahoe Upload Helper Fetched Bytes', + 'graph_vlabel bytes', + 'graph_category tahoe_helper', + 'graph_info This graph shows the number of bytes fetched by the helper', + ]), + 'graph_config': '\n'.join(['%(name)s.label %(name)s', + '%(name)s.type DERIVE', + '%(name)s.min 0', + '%(name)s.draw LINE1', + ]), + 'graph_render': '\n'.join(['%(name)s.value %(value)s', + ]), + }, + + 'tahoe_uploader_bytes_uploaded': + { 'statid': 'uploader.bytes_uploaded', + 'category': 'counters', + 'configheader': '\n'.join(['graph_title Tahoe Uploader Bytes Uploaded', + 'graph_vlabel bytes', + 'graph_category tahoe_traffic', + 'graph_info This graph shows the number of bytes uploaded', + ]), + 'graph_config': '\n'.join(['%(name)s.label %(name)s', + '%(name)s.type DERIVE', + '%(name)s.min 0', + '%(name)s.draw LINE1', + ]), + 'graph_render': '\n'.join(['%(name)s.value %(value)s', + ]), + }, + 'tahoe_uploader_files_uploaded': + { 'statid': 'uploader.files_uploaded', + 'category': 'counters', + 'configheader': '\n'.join(['graph_title Tahoe Uploader Bytes Uploaded', + 'graph_vlabel files', + 'graph_category tahoe_traffic', + 'graph_info This graph shows the number of files uploaded', + ]), + 'graph_config': '\n'.join(['%(name)s.label %(name)s', + '%(name)s.type DERIVE', + '%(name)s.min 0', + '%(name)s.draw LINE1', + ]), + 'graph_render': '\n'.join(['%(name)s.value %(value)s', + ]), + }, + 'tahoe_mutable_files_published': + { 'statid': 'mutable.files_published', + 'category': 'counters', + 'configheader': '\n'.join(['graph_title Tahoe Mutable Files Published', + 'graph_vlabel files', + 'graph_category tahoe_traffic', + 'graph_info This graph shows the number of mutable files published', + ]), + 'graph_config': '\n'.join(['%(name)s.label %(name)s', + '%(name)s.type DERIVE', + '%(name)s.min 0', + '%(name)s.draw LINE1', + ]), + 'graph_render': '\n'.join(['%(name)s.value %(value)s', + ]), + }, + 'tahoe_mutable_files_retrieved': + { 'statid': 'mutable.files_retrieved', + 'category': 'counters', + 'configheader': '\n'.join(['graph_title Tahoe Mutable Files Retrieved', + 'graph_vlabel files', + 'graph_category tahoe_traffic', + 'graph_info This graph shows the number of files retrieved', + ]), + 'graph_config': '\n'.join(['%(name)s.label %(name)s', + '%(name)s.type DERIVE', + '%(name)s.min 0', + '%(name)s.draw LINE1', + ]), + 'graph_render': '\n'.join(['%(name)s.value %(value)s', + ]), + }, + + } + +def smash_name(name): + return re.sub('[^a-zA-Z0-9]', '_', name) + +def open_stats(fname): + f = open(fname, 'rb') + stats = pickle.load(f) + f.close() + return stats + +def main(argv): + graph_name = os.path.basename(argv[0]) + if graph_name.endswith('.py'): + graph_name = graph_name[:-3] + + plugin_conf = PLUGINS.get(graph_name) + + for k,v in os.environ.items(): + if k.startswith('statsfile'): + stats_file = v + break + else: + raise RuntimeError("No 'statsfile' env var found") + + stats = open_stats(stats_file) + + now = time.time() + def output_nodes(output_section, check_time): + for tubid, nodestats in stats.items(): + if check_time and (now - nodestats.get('timestamp', 0)) > STAT_VALIDITY: + continue + name = smash_name("%s_%s" % (nodestats['nickname'], tubid[:4])) + #value = nodestats['stats'][plugin_conf['category']].get(plugin_conf['statid']) + category = plugin_conf['category'] + statid = plugin_conf['statid'] + value = nodestats['stats'][category].get(statid) + if value is not None: + args = { 'name': name, 'value': value } + print plugin_conf[output_section] % args + + if len(argv) > 1: + if sys.argv[1] == 'config': + print plugin_conf['configheader'] + output_nodes('graph_config', False) + sys.exit(0) + + output_nodes('graph_render', True) + +if __name__ == '__main__': + main(sys.argv) diff --git a/misc/munin/tahoe_storagespace b/misc/munin/tahoe_storagespace new file mode 100644 index 00000000..ad4375b9 --- /dev/null +++ b/misc/munin/tahoe_storagespace @@ -0,0 +1,58 @@ +#! /usr/bin/python + +# This is a munin plugin to track the amount of disk space each node's +# StorageServer is consuming on behalf of other nodes. This is where the +# shares are kept. If there are N nodes present in the mesh, the total space +# consumed by the entire mesh will be about N times the space reported by +# this plugin. + +# Copy this plugin into /etc/munun/plugins/tahoe-storagespace and then put +# the following in your /etc/munin/plugin-conf.d/foo file to let it know +# where to find the basedirectory for each node: +# +# [tahoe-storagespace] +# env.basedir_NODE1 /path/to/node1 +# env.basedir_NODE2 /path/to/node2 +# env.basedir_NODE3 /path/to/node3 +# +# Allmydata-tahoe must be installed on the system where this plugin is used, +# since it imports a utility module from allmydata.utils . + +import os, sys +import commands + +nodedirs = [] +for k,v in os.environ.items(): + if k.startswith("basedir_"): + nodename = k[len("basedir_"):] + nodedirs.append( (nodename, v) ) +nodedirs.sort() + +seriesname = "storage" + +configinfo = \ +"""graph_title Allmydata Tahoe Shareholder Space +graph_vlabel bytes +graph_category tahoe +graph_info This graph shows the space consumed by this node's StorageServer +""" + +for nodename, basedir in nodedirs: + configinfo += "%s.label %s\n" % (nodename, nodename) + configinfo += "%s.draw LINE2\n" % (nodename,) + + +if len(sys.argv) > 1: + if sys.argv[1] == "config": + print configinfo.rstrip() + sys.exit(0) + +for nodename, basedir in nodedirs: + cmd = "du --bytes --summarize %s" % os.path.join(basedir, "storage") + rc,out = commands.getstatusoutput(cmd) + if rc != 0: + sys.exit(rc) + bytes, extra = out.split() + usage = int(bytes) + print "%s.value %d" % (nodename, usage) +