From: Brian Warner Date: Thu, 24 Apr 2008 18:28:35 +0000 (-0700) Subject: munin/tahoe_estimate_files.py: tool to estimate the total number of slots (mutable... X-Git-Tag: allmydata-tahoe-1.1.0~182 X-Git-Url: https://git.rkrishnan.org/pf/content/en/statistics?a=commitdiff_plain;h=d2a2b8a4ae099d8e44d290b5c856d145f91825c9;p=tahoe-lafs%2Ftahoe-lafs.git munin/tahoe_estimate_files.py: tool to estimate the total number of slots (mutable and immutable combined) in the grid, from a small sample --- diff --git a/misc/munin/tahoe_estimate_files.py b/misc/munin/tahoe_estimate_files.py new file mode 100644 index 00000000..4723cdad --- /dev/null +++ b/misc/munin/tahoe_estimate_files.py @@ -0,0 +1,49 @@ +#! /usr/bin/python + +import sys, os.path + +if len(sys.argv) > 1 and sys.argv[1] == "config": + print """\ +graph_title Tahoe File Estimate +graph_vlabel files +graph_category tahoe +graph_info This graph shows the estimated number of files and directories present in the grid +files.label files +files.draw LINE2""" + sys.exit(0) + +# Edit this to point at some subset of storage directories. +node_dirs = [os.path.expanduser("~amduser/prodnet/storage1"), + os.path.expanduser("~amduser/prodnet/storage2"), + os.path.expanduser("~amduser/prodnet/storage3"), + os.path.expanduser("~amduser/prodnet/storage4"), + ] + +sections = ["aa", "ab", "ac", "ad", "ae", "af", "ag", "ah", "ai", "aj"] +# and edit this to reflect your default encoding's "total_shares" value, and +# the total number of servers. +N = 10 +num_servers = 20 + +index_strings = set() +for base in node_dirs: + for section in sections: + sampledir = os.path.join(base, "storage", "shares", section) + indices = os.listdir(sampledir) + index_strings.update(indices) +unique_strings = len(index_strings) + +# the chance that any given file appears on any given server +chance = 1.0 * N / num_servers + +# the chance that the file does *not* appear on the servers that we're +# examining +no_chance = (1-chance) ** len(node_dirs) + +# if a file has a 25% chance of not appearing in our sample, then we need to +# raise our estimate by (1.25/1) +correction = 1+no_chance +#print "correction", correction + +files = unique_strings * (32*32/len(sections)) * correction +print "files.value %d" % int(files)