size-mutable-files (TODO): same, for current version of all mutable files
size-literal-files: same, for LIT files
size-directories: size of directories (includes size-literal-files)
+ size-files-histogram: list of (minsize, maxsize, count) buckets,
+ with a histogram of filesizes, 5dB/bucket,
+ for both literal and immutable files
largest-directory: number of children in the largest directory
largest-immutable-file: number of bytes in the largest CHK file
-import os, time
+import os, time, math
from zope.interface import implements
from twisted.internet import defer
from allmydata.mutable.node import MutableFileNode
from allmydata.interfaces import IMutableFileNode, IDirectoryNode,\
IURI, IFileNode, IMutableFileURI, IVerifierURI, IFilesystemNode
-from allmydata.util import hashutil
+from allmydata.util import hashutil, mathutil
from allmydata.util.hashutil import netstring
from allmydata.util.limiter import ConcurrencyLimiter
from allmydata.uri import NewDirectoryURI
elif IFileNode.providedBy(child): # CHK and LIT
stats.add("count-files")
size = child.get_size()
+ stats.histogram("size-files-histogram", size)
if child.get_uri().startswith("URI:LIT:"):
stats.add("count-literal-files")
stats.add("size-literal-files", size)
#"largest-mutable-file",
]:
self.stats[k] = 0
+ self.histograms = {}
+ for k in ["size-files-histogram"]:
+ self.histograms[k] = {} # maps (min,max) to count
+ self.buckets = [ (0,0), (1,3)]
+ self.root = math.sqrt(10)
def add(self, key, value=1):
self.stats[key] += value
def max(self, key, value):
self.stats[key] = max(self.stats[key], value)
+ def which_bucket(self, size):
+ # return (min,max) such that min <= size <= max
+ # values are from the set (0,0), (1,3), (4,10), (11,31), (32,100),
+ # (101,316), (317, 1000), etc: two per decade
+ assert size >= 0
+ i = 0
+ while True:
+ if i >= len(self.buckets):
+ # extend the list
+ new_lower = self.buckets[i-1][1]+1
+ new_upper = int(mathutil.next_power_of_k(new_lower, self.root))
+ self.buckets.append( (new_lower, new_upper) )
+ maybe = self.buckets[i]
+ if maybe[0] <= size <= maybe[1]:
+ return maybe
+ i += 1
+
+ def histogram(self, key, size):
+ bucket = self.which_bucket(size)
+ h = self.histograms[key]
+ if bucket not in h:
+ h[bucket] = 0
+ h[bucket] += 1
+
def get_results(self):
- return self.stats
+ stats = self.stats.copy()
+ for key in self.histograms:
+ h = self.histograms[key]
+ out = [ (bucket[0], bucket[1], h[bucket]) for bucket in h ]
+ out.sort()
+ stats[key] = out
+ return stats
# use client.create_dirnode() to make one of these
d = self.client.create_empty_dirnode()
def _then(n):
+ # /
self.failUnless(n.is_mutable())
u = n.get_uri()
self.failUnless(u)
assert isinstance(ffu_v, str)
self.expected_manifest.append(ffu_v)
d.addCallback(lambda res: n.set_uri(u"child", fake_file_uri))
+ # /
+ # /child = mutable
d.addCallback(lambda res: n.create_empty_directory(u"subdir"))
+ # /
+ # /child = mutable
+ # /subdir = directory
def _created(subdir):
self.failUnless(isinstance(subdir, FakeDirectoryNode))
self.subdir = subdir
stats["size-directories"])
self.failUnless(stats["largest-directory"] > 500,
stats["largest-directory"])
+ self.failUnlessEqual(stats["size-files-histogram"], [])
d.addCallback(_check_deepstats)
def _add_subsubdir(res):
return d
+class DeepStats(unittest.TestCase):
+ def test_stats(self):
+ ds = dirnode.DeepStats()
+ ds.add("count-files")
+ ds.add("size-immutable-files", 123)
+ ds.histogram("size-files-histogram", 123)
+ ds.max("largest-directory", 444)
+
+ s = ds.get_results()
+ self.failUnlessEqual(s["count-files"], 1)
+ self.failUnlessEqual(s["size-immutable-files"], 123)
+ self.failUnlessEqual(s["largest-directory"], 444)
+ self.failUnlessEqual(s["count-literal-files"], 0)
+
+ ds.add("count-files")
+ ds.add("size-immutable-files", 321)
+ ds.histogram("size-files-histogram", 321)
+ ds.max("largest-directory", 2)
+
+ s = ds.get_results()
+ self.failUnlessEqual(s["count-files"], 2)
+ self.failUnlessEqual(s["size-immutable-files"], 444)
+ self.failUnlessEqual(s["largest-directory"], 444)
+ self.failUnlessEqual(s["count-literal-files"], 0)
+ self.failUnlessEqual(s["size-files-histogram"],
+ [ (101, 316, 1), (317, 1000, 1) ])
+
+ ds = dirnode.DeepStats()
+ for i in range(1, 1100):
+ ds.histogram("size-files-histogram", i)
+ ds.histogram("size-files-histogram", 4*1000*1000*1000*1000) # 4TB
+ s = ds.get_results()
+ self.failUnlessEqual(s["size-files-histogram"],
+ [ (1, 3, 3),
+ (4, 10, 7),
+ (11, 31, 21),
+ (32, 100, 69),
+ (101, 316, 216),
+ (317, 1000, 684),
+ (1001, 3162, 99),
+ (3162277660169L, 10000000000000L, 1),
+ ])
+
netstring = hashutil.netstring
split_netstring = dirnode.split_netstring
# P/s2-rw/mydata992 (same as P/s2-rw/mydata992)
d1.addCallback(lambda manifest:
self.failUnlessEqual(len(manifest), 4))
+ d1.addCallback(lambda res: home.deep_stats())
+ def _check_stats(stats):
+ expected = {"count-immutable-files": 1,
+ "count-mutable-files": 0,
+ "count-literal-files": 1,
+ "count-files": 2,
+ "count-directories": 3,
+ "size-immutable-files": 112,
+ "size-literal-files": 23,
+ #"size-directories": 616, # varies
+ #"largest-directory": 616,
+ "largest-directory-children": 3,
+ "largest-immutable-file": 112,
+ }
+ for k,v in expected.iteritems():
+ self.failUnlessEqual(stats[k], v,
+ "stats[%s] was %s, not %s" %
+ (k, stats[k], v))
+ self.failUnless(stats["size-directories"] > 1300,
+ stats["size-directories"])
+ self.failUnless(stats["largest-directory"] > 800,
+ stats["largest-directory"])
+ self.failUnlessEqual(stats["size-files-histogram"],
+ [ (11, 31, 1), (101, 316, 1) ])
+ d1.addCallback(_check_stats)
return d1
d.addCallback(_got_home)
return d
self.failUnlessEqual(stats[k], v,
"stats[%s] was %s, not %s" %
(k, stats[k], v))
+ self.failUnlessEqual(stats["size-files-histogram"],
+ [ [11, 31, 3] ])
d.addCallback(_got)
return d