From: Zooko O'Whielacronx Date: Mon, 7 Jun 2010 05:16:18 +0000 (-0700) Subject: setup: organize misc/ scripts and tools and remove obsolete ones X-Git-Url: https://git.rkrishnan.org/Site/Content/Exhibitors/class-simplejson.JSONDecoder-index.html?a=commitdiff_plain;h=1fc6be28f46b3237635441c2ab3e2fbc616485ff;p=tahoe-lafs%2Ftahoe-lafs.git setup: organize misc/ scripts and tools and remove obsolete ones This is for ticket #1068. --- diff --git a/Makefile b/Makefile index edfd8681..0bd474b7 100644 --- a/Makefile +++ b/Makefile @@ -119,28 +119,22 @@ test-coverage: build src/allmydata/_version.py $(PYTHON) setup.py trial --reporter=bwverbose-coverage -s $(TEST) quicktest: - $(PYTHON) misc/run-with-pythonpath.py trial $(TRIALARGS) $(TEST) + $(PYTHON) misc/build_helpers/run-with-pythonpath.py trial $(TRIALARGS) $(TEST) # code-coverage: install the "coverage" package from PyPI, do "make # quicktest-coverage" to do a unit test run with coverage-gathering enabled, # then use "make coverate-output-text" for a brief report, or "make # coverage-output" for a pretty HTML report. Also see "make .coverage.el" and -# misc/coverage.el for emacs integration. +# misc/coding_helpers/coverage.el for emacs integration. quicktest-coverage: rm -f .coverage - $(PYTHON) misc/run-with-pythonpath.py trial --reporter=bwverbose-coverage $(TEST) + $(PYTHON) misc/build_helpers/run-with-pythonpath.py trial --reporter=bwverbose-coverage $(TEST) # on my laptop, "quicktest" takes 239s, "quicktest-coverage" takes 304s -COVERAGE_OMIT = --omit /System,/Library,/usr/lib,src/allmydata/test,support - -# this is like 'coverage report', but includes lines-uncovered -coverage-output-text: - $(PYTHON) misc/coverage2text.py - coverage-output: rm -rf coverage-html - coverage html -d coverage-html $(COVERAGE_OMIT) + coverage html -d coverage-html cp .coverage coverage-html/coverage.data @echo "now point your browser at coverage-html/index.html" @@ -160,7 +154,7 @@ coverage-output: .PHONY: repl test-darcs-boringfile test-clean clean find-trailing-spaces .coverage.el: .coverage - $(PYTHON) misc/coverage2el.py + $(PYTHON) misc/coding_helpers/coverage2el.py # 'upload-coverage' is meant to be run with an UPLOAD_TARGET=host:/dir setting ifdef UPLOAD_TARGET @@ -244,7 +238,7 @@ repl: test-darcs-boringfile: $(MAKE) - $(PYTHON) misc/test-darcs-boringfile.py + $(PYTHON) misc/build_helpers/test-darcs-boringfile.py test-clean: find . |grep -vEe "_darcs|allfiles.tmp|src/allmydata/_(version|auto_deps|appname).py" |sort >allfiles.tmp.old @@ -265,7 +259,7 @@ clean: rm -f bin/tahoe bin/tahoe-script.py find-trailing-spaces: - $(PYTHON) misc/find-trailing-spaces.py -r src + $(PYTHON) misc/coding_tools/find-trailing-spaces.py -r src # The test-desert-island target grabs the tahoe-deps tarball, unpacks it, # does a build, then asserts that the build did not try to download anything @@ -280,7 +274,7 @@ fetch-and-unpack-deps: test-desert-island: $(MAKE) fetch-and-unpack-deps $(MAKE) 2>&1 | tee make.out - $(PYTHON) misc/check-build.py make.out no-downloads + $(PYTHON) misc/build_helpers/check-build.py make.out no-downloads # TARBALL GENERATION @@ -297,7 +291,7 @@ upload-tarballs: # DEBIAN PACKAGING -VER=$(shell $(PYTHON) misc/get-version.py) +VER=$(shell $(PYTHON) misc/build_helpers/get-version.py) DEBCOMMENTS="'make deb' build" show-version: @@ -309,7 +303,7 @@ show-pp: .PHONY: deb-etch deb-lenny deb-sid .PHONY: deb-edgy deb-feisty deb-gutsy deb-hardy deb-intrepid deb-jaunty -# we use misc/$TAHOE_ARCH/debian +# we use misc/debian_helpers/$TAHOE_ARCH/debian deb-etch: # py2.4 $(MAKE) deb-ARCH ARCH=etch TAHOE_ARCH=etch @@ -352,7 +346,7 @@ endif setup-deb: is-known-debian-arch rm -f debian - ln -s misc/$(TAHOE_ARCH)/debian debian + ln -s misc/debian_helpers/$(TAHOE_ARCH)/debian debian chmod +x debian/rules # etch (current debian stable) has python-simplejson-1.3, which doesn't @@ -419,7 +413,7 @@ deb-jaunty-head: # new experimental debian-packaging-building target .PHONY: EXPERIMENTAL-deb EXPERIMENTAL-deb: is-known-debian-arch - $(PYTHON) misc/build-deb.py $(ARCH) + $(PYTHON) misc/build_helpers/build-deb.py $(ARCH) # These targets provide for windows native builds diff --git a/docs/logging.txt b/docs/logging.txt index d249d1e5..eb3190ef 100644 --- a/docs/logging.txt +++ b/docs/logging.txt @@ -198,7 +198,7 @@ but a few notes are worth stating here: * assign each severe (log.WEIRD or higher) event a unique message identifier, as the umid= argument to the log.msg() call. The - misc/make_umid script may be useful for this purpose. This will make it + misc/coding_tools/make_umid script may be useful for this purpose. This will make it easier to write a classification function for these messages. * use the parent= argument whenever the event is causally/temporally diff --git a/docs/stats.txt b/docs/stats.txt index 437313ad..8cb8dfb4 100644 --- a/docs/stats.txt +++ b/docs/stats.txt @@ -254,7 +254,7 @@ Other tools can be built to examine these stats and render them into something useful. For example, a tool could sum the "storage_server.disk_avail' values from all servers to compute a total-disk-available number for the entire grid (however, the "disk watcher" -daemon, in misc/spacetime/, is better suited for this specific task). +daemon, in misc/operations_helpers/spacetime/, is better suited for this specific task). == Using Munin To Graph Stats Values == diff --git a/mac/Makefile b/mac/Makefile index c15da210..47855fc7 100644 --- a/mac/Makefile +++ b/mac/Makefile @@ -21,6 +21,7 @@ diskimage: ifdef UPLOAD_DEST_FURLFILE # N.B.: xfer-client.py requires foolscap, so we hack up PYTHONPATH to improve # the chances of finding it (using the one that we just built) +# broken. To fix this just use flappclient. --Zooko 2010-06-06 upload: chmod a+r Allmydata-$(VERSION).dmg PYTHONPATH=$(PYTHONPATH):../support/lib/python2.5/site-packages \ diff --git a/misc/awesome_weird_stuff/boodlegrid.tac b/misc/awesome_weird_stuff/boodlegrid.tac new file mode 100644 index 00000000..d92c03f6 --- /dev/null +++ b/misc/awesome_weird_stuff/boodlegrid.tac @@ -0,0 +1,161 @@ +# -*- python -*- + +"""Monitor a Tahoe grid, by playing sounds in response to remote events. + +To install: + 1: install Boodler, from http://www.eblong.com/zarf/boodler/ + 2: run "boodler.py -l listen.Sounds". This will run a daemon + that listens on a network socket (31863 by default) and + accepts commands in the form of "sound bird/crow1.aiff\n" + 3: copy this file into a new directory, which we'll call $BASEDIR + 4: write one or more logport FURLs into files named *.furl or *.furls, one + per line. All logports from all such files will be used. + 5: launch this daemon with 'cd $BASEDIR && twistd -y boodlegrid.tac' + +""" + +import os, time +from zope.interface import implements +from twisted.application import service +from twisted.internet import protocol, reactor, defer +from foolscap import Tub, Referenceable +from foolscap.logging.interfaces import RILogObserver +from twisted.python import log + +class Listener: + + def __init__(self): + self.boodler = None # filled in when we connect to boodler + self.last = {} + + def sound(self, name, slot=None, max=0.100): + if not self.boodler: + return + now = time.time() + if slot is None: + slot = name + if now < self.last.get(slot, 0) + max: + return # too soon + self.last[slot] = now + self.boodler.write("sound %s\n" % name) + + def msg(self, m, furl): + #print "got it", m + message = m.get("message", m.get("format", "")) + format = m.get("format", "") + facility = m.get("facility", "") + + # messages emitted by the Introducer: client join/leave + if message.startswith("introducer: subscription[storage] request"): + print "new client" + self.sound("voice/hooray.aiff") + if message.startswith("introducer: unsubscribing"): + print "unsubscribe" + self.sound("electro/zaptrill-fade.aiff") + + # messages from the helper + if message == "file already found in grid": + print "already found" + self.sound("mech/ziplash-high.aiff") + #if message == "upload done": + if format == "plaintext_hash=%(plaintext_hash)s, SI=%(SI)s, size=%(size)d": + size = m.get("size") + print "upload done, size", size + self.sound("mech/ziplash-low.aiff") + if "fetching " in message: + # helper grabbing ciphertext from client + self.sound("voice/phoneme/sh.aiff", max=0.5) + + # messages from storage servers + if message.startswith("storage: slot_readv"): + #self.sound("voice/phoneme/r.aiff") + self.sound("percussion/wood-tap-hollow.aiff") + + # messages from webapi + if message.startswith("Retrieve") and "starting" in message: + self.sound("mech/metal-clack.aiff") + if message.startswith("Publish") and "starting" in message: + self.sound("mech/door-slam.aiff") + #self.sound("mech/metal-clash.aiff") + if ("web: %(clientip)s" in format + and m.get("method") == "POST" + and ("t=set_children" in m.get("uri", "") # FIXME: may give false-positives + or "t=set-children" in m.get("uri", ""))): + self.sound("mech/clock-clang.aiff") + + # generic messages + #if m['level'] < 20: + # self.sound("mech/keyboard-1.aiff") + if "_check_for_done but we're not running" in message: + pass + elif format == "excessive reactor delay (%ss)": + self.sound("animal/frog-cheep.aiff") + print "excessive delay %s: %s" % (m['args'][0], furl) + elif format == "excessive reactor delay (%(delay)ss)": + self.sound("animal/frog-cheep.aiff") + print "excessive delay %s: %s" % (m['delay'], furl) + elif facility == "foolscap.negotiation": + if (message == "got offer for an existing connection" + or "master told us to use a new connection" in message): + print "foolscap: got offer for an existing connection", message, furl + else: + #print "foolscap:", message + pass + elif m['level'] > 30: # SCARY or BAD + #self.sound("mech/alarm-bell.aiff") + self.sound("environ/thunder-tense.aiff") + print m, furl + elif m['level'] == 30: # WEIRD + self.sound("mech/glass-breaking.aiff") + print m, furl + elif m['level'] > 20: # UNUSUAL or INFREQUENT or CURIOUS + self.sound("mech/telephone-ring-old.aiff") + print m, furl + +class BoodleSender(protocol.Protocol): + def connectionMade(self): + print "connected to boodler" + self.factory.listener.boodler = self.transport + +class Bridge(Referenceable): + implements(RILogObserver) + + def __init__(self, furl, listener): + self.furl = furl + self.listener = listener + + def remote_msg(self, m): + d = defer.maybeDeferred(self.listener.msg, m, self.furl) + d.addErrback(log.err) + # never send errors to the remote side + +class Monitor(service.MultiService): + def __init__(self): + service.MultiService.__init__(self) + self.tub = Tub() + self.tub.setServiceParent(self) + self.listener = Listener() + self.targets = [] + for fn in os.listdir("."): + if fn.endswith(".furl") or fn.endswith(".furls"): + for i,line in enumerate(open(fn, "r").readlines()): + target = line.strip() + if target: + self.tub.connectTo(target, self._got_logpublisher, + fn, i, target) + + cf = protocol.ClientFactory() + cf.listener = self.listener + cf.protocol = BoodleSender + reactor.connectTCP("localhost", 31863, cf) + + def _got_logpublisher(self, publisher, fn, i, target): + print "connected to %s:%d, %s" % (fn, i, target) + b = Bridge(target, self.listener) + publisher.callRemote("subscribe_to_all", b) + + +m = Monitor() +application = service.Application("boodlegrid") +m.setServiceParent(application) + diff --git a/misc/boodlegrid.tac b/misc/boodlegrid.tac deleted file mode 100644 index d92c03f6..00000000 --- a/misc/boodlegrid.tac +++ /dev/null @@ -1,161 +0,0 @@ -# -*- python -*- - -"""Monitor a Tahoe grid, by playing sounds in response to remote events. - -To install: - 1: install Boodler, from http://www.eblong.com/zarf/boodler/ - 2: run "boodler.py -l listen.Sounds". This will run a daemon - that listens on a network socket (31863 by default) and - accepts commands in the form of "sound bird/crow1.aiff\n" - 3: copy this file into a new directory, which we'll call $BASEDIR - 4: write one or more logport FURLs into files named *.furl or *.furls, one - per line. All logports from all such files will be used. - 5: launch this daemon with 'cd $BASEDIR && twistd -y boodlegrid.tac' - -""" - -import os, time -from zope.interface import implements -from twisted.application import service -from twisted.internet import protocol, reactor, defer -from foolscap import Tub, Referenceable -from foolscap.logging.interfaces import RILogObserver -from twisted.python import log - -class Listener: - - def __init__(self): - self.boodler = None # filled in when we connect to boodler - self.last = {} - - def sound(self, name, slot=None, max=0.100): - if not self.boodler: - return - now = time.time() - if slot is None: - slot = name - if now < self.last.get(slot, 0) + max: - return # too soon - self.last[slot] = now - self.boodler.write("sound %s\n" % name) - - def msg(self, m, furl): - #print "got it", m - message = m.get("message", m.get("format", "")) - format = m.get("format", "") - facility = m.get("facility", "") - - # messages emitted by the Introducer: client join/leave - if message.startswith("introducer: subscription[storage] request"): - print "new client" - self.sound("voice/hooray.aiff") - if message.startswith("introducer: unsubscribing"): - print "unsubscribe" - self.sound("electro/zaptrill-fade.aiff") - - # messages from the helper - if message == "file already found in grid": - print "already found" - self.sound("mech/ziplash-high.aiff") - #if message == "upload done": - if format == "plaintext_hash=%(plaintext_hash)s, SI=%(SI)s, size=%(size)d": - size = m.get("size") - print "upload done, size", size - self.sound("mech/ziplash-low.aiff") - if "fetching " in message: - # helper grabbing ciphertext from client - self.sound("voice/phoneme/sh.aiff", max=0.5) - - # messages from storage servers - if message.startswith("storage: slot_readv"): - #self.sound("voice/phoneme/r.aiff") - self.sound("percussion/wood-tap-hollow.aiff") - - # messages from webapi - if message.startswith("Retrieve") and "starting" in message: - self.sound("mech/metal-clack.aiff") - if message.startswith("Publish") and "starting" in message: - self.sound("mech/door-slam.aiff") - #self.sound("mech/metal-clash.aiff") - if ("web: %(clientip)s" in format - and m.get("method") == "POST" - and ("t=set_children" in m.get("uri", "") # FIXME: may give false-positives - or "t=set-children" in m.get("uri", ""))): - self.sound("mech/clock-clang.aiff") - - # generic messages - #if m['level'] < 20: - # self.sound("mech/keyboard-1.aiff") - if "_check_for_done but we're not running" in message: - pass - elif format == "excessive reactor delay (%ss)": - self.sound("animal/frog-cheep.aiff") - print "excessive delay %s: %s" % (m['args'][0], furl) - elif format == "excessive reactor delay (%(delay)ss)": - self.sound("animal/frog-cheep.aiff") - print "excessive delay %s: %s" % (m['delay'], furl) - elif facility == "foolscap.negotiation": - if (message == "got offer for an existing connection" - or "master told us to use a new connection" in message): - print "foolscap: got offer for an existing connection", message, furl - else: - #print "foolscap:", message - pass - elif m['level'] > 30: # SCARY or BAD - #self.sound("mech/alarm-bell.aiff") - self.sound("environ/thunder-tense.aiff") - print m, furl - elif m['level'] == 30: # WEIRD - self.sound("mech/glass-breaking.aiff") - print m, furl - elif m['level'] > 20: # UNUSUAL or INFREQUENT or CURIOUS - self.sound("mech/telephone-ring-old.aiff") - print m, furl - -class BoodleSender(protocol.Protocol): - def connectionMade(self): - print "connected to boodler" - self.factory.listener.boodler = self.transport - -class Bridge(Referenceable): - implements(RILogObserver) - - def __init__(self, furl, listener): - self.furl = furl - self.listener = listener - - def remote_msg(self, m): - d = defer.maybeDeferred(self.listener.msg, m, self.furl) - d.addErrback(log.err) - # never send errors to the remote side - -class Monitor(service.MultiService): - def __init__(self): - service.MultiService.__init__(self) - self.tub = Tub() - self.tub.setServiceParent(self) - self.listener = Listener() - self.targets = [] - for fn in os.listdir("."): - if fn.endswith(".furl") or fn.endswith(".furls"): - for i,line in enumerate(open(fn, "r").readlines()): - target = line.strip() - if target: - self.tub.connectTo(target, self._got_logpublisher, - fn, i, target) - - cf = protocol.ClientFactory() - cf.listener = self.listener - cf.protocol = BoodleSender - reactor.connectTCP("localhost", 31863, cf) - - def _got_logpublisher(self, publisher, fn, i, target): - print "connected to %s:%d, %s" % (fn, i, target) - b = Bridge(target, self.listener) - publisher.callRemote("subscribe_to_all", b) - - -m = Monitor() -application = service.Application("boodlegrid") -m.setServiceParent(application) - diff --git a/misc/build-deb.py b/misc/build-deb.py deleted file mode 100644 index 4d133b0d..00000000 --- a/misc/build-deb.py +++ /dev/null @@ -1,110 +0,0 @@ -#!/bin/false # invoke this with a specific python - -import sys, shutil, os.path -from subprocess import Popen, PIPE - -PYTHON = sys.executable -ARCH = sys.argv[1] - -class SubprocessError(Exception): - pass - -def get_output(*cmd, **kwargs): - tolerate_stderr = kwargs.get("tolerate_stderr", False) - print " " + " ".join(cmd) - p = Popen(cmd, stdout=PIPE) - (out,err) = p.communicate() - rc = p.returncode - if rc != 0: - print >>sys.stderr, err - raise SubprocessError("command %s exited with rc=%s", (cmd, rc)) - if err and not tolerate_stderr: - print >>sys.stderr, "stderr:", err - raise SubprocessError("command emitted unexpected stderr") - print " =>", out, - return out - -def run(*cmd, **kwargs): - print " " + " ".join(cmd) -# if "stdin" in kwargs: -# stdin = kwargs.pop("stdin") -# p = Popen(cmd, stdin=PIPE, **kwargs) -# p.stdin.write(stdin) -# p.stdin.close() -# else: -# p = Popen(cmd, **kwargs) - p = Popen(cmd, **kwargs) - rc = p.wait() - if rc != 0: - raise SubprocessError("command %s exited with rc=%s", (cmd, rc)) - -# the very first time you run setup.py, it will download+build darcsver and -# whatnot, emitting noise to stdout. Run it once (and throw away that junk) -# to avoid treating that noise as the package name. -run(PYTHON, "setup.py", "darcsver") - -NAME = get_output(PYTHON, "setup.py", "--name").strip() -VERSION = get_output(PYTHON, "setup.py", "--version").strip() - -TARBALL = "%s-%s.tar.gz" % (NAME, VERSION) -DEBIAN_TARBALL = "%s_%s.orig.tar.gz" % (NAME, VERSION) -BUILDDIR = "build/debian/%s-%s" % (NAME, VERSION) - -run(PYTHON, "setup.py", "sdist", "--formats=gztar") -if os.path.exists("build/debian"): - shutil.rmtree("build/debian") -os.makedirs("build/debian") -shutil.copyfile("dist/%s" % TARBALL, "build/debian/%s" % DEBIAN_TARBALL) -run("tar", "xf", DEBIAN_TARBALL, cwd="build/debian") - -# now modify the tree for debian packaging. This is an algorithmic way of -# applying the debian .diff, which factors out some of the similarities -# between various debian/ubuntu releases. Everything we do after this point -# will show up in the generated .diff, and thus form the debian-specific part -# of the source package. -DEBDIR = os.path.join(BUILDDIR, "debian") -os.makedirs(DEBDIR) - -# The 'aliases' section in setup.cfg causes problems, so get rid of it. We -# could get rid of the whole file, but 1: find_links is still sort of useful, -# and 2: dpkg-buildpackage prefers to ignore file removal (as opposed to -# file-modification) - -#os.unlink(os.path.join(BUILDDIR, "setup.cfg")) -SETUPCFG = os.path.join(BUILDDIR, "setup.cfg") -lines = open(SETUPCFG, "r").readlines() -f = open(SETUPCFG, "w") -for l in lines: - if l.startswith("[aliases]"): - break - f.write(l) -f.close() - -for n in ["compat", "control", "copyright", "pycompat", "rules"]: - fn = "misc/debian/%s.%s" % (n, ARCH) - if not os.path.exists(fn): - fn = "misc/debian/%s" % n - assert os.path.exists(fn) - - shutil.copyfile(fn, os.path.join(DEBDIR, n)) - if n == "rules": - os.chmod(os.path.join(DEBDIR, n), 0755) # +x - -# We put "local package" on the first line of the changelog entry to suppress -# the lintian NMU warnings (since debchange's new entry's "author" will -# probably be different than the what the debian/control Maintainer: field -# says) - -DISTRIBUTION_MAP = {"sid": "unstable"} - -run("debchange", "--create", - "--package", NAME, - "--newversion", VERSION+"-1", - "--distribution", DISTRIBUTION_MAP.get(ARCH, ARCH), - "local package: 'make deb' build", cwd=BUILDDIR) - -# the package is ready to build. 'debuild' will produce the source package -# (.dsc+.diff.gz), then build the .deb and produce a .changes file ready for -# upload to an APT archive. The build log will go into a .build file. - -run("debuild", "-uc", "-us", cwd=BUILDDIR) diff --git a/misc/build_helpers/build-deb.py b/misc/build_helpers/build-deb.py new file mode 100644 index 00000000..4d133b0d --- /dev/null +++ b/misc/build_helpers/build-deb.py @@ -0,0 +1,110 @@ +#!/bin/false # invoke this with a specific python + +import sys, shutil, os.path +from subprocess import Popen, PIPE + +PYTHON = sys.executable +ARCH = sys.argv[1] + +class SubprocessError(Exception): + pass + +def get_output(*cmd, **kwargs): + tolerate_stderr = kwargs.get("tolerate_stderr", False) + print " " + " ".join(cmd) + p = Popen(cmd, stdout=PIPE) + (out,err) = p.communicate() + rc = p.returncode + if rc != 0: + print >>sys.stderr, err + raise SubprocessError("command %s exited with rc=%s", (cmd, rc)) + if err and not tolerate_stderr: + print >>sys.stderr, "stderr:", err + raise SubprocessError("command emitted unexpected stderr") + print " =>", out, + return out + +def run(*cmd, **kwargs): + print " " + " ".join(cmd) +# if "stdin" in kwargs: +# stdin = kwargs.pop("stdin") +# p = Popen(cmd, stdin=PIPE, **kwargs) +# p.stdin.write(stdin) +# p.stdin.close() +# else: +# p = Popen(cmd, **kwargs) + p = Popen(cmd, **kwargs) + rc = p.wait() + if rc != 0: + raise SubprocessError("command %s exited with rc=%s", (cmd, rc)) + +# the very first time you run setup.py, it will download+build darcsver and +# whatnot, emitting noise to stdout. Run it once (and throw away that junk) +# to avoid treating that noise as the package name. +run(PYTHON, "setup.py", "darcsver") + +NAME = get_output(PYTHON, "setup.py", "--name").strip() +VERSION = get_output(PYTHON, "setup.py", "--version").strip() + +TARBALL = "%s-%s.tar.gz" % (NAME, VERSION) +DEBIAN_TARBALL = "%s_%s.orig.tar.gz" % (NAME, VERSION) +BUILDDIR = "build/debian/%s-%s" % (NAME, VERSION) + +run(PYTHON, "setup.py", "sdist", "--formats=gztar") +if os.path.exists("build/debian"): + shutil.rmtree("build/debian") +os.makedirs("build/debian") +shutil.copyfile("dist/%s" % TARBALL, "build/debian/%s" % DEBIAN_TARBALL) +run("tar", "xf", DEBIAN_TARBALL, cwd="build/debian") + +# now modify the tree for debian packaging. This is an algorithmic way of +# applying the debian .diff, which factors out some of the similarities +# between various debian/ubuntu releases. Everything we do after this point +# will show up in the generated .diff, and thus form the debian-specific part +# of the source package. +DEBDIR = os.path.join(BUILDDIR, "debian") +os.makedirs(DEBDIR) + +# The 'aliases' section in setup.cfg causes problems, so get rid of it. We +# could get rid of the whole file, but 1: find_links is still sort of useful, +# and 2: dpkg-buildpackage prefers to ignore file removal (as opposed to +# file-modification) + +#os.unlink(os.path.join(BUILDDIR, "setup.cfg")) +SETUPCFG = os.path.join(BUILDDIR, "setup.cfg") +lines = open(SETUPCFG, "r").readlines() +f = open(SETUPCFG, "w") +for l in lines: + if l.startswith("[aliases]"): + break + f.write(l) +f.close() + +for n in ["compat", "control", "copyright", "pycompat", "rules"]: + fn = "misc/debian/%s.%s" % (n, ARCH) + if not os.path.exists(fn): + fn = "misc/debian/%s" % n + assert os.path.exists(fn) + + shutil.copyfile(fn, os.path.join(DEBDIR, n)) + if n == "rules": + os.chmod(os.path.join(DEBDIR, n), 0755) # +x + +# We put "local package" on the first line of the changelog entry to suppress +# the lintian NMU warnings (since debchange's new entry's "author" will +# probably be different than the what the debian/control Maintainer: field +# says) + +DISTRIBUTION_MAP = {"sid": "unstable"} + +run("debchange", "--create", + "--package", NAME, + "--newversion", VERSION+"-1", + "--distribution", DISTRIBUTION_MAP.get(ARCH, ARCH), + "local package: 'make deb' build", cwd=BUILDDIR) + +# the package is ready to build. 'debuild' will produce the source package +# (.dsc+.diff.gz), then build the .deb and produce a .changes file ready for +# upload to an APT archive. The build log will go into a .build file. + +run("debuild", "-uc", "-us", cwd=BUILDDIR) diff --git a/misc/build_helpers/check-build.py b/misc/build_helpers/check-build.py new file mode 100644 index 00000000..e293c6a1 --- /dev/null +++ b/misc/build_helpers/check-build.py @@ -0,0 +1,40 @@ +#! /usr/bin/env python + +# This helper script is used with the 'test-desert-island' Makefile target. + +import sys + +good = True +build_out = sys.argv[1] +mode = sys.argv[2] + +print + +for line in open(build_out, "r"): + if mode == "no-downloads": + # when setup_requires= uses + # misc/dependencies/setuptools-0.6c8.egg, it causes a + # "Downloading: misc/dependencies/.." line to be emitted, + # which doesn't count as a network download. Lines that start + # with "Reading" indicate that it is fetching web pages in + # order to check for newer versions of packages. As long as it + # doesn't actually download any packages then it still passes + # this test. That is: it *would* have succeeded if you were on + # a Desert Island, an airplane with no network, behind a + # corporate firewall that disallows such connections, or if + # you had turned off your network prior to running "python + # setup.py build". A stronger requirement would be that it + # doesn't even try to check for new packages on remote hosts + # if it has all the packages that it needs locally, but we + # currently don't enforce that stronger requirement. + if line.startswith("Downloading http:"): + print line, + good = False +if good: + if mode == "no-downloads": + print "Good: build did not try to download any files" + sys.exit(0) +else: + if mode == "no-downloads": + print "Failed: build tried to download files" + sys.exit(1) diff --git a/misc/build_helpers/get-version.py b/misc/build_helpers/get-version.py new file mode 100644 index 00000000..a3ef5d11 --- /dev/null +++ b/misc/build_helpers/get-version.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python + +"""Determine the version number of the current tree. + +This should be run after 'setup.py darcsver'. It will emit a single line of text +to stdout, either of the form '0.2.0' if this is a release tree (i.e. no patches +have been added since the last release tag), or '0.2.0-34' (if 34 patches have +been added since the last release tag). If the tree does not have a well-formed +version number, this will emit 'unknown'. + +The version string thus calculated should exactly match the version string +determined by setup.py (when it creates eggs and source tarballs) and also +the version available in the code image when you do: + + from allmydata import __version__ + +""" + +import os.path, re + +def get_version(): + VERSIONFILE = "src/allmydata/_version.py" + verstr = "unknown" + if os.path.exists(VERSIONFILE): + VSRE = re.compile("^verstr = ['\"]([^'\"]*)['\"]", re.M) + verstrline = open(VERSIONFILE, "rt").read() + mo = VSRE.search(verstrline) + if mo: + verstr = mo.group(1) + else: + raise ValueError("if version.py exists, it must be well-formed") + + return verstr + +if __name__ == '__main__': + verstr = get_version() + print verstr + diff --git a/misc/build_helpers/pyver.py b/misc/build_helpers/pyver.py new file mode 100644 index 00000000..d53db165 --- /dev/null +++ b/misc/build_helpers/pyver.py @@ -0,0 +1,4 @@ +#!/usr/bin/env python + +import sys +print "python%d.%d" % (sys.version_info[:2]) diff --git a/misc/build_helpers/run-with-pythonpath.py b/misc/build_helpers/run-with-pythonpath.py new file mode 100644 index 00000000..9aee7fa6 --- /dev/null +++ b/misc/build_helpers/run-with-pythonpath.py @@ -0,0 +1,46 @@ +# -*- python -*- +# you must invoke this with an explicit python, from the tree root + +"""Run an arbitrary command with a PYTHONPATH that will include the Tahoe +code, including dependent libraries. Run this like: + + python misc/run-with-pythonpath.py python foo.py +or + python misc/run-with-pythonpath.py trial -r poll allmydata.test.test_util + +""" + +import os, sys, subprocess + +# figure out where support/lib/pythonX.X/site-packages is +# add it to os.environ["PYTHONPATH"] +# spawn the child process + + +def pylibdir(prefixdir): + pyver = "python%d.%d" % (sys.version_info[:2]) + if sys.platform == "win32": + return os.path.join(prefixdir, "Lib", "site-packages") + else: + return os.path.join(prefixdir, "lib", pyver, "site-packages") + +basedir = os.path.dirname(os.path.abspath(__file__)) +supportlib = pylibdir(os.path.abspath("support")) + +oldpp = os.environ.get("PYTHONPATH", "").split(os.pathsep) +if oldpp == [""]: + # grr silly split() behavior + oldpp = [] +newpp = os.pathsep.join(oldpp + [supportlib,]) +os.environ['PYTHONPATH'] = newpp + +from twisted.python.procutils import which +cmd = sys.argv[1] +if cmd and cmd[0] not in "/~.": + cmds = which(cmd) + if not cmds: + print >>sys.stderr, "'%s' not found on PATH" % (cmd,) + sys.exit(-1) + cmd = cmds[0] + +os.execve(cmd, sys.argv[1:], os.environ) diff --git a/misc/build_helpers/run_trial.py b/misc/build_helpers/run_trial.py new file mode 100644 index 00000000..4d06a5d0 --- /dev/null +++ b/misc/build_helpers/run_trial.py @@ -0,0 +1 @@ +from twisted.scripts.trial import run; run() \ No newline at end of file diff --git a/misc/build_helpers/show-tool-versions.py b/misc/build_helpers/show-tool-versions.py new file mode 100644 index 00000000..bec7e698 --- /dev/null +++ b/misc/build_helpers/show-tool-versions.py @@ -0,0 +1,96 @@ +#! /usr/bin/env python + +import locale, os, subprocess, sys + +def print_platform(): + try: + import platform + out = platform.platform() + print + print "platform:", out.replace("\n", " ") + except EnvironmentError, le: + sys.stderr.write("Got exception using 'platform': %s\n" % (le,)) + pass + +def print_python_ver(): + print "python:", sys.version.replace("\n", " "), + print ', maxunicode: ' + str(sys.maxunicode), + print ', stdout.encoding: ' + str(sys.stdout.encoding), + print ', stdin.encoding: ' + str(sys.stdin.encoding), + print ', filesystem.encoding: ' + str(sys.getfilesystemencoding()), + print ', locale.getpreferredencoding: ' + str(locale.getpreferredencoding()), + print ', os.path.supports_unicode_filenames: ' + str(os.path.supports_unicode_filenames), + print ', locale.defaultlocale: ' + str(locale.getdefaultlocale()), + print ', locale.locale: ' + str(locale.getlocale()) + +def print_cmd_ver(cmdlist, label=None): + try: + res = subprocess.Popen(cmdlist, stdin=open(os.devnull), + stdout=subprocess.PIPE).communicate()[0] + if label is None: + label = cmdlist[0] + print + print label + ': ' + res.replace("\n", " ") + except EnvironmentError, le: + sys.stderr.write("Got exception invoking '%s': %s\n" % (cmdlist[0], le,)) + pass + +def print_as_ver(): + if os.path.exists('a.out'): + print + print "WARNING: a file named a.out exists, and getting the version of the 'as' assembler writes to that filename, so I'm not attempting to get the version of 'as'." + return + try: + res = subprocess.Popen(['as', '-version'], stdin=open(os.devnull), + stderr=subprocess.PIPE).communicate()[1] + print + print 'as: ' + res.replace("\n", " ") + os.remove('a.out') + except EnvironmentError, le: + sys.stderr.write("Got exception invoking '%s': %s\n" % ('as', le,)) + pass + +def print_setuptools_ver(): + try: + import pkg_resources + out = str(pkg_resources.require("setuptools")) + print + print "setuptools:", out.replace("\n", " ") + except (ImportError, EnvironmentError), le: + sys.stderr.write("Got exception using 'pkg_resources' to get the version of setuptools: %s\n" % (le,)) + pass + +def print_py_pkg_ver(pkgname): + try: + import pkg_resources + out = str(pkg_resources.require(pkgname)) + print + print pkgname + ': ' + out.replace("\n", " ") + except (ImportError, EnvironmentError), le: + sys.stderr.write("Got exception using 'pkg_resources' to get the version of %s: %s\n" % (pkgname, le,)) + pass + except pkg_resources.DistributionNotFound, le: + sys.stderr.write("pkg_resources reported no %s package installed: %s\n" % (pkgname, le,)) + pass + +print_platform() + +print_python_ver() + +print_cmd_ver(['buildbot', '--version']) +print_cmd_ver(['cl']) +print_cmd_ver(['gcc', '--version']) +print_cmd_ver(['g++', '--version']) +print_cmd_ver(['cryptest', 'V']) +print_cmd_ver(['darcs', '--version']) +print_cmd_ver(['darcs', '--exact-version'], label='darcs-exact-version') +print_cmd_ver(['7za']) + +print_as_ver() + +print_setuptools_ver() + +print_py_pkg_ver('coverage') +print_py_pkg_ver('trialcoverage') +print_py_pkg_ver('setuptools_trial') +print_py_pkg_ver('pyflakes') diff --git a/misc/build_helpers/sub-ver.py b/misc/build_helpers/sub-ver.py new file mode 100644 index 00000000..6a1392cb --- /dev/null +++ b/misc/build_helpers/sub-ver.py @@ -0,0 +1,26 @@ +#!/usr/bin/env python + +from allmydata import __version__ as v + +import sys + +if len(sys.argv) == 1: + input = sys.stdin +elif len(sys.argv) == 2: + fname = sys.argv[1] + input = file(fname, 'rb') +else: + raise ValueError('must provide 0 or 1 argument (stdin, or filename)') + +vern = { + 'major': v.major or 0, + 'minor': v.minor or 0, + 'point': v.micro or 0, + 'micro': v.micro or 0, + 'revision' : v.revision or 0, + 'build': str(v), + } + +for line in input.readlines(): + print line % vern, + diff --git a/misc/build_helpers/test-darcs-boringfile.py b/misc/build_helpers/test-darcs-boringfile.py new file mode 100644 index 00000000..619a4e44 --- /dev/null +++ b/misc/build_helpers/test-darcs-boringfile.py @@ -0,0 +1,14 @@ +#!/usr/bin/env python + +import sys +from subprocess import Popen, PIPE + +cmd = ["darcs", "whatsnew", "-l"] +p = Popen(cmd, stdout=PIPE) +output = p.communicate()[0] +print output +if output == "No changes!\n": + sys.exit(0) +sys.exit(1) + + diff --git a/misc/build_helpers/test_mac_diskimage.py b/misc/build_helpers/test_mac_diskimage.py new file mode 100644 index 00000000..7795dfd1 --- /dev/null +++ b/misc/build_helpers/test_mac_diskimage.py @@ -0,0 +1,78 @@ +# This script uses hdiutil to attach a dmg (whose name is derived from the +# appname and the version number passed in), asserts that it attached as +# expected, cd's into the mounted filesystem, executes "$appname +# --version-and-path", and checks whether the output of --version-and-path is +# right. + +# If all of the paths listed therein are loaded from within the current PWD +# then it exits with code 0. + +# If anything goes wrong then it exits with non-zero (failure). This is to +# check that the Mac OS "DMG" (disk image) package that gets built is correctly +# loading all of its packages from inside the image. + +# Here is an example output from --version-and-path: + +# allmydata-tahoe: 1.4.1-r3916 (/home/zooko/playground/allmydata/tahoe/trunk/trunk/src), foolscap: 0.4.1 (/usr/local/lib/python2.6/dist-packages/foolscap-0.4.1-py2.6.egg), pycryptopp: 0.5.10 (/home/zooko/playground/allmydata/tahoe/trunk/trunk/support/lib/python2.6/site-packages/pycryptopp-0.5.10-py2.6-linux-x86_64.egg), zfec: 1.4.2 (/usr/local/lib/python2.6/dist-packages/zfec-1.4.2-py2.6-linux-x86_64.egg), Twisted: 8.2.0-r26987 (/usr/local/lib/python2.6/dist-packages/Twisted-8.2.0_r26987-py2.6-linux-x86_64.egg), Nevow: 0.9.32 (/home/zooko/playground/allmydata/tahoe/trunk/trunk/support/lib/python2.6/site-packages/Nevow-0.9.32-py2.6.egg), zope.interface: 3.4.0 (/usr/lib/python2.6/dist-packages), python: 2.6.2 (/usr/bin/python), platform: Linux-Ubuntu_9.04-x86_64-64bit_ELF (None), sqlite: 3.6.10 (unknown), simplejson: 2.0.1 (/usr/local/lib/python2.6/dist-packages/simplejson-2.0.1-py2.6-linux-x86_64.egg), argparse: 0.8.0 (/usr/local/lib/python2.6/dist-packages/argparse-0.8.0-py2.6.egg), pyOpenSSL: 0.7 (/home/zooko/playground/allmydata/tahoe/trunk/trunk/support/lib/python2.6/site-packages/pyOpenSSL-0.7-py2.6-linux-x86_64.egg), pyutil: 1.3.30 (/usr/local/lib/python2.6/dist-packages/pyutil-1.3.30-py2.6.egg), zbase32: 1.1.1 (/usr/local/lib/python2.6/dist-packages/zbase32-1.1.1-py2.6.egg), setuptools: 0.6c12dev (/home/zooko/playground/allmydata/tahoe/trunk/trunk/support/lib/python2.6/site-packages/setuptools-0.6c12dev.egg), pysqlite: 2.4.1 (/usr/lib/python2.6/sqlite3) + +import fcntl, os, re, subprocess, time + +def test_mac_diskimage(appname, version): + """ Return True on success, raise exception on failure. """ + assert isinstance(appname, basestring), appname + assert isinstance(version, basestring), version + DMGNAME='mac/'+appname+'-'+version+'.dmg' + + cmd = ['hdiutil', 'attach', DMGNAME] + attachit = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + rc = attachit.wait() + if rc != 0: + raise Exception("FAIL: hdiutil returned non-zero exit code: %r from command: %r" % (rc, cmd,)) + + stderrtxt = attachit.stderr.read() + if stderrtxt: + raise Exception("FAIL: hdiutil said something on stderr: %r" % (stderrtxt,)) + stdouttxt = attachit.stdout.read() + mo = re.search("^(/[^ ]+)\s+Apple_HFS\s+(/Volumes/.*)$", stdouttxt, re.UNICODE|re.MULTILINE) + if not mo: + raise Exception("FAIL: hdiutil said something on stdout that didn't match our expectations: %r" % (stdouttxt,)) + DEV=mo.group(1) + MOUNTPOINT=mo.group(2) + + callitpid = None + try: + basedir = MOUNTPOINT + '/' + appname + '.app/Contents/Resources' + + os.chdir(basedir) + + cmd = ['../MacOS/' + appname, '--version-and-path'] + callit = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + callitpid = callit.pid + assert callitpid + deadline = time.time() + 2 # If it takes longer than 2 seconds to do this then it fails. + while True: + rc = callit.poll() + if rc is not None: + break + if time.time() > deadline: + os.kill(callitpid, 15) + raise Exception("FAIL: it took longer than 2 seconds to invoke $appname --version-and-path. stdout: %s, stderr: %s" % (callit.stdout.read(), callit.stderr.read())) + time.sleep(0.05) + + if rc != 0: + raise Exception("FAIL: $appname --version-and-path returned non-zero exit code: %r" % (rc,)) + + stdouttxt = callit.stdout.read() + + PKG_VER_PATH_RE=re.compile("(\S+): (\S+) \((.+?)\), ", re.UNICODE) + + for mo in PKG_VER_PATH_RE.finditer(stdouttxt): + if not mo.group(3).startswith(basedir): + raise Exception("FAIL: found package not loaded from basedir (%s); package was: %s" % (basedir, mo.groups(),)) + + return True # success! + finally: + if callitpid: + os.kill(callitpid, 9) + os.waitpid(callitpid, 0) + subprocess.call(['hdiutil', 'detach', '-Force', DEV]) diff --git a/misc/check-build.py b/misc/check-build.py deleted file mode 100644 index e293c6a1..00000000 --- a/misc/check-build.py +++ /dev/null @@ -1,40 +0,0 @@ -#! /usr/bin/env python - -# This helper script is used with the 'test-desert-island' Makefile target. - -import sys - -good = True -build_out = sys.argv[1] -mode = sys.argv[2] - -print - -for line in open(build_out, "r"): - if mode == "no-downloads": - # when setup_requires= uses - # misc/dependencies/setuptools-0.6c8.egg, it causes a - # "Downloading: misc/dependencies/.." line to be emitted, - # which doesn't count as a network download. Lines that start - # with "Reading" indicate that it is fetching web pages in - # order to check for newer versions of packages. As long as it - # doesn't actually download any packages then it still passes - # this test. That is: it *would* have succeeded if you were on - # a Desert Island, an airplane with no network, behind a - # corporate firewall that disallows such connections, or if - # you had turned off your network prior to running "python - # setup.py build". A stronger requirement would be that it - # doesn't even try to check for new packages on remote hosts - # if it has all the packages that it needs locally, but we - # currently don't enforce that stronger requirement. - if line.startswith("Downloading http:"): - print line, - good = False -if good: - if mode == "no-downloads": - print "Good: build did not try to download any files" - sys.exit(0) -else: - if mode == "no-downloads": - print "Failed: build tried to download files" - sys.exit(1) diff --git a/misc/coding_tools/coverage.el b/misc/coding_tools/coverage.el new file mode 100644 index 00000000..bad490fd --- /dev/null +++ b/misc/coding_tools/coverage.el @@ -0,0 +1,120 @@ + +(defvar coverage-annotation-file ".coverage.el") +(defvar coverage-annotations nil) + +(defun find-coverage-annotation-file () + (let ((dir (file-name-directory buffer-file-name)) + (olddir "/")) + (while (and (not (equal dir olddir)) + (not (file-regular-p (concat dir coverage-annotation-file)))) + (setq olddir dir + dir (file-name-directory (directory-file-name dir)))) + (and (not (equal dir olddir)) (concat dir coverage-annotation-file)) +)) + +(defun load-coverage-annotations () + (let* ((annotation-file (find-coverage-annotation-file)) + (coverage + (with-temp-buffer + (insert-file-contents annotation-file) + (let ((form (read (current-buffer)))) + (eval form))))) + (setq coverage-annotations coverage) + coverage + )) + +(defun coverage-unannotate () + (save-excursion + (dolist (ov (overlays-in (point-min) (point-max))) + (delete-overlay ov)) + (setq coverage-this-buffer-is-annotated nil) + (message "Removed annotations") +)) + +;; in emacs22, it will be possible to put the annotations in the fringe. Set +;; a display property for one of the characters in the line, using +;; (right-fringe BITMAP FACE), where BITMAP should probably be right-triangle +;; or so, and FACE should probably be '(:foreground "red"). We can also +;; create new bitmaps, with faces. To do tartans will require a lot of +;; bitmaps, and you've only got about 8 pixels to work with. + +;; unfortunately emacs21 gives us less control over the fringe. We can use +;; overlays to put letters on the left or right margins (in the text area, +;; overriding actual program text), and to modify the text being displayed +;; (by changing its background color, or adding a box around each word). + +(defun coverage-annotate (show-code) + (let ((allcoverage (load-coverage-annotations)) + (filename-key (expand-file-name buffer-file-truename)) + thiscoverage code-lines covered-lines uncovered-code-lines + ) + (while (and (not (gethash filename-key allcoverage nil)) + (string-match "/" filename-key)) + ;; eat everything up to and including the first slash, then look again + (setq filename-key (substring filename-key + (+ 1 (string-match "/" filename-key))))) + (setq thiscoverage (gethash filename-key allcoverage nil)) + (if thiscoverage + (progn + (setq coverage-this-buffer-is-annotated t) + (setq code-lines (nth 0 thiscoverage) + covered-lines (nth 1 thiscoverage) + uncovered-code-lines (nth 2 thiscoverage) + ) + + (save-excursion + (dolist (ov (overlays-in (point-min) (point-max))) + (delete-overlay ov)) + (if show-code + (dolist (line code-lines) + (goto-line line) + ;;(add-text-properties (point) (line-end-position) '(face bold) ) + (overlay-put (make-overlay (point) (line-end-position)) + ;'before-string "C" + ;'face '(background-color . "green") + 'face '(:background "dark green") + ) + )) + (dolist (line uncovered-code-lines) + (goto-line line) + (overlay-put (make-overlay (point) (line-end-position)) + ;'before-string "D" + ;'face '(:background "blue") + ;'face '(:underline "blue") + 'face '(:box "red") + ) + ) + (message "Added annotations") + ) + ) + (message "unable to find coverage for this file")) +)) + +(defun coverage-toggle-annotations (show-code) + (interactive "P") + (if coverage-this-buffer-is-annotated + (coverage-unannotate) + (coverage-annotate show-code)) +) + + +(setq coverage-this-buffer-is-annotated nil) +(make-variable-buffer-local 'coverage-this-buffer-is-annotated) + +(define-minor-mode coverage-annotation-minor-mode + "Minor mode to annotate code-coverage information" + nil + " CA" + '( + ("\C-c\C-a" . coverage-toggle-annotations) + ) + + () ; forms run on mode entry/exit +) + +(defun maybe-enable-coverage-mode () + (if (string-match "/src/allmydata/" (buffer-file-name)) + (coverage-annotation-minor-mode t) + )) + +(add-hook 'python-mode-hook 'maybe-enable-coverage-mode) diff --git a/misc/coding_tools/coverage2el.py b/misc/coding_tools/coverage2el.py new file mode 100644 index 00000000..ed94bd0f --- /dev/null +++ b/misc/coding_tools/coverage2el.py @@ -0,0 +1,45 @@ + +from coverage import coverage, summary + +class ElispReporter(summary.SummaryReporter): + def report(self): + self.find_code_units(None, ["/System", "/Library", "/usr/lib", + "support/lib", "src/allmydata/test"]) + + out = open(".coverage.el", "w") + out.write(""" +;; This is an elisp-readable form of the figleaf coverage data. It defines a +;; single top-level hash table in which the key is an asolute pathname, and +;; the value is a three-element list. The first element of this list is a +;; list of line numbers that represent actual code statements. The second is +;; a list of line numbers for lines which got used during the unit test. The +;; third is a list of line numbers for code lines that were not covered +;; (since 'code' and 'covered' start as sets, this last list is equal to +;; 'code - covered'). + + """) + out.write("(let ((results (make-hash-table :test 'equal)))\n") + for cu in self.code_units: + f = cu.filename + (fn, executable, missing, mf) = self.coverage.analysis(cu) + code_linenumbers = executable + uncovered_code = missing + covered_linenumbers = sorted(set(executable) - set(missing)) + out.write(" (puthash \"%s\" '((%s) (%s) (%s)) results)\n" + % (f, + " ".join([str(ln) for ln in sorted(code_linenumbers)]), + " ".join([str(ln) for ln in sorted(covered_linenumbers)]), + " ".join([str(ln) for ln in sorted(uncovered_code)]), + )) + out.write(" results)\n") + out.close() + +def main(): + c = coverage() + c.load() + ElispReporter(c).report() + +if __name__ == '__main__': + main() + + diff --git a/misc/coding_tools/find-trailing-spaces.py b/misc/coding_tools/find-trailing-spaces.py new file mode 100644 index 00000000..ad2cc583 --- /dev/null +++ b/misc/coding_tools/find-trailing-spaces.py @@ -0,0 +1,42 @@ +#!/usr/bin/env python + +import os, sys + +from twisted.python import usage + +class Options(usage.Options): + optFlags = [ + ("recursive", "r", "Search for .py files recursively"), + ] + def parseArgs(self, *starting_points): + self.starting_points = starting_points + +found = [False] + +def check(fn): + f = open(fn, "r") + for i,line in enumerate(f.readlines()): + if line == "\n": + continue + if line[-1] == "\n": + line = line[:-1] + if line.rstrip() != line: + # the %s:%d:%d: lets emacs' compile-mode jump to those locations + print "%s:%d:%d: trailing whitespace" % (fn, i+1, len(line)+1) + found[0] = True + f.close() + +o = Options() +o.parseOptions() +if o['recursive']: + for starting_point in o.starting_points: + for root, dirs, files in os.walk(starting_point): + for fn in [f for f in files if f.endswith(".py")]: + fn = os.path.join(root, fn) + check(fn) +else: + for fn in o.starting_points: + check(fn) +if found[0]: + sys.exit(1) +sys.exit(0) diff --git a/misc/coding_tools/fixshebangs.py b/misc/coding_tools/fixshebangs.py new file mode 100644 index 00000000..02446490 --- /dev/null +++ b/misc/coding_tools/fixshebangs.py @@ -0,0 +1,30 @@ +#!/usr/bin/env python + +from allmydata.util import fileutil + +import re, shutil, sys + +R=re.compile("^#! */usr/bin/python *$") +for fname in sys.argv[1:]: + inf = open(fname, "rU") + rntf = fileutil.ReopenableNamedTemporaryFile() + outf = open(rntf.name, "w") + first = True + for l in inf: + if first and R.search(l): + outf.write("#!/usr/bin/env python\n") + else: + outf.write(l) + first = False + outf.close() + + try: + shutil.move(rntf.name, fname) + except EnvironmentError: + # Couldn't atomically overwrite, so just hope that this process doesn't die + # and the target file doesn't get recreated in between the following two + # operations: + shutil.move(fname, fname + ".bak") + shutil.move(rntf.name, fname) + + fileutil.remove_if_possible(fname + ".bak") diff --git a/misc/coding_tools/make-canary-files.py b/misc/coding_tools/make-canary-files.py new file mode 100644 index 00000000..44f0348a --- /dev/null +++ b/misc/coding_tools/make-canary-files.py @@ -0,0 +1,151 @@ +#!/usr/bin/env python + +""" +Given a list of nodeids and a 'convergence' file, create a bunch of files +that will (when encoded at k=1,N=1) be uploaded to specific nodeids. + +Run this as follows: + + make-canary-files.py -c PATH/TO/convergence -n PATH/TO/nodeids -k 1 -N 1 + +It will create a directory named 'canaries', with one file per nodeid named +'$NODEID-$NICKNAME.txt', that contains some random text. + +The 'nodeids' file should contain one base32 nodeid per line, followed by the +optional nickname, like: + +--- +5yyqu2hbvbh3rgtsgxrmmg4g77b6p3yo server12 +vb7vm2mneyid5jbyvcbk2wb5icdhwtun server13 +... +--- + +The resulting 'canaries/5yyqu2hbvbh3rgtsgxrmmg4g77b6p3yo-server12.txt' file +will, when uploaded with the given (convergence,k,N) pair, have its first +share placed on the 5yyq/server12 storage server. If N>1, the other shares +will be placed elsewhere, of course. + +This tool can be useful to construct a set of 'canary' files, which can then +be uploaded to storage servers, and later downloaded to test a grid's health. +If you are able to download the canary for server12 via some tahoe node X, +then the following properties are known to be true: + + node X is running, and has established a connection to server12 + server12 is running, and returning data for at least the given file + +Using k=1/N=1 creates a separate test for each server. The test process is +then to download the whole directory of files (perhaps with a t=deep-check +operation). + +Alternatively, you could upload with the usual k=3/N=10 and then move/delete +shares to put all N shares on a single server. + +Note that any changes to the nodeid list will affect the placement of shares. +Shares should be uploaded with the same nodeid list as this tool used when +constructing the files. + +Also note that this tool uses the Tahoe codebase, so it should be run on a +system where Tahoe is installed, or in a source tree with setup.py like this: + + setup.py run_with_pythonpath -p -c 'misc/make-canary-files.py ARGS..' +""" + +import os, sha +from twisted.python import usage +from allmydata.immutable import upload +from allmydata.util import base32 + +class Options(usage.Options): + optParameters = [ + ("convergence", "c", None, "path to NODEDIR/private/convergence"), + ("nodeids", "n", None, "path to file with one base32 nodeid per line"), + ("k", "k", 1, "number of necessary shares, defaults to 1", int), + ("N", "N", 1, "number of total shares, defaults to 1", int), + ] + optFlags = [ + ("verbose", "v", "Be noisy"), + ] + +opts = Options() +opts.parseOptions() + +verbose = bool(opts["verbose"]) + +nodes = {} +for line in open(opts["nodeids"], "r").readlines(): + line = line.strip() + if not line or line.startswith("#"): + continue + pieces = line.split(None, 1) + if len(pieces) == 2: + nodeid_s, nickname = pieces + else: + nodeid_s = pieces[0] + nickname = None + nodeid = base32.a2b(nodeid_s) + nodes[nodeid] = nickname + +if opts["k"] != 3 or opts["N"] != 10: + print "note: using non-default k/N requires patching the Tahoe code" + print "src/allmydata/client.py line 55, DEFAULT_ENCODING_PARAMETERS" + +convergence_file = os.path.expanduser(opts["convergence"]) +convergence_s = open(convergence_file, "rb").read().strip() +convergence = base32.a2b(convergence_s) + +def get_permuted_peers(key): + results = [] + for nodeid in nodes: + permuted = sha.new(key + nodeid).digest() + results.append((permuted, nodeid)) + results.sort(lambda a,b: cmp(a[0], b[0])) + return [ r[1] for r in results ] + +def find_share_for_target(target): + target_s = base32.b2a(target) + prefix = "The first share of this file will be placed on " + target_s + "\n" + prefix += "This data is random: " + attempts = 0 + while True: + attempts += 1 + suffix = base32.b2a(os.urandom(10)) + if verbose: print " trying", suffix, + data = prefix + suffix + "\n" + assert len(data) > 55 # no LIT files + # now, what storage index will this get? + u = upload.Data(data, convergence) + eu = upload.EncryptAnUploadable(u) + d = eu.get_storage_index() # this happens to run synchronously + def _got_si(si): + if verbose: print "SI", base32.b2a(si), + peerlist = get_permuted_peers(si) + if peerlist[0] == target: + # great! + if verbose: print " yay!" + fn = base32.b2a(target) + if nodes[target]: + nickname = nodes[target].replace("/", "_") + fn += "-" + nickname + fn += ".txt" + fn = os.path.join("canaries", fn) + open(fn, "w").write(data) + return True + # nope, must try again + if verbose: print " boo" + return False + d.addCallback(_got_si) + # get sneaky and look inside the Deferred for the synchronous result + if d.result: + return attempts + +os.mkdir("canaries") +attempts = [] +for target in nodes: + target_s = base32.b2a(target) + print "working on", target_s + attempts.append(find_share_for_target(target)) +print "done" +print "%d attempts total, avg %d per target, max %d" % \ + (sum(attempts), 1.0* sum(attempts) / len(nodes), max(attempts)) + + diff --git a/misc/coding_tools/make_umid b/misc/coding_tools/make_umid new file mode 100644 index 00000000..60aab23a --- /dev/null +++ b/misc/coding_tools/make_umid @@ -0,0 +1,55 @@ +#!/usr/bin/env python + +"""Create a short probably-unique string for use as a umid= argument in a +Foolscap log() call, to make it easier to locate the source code that +generated the message. The main text of the log message is frequently +unhelpful for this, and python doesn't make it cheap to compile in the +filename and line number of logging calls. + +Given a message-unique-ID like 'aXoWcA', make your logging call look like: + + log.msg('OMG badness', level=log.WEIRD, umid='aXoWcA') + +Then later, if this message actually occurs, you can grep your source tree +for aXoWcA to locate the code that caused it. + +Just stick to the convention that 'umid=' is reserved for this job. It is a +good idea to make all the logging statements that could provoke an Incident +(i.e. those at level=log.WEIRD or higher) have umid= arguments, to make it +easier to write classifier functions for the incident-gatherer. + +""" + +''' +The following elisp code may be useful: + + (defun insert-umid () + (interactive) + (insert ", umid=\"") + (call-process "make_umid" nil t) + (delete-char -1) + (insert "\"") + ) + (global-set-key (kbd "C-\`") 'insert-umid) +''' + +# ' # emacs gets confused by the odd number of single-quotes there + +import os, base64, sys + +def make_id(): + while True: + m = os.urandom(4) # this gives 6-character message ids + m = base64.b64encode(m) + if "/" in m or "+" in m: + continue + m = m.replace("=", "") + break + return m + +count = 1 +if len(sys.argv) > 1: + count = int(sys.argv[1]) +for i in range(count): + print make_id() + diff --git a/misc/count_dirs.py b/misc/count_dirs.py deleted file mode 100644 index 78412d33..00000000 --- a/misc/count_dirs.py +++ /dev/null @@ -1,118 +0,0 @@ -#!/usr/bin/env python - -""" -This tool estimates how much space would be consumed by a filetree into which -a native directory was copied. - -One open question is how we should encode directories. One approach is to put -a block of data on a server, one per directory, which effectively contains a -dictionary that maps child names to targets (URIs for children which are -files, slotnames for children which are directories). To prevent the server -which hosts this data from either learning its contents or corrupting them, -we can add encryption and integrity checks to the data, at the cost of -storage overhead. - -This program is intended to estimate the size of these data blocks using -real-world filenames and directories. You point it at a real directory, and -it does a recursive walk of the filesystem, adding up the size of the -filetree data structures that would be required to represent it. - -MODES: - - A: no confidentiality or integrity checking. Directories are serialized - plaintext dictionaries which map file/subdir names to targets (either - URIs or slotnames). Each entry can be changed independently. - B1: child names and targets are encrypted. No integrity checks, so the - server can still corrupt the contents undetectably. Each entry can - still be changed independently. - B2: same security properties as B1, but the dictionary is serialized before - encryption. This reduces overhead at the cost of preventing independent - updates of entries (all entries must be updated at the same time, so - test-and-set operations are required to avoid data-losing races) - C1: like B1, but adding HMACs to each entry to guarantee data integrity - C2: like B2, but adding a single block-wide HMAC for data integrity - -""" - -import sys, os.path - -#URI:7jzbza6iwdsk5xbxsvdgjaugyrhetw64zpflp4gihmyh5krjblra====:a5qdejwbimu5b2wfke7xwexxlq======:gzeub5v42rjbgd7ccawnahu2evqd42lpdpzd447c6zkmdvjkpowq====:25:100:219889 -# that's a printable representation of two 32-byte hashes (storage index, URI -# extension block hash) and a 16-byte AES read-capability key, and some -# share-count and size information -URI_SIZE = 164 - -#pb://xextf3eap44o3wi27mf7ehiur6wvhzr6@207.7.153.180:56677,127.0.0.1:56677/zilcw5uz2yyyo=== -# that's a FURL which points at the slot. Modes that need to add a -# read-capability AES key will need more space. -SLOTNAME_SIZE = 90 - - -def slotsize(mode, numfiles, numdirs): - # URI_sizes is the total space taken up by the target (dict keys) strings - # for all of the targets that are files, instead of directories - target_sizes_for_files = numfiles * URI_SIZE - slotname_size = SLOTNAME_SIZE - if mode in ("B1", "B2", "C1", "C2"): - slotname_size += 16 - # slotname_sizes is the total space taken up by the target strings for - # all the targets that are directories, instead of files. These are - # bigger when the read+write-cap slotname is larger than the store-cap, - # which happens as soon as we seek to prevent the slot's host from - # reading or corrupting it. - target_sizes_for_subdirs = numdirs * slotname_size - - # now how much overhead is there for each entry? - per_slot, per_entry = 0, 0 - if mode == "B1": - per_entry = 16+12+12 - elif mode == "C1": - per_entry = 16+12+12 + 32+32 - elif mode == "B2": - per_slot = 12 - elif mode == "C2": - per_slot = 12+32 - num_entries = numfiles + numdirs - total = (target_sizes_for_files + - target_sizes_for_subdirs + - per_slot + - per_entry * num_entries - ) - return total - -MODES = ("A", "B1", "B2", "C1", "C2") - -def scan(root): - total = dict([(mode,0) for mode in MODES]) - num_files = 0 - num_dirs = 0 - for absroot, dirs, files in os.walk(root): - #print absroot - #print " %d files" % len(files) - #print " %d subdirs" % len(dirs) - num_files += len(files) - num_dirs += len(dirs) - stringsize = len(''.join(files) + ''.join(dirs)) - for mode in MODES: - total[mode] += slotsize(mode, len(files), len(dirs)) + stringsize - - print "%d directories" % num_dirs - print "%d files" % num_files - for mode in sorted(total.keys()): - print "%s: %d bytes" % (mode, total[mode]) - - -if __name__ == '__main__': - scan(sys.argv[1]) - -""" -260:warner@monolith% ./count_dirs.py ~ -70925 directories -457199 files -A: 90042361 bytes -B1: 112302121 bytes -B2: 92027061 bytes -C1: 146102057 bytes -C2: 94293461 bytes - -""" diff --git a/misc/coverage.el b/misc/coverage.el deleted file mode 100644 index bad490fd..00000000 --- a/misc/coverage.el +++ /dev/null @@ -1,120 +0,0 @@ - -(defvar coverage-annotation-file ".coverage.el") -(defvar coverage-annotations nil) - -(defun find-coverage-annotation-file () - (let ((dir (file-name-directory buffer-file-name)) - (olddir "/")) - (while (and (not (equal dir olddir)) - (not (file-regular-p (concat dir coverage-annotation-file)))) - (setq olddir dir - dir (file-name-directory (directory-file-name dir)))) - (and (not (equal dir olddir)) (concat dir coverage-annotation-file)) -)) - -(defun load-coverage-annotations () - (let* ((annotation-file (find-coverage-annotation-file)) - (coverage - (with-temp-buffer - (insert-file-contents annotation-file) - (let ((form (read (current-buffer)))) - (eval form))))) - (setq coverage-annotations coverage) - coverage - )) - -(defun coverage-unannotate () - (save-excursion - (dolist (ov (overlays-in (point-min) (point-max))) - (delete-overlay ov)) - (setq coverage-this-buffer-is-annotated nil) - (message "Removed annotations") -)) - -;; in emacs22, it will be possible to put the annotations in the fringe. Set -;; a display property for one of the characters in the line, using -;; (right-fringe BITMAP FACE), where BITMAP should probably be right-triangle -;; or so, and FACE should probably be '(:foreground "red"). We can also -;; create new bitmaps, with faces. To do tartans will require a lot of -;; bitmaps, and you've only got about 8 pixels to work with. - -;; unfortunately emacs21 gives us less control over the fringe. We can use -;; overlays to put letters on the left or right margins (in the text area, -;; overriding actual program text), and to modify the text being displayed -;; (by changing its background color, or adding a box around each word). - -(defun coverage-annotate (show-code) - (let ((allcoverage (load-coverage-annotations)) - (filename-key (expand-file-name buffer-file-truename)) - thiscoverage code-lines covered-lines uncovered-code-lines - ) - (while (and (not (gethash filename-key allcoverage nil)) - (string-match "/" filename-key)) - ;; eat everything up to and including the first slash, then look again - (setq filename-key (substring filename-key - (+ 1 (string-match "/" filename-key))))) - (setq thiscoverage (gethash filename-key allcoverage nil)) - (if thiscoverage - (progn - (setq coverage-this-buffer-is-annotated t) - (setq code-lines (nth 0 thiscoverage) - covered-lines (nth 1 thiscoverage) - uncovered-code-lines (nth 2 thiscoverage) - ) - - (save-excursion - (dolist (ov (overlays-in (point-min) (point-max))) - (delete-overlay ov)) - (if show-code - (dolist (line code-lines) - (goto-line line) - ;;(add-text-properties (point) (line-end-position) '(face bold) ) - (overlay-put (make-overlay (point) (line-end-position)) - ;'before-string "C" - ;'face '(background-color . "green") - 'face '(:background "dark green") - ) - )) - (dolist (line uncovered-code-lines) - (goto-line line) - (overlay-put (make-overlay (point) (line-end-position)) - ;'before-string "D" - ;'face '(:background "blue") - ;'face '(:underline "blue") - 'face '(:box "red") - ) - ) - (message "Added annotations") - ) - ) - (message "unable to find coverage for this file")) -)) - -(defun coverage-toggle-annotations (show-code) - (interactive "P") - (if coverage-this-buffer-is-annotated - (coverage-unannotate) - (coverage-annotate show-code)) -) - - -(setq coverage-this-buffer-is-annotated nil) -(make-variable-buffer-local 'coverage-this-buffer-is-annotated) - -(define-minor-mode coverage-annotation-minor-mode - "Minor mode to annotate code-coverage information" - nil - " CA" - '( - ("\C-c\C-a" . coverage-toggle-annotations) - ) - - () ; forms run on mode entry/exit -) - -(defun maybe-enable-coverage-mode () - (if (string-match "/src/allmydata/" (buffer-file-name)) - (coverage-annotation-minor-mode t) - )) - -(add-hook 'python-mode-hook 'maybe-enable-coverage-mode) diff --git a/misc/coverage2el.py b/misc/coverage2el.py deleted file mode 100644 index ed94bd0f..00000000 --- a/misc/coverage2el.py +++ /dev/null @@ -1,45 +0,0 @@ - -from coverage import coverage, summary - -class ElispReporter(summary.SummaryReporter): - def report(self): - self.find_code_units(None, ["/System", "/Library", "/usr/lib", - "support/lib", "src/allmydata/test"]) - - out = open(".coverage.el", "w") - out.write(""" -;; This is an elisp-readable form of the figleaf coverage data. It defines a -;; single top-level hash table in which the key is an asolute pathname, and -;; the value is a three-element list. The first element of this list is a -;; list of line numbers that represent actual code statements. The second is -;; a list of line numbers for lines which got used during the unit test. The -;; third is a list of line numbers for code lines that were not covered -;; (since 'code' and 'covered' start as sets, this last list is equal to -;; 'code - covered'). - - """) - out.write("(let ((results (make-hash-table :test 'equal)))\n") - for cu in self.code_units: - f = cu.filename - (fn, executable, missing, mf) = self.coverage.analysis(cu) - code_linenumbers = executable - uncovered_code = missing - covered_linenumbers = sorted(set(executable) - set(missing)) - out.write(" (puthash \"%s\" '((%s) (%s) (%s)) results)\n" - % (f, - " ".join([str(ln) for ln in sorted(code_linenumbers)]), - " ".join([str(ln) for ln in sorted(covered_linenumbers)]), - " ".join([str(ln) for ln in sorted(uncovered_code)]), - )) - out.write(" results)\n") - out.close() - -def main(): - c = coverage() - c.load() - ElispReporter(c).report() - -if __name__ == '__main__': - main() - - diff --git a/misc/coverage2text.py b/misc/coverage2text.py deleted file mode 100644 index f91e25b5..00000000 --- a/misc/coverage2text.py +++ /dev/null @@ -1,116 +0,0 @@ - -import sys -from coverage import coverage -from coverage.results import Numbers -from coverage.summary import SummaryReporter -from twisted.python import usage - -# this is an adaptation of the code behind "coverage report", modified to -# display+sortby "lines uncovered", which (IMHO) is more important of a -# metric than lines covered or percentage covered. Concentrating on the files -# with the most uncovered lines encourages getting the tree and test suite -# into a state that provides full line-coverage on all files. - -# much of this code was adapted from coverage/summary.py in the 'coverage' -# distribution, and is used under their BSD license. - -class Options(usage.Options): - optParameters = [ - ("sortby", "s", "uncovered", "how to sort: uncovered, covered, name"), - ] - -class MyReporter(SummaryReporter): - def report(self, outfile=None, sortby="uncovered"): - self.find_code_units(None, ["/System", "/Library", "/usr/lib", - "support/lib", "src/allmydata/test"]) - - # Prepare the formatting strings - max_name = max([len(cu.name) for cu in self.code_units] + [5]) - fmt_name = "%%- %ds " % max_name - fmt_err = "%s %s: %s\n" - header1 = (fmt_name % "" ) + " Statements " - header2 = (fmt_name % "Name") + " Uncovered Covered" - fmt_coverage = fmt_name + "%9d %7d " - if self.branches: - header1 += " Branches " - header2 += " Found Excutd" - fmt_coverage += " %6d %6d" - header1 += " Percent" - header2 += " Covered" - fmt_coverage += " %7d%%" - if self.show_missing: - header1 += " " - header2 += " Missing" - fmt_coverage += " %s" - rule = "-" * len(header1) + "\n" - header1 += "\n" - header2 += "\n" - fmt_coverage += "\n" - - if not outfile: - outfile = sys.stdout - - # Write the header - outfile.write(header1) - outfile.write(header2) - outfile.write(rule) - - total = Numbers() - total_uncovered = 0 - - lines = [] - for cu in self.code_units: - try: - analysis = self.coverage._analyze(cu) - nums = analysis.numbers - uncovered = nums.n_statements - nums.n_executed - total_uncovered += uncovered - args = (cu.name, uncovered, nums.n_executed) - if self.branches: - args += (nums.n_branches, nums.n_executed_branches) - args += (nums.pc_covered,) - if self.show_missing: - args += (analysis.missing_formatted(),) - if sortby == "covered": - sortkey = nums.pc_covered - elif sortby == "uncovered": - sortkey = uncovered - else: - sortkey = cu.name - lines.append((sortkey, fmt_coverage % args)) - total += nums - except KeyboardInterrupt: # pragma: no cover - raise - except: - if not self.ignore_errors: - typ, msg = sys.exc_info()[:2] - outfile.write(fmt_err % (cu.name, typ.__name__, msg)) - lines.sort() - if sortby in ("uncovered", "covered"): - lines.reverse() - for sortkey,line in lines: - outfile.write(line) - - if total.n_files > 1: - outfile.write(rule) - args = ("TOTAL", total_uncovered, total.n_executed) - if self.branches: - args += (total.n_branches, total.n_executed_branches) - args += (total.pc_covered,) - if self.show_missing: - args += ("",) - outfile.write(fmt_coverage % args) - -def report(o): - c = coverage() - c.load() - r = MyReporter(c, show_missing=False, ignore_errors=False) - r.report(sortby=o['sortby']) - -if __name__ == '__main__': - o = Options() - o.parseOptions() - report(o) - - - diff --git a/misc/cpu-watcher-poll.py b/misc/cpu-watcher-poll.py deleted file mode 100644 index 68ac4b46..00000000 --- a/misc/cpu-watcher-poll.py +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/env python - -from foolscap import Tub, eventual -from twisted.internet import reactor -import sys -import pprint - -def oops(f): - print "ERROR" - print f - -def fetch(furl): - t = Tub() - t.startService() - d = t.getReference(furl) - d.addCallback(lambda rref: rref.callRemote("get_averages")) - d.addCallback(pprint.pprint) - return d - -d = eventual.fireEventually(sys.argv[1]) -d.addCallback(fetch) -d.addErrback(oops) -d.addBoth(lambda res: reactor.stop()) -reactor.run() diff --git a/misc/cpu-watcher-subscribe.py b/misc/cpu-watcher-subscribe.py deleted file mode 100644 index 4c560e2c..00000000 --- a/misc/cpu-watcher-subscribe.py +++ /dev/null @@ -1,49 +0,0 @@ -# -*- python -*- - -from twisted.internet import reactor -import sys - -import os.path, pprint -from twisted.application import service -from twisted.python import log -from foolscap import Tub, Referenceable, RemoteInterface -from foolscap.schema import ListOf, TupleOf -from zope.interface import implements - -Averages = ListOf( TupleOf(str, float, float, float) ) -class RICPUWatcherSubscriber(RemoteInterface): - def averages(averages=Averages): - return None - -class CPUWatcherSubscriber(service.MultiService, Referenceable): - implements(RICPUWatcherSubscriber) - def __init__(self, furlthing): - service.MultiService.__init__(self) - if furlthing.startswith("pb://"): - furl = furlthing - else: - furlfile = os.path.expanduser(furlthing) - if os.path.isdir(furlfile): - furlfile = os.path.join(furlfile, "watcher.furl") - furl = open(furlfile, "r").read().strip() - tub = Tub() - tub.setServiceParent(self) - tub.connectTo(furl, self.connected) - - def connected(self, rref): - print "subscribing" - d = rref.callRemote("get_averages") - d.addCallback(self.remote_averages) - d.addErrback(log.err) - - d = rref.callRemote("subscribe", self) - d.addErrback(log.err) - - def remote_averages(self, averages): - pprint.pprint(averages) - - -c = CPUWatcherSubscriber(sys.argv[1]) -c.startService() -reactor.run() - diff --git a/misc/cpu-watcher.tac b/misc/cpu-watcher.tac deleted file mode 100644 index 12f29324..00000000 --- a/misc/cpu-watcher.tac +++ /dev/null @@ -1,231 +0,0 @@ -# -*- python -*- - -""" -# run this tool on a linux box in its own directory, with a file named -# 'pids.txt' describing which processes to watch. It will follow CPU usage of -# the given processes, and compute 1/5/15-minute moving averages for each -# process. These averages can be retrieved from a foolscap connection -# (published at ./watcher.furl), or through an HTTP query (using ./webport). - -# Each line of pids.txt describes a single process. Blank lines and ones that -# begin with '#' are ignored. Each line is either "PID" or "PID NAME" (space -# separated). PID is either a numeric process ID, a pathname to a file that -# contains a process ID, or a pathname to a directory that contains a -# twistd.pid file (which contains a process ID). NAME is an arbitrary string -# that will be used to describe the process to watcher.furl subscribers, and -# defaults to PID if not provided. -""" - -# TODO: -# built-in graphs on web interface - - - -import pickle, os.path, time, pprint -from twisted.application import internet, service, strports -from twisted.web import server, resource, http -from twisted.python import log -import simplejson -from foolscap import Tub, Referenceable, RemoteInterface, eventual -from foolscap.schema import ListOf, TupleOf -from zope.interface import implements - -def read_cpu_times(pid): - data = open("/proc/%d/stat" % pid, "r").read() - data = data.split() - times = data[13:17] - # the values in /proc/%d/stat are in ticks, I think. My system has - # CONFIG_HZ_1000=y in /proc/config.gz but nevertheless the numbers in - # 'stat' appear to be 10ms each. - HZ = 100 - userspace_seconds = int(times[0]) * 1.0 / HZ - system_seconds = int(times[1]) * 1.0 / HZ - child_userspace_seconds = int(times[2]) * 1.0 / HZ - child_system_seconds = int(times[3]) * 1.0 / HZ - return (userspace_seconds, system_seconds) - - -def read_pids_txt(): - processes = [] - for line in open("pids.txt", "r").readlines(): - line = line.strip() - if not line or line[0] == "#": - continue - parts = line.split() - pidthing = parts[0] - if len(parts) > 1: - name = parts[1] - else: - name = pidthing - pid = None - try: - pid = int(pidthing) - except ValueError: - pidfile = os.path.expanduser(pidthing) - if os.path.isdir(pidfile): - pidfile = os.path.join(pidfile, "twistd.pid") - try: - pid = int(open(pidfile, "r").read().strip()) - except EnvironmentError: - pass - if pid is not None: - processes.append( (pid, name) ) - return processes - -Averages = ListOf( TupleOf(str, float, float, float) ) -class RICPUWatcherSubscriber(RemoteInterface): - def averages(averages=Averages): - return None - -class RICPUWatcher(RemoteInterface): - def get_averages(): - """Return a list of rows, one for each process I am watching. Each - row is (name, 1-min-avg, 5-min-avg, 15-min-avg), where 'name' is a - string, and the averages are floats from 0.0 to 1.0 . Each average is - the percentage of the CPU that this process has used: the change in - CPU time divided by the change in wallclock time. - """ - return Averages - - def subscribe(observer=RICPUWatcherSubscriber): - """Arrange for the given observer to get an 'averages' message every - time the averages are updated. This message will contain a single - argument, the same list of tuples that get_averages() returns.""" - return None - -class CPUWatcher(service.MultiService, resource.Resource, Referenceable): - implements(RICPUWatcher) - POLL_INTERVAL = 30 # seconds - HISTORY_LIMIT = 15 * 60 # 15min - AVERAGES = (1*60, 5*60, 15*60) # 1min, 5min, 15min - - def __init__(self): - service.MultiService.__init__(self) - resource.Resource.__init__(self) - try: - self.history = pickle.load(open("history.pickle", "rb")) - except: - self.history = {} - self.current = [] - self.observers = set() - ts = internet.TimerService(self.POLL_INTERVAL, self.poll) - ts.setServiceParent(self) - - def startService(self): - service.MultiService.startService(self) - - try: - desired_webport = open("webport", "r").read().strip() - except EnvironmentError: - desired_webport = None - webport = desired_webport or "tcp:0" - root = self - serv = strports.service(webport, server.Site(root)) - serv.setServiceParent(self) - if not desired_webport: - got_port = serv._port.getHost().port - open("webport", "w").write("tcp:%d\n" % got_port) - - self.tub = Tub(certFile="watcher.pem") - self.tub.setServiceParent(self) - try: - desired_tubport = open("tubport", "r").read().strip() - except EnvironmentError: - desired_tubport = None - tubport = desired_tubport or "tcp:0" - l = self.tub.listenOn(tubport) - if not desired_tubport: - got_port = l.getPortnum() - open("tubport", "w").write("tcp:%d\n" % got_port) - d = self.tub.setLocationAutomatically() - d.addCallback(self._tub_ready) - d.addErrback(log.err) - - def _tub_ready(self, res): - self.tub.registerReference(self, furlFile="watcher.furl") - - - def getChild(self, path, req): - if path == "": - return self - return resource.Resource.getChild(self, path, req) - - def render(self, req): - t = req.args.get("t", ["html"])[0] - ctype = "text/plain" - data = "" - if t == "html": - data = "# name, 1min, 5min, 15min\n" - data += pprint.pformat(self.current) + "\n" - elif t == "json": - #data = str(self.current) + "\n" # isn't that convenient? almost. - data = simplejson.dumps(self.current, indent=True) - else: - req.setResponseCode(http.BAD_REQUEST) - data = "Unknown t= %s\n" % t - req.setHeader("content-type", ctype) - return data - - def remote_get_averages(self): - return self.current - def remote_subscribe(self, observer): - self.observers.add(observer) - - def notify(self, observer): - d = observer.callRemote("averages", self.current) - def _error(f): - log.msg("observer error, removing them") - log.msg(f) - self.observers.discard(observer) - d.addErrback(_error) - - def poll(self): - max_history = self.HISTORY_LIMIT / self.POLL_INTERVAL - current = [] - try: - processes = read_pids_txt() - except: - log.err() - return - for (pid, name) in processes: - if pid not in self.history: - self.history[pid] = [] - now = time.time() - try: - (user_seconds, sys_seconds) = read_cpu_times(pid) - self.history[pid].append( (now, user_seconds, sys_seconds) ) - while len(self.history[pid]) > max_history+1: - self.history[pid].pop(0) - except: - log.msg("error reading process %s (%s), ignoring" % (pid, name)) - log.err() - try: - pickle.dump(self.history, open("history.pickle.tmp", "wb")) - os.rename("history.pickle.tmp", "history.pickle") - except: - pass - for (pid, name) in processes: - row = [name] - for avg in self.AVERAGES: - row.append(self._average_N(pid, avg)) - current.append(tuple(row)) - self.current = current - print current - for ob in self.observers: - eventual.eventually(self.notify, ob) - - def _average_N(self, pid, seconds): - num_samples = seconds / self.POLL_INTERVAL - samples = self.history[pid] - if len(samples) < num_samples+1: - return None - first = -num_samples-1 - elapsed_wall = samples[-1][0] - samples[first][0] - elapsed_user = samples[-1][1] - samples[first][1] - elapsed_sys = samples[-1][2] - samples[first][2] - if elapsed_wall == 0.0: - return 0.0 - return (elapsed_user+elapsed_sys) / elapsed_wall - -application = service.Application("cpu-watcher") -CPUWatcher().setServiceParent(application) diff --git a/misc/debian/rules.sid b/misc/debian/rules.sid index 3b7bf2be..6c65b746 100644 --- a/misc/debian/rules.sid +++ b/misc/debian/rules.sid @@ -15,7 +15,7 @@ DEBNAME := $(firstword $(DEB_PACKAGES)) STAGING_DIR := $(CURDIR)/debian/$(DEBNAME) DEB_INSTALL_DOCS_ALL := COPYING.TGPPL.html CREDITS NEWS README.txt relnotes.txt \ - docs misc/spacetime misc/cpu-watcher.tac + docs misc/operations_helpers/spacetime misc/operations_helpers/cpu-watcher.tac DEB_COMPRESS_EXCLUDE := .tac install/$(DEBNAME):: diff --git a/misc/debian_helpers/etch/debian/changelog b/misc/debian_helpers/etch/debian/changelog new file mode 100644 index 00000000..9f4e959e --- /dev/null +++ b/misc/debian_helpers/etch/debian/changelog @@ -0,0 +1,5 @@ +allmydata-tahoe (0.0.1) unstable; urgency=low + + * experimental packaging + + -- Brian Warner Mon, 4 Dec 2006 23:34:07 -0800 diff --git a/misc/debian_helpers/etch/debian/compat b/misc/debian_helpers/etch/debian/compat new file mode 100644 index 00000000..b8626c4c --- /dev/null +++ b/misc/debian_helpers/etch/debian/compat @@ -0,0 +1 @@ +4 diff --git a/misc/debian_helpers/etch/debian/control b/misc/debian_helpers/etch/debian/control new file mode 100644 index 00000000..7d03a506 --- /dev/null +++ b/misc/debian_helpers/etch/debian/control @@ -0,0 +1,16 @@ +Source: allmydata-tahoe +Section: python +Priority: optional +Maintainer: Brian Warner +Build-Depends: debhelper (>= 5.0.37.1), cdbs (>= 0.4.41), python-central (>= 0.4.10), python-setuptools, python, python-all-dev +Build-Depends-Indep: python-twisted +XS-Python-Version: 2.4,2.5,2.6 +Standards-Version: 3.7.2 + +Package: allmydata-tahoe +Architecture: all +Depends: ${python:Depends}, python-twisted, python-foolscap (>= 0.4.1), python-pyopenssl, python-nevow, python-simplejson (>= 1.4), python-zfec (>= 1.1), python-pycryptopp (>= 0.5.15), python-setuptools, python-sqlite2 +Recommends: +XB-Python-Version: 2.4,2.5,2.6 +Description: A secure distributed filesystem + Allmydata Tahoe diff --git a/misc/debian_helpers/etch/debian/copyright b/misc/debian_helpers/etch/debian/copyright new file mode 100644 index 00000000..1dce8a20 --- /dev/null +++ b/misc/debian_helpers/etch/debian/copyright @@ -0,0 +1,19 @@ +This package was debianized by Brian Warner + +The upstream source of this project is http://allmydata.org . + +Copyright (c) 2006-2009 +AllMyData, Inc. + +You may use this package under the GNU General Public License, version 2 or, at +your option, any later version. + +You may use this package under the Transitive Grace Period Public Licence, +version 1 or, at your option, any later version. The Transitive Grace Period +Public Licence has requirements similar to the GPL except that it allows you to +wait for up to twelve months after you redistribute a derived work before +releasing the source code of your derived work. See the file COPYING.TGPPL.html +for the terms of the Transitive Grace Period Public Licence, version 1. + +(You may choose to use this package under the terms of either licence, at your +option.) diff --git a/misc/debian_helpers/etch/debian/pycompat b/misc/debian_helpers/etch/debian/pycompat new file mode 100644 index 00000000..0cfbf088 --- /dev/null +++ b/misc/debian_helpers/etch/debian/pycompat @@ -0,0 +1 @@ +2 diff --git a/misc/debian_helpers/etch/debian/rules b/misc/debian_helpers/etch/debian/rules new file mode 100644 index 00000000..9c07c564 --- /dev/null +++ b/misc/debian_helpers/etch/debian/rules @@ -0,0 +1,48 @@ +#! /usr/bin/make -f +# Uncomment this to turn on verbose mode. +#export DH_VERBOSE=1 + +DEB_PYTHON_SYSTEM=pycentral + +include /usr/share/cdbs/1/rules/debhelper.mk +include /usr/share/cdbs/1/class/python-distutils.mk + +# this ought to be the name of the package that we're building, which is +# different on each tahoe branch. debian/control is the master: whatever +# package is listed in there will be built. +DEBNAME := $(firstword $(DEB_PACKAGES)) + +STAGING_DIR := $(CURDIR)/debian/$(DEBNAME) + +DEB_INSTALL_DOCS_ALL := COPYING.GPL COPYING.TGPPL.html CREDITS \ + NEWS README.txt relnotes.txt docs misc/operations_helpers/spacetime misc/operations_helpers/cpu-watcher.tac +DEB_COMPRESS_EXCLUDE := .tac + + +# we overwrite the setuptools-generated /usr/bin/tahoe (located in +# support/bin/tahoe after a 'make build') with a different version, because +# the setuptools form (using "entry points") insists upon .egg-info -visible +# forms of dependent packages to be installed. For a debian package, we rely +# upon the dependencies that are declared in debian/control . +# +# To make sure the #! line matches the version of python that we're using for +# this build, we copy it from the setuptools-generated /usr/bin/tahoe, then +# add other code to the generated file. + +install/$(DEBNAME):: + mkdir -pm755 $(STAGING_DIR) + python setup.py install --root=$(STAGING_DIR) + + head -1 $(STAGING_DIR)/usr/bin/tahoe >$(STAGING_DIR)/usr/bin/tahoe.new + echo "from allmydata.scripts import runner" >>$(STAGING_DIR)/usr/bin/tahoe.new + echo "runner.run()" >>$(STAGING_DIR)/usr/bin/tahoe.new + chmod +x $(STAGING_DIR)/usr/bin/tahoe.new + mv $(STAGING_DIR)/usr/bin/tahoe.new $(STAGING_DIR)/usr/bin/tahoe + + dh_install misc/operations_helpers/munin/* usr/share/$(DEBNAME)/munin + chmod +x $(STAGING_DIR)/usr/share/$(DEBNAME)/munin/* + + dh_pycentral + +clean:: + -rm -rf build diff --git a/misc/debian_helpers/lenny/debian/changelog b/misc/debian_helpers/lenny/debian/changelog new file mode 100644 index 00000000..9f4e959e --- /dev/null +++ b/misc/debian_helpers/lenny/debian/changelog @@ -0,0 +1,5 @@ +allmydata-tahoe (0.0.1) unstable; urgency=low + + * experimental packaging + + -- Brian Warner Mon, 4 Dec 2006 23:34:07 -0800 diff --git a/misc/debian_helpers/lenny/debian/compat b/misc/debian_helpers/lenny/debian/compat new file mode 100644 index 00000000..b8626c4c --- /dev/null +++ b/misc/debian_helpers/lenny/debian/compat @@ -0,0 +1 @@ +4 diff --git a/misc/debian_helpers/lenny/debian/control b/misc/debian_helpers/lenny/debian/control new file mode 100644 index 00000000..1eb610de --- /dev/null +++ b/misc/debian_helpers/lenny/debian/control @@ -0,0 +1,16 @@ +Source: allmydata-tahoe +Section: python +Priority: optional +Maintainer: Brian Warner +Build-Depends: debhelper (>= 5.0.37.2), cdbs (>= 0.4.43), python-central (>= 0.5), python-setuptools, python, python-dev +Build-Depends-Indep: python-twisted +XS-Python-Version: 2.4,2.5,2.6 +Standards-Version: 3.7.2 + +Package: allmydata-tahoe +Architecture: all +Depends: ${python:Depends}, python-twisted, python-foolscap (>= 0.4.1), python-pyopenssl, python-nevow, python-simplejson (>= 1.4), python-zfec (>= 1.1), python-pycryptopp (>= 0.5.15), python-setuptools +Recommends: +XB-Python-Version: 2.4,2.5,2.6 +Description: A secure distributed filesystem + Allmydata Tahoe diff --git a/misc/debian_helpers/lenny/debian/copyright b/misc/debian_helpers/lenny/debian/copyright new file mode 100644 index 00000000..1dce8a20 --- /dev/null +++ b/misc/debian_helpers/lenny/debian/copyright @@ -0,0 +1,19 @@ +This package was debianized by Brian Warner + +The upstream source of this project is http://allmydata.org . + +Copyright (c) 2006-2009 +AllMyData, Inc. + +You may use this package under the GNU General Public License, version 2 or, at +your option, any later version. + +You may use this package under the Transitive Grace Period Public Licence, +version 1 or, at your option, any later version. The Transitive Grace Period +Public Licence has requirements similar to the GPL except that it allows you to +wait for up to twelve months after you redistribute a derived work before +releasing the source code of your derived work. See the file COPYING.TGPPL.html +for the terms of the Transitive Grace Period Public Licence, version 1. + +(You may choose to use this package under the terms of either licence, at your +option.) diff --git a/misc/debian_helpers/lenny/debian/pycompat b/misc/debian_helpers/lenny/debian/pycompat new file mode 100644 index 00000000..0cfbf088 --- /dev/null +++ b/misc/debian_helpers/lenny/debian/pycompat @@ -0,0 +1 @@ +2 diff --git a/misc/debian_helpers/lenny/debian/rules b/misc/debian_helpers/lenny/debian/rules new file mode 100644 index 00000000..9c07c564 --- /dev/null +++ b/misc/debian_helpers/lenny/debian/rules @@ -0,0 +1,48 @@ +#! /usr/bin/make -f +# Uncomment this to turn on verbose mode. +#export DH_VERBOSE=1 + +DEB_PYTHON_SYSTEM=pycentral + +include /usr/share/cdbs/1/rules/debhelper.mk +include /usr/share/cdbs/1/class/python-distutils.mk + +# this ought to be the name of the package that we're building, which is +# different on each tahoe branch. debian/control is the master: whatever +# package is listed in there will be built. +DEBNAME := $(firstword $(DEB_PACKAGES)) + +STAGING_DIR := $(CURDIR)/debian/$(DEBNAME) + +DEB_INSTALL_DOCS_ALL := COPYING.GPL COPYING.TGPPL.html CREDITS \ + NEWS README.txt relnotes.txt docs misc/operations_helpers/spacetime misc/operations_helpers/cpu-watcher.tac +DEB_COMPRESS_EXCLUDE := .tac + + +# we overwrite the setuptools-generated /usr/bin/tahoe (located in +# support/bin/tahoe after a 'make build') with a different version, because +# the setuptools form (using "entry points") insists upon .egg-info -visible +# forms of dependent packages to be installed. For a debian package, we rely +# upon the dependencies that are declared in debian/control . +# +# To make sure the #! line matches the version of python that we're using for +# this build, we copy it from the setuptools-generated /usr/bin/tahoe, then +# add other code to the generated file. + +install/$(DEBNAME):: + mkdir -pm755 $(STAGING_DIR) + python setup.py install --root=$(STAGING_DIR) + + head -1 $(STAGING_DIR)/usr/bin/tahoe >$(STAGING_DIR)/usr/bin/tahoe.new + echo "from allmydata.scripts import runner" >>$(STAGING_DIR)/usr/bin/tahoe.new + echo "runner.run()" >>$(STAGING_DIR)/usr/bin/tahoe.new + chmod +x $(STAGING_DIR)/usr/bin/tahoe.new + mv $(STAGING_DIR)/usr/bin/tahoe.new $(STAGING_DIR)/usr/bin/tahoe + + dh_install misc/operations_helpers/munin/* usr/share/$(DEBNAME)/munin + chmod +x $(STAGING_DIR)/usr/share/$(DEBNAME)/munin/* + + dh_pycentral + +clean:: + -rm -rf build diff --git a/misc/debian_helpers/sid/debian/changelog b/misc/debian_helpers/sid/debian/changelog new file mode 100644 index 00000000..9f4e959e --- /dev/null +++ b/misc/debian_helpers/sid/debian/changelog @@ -0,0 +1,5 @@ +allmydata-tahoe (0.0.1) unstable; urgency=low + + * experimental packaging + + -- Brian Warner Mon, 4 Dec 2006 23:34:07 -0800 diff --git a/misc/debian_helpers/sid/debian/compat b/misc/debian_helpers/sid/debian/compat new file mode 100644 index 00000000..b8626c4c --- /dev/null +++ b/misc/debian_helpers/sid/debian/compat @@ -0,0 +1 @@ +4 diff --git a/misc/debian_helpers/sid/debian/control b/misc/debian_helpers/sid/debian/control new file mode 100644 index 00000000..e37fbde1 --- /dev/null +++ b/misc/debian_helpers/sid/debian/control @@ -0,0 +1,16 @@ +Source: allmydata-tahoe +Section: python +Priority: optional +Maintainer: Brian Warner +Build-Depends: debhelper (>= 5.0.37.2), cdbs (>= 0.4.43), python-central (>= 0.5), python-setuptools, python, python-dev +Build-Depends-Indep: python-twisted-core +XS-Python-Version: 2.4,2.5,2.6 +Standards-Version: 3.7.2 + +Package: allmydata-tahoe +Architecture: all +Depends: ${python:Depends}, python-twisted-core, python-twisted-names, python-twisted-web, python-foolscap (>= 0.4.1), python-pyopenssl, python-nevow, python-simplejson (>= 1.4), python-zfec (>= 1.1), python-pycryptopp (>= 0.5.15), python-setuptools +Recommends: +XB-Python-Version: 2.4,2.5,2.6 +Description: A secure distributed filesystem + Allmydata Tahoe diff --git a/misc/debian_helpers/sid/debian/copyright b/misc/debian_helpers/sid/debian/copyright new file mode 100644 index 00000000..1dce8a20 --- /dev/null +++ b/misc/debian_helpers/sid/debian/copyright @@ -0,0 +1,19 @@ +This package was debianized by Brian Warner + +The upstream source of this project is http://allmydata.org . + +Copyright (c) 2006-2009 +AllMyData, Inc. + +You may use this package under the GNU General Public License, version 2 or, at +your option, any later version. + +You may use this package under the Transitive Grace Period Public Licence, +version 1 or, at your option, any later version. The Transitive Grace Period +Public Licence has requirements similar to the GPL except that it allows you to +wait for up to twelve months after you redistribute a derived work before +releasing the source code of your derived work. See the file COPYING.TGPPL.html +for the terms of the Transitive Grace Period Public Licence, version 1. + +(You may choose to use this package under the terms of either licence, at your +option.) diff --git a/misc/debian_helpers/sid/debian/pycompat b/misc/debian_helpers/sid/debian/pycompat new file mode 100644 index 00000000..0cfbf088 --- /dev/null +++ b/misc/debian_helpers/sid/debian/pycompat @@ -0,0 +1 @@ +2 diff --git a/misc/debian_helpers/sid/debian/rules b/misc/debian_helpers/sid/debian/rules new file mode 100644 index 00000000..9c07c564 --- /dev/null +++ b/misc/debian_helpers/sid/debian/rules @@ -0,0 +1,48 @@ +#! /usr/bin/make -f +# Uncomment this to turn on verbose mode. +#export DH_VERBOSE=1 + +DEB_PYTHON_SYSTEM=pycentral + +include /usr/share/cdbs/1/rules/debhelper.mk +include /usr/share/cdbs/1/class/python-distutils.mk + +# this ought to be the name of the package that we're building, which is +# different on each tahoe branch. debian/control is the master: whatever +# package is listed in there will be built. +DEBNAME := $(firstword $(DEB_PACKAGES)) + +STAGING_DIR := $(CURDIR)/debian/$(DEBNAME) + +DEB_INSTALL_DOCS_ALL := COPYING.GPL COPYING.TGPPL.html CREDITS \ + NEWS README.txt relnotes.txt docs misc/operations_helpers/spacetime misc/operations_helpers/cpu-watcher.tac +DEB_COMPRESS_EXCLUDE := .tac + + +# we overwrite the setuptools-generated /usr/bin/tahoe (located in +# support/bin/tahoe after a 'make build') with a different version, because +# the setuptools form (using "entry points") insists upon .egg-info -visible +# forms of dependent packages to be installed. For a debian package, we rely +# upon the dependencies that are declared in debian/control . +# +# To make sure the #! line matches the version of python that we're using for +# this build, we copy it from the setuptools-generated /usr/bin/tahoe, then +# add other code to the generated file. + +install/$(DEBNAME):: + mkdir -pm755 $(STAGING_DIR) + python setup.py install --root=$(STAGING_DIR) + + head -1 $(STAGING_DIR)/usr/bin/tahoe >$(STAGING_DIR)/usr/bin/tahoe.new + echo "from allmydata.scripts import runner" >>$(STAGING_DIR)/usr/bin/tahoe.new + echo "runner.run()" >>$(STAGING_DIR)/usr/bin/tahoe.new + chmod +x $(STAGING_DIR)/usr/bin/tahoe.new + mv $(STAGING_DIR)/usr/bin/tahoe.new $(STAGING_DIR)/usr/bin/tahoe + + dh_install misc/operations_helpers/munin/* usr/share/$(DEBNAME)/munin + chmod +x $(STAGING_DIR)/usr/share/$(DEBNAME)/munin/* + + dh_pycentral + +clean:: + -rm -rf build diff --git a/misc/delete-old-helper.sh b/misc/delete-old-helper.sh deleted file mode 100644 index 1a85fcdd..00000000 --- a/misc/delete-old-helper.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/sh - -find helper/CHK_encoding -mtime +7 -print0 |xargs -0 rm -find helper/CHK_incoming -mtime +14 -print0 |xargs -0 rm diff --git a/misc/etch/debian/changelog b/misc/etch/debian/changelog deleted file mode 100644 index 9f4e959e..00000000 --- a/misc/etch/debian/changelog +++ /dev/null @@ -1,5 +0,0 @@ -allmydata-tahoe (0.0.1) unstable; urgency=low - - * experimental packaging - - -- Brian Warner Mon, 4 Dec 2006 23:34:07 -0800 diff --git a/misc/etch/debian/compat b/misc/etch/debian/compat deleted file mode 100644 index b8626c4c..00000000 --- a/misc/etch/debian/compat +++ /dev/null @@ -1 +0,0 @@ -4 diff --git a/misc/etch/debian/control b/misc/etch/debian/control deleted file mode 100644 index 7d03a506..00000000 --- a/misc/etch/debian/control +++ /dev/null @@ -1,16 +0,0 @@ -Source: allmydata-tahoe -Section: python -Priority: optional -Maintainer: Brian Warner -Build-Depends: debhelper (>= 5.0.37.1), cdbs (>= 0.4.41), python-central (>= 0.4.10), python-setuptools, python, python-all-dev -Build-Depends-Indep: python-twisted -XS-Python-Version: 2.4,2.5,2.6 -Standards-Version: 3.7.2 - -Package: allmydata-tahoe -Architecture: all -Depends: ${python:Depends}, python-twisted, python-foolscap (>= 0.4.1), python-pyopenssl, python-nevow, python-simplejson (>= 1.4), python-zfec (>= 1.1), python-pycryptopp (>= 0.5.15), python-setuptools, python-sqlite2 -Recommends: -XB-Python-Version: 2.4,2.5,2.6 -Description: A secure distributed filesystem - Allmydata Tahoe diff --git a/misc/etch/debian/copyright b/misc/etch/debian/copyright deleted file mode 100644 index 1dce8a20..00000000 --- a/misc/etch/debian/copyright +++ /dev/null @@ -1,19 +0,0 @@ -This package was debianized by Brian Warner - -The upstream source of this project is http://allmydata.org . - -Copyright (c) 2006-2009 -AllMyData, Inc. - -You may use this package under the GNU General Public License, version 2 or, at -your option, any later version. - -You may use this package under the Transitive Grace Period Public Licence, -version 1 or, at your option, any later version. The Transitive Grace Period -Public Licence has requirements similar to the GPL except that it allows you to -wait for up to twelve months after you redistribute a derived work before -releasing the source code of your derived work. See the file COPYING.TGPPL.html -for the terms of the Transitive Grace Period Public Licence, version 1. - -(You may choose to use this package under the terms of either licence, at your -option.) diff --git a/misc/etch/debian/pycompat b/misc/etch/debian/pycompat deleted file mode 100644 index 0cfbf088..00000000 --- a/misc/etch/debian/pycompat +++ /dev/null @@ -1 +0,0 @@ -2 diff --git a/misc/etch/debian/rules b/misc/etch/debian/rules deleted file mode 100644 index 58f7bf29..00000000 --- a/misc/etch/debian/rules +++ /dev/null @@ -1,48 +0,0 @@ -#! /usr/bin/make -f -# Uncomment this to turn on verbose mode. -#export DH_VERBOSE=1 - -DEB_PYTHON_SYSTEM=pycentral - -include /usr/share/cdbs/1/rules/debhelper.mk -include /usr/share/cdbs/1/class/python-distutils.mk - -# this ought to be the name of the package that we're building, which is -# different on each tahoe branch. debian/control is the master: whatever -# package is listed in there will be built. -DEBNAME := $(firstword $(DEB_PACKAGES)) - -STAGING_DIR := $(CURDIR)/debian/$(DEBNAME) - -DEB_INSTALL_DOCS_ALL := COPYING.GPL COPYING.TGPPL.html CREDITS \ - NEWS README.txt relnotes.txt docs misc/spacetime misc/cpu-watcher.tac -DEB_COMPRESS_EXCLUDE := .tac - - -# we overwrite the setuptools-generated /usr/bin/tahoe (located in -# support/bin/tahoe after a 'make build') with a different version, because -# the setuptools form (using "entry points") insists upon .egg-info -visible -# forms of dependent packages to be installed. For a debian package, we rely -# upon the dependencies that are declared in debian/control . -# -# To make sure the #! line matches the version of python that we're using for -# this build, we copy it from the setuptools-generated /usr/bin/tahoe, then -# add other code to the generated file. - -install/$(DEBNAME):: - mkdir -pm755 $(STAGING_DIR) - python setup.py install --root=$(STAGING_DIR) - - head -1 $(STAGING_DIR)/usr/bin/tahoe >$(STAGING_DIR)/usr/bin/tahoe.new - echo "from allmydata.scripts import runner" >>$(STAGING_DIR)/usr/bin/tahoe.new - echo "runner.run()" >>$(STAGING_DIR)/usr/bin/tahoe.new - chmod +x $(STAGING_DIR)/usr/bin/tahoe.new - mv $(STAGING_DIR)/usr/bin/tahoe.new $(STAGING_DIR)/usr/bin/tahoe - - dh_install misc/munin/* usr/share/$(DEBNAME)/munin - chmod +x $(STAGING_DIR)/usr/share/$(DEBNAME)/munin/* - - dh_pycentral - -clean:: - -rm -rf build diff --git a/misc/figleaf.excludes b/misc/figleaf.excludes deleted file mode 100644 index 0eacb7a0..00000000 --- a/misc/figleaf.excludes +++ /dev/null @@ -1,5 +0,0 @@ -/test/ -/foolscap/ -/zfec/ -/allmydata/Crypto/ -/simplejson/ diff --git a/misc/figleaf2el.py b/misc/figleaf2el.py deleted file mode 100644 index 0e7e051b..00000000 --- a/misc/figleaf2el.py +++ /dev/null @@ -1,87 +0,0 @@ -#! /usr/bin/env python - -import os, sys, pickle - -def longest_common_prefix(elements): - if not elements: - return "" - prefix = elements[0] - for e in elements: - prefix = longest_common_prefix_2(prefix, e) - return prefix -def longest_common_prefix_2(a, b): - maxlen = min(len(a), len(b)) - for i in range(maxlen, 0, -1): - if a[:i] == b[:i]: - return a[:i] - return "" - -## def write_el(r2): -## filenames = sorted(r2.keys()) -## out = open(".figleaf.el", "w") -## out.write("(setq figleaf-results '(\n") -## for f in filenames: -## linenumbers = r2[f] -## out.write(' ("%s" (%s))\n' % (f, " ".join([str(ln) -## for ln in linenumbers]))) -## out.write(" ))\n") -## out.close() - -def write_el(r2, source): - filenames = sorted(r2.keys()) - out = open(".figleaf.el", "w") - out.write(""" -;; This is an elisp-readable form of the figleaf coverage data. It defines a -;; single top-level hash table in which the load-path-relative filename (like -;; allmydata/download.py) is the key, and the value is a three-element list. -;; The first element of this list is a list of line numbers that represent -;; actual code. The second is a list of line numbers for lines which got used -;; during the unit test. The third is a list of line numbers for code lines -;; that were not covered (since 'code' and 'covered' start as sets, this last -;; list is equal to 'code - covered'). - -""") - out.write("(let ((results (make-hash-table :test 'equal)))\n") - for f in filenames: - covered_linenumbers = r2[f] - code_linenumbers = source[f] - uncovered_code = code_linenumbers - covered_linenumbers - out.write(" (puthash \"%s\" '((%s) (%s) (%s)) results)\n" - % (f, - " ".join([str(ln) for ln in sorted(code_linenumbers)]), - " ".join([str(ln) for ln in sorted(covered_linenumbers)]), - " ".join([str(ln) for ln in sorted(uncovered_code)]), - )) - out.write(" results)\n") - out.close() - -import figleaf - -def examine_source(filename): - f = open(filename, "r") - lines = figleaf.get_lines(f) - f.close() - return lines - -def main(): - results = pickle.load(open(sys.argv[1], "rb")) - import_prefix = os.path.abspath(sys.argv[2]) - if not import_prefix.endswith("/"): - import_prefix = import_prefix + "/" - plen = len(import_prefix) - - r2 = {} - source = {} - filenames = sorted(results.keys()) - here = os.getcwd() - for f in filenames: - if f.startswith(import_prefix): - short = f[plen:] - r2[short] = results[f] - source[short] = examine_source(f) - write_el(r2, source) - -if __name__ == '__main__': - main() - - diff --git a/misc/find-share-anomalies.py b/misc/find-share-anomalies.py deleted file mode 100644 index 76fe3419..00000000 --- a/misc/find-share-anomalies.py +++ /dev/null @@ -1,66 +0,0 @@ -#!/usr/bin/env python - -# feed this the results of 'tahoe catalog-shares' for all servers - -import sys - -chk_encodings = {} -sdmf_encodings = {} -sdmf_versions = {} - -for catalog in sys.argv[1:]: - for line in open(catalog, "r").readlines(): - line = line.strip() - pieces = line.split() - if pieces[0] == "CHK": - ftype, si, kN, size, ueb_hash, expiration, filename = pieces - if si not in chk_encodings: - chk_encodings[si] = (set(), set()) - chk_encodings[si][0].add( (si, kN) ) - chk_encodings[si][1].add( line ) - if pieces[0] == "SDMF": - ftype, si, kN, size, ver, expiration, filename = pieces - if si not in sdmf_encodings: - sdmf_encodings[si] = (set(), set()) - sdmf_encodings[si][0].add( (si, kN) ) - sdmf_encodings[si][1].add( line ) - if si not in sdmf_versions: - sdmf_versions[si] = (set(), set()) - sdmf_versions[si][0].add( ver ) - sdmf_versions[si][1].add( line ) - -chk_multiple_encodings = [(si,lines) - for si,(encodings,lines) in chk_encodings.items() - if len(encodings) > 1] -chk_multiple_encodings.sort() -sdmf_multiple_encodings = [(si,lines) - for si,(encodings,lines) in sdmf_encodings.items() - if len(encodings) > 1 - ] -sdmf_multiple_encodings.sort() -sdmf_multiple_versions = [(si,lines) - for si,(versions,lines) in sdmf_versions.items() - if len(versions) > 1] -sdmf_multiple_versions.sort() - -if chk_multiple_encodings: - print - print "CHK multiple encodings:" - for (si,lines) in chk_multiple_encodings: - print " " + si - for line in sorted(lines): - print " " + line -if sdmf_multiple_encodings: - print - print "SDMF multiple encodings:" - for (si,lines) in sdmf_multiple_encodings: - print " " + si - for line in sorted(lines): - print " " + line -if sdmf_multiple_versions: - print - print "SDMF multiple versions:" - for (si,lines) in sdmf_multiple_versions: - print " " + si - for line in sorted(lines): - print " " + line diff --git a/misc/find-trailing-spaces.py b/misc/find-trailing-spaces.py deleted file mode 100644 index ad2cc583..00000000 --- a/misc/find-trailing-spaces.py +++ /dev/null @@ -1,42 +0,0 @@ -#!/usr/bin/env python - -import os, sys - -from twisted.python import usage - -class Options(usage.Options): - optFlags = [ - ("recursive", "r", "Search for .py files recursively"), - ] - def parseArgs(self, *starting_points): - self.starting_points = starting_points - -found = [False] - -def check(fn): - f = open(fn, "r") - for i,line in enumerate(f.readlines()): - if line == "\n": - continue - if line[-1] == "\n": - line = line[:-1] - if line.rstrip() != line: - # the %s:%d:%d: lets emacs' compile-mode jump to those locations - print "%s:%d:%d: trailing whitespace" % (fn, i+1, len(line)+1) - found[0] = True - f.close() - -o = Options() -o.parseOptions() -if o['recursive']: - for starting_point in o.starting_points: - for root, dirs, files in os.walk(starting_point): - for fn in [f for f in files if f.endswith(".py")]: - fn = os.path.join(root, fn) - check(fn) -else: - for fn in o.starting_points: - check(fn) -if found[0]: - sys.exit(1) -sys.exit(0) diff --git a/misc/find_trial.py b/misc/find_trial.py deleted file mode 100644 index 39c1553f..00000000 --- a/misc/find_trial.py +++ /dev/null @@ -1,37 +0,0 @@ -import sys - -import os, sys -from twisted.python.procutils import which - -def find_exe(exename): - """ - Look for something named exename or exename + ".py". - - This is a kludge. - - @return: a list containing one element which is the quoted path to the - exename (if it is thought to be executable), or else the first element - being quoted sys.executable and the second element being the quoted path - to the exename + ".py", or else return False if one can't be found - """ - exes = which(exename) - exe = exes and exes[0] - if not exe: - exe = os.path.join(sys.prefix, 'scripts', exename + '.py') - if os.path.exists(exe): - path, ext = os.path.splitext(exe) - if ext.lower() in [".exe", ".bat",]: - cmd = ['"' + exe + '"',] - else: - cmd = ['"' + sys.executable + '"', '"' + exe + '"',] - return cmd - else: - return False - - -if __name__ == "__main__": - cmd = find_exe("trial") - if cmd: - print " ".join(cmd).replace("\\", "/") - else: - sys.exit(1) diff --git a/misc/fixshebangs.py b/misc/fixshebangs.py deleted file mode 100644 index 02446490..00000000 --- a/misc/fixshebangs.py +++ /dev/null @@ -1,30 +0,0 @@ -#!/usr/bin/env python - -from allmydata.util import fileutil - -import re, shutil, sys - -R=re.compile("^#! */usr/bin/python *$") -for fname in sys.argv[1:]: - inf = open(fname, "rU") - rntf = fileutil.ReopenableNamedTemporaryFile() - outf = open(rntf.name, "w") - first = True - for l in inf: - if first and R.search(l): - outf.write("#!/usr/bin/env python\n") - else: - outf.write(l) - first = False - outf.close() - - try: - shutil.move(rntf.name, fname) - except EnvironmentError: - # Couldn't atomically overwrite, so just hope that this process doesn't die - # and the target file doesn't get recreated in between the following two - # operations: - shutil.move(fname, fname + ".bak") - shutil.move(rntf.name, fname) - - fileutil.remove_if_possible(fname + ".bak") diff --git a/misc/get-version.py b/misc/get-version.py deleted file mode 100644 index a3ef5d11..00000000 --- a/misc/get-version.py +++ /dev/null @@ -1,38 +0,0 @@ -#!/usr/bin/env python - -"""Determine the version number of the current tree. - -This should be run after 'setup.py darcsver'. It will emit a single line of text -to stdout, either of the form '0.2.0' if this is a release tree (i.e. no patches -have been added since the last release tag), or '0.2.0-34' (if 34 patches have -been added since the last release tag). If the tree does not have a well-formed -version number, this will emit 'unknown'. - -The version string thus calculated should exactly match the version string -determined by setup.py (when it creates eggs and source tarballs) and also -the version available in the code image when you do: - - from allmydata import __version__ - -""" - -import os.path, re - -def get_version(): - VERSIONFILE = "src/allmydata/_version.py" - verstr = "unknown" - if os.path.exists(VERSIONFILE): - VSRE = re.compile("^verstr = ['\"]([^'\"]*)['\"]", re.M) - verstrline = open(VERSIONFILE, "rt").read() - mo = VSRE.search(verstrline) - if mo: - verstr = mo.group(1) - else: - raise ValueError("if version.py exists, it must be well-formed") - - return verstr - -if __name__ == '__main__': - verstr = get_version() - print verstr - diff --git a/misc/getmem.py b/misc/getmem.py deleted file mode 100644 index 8ddc3ed7..00000000 --- a/misc/getmem.py +++ /dev/null @@ -1,18 +0,0 @@ -#! /usr/bin/env python - -from foolscap import Tub -from foolscap.eventual import eventually -import sys -from twisted.internet import reactor - -def go(): - t = Tub() - d = t.getReference(sys.argv[1]) - d.addCallback(lambda rref: rref.callRemote("get_memory_usage")) - def _got(res): - print res - reactor.stop() - d.addCallback(_got) - -eventually(go) -reactor.run() diff --git a/misc/lenny/debian/changelog b/misc/lenny/debian/changelog deleted file mode 100644 index 9f4e959e..00000000 --- a/misc/lenny/debian/changelog +++ /dev/null @@ -1,5 +0,0 @@ -allmydata-tahoe (0.0.1) unstable; urgency=low - - * experimental packaging - - -- Brian Warner Mon, 4 Dec 2006 23:34:07 -0800 diff --git a/misc/lenny/debian/compat b/misc/lenny/debian/compat deleted file mode 100644 index b8626c4c..00000000 --- a/misc/lenny/debian/compat +++ /dev/null @@ -1 +0,0 @@ -4 diff --git a/misc/lenny/debian/control b/misc/lenny/debian/control deleted file mode 100644 index 1eb610de..00000000 --- a/misc/lenny/debian/control +++ /dev/null @@ -1,16 +0,0 @@ -Source: allmydata-tahoe -Section: python -Priority: optional -Maintainer: Brian Warner -Build-Depends: debhelper (>= 5.0.37.2), cdbs (>= 0.4.43), python-central (>= 0.5), python-setuptools, python, python-dev -Build-Depends-Indep: python-twisted -XS-Python-Version: 2.4,2.5,2.6 -Standards-Version: 3.7.2 - -Package: allmydata-tahoe -Architecture: all -Depends: ${python:Depends}, python-twisted, python-foolscap (>= 0.4.1), python-pyopenssl, python-nevow, python-simplejson (>= 1.4), python-zfec (>= 1.1), python-pycryptopp (>= 0.5.15), python-setuptools -Recommends: -XB-Python-Version: 2.4,2.5,2.6 -Description: A secure distributed filesystem - Allmydata Tahoe diff --git a/misc/lenny/debian/copyright b/misc/lenny/debian/copyright deleted file mode 100644 index 1dce8a20..00000000 --- a/misc/lenny/debian/copyright +++ /dev/null @@ -1,19 +0,0 @@ -This package was debianized by Brian Warner - -The upstream source of this project is http://allmydata.org . - -Copyright (c) 2006-2009 -AllMyData, Inc. - -You may use this package under the GNU General Public License, version 2 or, at -your option, any later version. - -You may use this package under the Transitive Grace Period Public Licence, -version 1 or, at your option, any later version. The Transitive Grace Period -Public Licence has requirements similar to the GPL except that it allows you to -wait for up to twelve months after you redistribute a derived work before -releasing the source code of your derived work. See the file COPYING.TGPPL.html -for the terms of the Transitive Grace Period Public Licence, version 1. - -(You may choose to use this package under the terms of either licence, at your -option.) diff --git a/misc/lenny/debian/pycompat b/misc/lenny/debian/pycompat deleted file mode 100644 index 0cfbf088..00000000 --- a/misc/lenny/debian/pycompat +++ /dev/null @@ -1 +0,0 @@ -2 diff --git a/misc/lenny/debian/rules b/misc/lenny/debian/rules deleted file mode 100644 index 58f7bf29..00000000 --- a/misc/lenny/debian/rules +++ /dev/null @@ -1,48 +0,0 @@ -#! /usr/bin/make -f -# Uncomment this to turn on verbose mode. -#export DH_VERBOSE=1 - -DEB_PYTHON_SYSTEM=pycentral - -include /usr/share/cdbs/1/rules/debhelper.mk -include /usr/share/cdbs/1/class/python-distutils.mk - -# this ought to be the name of the package that we're building, which is -# different on each tahoe branch. debian/control is the master: whatever -# package is listed in there will be built. -DEBNAME := $(firstword $(DEB_PACKAGES)) - -STAGING_DIR := $(CURDIR)/debian/$(DEBNAME) - -DEB_INSTALL_DOCS_ALL := COPYING.GPL COPYING.TGPPL.html CREDITS \ - NEWS README.txt relnotes.txt docs misc/spacetime misc/cpu-watcher.tac -DEB_COMPRESS_EXCLUDE := .tac - - -# we overwrite the setuptools-generated /usr/bin/tahoe (located in -# support/bin/tahoe after a 'make build') with a different version, because -# the setuptools form (using "entry points") insists upon .egg-info -visible -# forms of dependent packages to be installed. For a debian package, we rely -# upon the dependencies that are declared in debian/control . -# -# To make sure the #! line matches the version of python that we're using for -# this build, we copy it from the setuptools-generated /usr/bin/tahoe, then -# add other code to the generated file. - -install/$(DEBNAME):: - mkdir -pm755 $(STAGING_DIR) - python setup.py install --root=$(STAGING_DIR) - - head -1 $(STAGING_DIR)/usr/bin/tahoe >$(STAGING_DIR)/usr/bin/tahoe.new - echo "from allmydata.scripts import runner" >>$(STAGING_DIR)/usr/bin/tahoe.new - echo "runner.run()" >>$(STAGING_DIR)/usr/bin/tahoe.new - chmod +x $(STAGING_DIR)/usr/bin/tahoe.new - mv $(STAGING_DIR)/usr/bin/tahoe.new $(STAGING_DIR)/usr/bin/tahoe - - dh_install misc/munin/* usr/share/$(DEBNAME)/munin - chmod +x $(STAGING_DIR)/usr/share/$(DEBNAME)/munin/* - - dh_pycentral - -clean:: - -rm -rf build diff --git a/misc/logtool.py b/misc/logtool.py deleted file mode 100644 index 9f0beeb5..00000000 --- a/misc/logtool.py +++ /dev/null @@ -1,201 +0,0 @@ -#!/usr/bin/env python - -import os.path, time, pickle -import foolscap -from foolscap import RemoteInterface -from foolscap.eventual import fireEventually -from foolscap.schema import DictOf, Any -from twisted.internet import reactor, defer -from zope.interface import implements -from twisted.python import usage -#from twisted.python import log -#import sys -#log.startLogging(sys.stderr) - -class Options(usage.Options): - longdesc = """ - logtool tail FURL : follow logs of the target node - logtool gather : run as a daemon, record all logs to the current directory - logtool dump FILE : dump the logs recorded by 'logtool gather' - """ - - def parseArgs(self, mode, *args): - self.mode = mode - if mode == "tail": - target = args[0] - if target.startswith("pb:"): - self.target_furl = target - elif os.path.isfile(target): - self.target_furl = open(target, "r").read().strip() - elif os.path.isdir(target): - fn = os.path.join(target, "logport.furl") - self.target_furl = open(fn, "r").read().strip() - else: - raise ValueError("Can't use tail target: %s" % target) - elif mode == "dump": - self.dumpfile = args[0] - - -class RILogObserver(RemoteInterface): - def msg(logmsg=DictOf(str, Any())): - return None -class RISubscription(RemoteInterface): - pass - -class RILogPublisher(RemoteInterface): - def get_versions(): - return DictOf(str, str) - def subscribe_to_all(observer=RILogObserver): - return RISubscription - def unsubscribe(subscription=Any()): - # I don't know how to get the constraint right: unsubscribe() should - # accept return value of subscribe_to_all() - return None - -class RILogGatherer(RemoteInterface): - def logport(nodeid=str, logport=RILogPublisher): - return None - -class LogPrinter(foolscap.Referenceable): - implements(RILogObserver) - - def remote_msg(self, d): - print d - -class LogTail: - - def start(self, target_furl): - print "Connecting.." - d = defer.maybeDeferred(self.setup_tub) - d.addCallback(self._tub_ready, target_furl) - return d - - def setup_tub(self): - self._tub = foolscap.Tub() - self._tub.startService() - - def _tub_ready(self, res, target_furl): - d = self._tub.getReference(target_furl) - d.addCallback(self._got_logpublisher) - return d - - def _got_logpublisher(self, publisher): - print "Connected" - lp = LogPrinter() - d = publisher.callRemote("subscribe_to_all", lp) - return d - - def remote_msg(self, d): - print d - -class LogSaver(foolscap.Referenceable): - implements(RILogObserver) - def __init__(self, nodeid, savefile): - self.nodeid = nodeid - self.f = savefile - - def remote_msg(self, d): - e = {"from": self.nodeid, - "rx_time": time.time(), - "d": d, - } - pickle.dump(e, self.f) - - def disconnected(self): - del self.f - from allmydata.util.idlib import shortnodeid_b2a - print "LOGPORT CLOSED", shortnodeid_b2a(self.nodeid) - -class LogGatherer(foolscap.Referenceable): - implements(RILogGatherer) - - def start(self, res): - self._savefile = open("logs.pickle", "ab", 0) - d = self.setup_tub() - d.addCallback(self._tub_ready) - return d - - def setup_tub(self): - from allmydata.util import iputil - self._tub = foolscap.Tub(certFile="gatherer.pem") - self._tub.startService() - portnumfile = "portnum" - try: - portnum = int(open(portnumfile, "r").read()) - except (EnvironmentError, ValueError): - portnum = 0 - self._tub.listenOn("tcp:%d" % portnum) - d = defer.maybeDeferred(iputil.get_local_addresses_async) - d.addCallback(self._set_location) - return d - - def _set_location(self, local_addresses): - l = self._tub.getListeners()[0] - portnum = l.getPortnum() - portnumfile = "portnum" - open(portnumfile, "w").write("%d\n" % portnum) - local_addresses = [ "%s:%d" % (addr, portnum,) - for addr in local_addresses ] - location = ",".join(local_addresses) - self._tub.setLocation(location) - - def _tub_ready(self, res): - me = self._tub.registerReference(self, furlFile="log_gatherer.furl") - print "Gatherer waiting at:", me - - def remote_logport(self, nodeid, publisher): - from allmydata.util.idlib import shortnodeid_b2a - short = shortnodeid_b2a(nodeid) - print "GOT LOGPORT", short - ls = LogSaver(nodeid, self._savefile) - publisher.callRemote("subscribe_to_all", ls) - publisher.notifyOnDisconnect(ls.disconnected) - -class LogDumper: - def start(self, options): - from allmydata.util.idlib import shortnodeid_b2a - fn = options.dumpfile - f = open(fn, "rb") - while True: - try: - e = pickle.load(f) - short = shortnodeid_b2a(e['from']) - when = e['rx_time'] - print "%s %r: %r" % (short, when, e['d']) - except EOFError: - break - -class LogTool: - - def run(self, options): - mode = options.mode - if mode == "tail": - lt = LogTail() - d = fireEventually(options.target_furl) - d.addCallback(lt.start) - d.addErrback(self._error) - print "starting.." - reactor.run() - elif mode == "gather": - lg = LogGatherer() - d = fireEventually() - d.addCallback(lg.start) - d.addErrback(self._error) - print "starting.." - reactor.run() - elif mode == "dump": - ld = LogDumper() - ld.start(options) - else: - print "unknown mode '%s'" % mode - raise NotImplementedError - - def _error(self, f): - print "ERROR", f - reactor.stop() - -if __name__ == '__main__': - o = Options() - o.parseOptions() - lt = LogTool() - lt.run(o) diff --git a/misc/make-canary-files.py b/misc/make-canary-files.py deleted file mode 100644 index 44f0348a..00000000 --- a/misc/make-canary-files.py +++ /dev/null @@ -1,151 +0,0 @@ -#!/usr/bin/env python - -""" -Given a list of nodeids and a 'convergence' file, create a bunch of files -that will (when encoded at k=1,N=1) be uploaded to specific nodeids. - -Run this as follows: - - make-canary-files.py -c PATH/TO/convergence -n PATH/TO/nodeids -k 1 -N 1 - -It will create a directory named 'canaries', with one file per nodeid named -'$NODEID-$NICKNAME.txt', that contains some random text. - -The 'nodeids' file should contain one base32 nodeid per line, followed by the -optional nickname, like: - ---- -5yyqu2hbvbh3rgtsgxrmmg4g77b6p3yo server12 -vb7vm2mneyid5jbyvcbk2wb5icdhwtun server13 -... ---- - -The resulting 'canaries/5yyqu2hbvbh3rgtsgxrmmg4g77b6p3yo-server12.txt' file -will, when uploaded with the given (convergence,k,N) pair, have its first -share placed on the 5yyq/server12 storage server. If N>1, the other shares -will be placed elsewhere, of course. - -This tool can be useful to construct a set of 'canary' files, which can then -be uploaded to storage servers, and later downloaded to test a grid's health. -If you are able to download the canary for server12 via some tahoe node X, -then the following properties are known to be true: - - node X is running, and has established a connection to server12 - server12 is running, and returning data for at least the given file - -Using k=1/N=1 creates a separate test for each server. The test process is -then to download the whole directory of files (perhaps with a t=deep-check -operation). - -Alternatively, you could upload with the usual k=3/N=10 and then move/delete -shares to put all N shares on a single server. - -Note that any changes to the nodeid list will affect the placement of shares. -Shares should be uploaded with the same nodeid list as this tool used when -constructing the files. - -Also note that this tool uses the Tahoe codebase, so it should be run on a -system where Tahoe is installed, or in a source tree with setup.py like this: - - setup.py run_with_pythonpath -p -c 'misc/make-canary-files.py ARGS..' -""" - -import os, sha -from twisted.python import usage -from allmydata.immutable import upload -from allmydata.util import base32 - -class Options(usage.Options): - optParameters = [ - ("convergence", "c", None, "path to NODEDIR/private/convergence"), - ("nodeids", "n", None, "path to file with one base32 nodeid per line"), - ("k", "k", 1, "number of necessary shares, defaults to 1", int), - ("N", "N", 1, "number of total shares, defaults to 1", int), - ] - optFlags = [ - ("verbose", "v", "Be noisy"), - ] - -opts = Options() -opts.parseOptions() - -verbose = bool(opts["verbose"]) - -nodes = {} -for line in open(opts["nodeids"], "r").readlines(): - line = line.strip() - if not line or line.startswith("#"): - continue - pieces = line.split(None, 1) - if len(pieces) == 2: - nodeid_s, nickname = pieces - else: - nodeid_s = pieces[0] - nickname = None - nodeid = base32.a2b(nodeid_s) - nodes[nodeid] = nickname - -if opts["k"] != 3 or opts["N"] != 10: - print "note: using non-default k/N requires patching the Tahoe code" - print "src/allmydata/client.py line 55, DEFAULT_ENCODING_PARAMETERS" - -convergence_file = os.path.expanduser(opts["convergence"]) -convergence_s = open(convergence_file, "rb").read().strip() -convergence = base32.a2b(convergence_s) - -def get_permuted_peers(key): - results = [] - for nodeid in nodes: - permuted = sha.new(key + nodeid).digest() - results.append((permuted, nodeid)) - results.sort(lambda a,b: cmp(a[0], b[0])) - return [ r[1] for r in results ] - -def find_share_for_target(target): - target_s = base32.b2a(target) - prefix = "The first share of this file will be placed on " + target_s + "\n" - prefix += "This data is random: " - attempts = 0 - while True: - attempts += 1 - suffix = base32.b2a(os.urandom(10)) - if verbose: print " trying", suffix, - data = prefix + suffix + "\n" - assert len(data) > 55 # no LIT files - # now, what storage index will this get? - u = upload.Data(data, convergence) - eu = upload.EncryptAnUploadable(u) - d = eu.get_storage_index() # this happens to run synchronously - def _got_si(si): - if verbose: print "SI", base32.b2a(si), - peerlist = get_permuted_peers(si) - if peerlist[0] == target: - # great! - if verbose: print " yay!" - fn = base32.b2a(target) - if nodes[target]: - nickname = nodes[target].replace("/", "_") - fn += "-" + nickname - fn += ".txt" - fn = os.path.join("canaries", fn) - open(fn, "w").write(data) - return True - # nope, must try again - if verbose: print " boo" - return False - d.addCallback(_got_si) - # get sneaky and look inside the Deferred for the synchronous result - if d.result: - return attempts - -os.mkdir("canaries") -attempts = [] -for target in nodes: - target_s = base32.b2a(target) - print "working on", target_s - attempts.append(find_share_for_target(target)) -print "done" -print "%d attempts total, avg %d per target, max %d" % \ - (sum(attempts), 1.0* sum(attempts) / len(nodes), max(attempts)) - - diff --git a/misc/make_umid b/misc/make_umid deleted file mode 100644 index 60aab23a..00000000 --- a/misc/make_umid +++ /dev/null @@ -1,55 +0,0 @@ -#!/usr/bin/env python - -"""Create a short probably-unique string for use as a umid= argument in a -Foolscap log() call, to make it easier to locate the source code that -generated the message. The main text of the log message is frequently -unhelpful for this, and python doesn't make it cheap to compile in the -filename and line number of logging calls. - -Given a message-unique-ID like 'aXoWcA', make your logging call look like: - - log.msg('OMG badness', level=log.WEIRD, umid='aXoWcA') - -Then later, if this message actually occurs, you can grep your source tree -for aXoWcA to locate the code that caused it. - -Just stick to the convention that 'umid=' is reserved for this job. It is a -good idea to make all the logging statements that could provoke an Incident -(i.e. those at level=log.WEIRD or higher) have umid= arguments, to make it -easier to write classifier functions for the incident-gatherer. - -""" - -''' -The following elisp code may be useful: - - (defun insert-umid () - (interactive) - (insert ", umid=\"") - (call-process "make_umid" nil t) - (delete-char -1) - (insert "\"") - ) - (global-set-key (kbd "C-\`") 'insert-umid) -''' - -# ' # emacs gets confused by the odd number of single-quotes there - -import os, base64, sys - -def make_id(): - while True: - m = os.urandom(4) # this gives 6-character message ids - m = base64.b64encode(m) - if "/" in m or "+" in m: - continue - m = m.replace("=", "") - break - return m - -count = 1 -if len(sys.argv) > 1: - count = int(sys.argv[1]) -for i in range(count): - print make_id() - diff --git a/misc/munin/tahoe-conf b/misc/munin/tahoe-conf deleted file mode 100644 index f85c0431..00000000 --- a/misc/munin/tahoe-conf +++ /dev/null @@ -1,22 +0,0 @@ -# put a copy of this file in /etc/munin/plugin-conf.d/tahoe-conf to let these -# plugins know where the node's base directories are. Modify the lines below -# to match your nodes. - -[tahoe-files] -env.basedir_bs5c1 /home/amduser/tahoe/bs5c1 -env.basedir_bs5c2 /home/amduser/tahoe/bs5c2 -env.basedir_bs5c3 /home/amduser/tahoe/bs5c3 -env.basedir_bs5c4 /home/amduser/tahoe/bs5c4 - -[tahoe-sharesperfile] -env.basedir_bs5c1 /home/amduser/tahoe/bs5c1 -env.basedir_bs5c2 /home/amduser/tahoe/bs5c2 -env.basedir_bs5c3 /home/amduser/tahoe/bs5c3 -env.basedir_bs5c4 /home/amduser/tahoe/bs5c4 - -[tahoe-storagespace] -env.basedir_bs5c1 /home/amduser/tahoe/bs5c1 -env.basedir_bs5c2 /home/amduser/tahoe/bs5c2 -env.basedir_bs5c3 /home/amduser/tahoe/bs5c3 -env.basedir_bs5c4 /home/amduser/tahoe/bs5c4 - diff --git a/misc/munin/tahoe-stats.plugin-conf b/misc/munin/tahoe-stats.plugin-conf deleted file mode 100644 index 2084c65f..00000000 --- a/misc/munin/tahoe-stats.plugin-conf +++ /dev/null @@ -1,12 +0,0 @@ -[tahoe_storage_allocated] -env.statsfile /home/robk/trees/tahoe/stats_gatherer/stats.pickle -[tahoe_storage_consumed] -env.statsfile /home/robk/trees/tahoe/stats_gatherer/stats.pickle -[tahoe_runtime_load_avg] -env.statsfile /home/robk/trees/tahoe/stats_gatherer/stats.pickle -[tahoe_runtime_load_peak] -env.statsfile /home/robk/trees/tahoe/stats_gatherer/stats.pickle -[tahoe_storage_bytes_added] -env.statsfile /home/robk/trees/tahoe/stats_gatherer/stats.pickle -[tahoe_storage_bytes_freed] -env.statsfile /home/robk/trees/tahoe/stats_gatherer/stats.pickle diff --git a/misc/munin/tahoe_cpu_watcher b/misc/munin/tahoe_cpu_watcher deleted file mode 100644 index bd349cb2..00000000 --- a/misc/munin/tahoe_cpu_watcher +++ /dev/null @@ -1,29 +0,0 @@ -#!/usr/bin/env python - -import os, sys, re -import urllib -import simplejson - -url = os.environ["url"] -current = simplejson.loads(urllib.urlopen(url).read()) - -configinfo = """\ -graph_title Tahoe CPU Usage -graph_vlabel CPU % -graph_category tahoe -graph_info This graph shows the 5min average of CPU usage for each process -""" -data = "" - -for (name, avg1, avg5, avg15) in current: - dataname = re.sub(r'[^\w]', '_', name) - configinfo += dataname + ".label " + name + "\n" - configinfo += dataname + ".draw LINE2\n" - if avg5 is not None: - data += dataname + ".value %.2f\n" % (100.0 * avg5) - -if len(sys.argv) > 1: - if sys.argv[1] == "config": - print configinfo.rstrip() - sys.exit(0) -print data.rstrip() diff --git a/misc/munin/tahoe_diskleft b/misc/munin/tahoe_diskleft deleted file mode 100644 index fc568bbe..00000000 --- a/misc/munin/tahoe_diskleft +++ /dev/null @@ -1,22 +0,0 @@ -#!/usr/bin/env python - -# This is a munin plugin which pulls data from the server in -# misc/spacetime/diskwatcher.tac . It produces a graph of how much free space -# is left on all disks across the grid. The plugin should be configured with -# env_url= pointing at the diskwatcher.tac webport. - -import os, sys, urllib, simplejson - -if len(sys.argv) > 1 and sys.argv[1] == "config": - print """\ -graph_title Tahoe Remaining Disk Space -graph_vlabel bytes remaining -graph_category tahoe -graph_info This graph shows the total amount of disk space left available in the grid -disk_left.label disk left -disk_left.draw LINE1""" - sys.exit(0) - -url = os.environ["url"] -data = simplejson.load(urllib.urlopen(url))["available"] -print "disk_left.value", data diff --git a/misc/munin/tahoe_disktotal b/misc/munin/tahoe_disktotal deleted file mode 100644 index a9fb04e5..00000000 --- a/misc/munin/tahoe_disktotal +++ /dev/null @@ -1,26 +0,0 @@ -#!/usr/bin/env python - -# This is a munin plugin which pulls data from the server in -# misc/spacetime/diskwatcher.tac . It produces a graph of how much space is -# present on all disks across the grid, and how much space is actually being -# used. The plugin should be configured with env_url= pointing at the -# diskwatcher.tac webport. - -import os, sys, urllib, simplejson - -if len(sys.argv) > 1 and sys.argv[1] == "config": - print """\ -graph_title Tahoe Total Disk Space -graph_vlabel bytes -graph_category tahoe -graph_info This graph shows the total amount of disk space present in the grid, and how much of it is currently being used. -disk_total.label disk total -disk_total.draw LINE2 -disk_used.label disk used -disk_used.draw LINE1""" - sys.exit(0) - -url = os.environ["url"] -data = simplejson.load(urllib.urlopen(url)) -print "disk_total.value", data["total"] -print "disk_used.value", data["used"] diff --git a/misc/munin/tahoe_diskusage b/misc/munin/tahoe_diskusage deleted file mode 100644 index 9810220f..00000000 --- a/misc/munin/tahoe_diskusage +++ /dev/null @@ -1,40 +0,0 @@ -#!/usr/bin/env python - -# This is a munin plugin which pulls data from the server in -# misc/spacetime/diskwatcher.tac . It produces a graph of how much disk space -# is being used per unit time. The plugin should be configured with env_url= -# pointing at the diskwatcher.tac webport. - -import os, sys, urllib, simplejson - -if len(sys.argv) > 1 and sys.argv[1] == "config": - print """\ -graph_title Tahoe Disk Usage Measurement -graph_vlabel bytes per second -graph_category tahoe -graph_info This graph shows the estimated disk usage per unit time, totalled across all storage servers -graph_args --lower-limit 0 --rigid -rate_1hr.label (one hour sample) -rate_1hr.draw LINE1 -rate_1day.label (one day sample) -rate_1day.draw LINE1 -rate_2wk.label (two week sample) -rate_2wk.draw LINE2 -rate_4wk.label (four week sample) -rate_4wk.draw LINE2""" - sys.exit(0) - -url = os.environ["url"] -timespans = simplejson.load(urllib.urlopen(url))["rates"] - -data = dict([(name, growth) - for (name, timespan, growth, timeleft) in timespans]) -# growth is in bytes per second -if "1hr" in data: - print "rate_1hr.value", data["1hr"] -if "1day" in data: - print "rate_1day.value", data["1day"] -if "2wk" in data: - print "rate_2wk.value", data["2wk"] -if "4wk" in data: - print "rate_4wk.value", data["4wk"] diff --git a/misc/munin/tahoe_diskused b/misc/munin/tahoe_diskused deleted file mode 100644 index 50de687e..00000000 --- a/misc/munin/tahoe_diskused +++ /dev/null @@ -1,22 +0,0 @@ -#!/usr/bin/env python - -# This is a munin plugin which pulls data from the server in -# misc/spacetime/diskwatcher.tac . It produces a graph of how much space is -# used on all disks across the grid. The plugin should be configured with -# env_url= pointing at the diskwatcher.tac webport. - -import os, sys, urllib, simplejson - -if len(sys.argv) > 1 and sys.argv[1] == "config": - print """\ -graph_title Tahoe Total Disk Space Used -graph_vlabel bytes used -graph_category tahoe -graph_info This graph shows the total amount of disk space used across the grid -disk_used.label disk used -disk_used.draw LINE1""" - sys.exit(0) - -url = os.environ["url"] -data = simplejson.load(urllib.urlopen(url))["used"] -print "disk_used.value", data diff --git a/misc/munin/tahoe_doomsday b/misc/munin/tahoe_doomsday deleted file mode 100644 index c8ab1c0a..00000000 --- a/misc/munin/tahoe_doomsday +++ /dev/null @@ -1,41 +0,0 @@ -#!/usr/bin/env python - -# This is a munin plugin which pulls data from the server in -# misc/spacetime/diskwatcher.tac . It produces a graph of how much time is -# left before the grid fills up. The plugin should be configured with -# env_url= pointing at the diskwatcher.tac webport. - -import os, sys, urllib, simplejson - -if len(sys.argv) > 1 and sys.argv[1] == "config": - print """\ -graph_title Tahoe Remaining Time Predictor -graph_vlabel days remaining -graph_category tahoe -graph_info This graph shows the estimated number of days left until storage space is exhausted -days_1hr.label days left (one hour sample) -days_1hr.draw LINE1 -days_1day.label days left (one day sample) -days_1day.draw LINE1 -days_2wk.label days left (two week sample) -days_2wk.draw LINE2 -days_4wk.label days left (four week sample) -days_4wk.draw LINE2""" - sys.exit(0) - -url = os.environ["url"] -timespans = simplejson.load(urllib.urlopen(url))["rates"] - -data = dict([(name, timeleft) - for (name, timespan, growth, timeleft) in timespans - if timeleft]) -# timeleft is in seconds -DAY = 24*60*60 -if "1hr" in data: - print "days_1hr.value", data["1hr"]/DAY -if "1day" in data: - print "days_1day.value", data["1day"]/DAY -if "2wk" in data: - print "days_2wk.value", data["2wk"]/DAY -if "4wk" in data: - print "days_4wk.value", data["4wk"]/DAY diff --git a/misc/munin/tahoe_estimate_files b/misc/munin/tahoe_estimate_files deleted file mode 100644 index 249565e4..00000000 --- a/misc/munin/tahoe_estimate_files +++ /dev/null @@ -1,49 +0,0 @@ -#!/usr/bin/env python - -import sys, os.path - -if len(sys.argv) > 1 and sys.argv[1] == "config": - print """\ -graph_title Tahoe File Estimate -graph_vlabel files -graph_category tahoe -graph_info This graph shows the estimated number of files and directories present in the grid -files.label files -files.draw LINE2""" - sys.exit(0) - -# Edit this to point at some subset of storage directories. -node_dirs = [os.path.expanduser("~amduser/prodnet/storage1"), - os.path.expanduser("~amduser/prodnet/storage2"), - os.path.expanduser("~amduser/prodnet/storage3"), - os.path.expanduser("~amduser/prodnet/storage4"), - ] - -sections = ["aa", "ab", "ac", "ad", "ae", "af", "ag", "ah", "ai", "aj"] -# and edit this to reflect your default encoding's "total_shares" value, and -# the total number of servers. -N = 10 -num_servers = 20 - -index_strings = set() -for base in node_dirs: - for section in sections: - sampledir = os.path.join(base, "storage", "shares", section) - indices = os.listdir(sampledir) - index_strings.update(indices) -unique_strings = len(index_strings) - -# the chance that any given file appears on any given server -chance = 1.0 * N / num_servers - -# the chance that the file does *not* appear on the servers that we're -# examining -no_chance = (1-chance) ** len(node_dirs) - -# if a file has a 25% chance of not appearing in our sample, then we need to -# raise our estimate by (1.25/1) -correction = 1+no_chance -#print "correction", correction - -files = unique_strings * (32*32/len(sections)) * correction -print "files.value %d" % int(files) diff --git a/misc/munin/tahoe_files b/misc/munin/tahoe_files deleted file mode 100644 index e68b59da..00000000 --- a/misc/munin/tahoe_files +++ /dev/null @@ -1,52 +0,0 @@ -#!/usr/bin/env python - -# This is a munin plugin to track the number of files that each node's -# StorageServer is holding on behalf of other nodes. Each file that has been -# uploaded to the mesh (and has shares present on this node) will be counted -# here. When there are <= 100 nodes in the mesh, this count will equal the -# total number of files that are active in the entire mesh. When there are -# 200 nodes present in the mesh, it will represent about half of the total -# number. - -# Copy this plugin into /etc/munun/plugins/tahoe-files and then put -# the following in your /etc/munin/plugin-conf.d/foo file to let it know -# where to find the basedirectory for each node: -# -# [tahoe-files] -# env.basedir_NODE1 /path/to/node1 -# env.basedir_NODE2 /path/to/node2 -# env.basedir_NODE3 /path/to/node3 -# - -import os, sys - -nodedirs = [] -for k,v in os.environ.items(): - if k.startswith("basedir_"): - nodename = k[len("basedir_"):] - nodedirs.append( (nodename, v) ) -nodedirs.sort() - -configinfo = \ -"""graph_title Allmydata Tahoe Filecount -graph_vlabel files -graph_category tahoe -graph_info This graph shows the number of files hosted by this node's StorageServer -""" - -for nodename, basedir in nodedirs: - configinfo += "%s.label %s\n" % (nodename, nodename) - configinfo += "%s.draw LINE2\n" % (nodename,) - - -if len(sys.argv) > 1: - if sys.argv[1] == "config": - print configinfo.rstrip() - sys.exit(0) - -for nodename, basedir in nodedirs: - files = len(os.listdir(os.path.join(basedir, "storage", "shares"))) - if os.path.exists(os.path.join(basedir, "storage", "shares", "incoming")): - files -= 1 # the 'incoming' directory doesn't count - print "%s.value %d" % (nodename, files) - diff --git a/misc/munin/tahoe_helperstats_active b/misc/munin/tahoe_helperstats_active deleted file mode 100644 index 3265e5f0..00000000 --- a/misc/munin/tahoe_helperstats_active +++ /dev/null @@ -1,25 +0,0 @@ -#!/usr/bin/env python - -import os, sys -import urllib -import simplejson - -configinfo = """\ -graph_title Tahoe Helper Stats - Active Files -graph_vlabel bytes -graph_category tahoe -graph_info This graph shows the number of files being actively processed by the helper -fetched.label Active Files -fetched.draw LINE2 -""" - -if len(sys.argv) > 1: - if sys.argv[1] == "config": - print configinfo.rstrip() - sys.exit(0) - -url = os.environ["url"] - -data = simplejson.loads(urllib.urlopen(url).read()) -print "fetched.value %d" % data["chk_upload_helper.active_uploads"] - diff --git a/misc/munin/tahoe_helperstats_fetched b/misc/munin/tahoe_helperstats_fetched deleted file mode 100644 index 1f807beb..00000000 --- a/misc/munin/tahoe_helperstats_fetched +++ /dev/null @@ -1,26 +0,0 @@ -#!/usr/bin/env python - -import os, sys -import urllib -import simplejson - -configinfo = """\ -graph_title Tahoe Helper Stats - Bytes Fetched -graph_vlabel bytes -graph_category tahoe -graph_info This graph shows the amount of data being fetched by the helper -fetched.label Bytes Fetched -fetched.type GAUGE -fetched.draw LINE1 -fetched.min 0 -""" - -if len(sys.argv) > 1: - if sys.argv[1] == "config": - print configinfo.rstrip() - sys.exit(0) - -url = os.environ["url"] - -data = simplejson.loads(urllib.urlopen(url).read()) -print "fetched.value %d" % data["chk_upload_helper.fetched_bytes"] diff --git a/misc/munin/tahoe_introstats b/misc/munin/tahoe_introstats deleted file mode 100644 index 556762fb..00000000 --- a/misc/munin/tahoe_introstats +++ /dev/null @@ -1,31 +0,0 @@ -#!/usr/bin/env python - -import os, sys -import urllib -import simplejson - -configinfo = """\ -graph_title Tahoe Introducer Stats -graph_vlabel hosts -graph_category tahoe -graph_info This graph shows the number of hosts announcing and subscribing to various services -storage_server.label Storage Servers -storage_server.draw LINE1 -storage_hosts.label Distinct Storage Hosts -storage_hosts.draw LINE1 -storage_client.label Clients -storage_client.draw LINE2 -""" - -if len(sys.argv) > 1: - if sys.argv[1] == "config": - print configinfo.rstrip() - sys.exit(0) - -url = os.environ["url"] - -data = simplejson.loads(urllib.urlopen(url).read()) -print "storage_server.value %d" % data["announcement_summary"]["storage"] -print "storage_hosts.value %d" % data["announcement_distinct_hosts"]["storage"] -print "storage_client.value %d" % data["subscription_summary"]["storage"] - diff --git a/misc/munin/tahoe_nodememory b/misc/munin/tahoe_nodememory deleted file mode 100644 index fd3f8b0a..00000000 --- a/misc/munin/tahoe_nodememory +++ /dev/null @@ -1,71 +0,0 @@ -#!/usr/bin/env python - -# This munin plugin isolates processes by looking for the 'pid' file created -# by 'allmydata start', then extracts the amount of memory they consume (both -# VmSize and VmRSS) from /proc - -import os, sys, re - -if 0: - # for testing - os.environ["nodememory_warner1"] = "run/warner1" - os.environ["nodememory_warner2"] = "run/warner2" - -nodedirs = [] -for k,v in os.environ.items(): - if k.startswith("nodememory_"): - nodename = k[len("nodememory_"):] - nodedirs.append((nodename, v)) -nodedirs.sort(lambda a,b: cmp(a[0],b[0])) - -pids = {} - -for node,nodedir in nodedirs: - pidfile = os.path.join(nodedir, "twistd.pid") - if os.path.exists(pidfile): - pid = int(open(pidfile,"r").read()) - pids[node] = pid - -fields = ["VmSize", "VmRSS"] - - -if len(sys.argv) > 1: - if sys.argv[1] == "config": - configinfo = \ - """graph_title Memory Consumed by Nodes -graph_vlabel bytes -graph_category Tahoe -graph_info This graph shows the memory used by specific processes -""" - for nodename,nodedir in nodedirs: - for f in fields: - configinfo += "%s_%s.label %s used by %s\n" % (nodename, f, - f, nodename) - linetype = "LINE1" - if f == "VmSize": - linetype = "LINE2" - configinfo += "%s_%s.draw %s\n" % (nodename, f, linetype) - if f == "VmData": - configinfo += "%s_%s.graph no\n" % (nodename, f) - - print configinfo - sys.exit(0) - -nodestats = {} -for node,pid in pids.items(): - stats = {} - statusfile = "/proc/%s/status" % pid - if not os.path.exists(statusfile): - continue - for line in open(statusfile,"r").readlines(): - for f in fields: - if line.startswith(f + ":"): - m = re.search(r'(\d+)', line) - stats[f] = int(m.group(1)) - nodestats[node] = stats - -for node,stats in nodestats.items(): - for f,value in stats.items(): - # TODO: not sure if /proc/%d/status means 1000 or 1024 when it says - # 'kB' - print "%s_%s.value %d" % (node, f, 1024*value) diff --git a/misc/munin/tahoe_overhead b/misc/munin/tahoe_overhead deleted file mode 100644 index 266fe939..00000000 --- a/misc/munin/tahoe_overhead +++ /dev/null @@ -1,65 +0,0 @@ -#!/usr/bin/env python - -# This is a munin plugin which pulls total-used data from the server in -# misc/spacetime/diskwatcher.tac, and a total-deep-size number from custom -# PHP database-querying scripts on a different server. It produces a graph of -# how much garbage/overhead is present in the grid: the ratio of total-used -# over (total-deep-size*N/k), expressed as a percentage. No overhead would be -# 0, using twice as much space as we'd prefer would be 100. This is the -# percentage which could be saved if we made GC work perfectly and reduced -# other forms of overhead to zero. This script assumes 3-of-10. - -# A second graph is produced with how much of the total-deep-size number -# would be saved if we removed data from inactive accounts. This is also on a -# percentage scale. - -# A separate number (without a graph) is produced with the "effective -# expansion factor". If there were no overhead, with 3-of-10, this would be -# 3.33 . - -# Overhead is caused by the following problems (in order of size): -# uncollected garbage: files that are no longer referenced but not yet deleted -# inactive accounts: files that are referenced by cancelled accounts -# share storage overhead: bucket directories -# filesystem overhead: 4kB minimum block sizes -# share overhead: hashes, pubkeys, lease information - -# This plugin should be configured with env_diskwatcher_url= pointing at the -# diskwatcher.tac webport, and env_deepsize_url= pointing at the PHP script. - -import os, sys, urllib, simplejson - -if len(sys.argv) > 1 and sys.argv[1] == "config": - print """\ -graph_title Tahoe Overhead Calculator -graph_vlabel Percentage -graph_category tahoe -graph_info This graph shows the estimated amount of storage overhead (ratio of actual disk usage to ideal disk usage). The 'overhead' number is how much space we could save if we implemented GC, and the 'inactive' number is how much additional space we could save if we could delete data for cancelled accounts. -overhead.label disk usage overhead -overhead.draw LINE2 -inactive.label inactive account usage -inactive.draw LINE1 -effective_expansion.label Effective Expansion Factor -effective_expansion.graph no""" - sys.exit(0) - -diskwatcher_url = os.environ["diskwatcher_url"] -total = simplejson.load(urllib.urlopen(diskwatcher_url))["used"] -deepsize_url = os.environ["deepsize_url"] -deepsize = simplejson.load(urllib.urlopen(deepsize_url)) -k = 3; N = 10 -expansion = float(N) / k - -ideal = expansion * deepsize["all"] -overhead = (total - ideal) / ideal -if overhead > 0: - # until all the storage-servers come online, this number will be nonsense - print "overhead.value %f" % (100.0 * overhead) - - # same for this one - effective_expansion = total / deepsize["all"] - print "effective_expansion.value %f" % effective_expansion - -# this value remains valid, though -inactive_savings = (deepsize["all"] - deepsize["active"]) / deepsize["active"] -print "inactive.value %f" % (100.0 * inactive_savings) diff --git a/misc/munin/tahoe_rootdir_space b/misc/munin/tahoe_rootdir_space deleted file mode 100644 index c9acb3cb..00000000 --- a/misc/munin/tahoe_rootdir_space +++ /dev/null @@ -1,27 +0,0 @@ -#!/usr/bin/env python - -import os, sys -import urllib -import simplejson - -configinfo = """\ -graph_title Tahoe Root Directory Size -graph_vlabel bytes -graph_category tahoe -graph_info This graph shows the amount of space consumed by all files reachable from a given directory -space.label Space -space.draw LINE2 -""" - -if len(sys.argv) > 1: - if sys.argv[1] == "config": - print configinfo.rstrip() - sys.exit(0) - -url = os.environ["url"] - -data = int(urllib.urlopen(url).read().strip()) -print "space.value %d" % data - - - diff --git a/misc/munin/tahoe_server_latency_ b/misc/munin/tahoe_server_latency_ deleted file mode 100644 index a63f902c..00000000 --- a/misc/munin/tahoe_server_latency_ +++ /dev/null @@ -1,93 +0,0 @@ -#!/usr/bin/env python - -# retrieve a latency statistic for a given operation and percentile from a -# set of storage servers. - -# the OPERATION value should come from the following list: -# allocate: allocate_buckets, first step to upload an immutable file -# write: write data to an immutable share -# close: finish writing to an immutable share -# cancel: abandon a partial immutable share -# get: get_buckets, first step to download an immutable file -# read: read data from an immutable share -# writev: slot_testv_and_readv_and_writev, modify/create a directory -# readv: read a directory (or mutable file) - -# the PERCENTILE value should come from the following list: -# 01_0: 1% -# 10_0: 10% -# 50_0: 50% (median) -# 90_0: 90% -# 99_0: 99% -# 99_9: 99.9% -# mean: - -# To use this, create a symlink from -# /etc/munin/plugins/tahoe_server_latency_OPERATION_PERCENTILE to this -# script. For example: - -# ln -s /usr/share/doc/allmydata-tahoe/munin/tahoe_server_latency_ \ -# /etc/munin/plugins/tahoe_server_latency_allocate_99_9 - -# Also, you will need to put a list of node statistics URLs in the plugin's -# environment, by adding a stanza like the following to a file in -# /etc/munin/plugin-conf.d/, such as /etc/munin/plugin-conf.d/tahoe_latencies: -# -# [tahoe_server_latency*] -# env.url_storage1 http://localhost:9011/statistics?t=json -# env.url_storage2 http://localhost:9012/statistics?t=json -# env.url_storage3 http://localhost:9013/statistics?t=json -# env.url_storage4 http://localhost:9014/statistics?t=json - -# of course, these URLs must match the webports you have configured into the -# storage nodes. - -import os, sys -import urllib -import simplejson - -node_urls = [] -for k,v in os.environ.items(): - if k.startswith("url_"): - nodename = k[len("url_"):] - node_urls.append( (nodename, v) ) -node_urls.sort() - -my_name = os.path.basename(sys.argv[0]) -PREFIX = "tahoe_server_latency_" -assert my_name.startswith(PREFIX) -my_name = my_name[len(PREFIX):] -(operation, percentile) = my_name.split("_", 1) -if percentile == "mean": - what = "mean" -else: - what = percentile.replace("_", ".") + "th percentile" - -configinfo = \ -"""graph_title Tahoe Server '%(operation)s' Latency (%(what)s) -graph_vlabel seconds -graph_category tahoe -graph_info This graph shows how long '%(operation)s' operations took on the storage server, the %(what)s delay between message receipt and response generation, calculated over the last thousand operations. -""" % {'operation': operation, - 'what': what} - -for nodename, url in node_urls: - configinfo += "%s.label %s\n" % (nodename, nodename) - configinfo += "%s.draw LINE2\n" % (nodename,) - - -if len(sys.argv) > 1: - if sys.argv[1] == "config": - print configinfo.rstrip() - sys.exit(0) - -for nodename, url in node_urls: - data = simplejson.loads(urllib.urlopen(url).read()) - if percentile == "mean": - p_key = "mean" - else: - p_key = percentile + "_percentile" - key = "storage_server.latencies.%s.%s" % (operation, p_key) - value = data["stats"][key] - print "%s.value %s" % (nodename, value) - diff --git a/misc/munin/tahoe_server_operations_ b/misc/munin/tahoe_server_operations_ deleted file mode 100644 index 3f624757..00000000 --- a/misc/munin/tahoe_server_operations_ +++ /dev/null @@ -1,75 +0,0 @@ -#!/usr/bin/env python - -# graph operations-per-second from a set of storage servers. - -# the OPERATION value should come from the following list: -# allocate: allocate_buckets, first step to upload an immutable file -# write: write data to an immutable share -# close: finish writing to an immutable share -# cancel: abandon a partial immutable share -# get: get_buckets, first step to download an immutable file -# read: read data from an immutable share -# writev: slot_testv_and_readv_and_writev, modify/create a directory -# readv: read a directory (or mutable file) - -# To use this, create a symlink from -# /etc/munin/plugins/tahoe_server_operations_OPERATION to this script. For -# example: - -# ln -s /usr/share/doc/allmydata-tahoe/munin/tahoe_server_operations_ \ -# /etc/munin/plugins/tahoe_server_operations_allocate - -# Also, you will need to put a list of node statistics URLs in the plugin's -# environment, by adding a stanza like the following to a file in -# /etc/munin/plugin-conf.d/, such as /etc/munin/plugin-conf.d/tahoe_operations: -# -# [tahoe_server_operations*] -# env.url_storage1 http://localhost:9011/statistics?t=json -# env.url_storage2 http://localhost:9012/statistics?t=json -# env.url_storage3 http://localhost:9013/statistics?t=json -# env.url_storage4 http://localhost:9014/statistics?t=json - -# of course, these URLs must match the webports you have configured into the -# storage nodes. - -import os, sys -import urllib -import simplejson - -node_urls = [] -for k,v in os.environ.items(): - if k.startswith("url_"): - nodename = k[len("url_"):] - node_urls.append( (nodename, v) ) -node_urls.sort() - -my_name = os.path.basename(sys.argv[0]) -PREFIX = "tahoe_server_operations_" -assert my_name.startswith(PREFIX) -operation = my_name[len(PREFIX):] - -configinfo = \ -"""graph_title Tahoe Server '%(operation)s' Operations -graph_vlabel ops per second -graph_category tahoe -graph_info This graph shows how many '%(operation)s' operations take place on the storage server -""" % {'operation': operation} - -for nodename, url in node_urls: - configinfo += "%s.label %s\n" % (nodename, nodename) - configinfo += "%s.type DERIVE\n" % (nodename,) - configinfo += "%s.min 0\n" % (nodename,) - configinfo += "%s.draw LINE2\n" % (nodename,) - - -if len(sys.argv) > 1: - if sys.argv[1] == "config": - print configinfo.rstrip() - sys.exit(0) - -for nodename, url in node_urls: - data = simplejson.loads(urllib.urlopen(url).read()) - key = "storage_server.%s" % operation - value = data["counters"][key] - print "%s.value %s" % (nodename, value) - diff --git a/misc/munin/tahoe_spacetime b/misc/munin/tahoe_spacetime deleted file mode 100644 index 520a5fab..00000000 --- a/misc/munin/tahoe_spacetime +++ /dev/null @@ -1,108 +0,0 @@ -#!/usr/bin/env python - -# copy .rrd files from a remote munin master host, sum the 'df' stats from a -# list of hosts, use them to estimate a rate-of-change for the past month, -# then extrapolate to guess how many weeks/months/years of storage space we -# have left, and output it to another munin graph - -import sys, os, time -import rrdtool - -MUNIN_HOST = "munin.allmydata.com" -PREFIX = "%s:/var/lib/munin/prodtahoe/" % MUNIN_HOST -FILES = [ "prodtahoe%d.allmydata.com-df-_dev_sd%s3-g.rrd" % (a,b) - for a in (1,2,3,4,5) - for b in ("a", "b", "c", "d") - ] -REMOTEFILES = [ PREFIX + f for f in FILES ] -LOCALFILES = ["/var/lib/munin/prodtahoe/" + f for f in FILES ] -WEBFILE = "/var/www/tahoe/spacetime.json" - - -def rsync_rrd(): - # copy the RRD files from your munin master host to a local one - cmd = "rsync %s rrds/" % (" ".join(REMOTEFILES)) - rc = os.system(cmd) - assert rc == 0, rc - -def format_time(t): - return time.strftime("%b %d %H:%M", time.localtime(t)) - -def predict_future(past_s): - - start_df = [] - end_df = [] - durations = [] - - for fn in LOCALFILES: - d = rrdtool.fetch(fn, "AVERAGE", "-s", "-"+past_s, "-e", "-1hr") - # ((start, end, step), (name1, name2, ...), [(data1, data2, ..), ...]) - (start_time, end_time ,step) = d[0] - #print format_time(start_time), " - ", format_time(end_time), step - names = d[1] - #for points in d[2]: - # point = points[0] - # print point - start_space = d[2][0][0] - if start_space is None: - return None - # I don't know why, but the last few points are always bogus. Running - # 'rrdtool fetch' on the command line is usually ok.. I blame the python - # bindinds. - end_space = d[2][-4][0] - if end_space is None: - return None - end_time = end_time - (4*step) - start_df.append(start_space) - end_df.append(end_space) - durations.append(end_time - start_time) - - avg_start_df = sum(start_df) / len(start_df) - avg_end_df = sum(end_df) / len(end_df) - avg_duration = sum(durations) / len(durations) - #print avg_start_df, avg_end_df, avg_duration - - rate = (avg_end_df - avg_start_df) / avg_duration - #print "Rate", rate, " %/s" - #print "measured over", avg_duration / 86400, "days" - remaining = 100 - avg_end_df - remaining_seconds = remaining / rate - #print "remaining seconds", remaining_seconds - remaining_days = remaining_seconds / 86400 - #print "remaining days", remaining_days - return remaining_days - -def write_to_file(samples): - # write a JSON-formatted dictionary - f = open(WEBFILE + ".tmp", "w") - f.write("{ ") - f.write(", ".join(['"%s": %s' % (k, samples[k]) - for k in sorted(samples.keys())])) - f.write("}\n") - f.close() - os.rename(WEBFILE + ".tmp", WEBFILE) - -if len(sys.argv) > 1 and sys.argv[1] == "config": - print """\ -graph_title Tahoe Remaining Space Predictor -graph_vlabel days remaining -graph_category tahoe -graph_info This graph shows the estimated number of days left until storage space is exhausted -days_2wk.label days left (2wk sample) -days_2wk.draw LINE2 -days_4wk.label days left (4wk sample) -days_4wk.draw LINE2""" - sys.exit(0) - -#rsync_rrd() -samples = {} -remaining_4wk = predict_future("4wk") -if remaining_4wk is not None: - print "days_4wk.value", remaining_4wk - samples["remaining_4wk"] = remaining_4wk -remaining_2wk = predict_future("2wk") -if remaining_2wk is not None: - print "days_2wk.value", remaining_2wk - samples["remaining_2wk"] = remaining_2wk -write_to_file(samples) - diff --git a/misc/munin/tahoe_stats b/misc/munin/tahoe_stats deleted file mode 100644 index bd03aaea..00000000 --- a/misc/munin/tahoe_stats +++ /dev/null @@ -1,474 +0,0 @@ -#!/usr/bin/env python - -import os -import pickle -import re -import sys -import time - -STAT_VALIDITY = 300 # 5min limit on reporting stats - -PLUGINS = { - # LOAD AVERAGE - 'tahoe_runtime_load_avg': - { 'statid': 'load_monitor.avg_load', - 'category': 'stats', - 'configheader': '\n'.join(['graph_title Tahoe Runtime Load Average', - 'graph_vlabel load', - 'graph_category tahoe', - 'graph_info This graph shows average reactor delay', - ]), - 'graph_config': '\n'.join(['%(name)s.label %(name)s', - '%(name)s.draw LINE1', - ]), - 'graph_render': '\n'.join(['%(name)s.value %(value)s', - ]), - }, - - 'tahoe_runtime_load_peak': - { 'statid': 'load_monitor.max_load', - 'category': 'stats', - 'configheader': '\n'.join(['graph_title Tahoe Runtime Load Peak', - 'graph_vlabel load', - 'graph_category tahoe', - 'graph_info This graph shows peak reactor delay', - ]), - 'graph_config': '\n'.join(['%(name)s.label %(name)s', - '%(name)s.draw LINE1', - ]), - 'graph_render': '\n'.join(['%(name)s.value %(value)s', - ]), - }, - - # STORAGE ALLOCATION (BYTES) - 'tahoe_storage_consumed': - { 'statid': 'storage_server.consumed', - 'category': 'stats', - 'configheader': '\n'.join(['graph_title Tahoe Storage Server Space Consumed', - 'graph_vlabel bytes', - 'graph_category tahoe_storage_server', - 'graph_info This graph shows space consumed', - 'graph_args --base 1024', - ]), - 'graph_config': '\n'.join(['%(name)s.label %(name)s', - '%(name)s.draw LINE1', - ]), - 'graph_render': '\n'.join(['%(name)s.value %(value)s', - ]), - }, - - 'tahoe_storage_allocated': - { 'statid': 'storage_server.allocated', - 'category': 'stats', - 'configheader': '\n'.join(['graph_title Tahoe Storage Server Space Allocated', - 'graph_vlabel bytes', - 'graph_category tahoe_storage_server', - 'graph_info This graph shows space allocated', - 'graph_args --base 1024', - ]), - 'graph_config': '\n'.join(['%(name)s.label %(name)s', - '%(name)s.draw LINE1', - ]), - 'graph_render': '\n'.join(['%(name)s.value %(value)s', - ]), - }, - - 'tahoe_storage_bytes_added': - { 'statid': 'storage_server.bytes_added', - 'category': 'counters', - 'configheader': '\n'.join(['graph_title Tahoe Storage Server Bytes Added', - 'graph_vlabel bytes', - 'graph_category tahoe_storage_server', - 'graph_info This graph shows cummulative bytes added', - ]), - 'graph_config': '\n'.join(['%(name)s.label %(name)s', - '%(name)s.draw LINE1', - ]), - 'graph_render': '\n'.join(['%(name)s.value %(value)s', - ]), - }, - - 'tahoe_storage_bytes_freed': - { 'statid': 'storage_server.bytes_freed', - 'category': 'counters', - 'configheader': '\n'.join(['graph_title Tahoe Storage Server Bytes Removed', - 'graph_vlabel bytes', - 'graph_category tahoe_storage_server', - 'graph_info This graph shows cummulative bytes removed', - ]), - 'graph_config': '\n'.join(['%(name)s.label %(name)s', - '%(name)s.draw LINE1', - ]), - 'graph_render': '\n'.join(['%(name)s.value %(value)s', - ]), - }, - - 'tahoe_storage_operations_allocate': - { 'statid': 'storage_server.allocate', - 'category': 'counters', - 'configheader': '\n'.join(['graph_title Tahoe Storage Server Allocate_Bucket Operations', - 'graph_vlabel operations per second', - 'graph_category tahoe_storage_server', - 'graph_info This graph shows how many allocate_buckets operations occured per second. Each immutable file upload causes one such operation per server.', - ]), - 'graph_config': '\n'.join(['%(name)s.label %(name)s', - '%(name)s.type DERIVE', - '%(name)s.min 0', - '%(name)s.draw LINE1', - ]), - 'graph_render': '\n'.join(['%(name)s.value %(value)s', - ]), - }, - - 'tahoe_storage_operations_get': - { 'statid': 'storage_server.get', - 'category': 'counters', - 'configheader': '\n'.join(['graph_title Tahoe Storage Server get_bucket Operations', - 'graph_vlabel operations per second', - 'graph_category tahoe_storage_server', - 'graph_info This graph shows how many get_bucket operations occured per second. Each immutable file download/check causes one such operation per server.', - ]), - 'graph_config': '\n'.join(['%(name)s.label %(name)s', - '%(name)s.type DERIVE', - '%(name)s.min 0', - '%(name)s.draw LINE1', - ]), - 'graph_render': '\n'.join(['%(name)s.value %(value)s', - ]), - }, - - 'tahoe_storage_operations_writev': - { 'statid': 'storage_server.writev', - 'category': 'counters', - 'configheader': '\n'.join(['graph_title Tahoe Storage Server writev Operations', - 'graph_vlabel operations per second', - 'graph_category tahoe_storage_server', - 'graph_info This graph shows how many writev operations occured per second. Each mutable file / dirnode write causes one such operation per server.', - ]), - 'graph_config': '\n'.join(['%(name)s.label %(name)s', - '%(name)s.type DERIVE', - '%(name)s.min 0', - '%(name)s.draw LINE1', - ]), - 'graph_render': '\n'.join(['%(name)s.value %(value)s', - ]), - }, - - 'tahoe_storage_operations_readv': - { 'statid': 'storage_server.readv', - 'category': 'counters', - 'configheader': '\n'.join(['graph_title Tahoe Storage Server readv Operations', - 'graph_vlabel operations per second', - 'graph_category tahoe_storage_server', - 'graph_info This graph shows how many readv operations occured per second. Each dirnode read causes one such operation per server.', - ]), - 'graph_config': '\n'.join(['%(name)s.label %(name)s', - '%(name)s.type DERIVE', - '%(name)s.min 0', - '%(name)s.draw LINE1', - ]), - 'graph_render': '\n'.join(['%(name)s.value %(value)s', - ]), - }, - - - # HELPER - 'tahoe_helper_incoming_files': - { 'statid': 'chk_upload_helper.incoming_count', - 'category': 'stats', - 'configheader': '\n'.join(['graph_title Tahoe Upload Helper Incoming File Count', - 'graph_vlabel n files', - 'graph_category tahoe_helper', - 'graph_info This graph shows number of incoming files', - ]), - 'graph_config': '\n'.join(['%(name)s.label %(name)s', - '%(name)s.draw LINE1', - ]), - 'graph_render': '\n'.join(['%(name)s.value %(value)s', - ]), - }, - 'tahoe_helper_incoming_filesize': - { 'statid': 'chk_upload_helper.incoming_size', - 'category': 'stats', - 'configheader': '\n'.join(['graph_title Tahoe Upload Helper Incoming File Size', - 'graph_vlabel bytes', - 'graph_category tahoe_helper', - 'graph_info This graph shows total size of incoming files', - ]), - 'graph_config': '\n'.join(['%(name)s.label %(name)s', - '%(name)s.draw LINE1', - ]), - 'graph_render': '\n'.join(['%(name)s.value %(value)s', - ]), - }, - 'tahoe_helper_incoming_files_old': - { 'statid': 'chk_upload_helper.incoming_size_old', - 'category': 'stats', - 'configheader': '\n'.join(['graph_title Tahoe Upload Helper Incoming Old Files', - 'graph_vlabel bytes', - 'graph_category tahoe_helper', - 'graph_info This graph shows total size of old incoming files', - ]), - 'graph_config': '\n'.join(['%(name)s.label %(name)s', - '%(name)s.draw LINE1', - ]), - 'graph_render': '\n'.join(['%(name)s.value %(value)s', - ]), - }, - - 'tahoe_helper_encoding_files': - { 'statid': 'chk_upload_helper.encoding_count', - 'category': 'stats', - 'configheader': '\n'.join(['graph_title Tahoe Upload Helper Encoding File Count', - 'graph_vlabel n files', - 'graph_category tahoe_helper', - 'graph_info This graph shows number of encoding files', - ]), - 'graph_config': '\n'.join(['%(name)s.label %(name)s', - '%(name)s.draw LINE1', - ]), - 'graph_render': '\n'.join(['%(name)s.value %(value)s', - ]), - }, - 'tahoe_helper_encoding_filesize': - { 'statid': 'chk_upload_helper.encoding_size', - 'category': 'stats', - 'configheader': '\n'.join(['graph_title Tahoe Upload Helper Encoding File Size', - 'graph_vlabel bytes', - 'graph_category tahoe_helper', - 'graph_info This graph shows total size of encoding files', - ]), - 'graph_config': '\n'.join(['%(name)s.label %(name)s', - '%(name)s.draw LINE1', - ]), - 'graph_render': '\n'.join(['%(name)s.value %(value)s', - ]), - }, - 'tahoe_helper_encoding_files_old': - { 'statid': 'chk_upload_helper.encoding_size_old', - 'category': 'stats', - 'configheader': '\n'.join(['graph_title Tahoe Upload Helper Encoding Old Files', - 'graph_vlabel bytes', - 'graph_category tahoe_helper', - 'graph_info This graph shows total size of old encoding files', - ]), - 'graph_config': '\n'.join(['%(name)s.label %(name)s', - '%(name)s.draw LINE1', - ]), - 'graph_render': '\n'.join(['%(name)s.value %(value)s', - ]), - }, - - 'tahoe_helper_active_uploads': - { 'statid': 'chk_upload_helper.active_uploads', - 'category': 'stats', - 'configheader': '\n'.join(['graph_title Tahoe Upload Helper Active Files', - 'graph_vlabel n files', - 'graph_category tahoe_helper', - 'graph_info This graph shows number of files actively being processed by the helper', - ]), - 'graph_config': '\n'.join(['%(name)s.label %(name)s', - '%(name)s.draw LINE1', - ]), - 'graph_render': '\n'.join(['%(name)s.value %(value)s', - ]), - }, - - 'tahoe_helper_upload_requests': - { 'statid': 'chk_upload_helper.upload_requests', - 'category': 'counters', - 'configheader': '\n'.join(['graph_title Tahoe Upload Helper Upload Requests', - 'graph_vlabel requests', - 'graph_category tahoe_helper', - 'graph_info This graph shows the number of upload requests arriving at the helper', - ]), - 'graph_config': '\n'.join(['%(name)s.label %(name)s', - '%(name)s.type DERIVE', - '%(name)s.min 0', - '%(name)s.draw LINE1', - ]), - 'graph_render': '\n'.join(['%(name)s.value %(value)s', - ]), - }, - 'tahoe_helper_upload_already_present': - { 'statid': 'chk_upload_helper.upload_already_present', - 'category': 'counters', - 'configheader': '\n'.join(['graph_title Tahoe Upload Helper Uploads Already Present', - 'graph_vlabel requests', - 'graph_category tahoe_helper', - 'graph_info This graph shows the number of uploads whose files are already present in the grid', - ]), - 'graph_config': '\n'.join(['%(name)s.label %(name)s', - '%(name)s.type DERIVE', - '%(name)s.min 0', - '%(name)s.draw LINE1', - ]), - 'graph_render': '\n'.join(['%(name)s.value %(value)s', - ]), - }, - 'tahoe_helper_upload_need_upload': - { 'statid': 'chk_upload_helper.upload_need_upload', - 'category': 'counters', - 'configheader': '\n'.join(['graph_title Tahoe Upload Helper Uploads Needing Upload', - 'graph_vlabel requests', - 'graph_category tahoe_helper', - 'graph_info This graph shows the number of uploads whose files are not already present in the grid', - ]), - 'graph_config': '\n'.join(['%(name)s.label %(name)s', - '%(name)s.type DERIVE', - '%(name)s.min 0', - '%(name)s.draw LINE1', - ]), - 'graph_render': '\n'.join(['%(name)s.value %(value)s', - ]), - }, - 'tahoe_helper_encoded_bytes': - { 'statid': 'chk_upload_helper.encoded_bytes', - 'category': 'counters', - 'configheader': '\n'.join(['graph_title Tahoe Upload Helper Encoded Bytes', - 'graph_vlabel bytes', - 'graph_category tahoe_helper', - 'graph_info This graph shows the number of bytes encoded by the helper', - ]), - 'graph_config': '\n'.join(['%(name)s.label %(name)s', - '%(name)s.type DERIVE', - '%(name)s.min 0', - '%(name)s.draw LINE1', - ]), - 'graph_render': '\n'.join(['%(name)s.value %(value)s', - ]), - }, - 'tahoe_helper_fetched_bytes': - { 'statid': 'chk_upload_helper.fetched_bytes', - 'category': 'counters', - 'configheader': '\n'.join(['graph_title Tahoe Upload Helper Fetched Bytes', - 'graph_vlabel bytes', - 'graph_category tahoe_helper', - 'graph_info This graph shows the number of bytes fetched by the helper', - ]), - 'graph_config': '\n'.join(['%(name)s.label %(name)s', - '%(name)s.type DERIVE', - '%(name)s.min 0', - '%(name)s.draw LINE1', - ]), - 'graph_render': '\n'.join(['%(name)s.value %(value)s', - ]), - }, - - # WEBAPI - 'tahoe_uploader_bytes_uploaded': - { 'statid': 'uploader.bytes_uploaded', - 'category': 'counters', - 'configheader': '\n'.join(['graph_title Tahoe Uploader Bytes Uploaded', - 'graph_vlabel bytes', - 'graph_category tahoe_traffic', - 'graph_info This graph shows the number of bytes uploaded', - ]), - 'graph_config': '\n'.join(['%(name)s.label %(name)s', - '%(name)s.type DERIVE', - '%(name)s.min 0', - '%(name)s.draw LINE1', - ]), - 'graph_render': '\n'.join(['%(name)s.value %(value)s', - ]), - }, - 'tahoe_uploader_files_uploaded': - { 'statid': 'uploader.files_uploaded', - 'category': 'counters', - 'configheader': '\n'.join(['graph_title Tahoe Uploader Bytes Uploaded', - 'graph_vlabel files', - 'graph_category tahoe_traffic', - 'graph_info This graph shows the number of files uploaded', - ]), - 'graph_config': '\n'.join(['%(name)s.label %(name)s', - '%(name)s.type DERIVE', - '%(name)s.min 0', - '%(name)s.draw LINE1', - ]), - 'graph_render': '\n'.join(['%(name)s.value %(value)s', - ]), - }, - 'tahoe_mutable_files_published': - { 'statid': 'mutable.files_published', - 'category': 'counters', - 'configheader': '\n'.join(['graph_title Tahoe Mutable Files Published', - 'graph_vlabel files', - 'graph_category tahoe_traffic', - 'graph_info This graph shows the number of mutable files published', - ]), - 'graph_config': '\n'.join(['%(name)s.label %(name)s', - '%(name)s.type DERIVE', - '%(name)s.min 0', - '%(name)s.draw LINE1', - ]), - 'graph_render': '\n'.join(['%(name)s.value %(value)s', - ]), - }, - 'tahoe_mutable_files_retrieved': - { 'statid': 'mutable.files_retrieved', - 'category': 'counters', - 'configheader': '\n'.join(['graph_title Tahoe Mutable Files Retrieved', - 'graph_vlabel files', - 'graph_category tahoe_traffic', - 'graph_info This graph shows the number of files retrieved', - ]), - 'graph_config': '\n'.join(['%(name)s.label %(name)s', - '%(name)s.type DERIVE', - '%(name)s.min 0', - '%(name)s.draw LINE1', - ]), - 'graph_render': '\n'.join(['%(name)s.value %(value)s', - ]), - }, - - } - -def smash_name(name): - return re.sub('[^a-zA-Z0-9]', '_', name) - -def open_stats(fname): - f = open(fname, 'rb') - stats = pickle.load(f) - f.close() - return stats - -def main(argv): - graph_name = os.path.basename(argv[0]) - if graph_name.endswith('.py'): - graph_name = graph_name[:-3] - - plugin_conf = PLUGINS.get(graph_name) - - for k,v in os.environ.items(): - if k.startswith('statsfile'): - stats_file = v - break - else: - raise RuntimeError("No 'statsfile' env var found") - - stats = open_stats(stats_file) - - now = time.time() - def output_nodes(output_section, check_time): - for tubid, nodestats in stats.items(): - if check_time and (now - nodestats.get('timestamp', 0)) > STAT_VALIDITY: - continue - name = smash_name("%s_%s" % (nodestats['nickname'], tubid[:4])) - #value = nodestats['stats'][plugin_conf['category']].get(plugin_conf['statid']) - category = plugin_conf['category'] - statid = plugin_conf['statid'] - value = nodestats['stats'][category].get(statid) - if value is not None: - args = { 'name': name, 'value': value } - print plugin_conf[output_section] % args - - if len(argv) > 1: - if sys.argv[1] == 'config': - print plugin_conf['configheader'] - output_nodes('graph_config', False) - sys.exit(0) - - output_nodes('graph_render', True) - -if __name__ == '__main__': - main(sys.argv) diff --git a/misc/munin/tahoe_storagespace b/misc/munin/tahoe_storagespace deleted file mode 100644 index f6edc314..00000000 --- a/misc/munin/tahoe_storagespace +++ /dev/null @@ -1,58 +0,0 @@ -#!/usr/bin/env python - -# This is a munin plugin to track the amount of disk space each node's -# StorageServer is consuming on behalf of other nodes. This is where the -# shares are kept. If there are N nodes present in the mesh, the total space -# consumed by the entire mesh will be about N times the space reported by -# this plugin. - -# Copy this plugin into /etc/munun/plugins/tahoe_storagespace and then put -# the following in your /etc/munin/plugin-conf.d/foo file to let it know -# where to find the basedirectory for each node: -# -# [tahoe_storagespace] -# env.basedir_NODE1 /path/to/node1 -# env.basedir_NODE2 /path/to/node2 -# env.basedir_NODE3 /path/to/node3 -# -# Allmydata-tahoe must be installed on the system where this plugin is used, -# since it imports a utility module from allmydata.utils . - -import os, sys -import commands - -nodedirs = [] -for k,v in os.environ.items(): - if k.startswith("basedir_"): - nodename = k[len("basedir_"):] - nodedirs.append( (nodename, v) ) -nodedirs.sort() - -seriesname = "storage" - -configinfo = \ -"""graph_title Allmydata Tahoe Shareholder Space -graph_vlabel bytes -graph_category tahoe -graph_info This graph shows the space consumed by this node's StorageServer -""" - -for nodename, basedir in nodedirs: - configinfo += "%s.label %s\n" % (nodename, nodename) - configinfo += "%s.draw LINE2\n" % (nodename,) - - -if len(sys.argv) > 1: - if sys.argv[1] == "config": - print configinfo.rstrip() - sys.exit(0) - -for nodename, basedir in nodedirs: - cmd = "du --bytes --summarize %s" % os.path.join(basedir, "storage") - rc,out = commands.getstatusoutput(cmd) - if rc != 0: - sys.exit(rc) - bytes, extra = out.split() - usage = int(bytes) - print "%s.value %d" % (nodename, usage) - diff --git a/misc/operations_helpers/cpu-watcher-poll.py b/misc/operations_helpers/cpu-watcher-poll.py new file mode 100644 index 00000000..68ac4b46 --- /dev/null +++ b/misc/operations_helpers/cpu-watcher-poll.py @@ -0,0 +1,24 @@ +#!/usr/bin/env python + +from foolscap import Tub, eventual +from twisted.internet import reactor +import sys +import pprint + +def oops(f): + print "ERROR" + print f + +def fetch(furl): + t = Tub() + t.startService() + d = t.getReference(furl) + d.addCallback(lambda rref: rref.callRemote("get_averages")) + d.addCallback(pprint.pprint) + return d + +d = eventual.fireEventually(sys.argv[1]) +d.addCallback(fetch) +d.addErrback(oops) +d.addBoth(lambda res: reactor.stop()) +reactor.run() diff --git a/misc/operations_helpers/cpu-watcher-subscribe.py b/misc/operations_helpers/cpu-watcher-subscribe.py new file mode 100644 index 00000000..4c560e2c --- /dev/null +++ b/misc/operations_helpers/cpu-watcher-subscribe.py @@ -0,0 +1,49 @@ +# -*- python -*- + +from twisted.internet import reactor +import sys + +import os.path, pprint +from twisted.application import service +from twisted.python import log +from foolscap import Tub, Referenceable, RemoteInterface +from foolscap.schema import ListOf, TupleOf +from zope.interface import implements + +Averages = ListOf( TupleOf(str, float, float, float) ) +class RICPUWatcherSubscriber(RemoteInterface): + def averages(averages=Averages): + return None + +class CPUWatcherSubscriber(service.MultiService, Referenceable): + implements(RICPUWatcherSubscriber) + def __init__(self, furlthing): + service.MultiService.__init__(self) + if furlthing.startswith("pb://"): + furl = furlthing + else: + furlfile = os.path.expanduser(furlthing) + if os.path.isdir(furlfile): + furlfile = os.path.join(furlfile, "watcher.furl") + furl = open(furlfile, "r").read().strip() + tub = Tub() + tub.setServiceParent(self) + tub.connectTo(furl, self.connected) + + def connected(self, rref): + print "subscribing" + d = rref.callRemote("get_averages") + d.addCallback(self.remote_averages) + d.addErrback(log.err) + + d = rref.callRemote("subscribe", self) + d.addErrback(log.err) + + def remote_averages(self, averages): + pprint.pprint(averages) + + +c = CPUWatcherSubscriber(sys.argv[1]) +c.startService() +reactor.run() + diff --git a/misc/operations_helpers/cpu-watcher.tac b/misc/operations_helpers/cpu-watcher.tac new file mode 100644 index 00000000..12f29324 --- /dev/null +++ b/misc/operations_helpers/cpu-watcher.tac @@ -0,0 +1,231 @@ +# -*- python -*- + +""" +# run this tool on a linux box in its own directory, with a file named +# 'pids.txt' describing which processes to watch. It will follow CPU usage of +# the given processes, and compute 1/5/15-minute moving averages for each +# process. These averages can be retrieved from a foolscap connection +# (published at ./watcher.furl), or through an HTTP query (using ./webport). + +# Each line of pids.txt describes a single process. Blank lines and ones that +# begin with '#' are ignored. Each line is either "PID" or "PID NAME" (space +# separated). PID is either a numeric process ID, a pathname to a file that +# contains a process ID, or a pathname to a directory that contains a +# twistd.pid file (which contains a process ID). NAME is an arbitrary string +# that will be used to describe the process to watcher.furl subscribers, and +# defaults to PID if not provided. +""" + +# TODO: +# built-in graphs on web interface + + + +import pickle, os.path, time, pprint +from twisted.application import internet, service, strports +from twisted.web import server, resource, http +from twisted.python import log +import simplejson +from foolscap import Tub, Referenceable, RemoteInterface, eventual +from foolscap.schema import ListOf, TupleOf +from zope.interface import implements + +def read_cpu_times(pid): + data = open("/proc/%d/stat" % pid, "r").read() + data = data.split() + times = data[13:17] + # the values in /proc/%d/stat are in ticks, I think. My system has + # CONFIG_HZ_1000=y in /proc/config.gz but nevertheless the numbers in + # 'stat' appear to be 10ms each. + HZ = 100 + userspace_seconds = int(times[0]) * 1.0 / HZ + system_seconds = int(times[1]) * 1.0 / HZ + child_userspace_seconds = int(times[2]) * 1.0 / HZ + child_system_seconds = int(times[3]) * 1.0 / HZ + return (userspace_seconds, system_seconds) + + +def read_pids_txt(): + processes = [] + for line in open("pids.txt", "r").readlines(): + line = line.strip() + if not line or line[0] == "#": + continue + parts = line.split() + pidthing = parts[0] + if len(parts) > 1: + name = parts[1] + else: + name = pidthing + pid = None + try: + pid = int(pidthing) + except ValueError: + pidfile = os.path.expanduser(pidthing) + if os.path.isdir(pidfile): + pidfile = os.path.join(pidfile, "twistd.pid") + try: + pid = int(open(pidfile, "r").read().strip()) + except EnvironmentError: + pass + if pid is not None: + processes.append( (pid, name) ) + return processes + +Averages = ListOf( TupleOf(str, float, float, float) ) +class RICPUWatcherSubscriber(RemoteInterface): + def averages(averages=Averages): + return None + +class RICPUWatcher(RemoteInterface): + def get_averages(): + """Return a list of rows, one for each process I am watching. Each + row is (name, 1-min-avg, 5-min-avg, 15-min-avg), where 'name' is a + string, and the averages are floats from 0.0 to 1.0 . Each average is + the percentage of the CPU that this process has used: the change in + CPU time divided by the change in wallclock time. + """ + return Averages + + def subscribe(observer=RICPUWatcherSubscriber): + """Arrange for the given observer to get an 'averages' message every + time the averages are updated. This message will contain a single + argument, the same list of tuples that get_averages() returns.""" + return None + +class CPUWatcher(service.MultiService, resource.Resource, Referenceable): + implements(RICPUWatcher) + POLL_INTERVAL = 30 # seconds + HISTORY_LIMIT = 15 * 60 # 15min + AVERAGES = (1*60, 5*60, 15*60) # 1min, 5min, 15min + + def __init__(self): + service.MultiService.__init__(self) + resource.Resource.__init__(self) + try: + self.history = pickle.load(open("history.pickle", "rb")) + except: + self.history = {} + self.current = [] + self.observers = set() + ts = internet.TimerService(self.POLL_INTERVAL, self.poll) + ts.setServiceParent(self) + + def startService(self): + service.MultiService.startService(self) + + try: + desired_webport = open("webport", "r").read().strip() + except EnvironmentError: + desired_webport = None + webport = desired_webport or "tcp:0" + root = self + serv = strports.service(webport, server.Site(root)) + serv.setServiceParent(self) + if not desired_webport: + got_port = serv._port.getHost().port + open("webport", "w").write("tcp:%d\n" % got_port) + + self.tub = Tub(certFile="watcher.pem") + self.tub.setServiceParent(self) + try: + desired_tubport = open("tubport", "r").read().strip() + except EnvironmentError: + desired_tubport = None + tubport = desired_tubport or "tcp:0" + l = self.tub.listenOn(tubport) + if not desired_tubport: + got_port = l.getPortnum() + open("tubport", "w").write("tcp:%d\n" % got_port) + d = self.tub.setLocationAutomatically() + d.addCallback(self._tub_ready) + d.addErrback(log.err) + + def _tub_ready(self, res): + self.tub.registerReference(self, furlFile="watcher.furl") + + + def getChild(self, path, req): + if path == "": + return self + return resource.Resource.getChild(self, path, req) + + def render(self, req): + t = req.args.get("t", ["html"])[0] + ctype = "text/plain" + data = "" + if t == "html": + data = "# name, 1min, 5min, 15min\n" + data += pprint.pformat(self.current) + "\n" + elif t == "json": + #data = str(self.current) + "\n" # isn't that convenient? almost. + data = simplejson.dumps(self.current, indent=True) + else: + req.setResponseCode(http.BAD_REQUEST) + data = "Unknown t= %s\n" % t + req.setHeader("content-type", ctype) + return data + + def remote_get_averages(self): + return self.current + def remote_subscribe(self, observer): + self.observers.add(observer) + + def notify(self, observer): + d = observer.callRemote("averages", self.current) + def _error(f): + log.msg("observer error, removing them") + log.msg(f) + self.observers.discard(observer) + d.addErrback(_error) + + def poll(self): + max_history = self.HISTORY_LIMIT / self.POLL_INTERVAL + current = [] + try: + processes = read_pids_txt() + except: + log.err() + return + for (pid, name) in processes: + if pid not in self.history: + self.history[pid] = [] + now = time.time() + try: + (user_seconds, sys_seconds) = read_cpu_times(pid) + self.history[pid].append( (now, user_seconds, sys_seconds) ) + while len(self.history[pid]) > max_history+1: + self.history[pid].pop(0) + except: + log.msg("error reading process %s (%s), ignoring" % (pid, name)) + log.err() + try: + pickle.dump(self.history, open("history.pickle.tmp", "wb")) + os.rename("history.pickle.tmp", "history.pickle") + except: + pass + for (pid, name) in processes: + row = [name] + for avg in self.AVERAGES: + row.append(self._average_N(pid, avg)) + current.append(tuple(row)) + self.current = current + print current + for ob in self.observers: + eventual.eventually(self.notify, ob) + + def _average_N(self, pid, seconds): + num_samples = seconds / self.POLL_INTERVAL + samples = self.history[pid] + if len(samples) < num_samples+1: + return None + first = -num_samples-1 + elapsed_wall = samples[-1][0] - samples[first][0] + elapsed_user = samples[-1][1] - samples[first][1] + elapsed_sys = samples[-1][2] - samples[first][2] + if elapsed_wall == 0.0: + return 0.0 + return (elapsed_user+elapsed_sys) / elapsed_wall + +application = service.Application("cpu-watcher") +CPUWatcher().setServiceParent(application) diff --git a/misc/operations_helpers/find-share-anomalies.py b/misc/operations_helpers/find-share-anomalies.py new file mode 100644 index 00000000..76fe3419 --- /dev/null +++ b/misc/operations_helpers/find-share-anomalies.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python + +# feed this the results of 'tahoe catalog-shares' for all servers + +import sys + +chk_encodings = {} +sdmf_encodings = {} +sdmf_versions = {} + +for catalog in sys.argv[1:]: + for line in open(catalog, "r").readlines(): + line = line.strip() + pieces = line.split() + if pieces[0] == "CHK": + ftype, si, kN, size, ueb_hash, expiration, filename = pieces + if si not in chk_encodings: + chk_encodings[si] = (set(), set()) + chk_encodings[si][0].add( (si, kN) ) + chk_encodings[si][1].add( line ) + if pieces[0] == "SDMF": + ftype, si, kN, size, ver, expiration, filename = pieces + if si not in sdmf_encodings: + sdmf_encodings[si] = (set(), set()) + sdmf_encodings[si][0].add( (si, kN) ) + sdmf_encodings[si][1].add( line ) + if si not in sdmf_versions: + sdmf_versions[si] = (set(), set()) + sdmf_versions[si][0].add( ver ) + sdmf_versions[si][1].add( line ) + +chk_multiple_encodings = [(si,lines) + for si,(encodings,lines) in chk_encodings.items() + if len(encodings) > 1] +chk_multiple_encodings.sort() +sdmf_multiple_encodings = [(si,lines) + for si,(encodings,lines) in sdmf_encodings.items() + if len(encodings) > 1 + ] +sdmf_multiple_encodings.sort() +sdmf_multiple_versions = [(si,lines) + for si,(versions,lines) in sdmf_versions.items() + if len(versions) > 1] +sdmf_multiple_versions.sort() + +if chk_multiple_encodings: + print + print "CHK multiple encodings:" + for (si,lines) in chk_multiple_encodings: + print " " + si + for line in sorted(lines): + print " " + line +if sdmf_multiple_encodings: + print + print "SDMF multiple encodings:" + for (si,lines) in sdmf_multiple_encodings: + print " " + si + for line in sorted(lines): + print " " + line +if sdmf_multiple_versions: + print + print "SDMF multiple versions:" + for (si,lines) in sdmf_multiple_versions: + print " " + si + for line in sorted(lines): + print " " + line diff --git a/misc/operations_helpers/getmem.py b/misc/operations_helpers/getmem.py new file mode 100644 index 00000000..8ddc3ed7 --- /dev/null +++ b/misc/operations_helpers/getmem.py @@ -0,0 +1,18 @@ +#! /usr/bin/env python + +from foolscap import Tub +from foolscap.eventual import eventually +import sys +from twisted.internet import reactor + +def go(): + t = Tub() + d = t.getReference(sys.argv[1]) + d.addCallback(lambda rref: rref.callRemote("get_memory_usage")) + def _got(res): + print res + reactor.stop() + d.addCallback(_got) + +eventually(go) +reactor.run() diff --git a/misc/operations_helpers/munin/tahoe-conf b/misc/operations_helpers/munin/tahoe-conf new file mode 100644 index 00000000..f85c0431 --- /dev/null +++ b/misc/operations_helpers/munin/tahoe-conf @@ -0,0 +1,22 @@ +# put a copy of this file in /etc/munin/plugin-conf.d/tahoe-conf to let these +# plugins know where the node's base directories are. Modify the lines below +# to match your nodes. + +[tahoe-files] +env.basedir_bs5c1 /home/amduser/tahoe/bs5c1 +env.basedir_bs5c2 /home/amduser/tahoe/bs5c2 +env.basedir_bs5c3 /home/amduser/tahoe/bs5c3 +env.basedir_bs5c4 /home/amduser/tahoe/bs5c4 + +[tahoe-sharesperfile] +env.basedir_bs5c1 /home/amduser/tahoe/bs5c1 +env.basedir_bs5c2 /home/amduser/tahoe/bs5c2 +env.basedir_bs5c3 /home/amduser/tahoe/bs5c3 +env.basedir_bs5c4 /home/amduser/tahoe/bs5c4 + +[tahoe-storagespace] +env.basedir_bs5c1 /home/amduser/tahoe/bs5c1 +env.basedir_bs5c2 /home/amduser/tahoe/bs5c2 +env.basedir_bs5c3 /home/amduser/tahoe/bs5c3 +env.basedir_bs5c4 /home/amduser/tahoe/bs5c4 + diff --git a/misc/operations_helpers/munin/tahoe-stats.plugin-conf b/misc/operations_helpers/munin/tahoe-stats.plugin-conf new file mode 100644 index 00000000..2084c65f --- /dev/null +++ b/misc/operations_helpers/munin/tahoe-stats.plugin-conf @@ -0,0 +1,12 @@ +[tahoe_storage_allocated] +env.statsfile /home/robk/trees/tahoe/stats_gatherer/stats.pickle +[tahoe_storage_consumed] +env.statsfile /home/robk/trees/tahoe/stats_gatherer/stats.pickle +[tahoe_runtime_load_avg] +env.statsfile /home/robk/trees/tahoe/stats_gatherer/stats.pickle +[tahoe_runtime_load_peak] +env.statsfile /home/robk/trees/tahoe/stats_gatherer/stats.pickle +[tahoe_storage_bytes_added] +env.statsfile /home/robk/trees/tahoe/stats_gatherer/stats.pickle +[tahoe_storage_bytes_freed] +env.statsfile /home/robk/trees/tahoe/stats_gatherer/stats.pickle diff --git a/misc/operations_helpers/munin/tahoe_cpu_watcher b/misc/operations_helpers/munin/tahoe_cpu_watcher new file mode 100644 index 00000000..bd349cb2 --- /dev/null +++ b/misc/operations_helpers/munin/tahoe_cpu_watcher @@ -0,0 +1,29 @@ +#!/usr/bin/env python + +import os, sys, re +import urllib +import simplejson + +url = os.environ["url"] +current = simplejson.loads(urllib.urlopen(url).read()) + +configinfo = """\ +graph_title Tahoe CPU Usage +graph_vlabel CPU % +graph_category tahoe +graph_info This graph shows the 5min average of CPU usage for each process +""" +data = "" + +for (name, avg1, avg5, avg15) in current: + dataname = re.sub(r'[^\w]', '_', name) + configinfo += dataname + ".label " + name + "\n" + configinfo += dataname + ".draw LINE2\n" + if avg5 is not None: + data += dataname + ".value %.2f\n" % (100.0 * avg5) + +if len(sys.argv) > 1: + if sys.argv[1] == "config": + print configinfo.rstrip() + sys.exit(0) +print data.rstrip() diff --git a/misc/operations_helpers/munin/tahoe_diskleft b/misc/operations_helpers/munin/tahoe_diskleft new file mode 100644 index 00000000..913fa792 --- /dev/null +++ b/misc/operations_helpers/munin/tahoe_diskleft @@ -0,0 +1,22 @@ +#!/usr/bin/env python + +# This is a munin plugin which pulls data from the server in +# misc/operations_helpers/spacetime/diskwatcher.tac . It produces a graph of how much free space +# is left on all disks across the grid. The plugin should be configured with +# env_url= pointing at the diskwatcher.tac webport. + +import os, sys, urllib, simplejson + +if len(sys.argv) > 1 and sys.argv[1] == "config": + print """\ +graph_title Tahoe Remaining Disk Space +graph_vlabel bytes remaining +graph_category tahoe +graph_info This graph shows the total amount of disk space left available in the grid +disk_left.label disk left +disk_left.draw LINE1""" + sys.exit(0) + +url = os.environ["url"] +data = simplejson.load(urllib.urlopen(url))["available"] +print "disk_left.value", data diff --git a/misc/operations_helpers/munin/tahoe_disktotal b/misc/operations_helpers/munin/tahoe_disktotal new file mode 100644 index 00000000..354ccc8c --- /dev/null +++ b/misc/operations_helpers/munin/tahoe_disktotal @@ -0,0 +1,26 @@ +#!/usr/bin/env python + +# This is a munin plugin which pulls data from the server in +# misc/operations_helpers/spacetime/diskwatcher.tac . It produces a graph of how much space is +# present on all disks across the grid, and how much space is actually being +# used. The plugin should be configured with env_url= pointing at the +# diskwatcher.tac webport. + +import os, sys, urllib, simplejson + +if len(sys.argv) > 1 and sys.argv[1] == "config": + print """\ +graph_title Tahoe Total Disk Space +graph_vlabel bytes +graph_category tahoe +graph_info This graph shows the total amount of disk space present in the grid, and how much of it is currently being used. +disk_total.label disk total +disk_total.draw LINE2 +disk_used.label disk used +disk_used.draw LINE1""" + sys.exit(0) + +url = os.environ["url"] +data = simplejson.load(urllib.urlopen(url)) +print "disk_total.value", data["total"] +print "disk_used.value", data["used"] diff --git a/misc/operations_helpers/munin/tahoe_diskusage b/misc/operations_helpers/munin/tahoe_diskusage new file mode 100644 index 00000000..d54ae375 --- /dev/null +++ b/misc/operations_helpers/munin/tahoe_diskusage @@ -0,0 +1,40 @@ +#!/usr/bin/env python + +# This is a munin plugin which pulls data from the server in +# misc/operations_helpers/spacetime/diskwatcher.tac . It produces a graph of how much disk space +# is being used per unit time. The plugin should be configured with env_url= +# pointing at the diskwatcher.tac webport. + +import os, sys, urllib, simplejson + +if len(sys.argv) > 1 and sys.argv[1] == "config": + print """\ +graph_title Tahoe Disk Usage Measurement +graph_vlabel bytes per second +graph_category tahoe +graph_info This graph shows the estimated disk usage per unit time, totalled across all storage servers +graph_args --lower-limit 0 --rigid +rate_1hr.label (one hour sample) +rate_1hr.draw LINE1 +rate_1day.label (one day sample) +rate_1day.draw LINE1 +rate_2wk.label (two week sample) +rate_2wk.draw LINE2 +rate_4wk.label (four week sample) +rate_4wk.draw LINE2""" + sys.exit(0) + +url = os.environ["url"] +timespans = simplejson.load(urllib.urlopen(url))["rates"] + +data = dict([(name, growth) + for (name, timespan, growth, timeleft) in timespans]) +# growth is in bytes per second +if "1hr" in data: + print "rate_1hr.value", data["1hr"] +if "1day" in data: + print "rate_1day.value", data["1day"] +if "2wk" in data: + print "rate_2wk.value", data["2wk"] +if "4wk" in data: + print "rate_4wk.value", data["4wk"] diff --git a/misc/operations_helpers/munin/tahoe_diskused b/misc/operations_helpers/munin/tahoe_diskused new file mode 100644 index 00000000..41c0b24a --- /dev/null +++ b/misc/operations_helpers/munin/tahoe_diskused @@ -0,0 +1,22 @@ +#!/usr/bin/env python + +# This is a munin plugin which pulls data from the server in +# misc/operations_helpers/spacetime/diskwatcher.tac . It produces a graph of how much space is +# used on all disks across the grid. The plugin should be configured with +# env_url= pointing at the diskwatcher.tac webport. + +import os, sys, urllib, simplejson + +if len(sys.argv) > 1 and sys.argv[1] == "config": + print """\ +graph_title Tahoe Total Disk Space Used +graph_vlabel bytes used +graph_category tahoe +graph_info This graph shows the total amount of disk space used across the grid +disk_used.label disk used +disk_used.draw LINE1""" + sys.exit(0) + +url = os.environ["url"] +data = simplejson.load(urllib.urlopen(url))["used"] +print "disk_used.value", data diff --git a/misc/operations_helpers/munin/tahoe_doomsday b/misc/operations_helpers/munin/tahoe_doomsday new file mode 100644 index 00000000..63513a13 --- /dev/null +++ b/misc/operations_helpers/munin/tahoe_doomsday @@ -0,0 +1,41 @@ +#!/usr/bin/env python + +# This is a munin plugin which pulls data from the server in +# misc/operations_helpers/spacetime/diskwatcher.tac . It produces a graph of how much time is +# left before the grid fills up. The plugin should be configured with +# env_url= pointing at the diskwatcher.tac webport. + +import os, sys, urllib, simplejson + +if len(sys.argv) > 1 and sys.argv[1] == "config": + print """\ +graph_title Tahoe Remaining Time Predictor +graph_vlabel days remaining +graph_category tahoe +graph_info This graph shows the estimated number of days left until storage space is exhausted +days_1hr.label days left (one hour sample) +days_1hr.draw LINE1 +days_1day.label days left (one day sample) +days_1day.draw LINE1 +days_2wk.label days left (two week sample) +days_2wk.draw LINE2 +days_4wk.label days left (four week sample) +days_4wk.draw LINE2""" + sys.exit(0) + +url = os.environ["url"] +timespans = simplejson.load(urllib.urlopen(url))["rates"] + +data = dict([(name, timeleft) + for (name, timespan, growth, timeleft) in timespans + if timeleft]) +# timeleft is in seconds +DAY = 24*60*60 +if "1hr" in data: + print "days_1hr.value", data["1hr"]/DAY +if "1day" in data: + print "days_1day.value", data["1day"]/DAY +if "2wk" in data: + print "days_2wk.value", data["2wk"]/DAY +if "4wk" in data: + print "days_4wk.value", data["4wk"]/DAY diff --git a/misc/operations_helpers/munin/tahoe_estimate_files b/misc/operations_helpers/munin/tahoe_estimate_files new file mode 100644 index 00000000..249565e4 --- /dev/null +++ b/misc/operations_helpers/munin/tahoe_estimate_files @@ -0,0 +1,49 @@ +#!/usr/bin/env python + +import sys, os.path + +if len(sys.argv) > 1 and sys.argv[1] == "config": + print """\ +graph_title Tahoe File Estimate +graph_vlabel files +graph_category tahoe +graph_info This graph shows the estimated number of files and directories present in the grid +files.label files +files.draw LINE2""" + sys.exit(0) + +# Edit this to point at some subset of storage directories. +node_dirs = [os.path.expanduser("~amduser/prodnet/storage1"), + os.path.expanduser("~amduser/prodnet/storage2"), + os.path.expanduser("~amduser/prodnet/storage3"), + os.path.expanduser("~amduser/prodnet/storage4"), + ] + +sections = ["aa", "ab", "ac", "ad", "ae", "af", "ag", "ah", "ai", "aj"] +# and edit this to reflect your default encoding's "total_shares" value, and +# the total number of servers. +N = 10 +num_servers = 20 + +index_strings = set() +for base in node_dirs: + for section in sections: + sampledir = os.path.join(base, "storage", "shares", section) + indices = os.listdir(sampledir) + index_strings.update(indices) +unique_strings = len(index_strings) + +# the chance that any given file appears on any given server +chance = 1.0 * N / num_servers + +# the chance that the file does *not* appear on the servers that we're +# examining +no_chance = (1-chance) ** len(node_dirs) + +# if a file has a 25% chance of not appearing in our sample, then we need to +# raise our estimate by (1.25/1) +correction = 1+no_chance +#print "correction", correction + +files = unique_strings * (32*32/len(sections)) * correction +print "files.value %d" % int(files) diff --git a/misc/operations_helpers/munin/tahoe_files b/misc/operations_helpers/munin/tahoe_files new file mode 100644 index 00000000..e68b59da --- /dev/null +++ b/misc/operations_helpers/munin/tahoe_files @@ -0,0 +1,52 @@ +#!/usr/bin/env python + +# This is a munin plugin to track the number of files that each node's +# StorageServer is holding on behalf of other nodes. Each file that has been +# uploaded to the mesh (and has shares present on this node) will be counted +# here. When there are <= 100 nodes in the mesh, this count will equal the +# total number of files that are active in the entire mesh. When there are +# 200 nodes present in the mesh, it will represent about half of the total +# number. + +# Copy this plugin into /etc/munun/plugins/tahoe-files and then put +# the following in your /etc/munin/plugin-conf.d/foo file to let it know +# where to find the basedirectory for each node: +# +# [tahoe-files] +# env.basedir_NODE1 /path/to/node1 +# env.basedir_NODE2 /path/to/node2 +# env.basedir_NODE3 /path/to/node3 +# + +import os, sys + +nodedirs = [] +for k,v in os.environ.items(): + if k.startswith("basedir_"): + nodename = k[len("basedir_"):] + nodedirs.append( (nodename, v) ) +nodedirs.sort() + +configinfo = \ +"""graph_title Allmydata Tahoe Filecount +graph_vlabel files +graph_category tahoe +graph_info This graph shows the number of files hosted by this node's StorageServer +""" + +for nodename, basedir in nodedirs: + configinfo += "%s.label %s\n" % (nodename, nodename) + configinfo += "%s.draw LINE2\n" % (nodename,) + + +if len(sys.argv) > 1: + if sys.argv[1] == "config": + print configinfo.rstrip() + sys.exit(0) + +for nodename, basedir in nodedirs: + files = len(os.listdir(os.path.join(basedir, "storage", "shares"))) + if os.path.exists(os.path.join(basedir, "storage", "shares", "incoming")): + files -= 1 # the 'incoming' directory doesn't count + print "%s.value %d" % (nodename, files) + diff --git a/misc/operations_helpers/munin/tahoe_helperstats_active b/misc/operations_helpers/munin/tahoe_helperstats_active new file mode 100644 index 00000000..3265e5f0 --- /dev/null +++ b/misc/operations_helpers/munin/tahoe_helperstats_active @@ -0,0 +1,25 @@ +#!/usr/bin/env python + +import os, sys +import urllib +import simplejson + +configinfo = """\ +graph_title Tahoe Helper Stats - Active Files +graph_vlabel bytes +graph_category tahoe +graph_info This graph shows the number of files being actively processed by the helper +fetched.label Active Files +fetched.draw LINE2 +""" + +if len(sys.argv) > 1: + if sys.argv[1] == "config": + print configinfo.rstrip() + sys.exit(0) + +url = os.environ["url"] + +data = simplejson.loads(urllib.urlopen(url).read()) +print "fetched.value %d" % data["chk_upload_helper.active_uploads"] + diff --git a/misc/operations_helpers/munin/tahoe_helperstats_fetched b/misc/operations_helpers/munin/tahoe_helperstats_fetched new file mode 100644 index 00000000..1f807beb --- /dev/null +++ b/misc/operations_helpers/munin/tahoe_helperstats_fetched @@ -0,0 +1,26 @@ +#!/usr/bin/env python + +import os, sys +import urllib +import simplejson + +configinfo = """\ +graph_title Tahoe Helper Stats - Bytes Fetched +graph_vlabel bytes +graph_category tahoe +graph_info This graph shows the amount of data being fetched by the helper +fetched.label Bytes Fetched +fetched.type GAUGE +fetched.draw LINE1 +fetched.min 0 +""" + +if len(sys.argv) > 1: + if sys.argv[1] == "config": + print configinfo.rstrip() + sys.exit(0) + +url = os.environ["url"] + +data = simplejson.loads(urllib.urlopen(url).read()) +print "fetched.value %d" % data["chk_upload_helper.fetched_bytes"] diff --git a/misc/operations_helpers/munin/tahoe_introstats b/misc/operations_helpers/munin/tahoe_introstats new file mode 100644 index 00000000..556762fb --- /dev/null +++ b/misc/operations_helpers/munin/tahoe_introstats @@ -0,0 +1,31 @@ +#!/usr/bin/env python + +import os, sys +import urllib +import simplejson + +configinfo = """\ +graph_title Tahoe Introducer Stats +graph_vlabel hosts +graph_category tahoe +graph_info This graph shows the number of hosts announcing and subscribing to various services +storage_server.label Storage Servers +storage_server.draw LINE1 +storage_hosts.label Distinct Storage Hosts +storage_hosts.draw LINE1 +storage_client.label Clients +storage_client.draw LINE2 +""" + +if len(sys.argv) > 1: + if sys.argv[1] == "config": + print configinfo.rstrip() + sys.exit(0) + +url = os.environ["url"] + +data = simplejson.loads(urllib.urlopen(url).read()) +print "storage_server.value %d" % data["announcement_summary"]["storage"] +print "storage_hosts.value %d" % data["announcement_distinct_hosts"]["storage"] +print "storage_client.value %d" % data["subscription_summary"]["storage"] + diff --git a/misc/operations_helpers/munin/tahoe_nodememory b/misc/operations_helpers/munin/tahoe_nodememory new file mode 100644 index 00000000..fd3f8b0a --- /dev/null +++ b/misc/operations_helpers/munin/tahoe_nodememory @@ -0,0 +1,71 @@ +#!/usr/bin/env python + +# This munin plugin isolates processes by looking for the 'pid' file created +# by 'allmydata start', then extracts the amount of memory they consume (both +# VmSize and VmRSS) from /proc + +import os, sys, re + +if 0: + # for testing + os.environ["nodememory_warner1"] = "run/warner1" + os.environ["nodememory_warner2"] = "run/warner2" + +nodedirs = [] +for k,v in os.environ.items(): + if k.startswith("nodememory_"): + nodename = k[len("nodememory_"):] + nodedirs.append((nodename, v)) +nodedirs.sort(lambda a,b: cmp(a[0],b[0])) + +pids = {} + +for node,nodedir in nodedirs: + pidfile = os.path.join(nodedir, "twistd.pid") + if os.path.exists(pidfile): + pid = int(open(pidfile,"r").read()) + pids[node] = pid + +fields = ["VmSize", "VmRSS"] + + +if len(sys.argv) > 1: + if sys.argv[1] == "config": + configinfo = \ + """graph_title Memory Consumed by Nodes +graph_vlabel bytes +graph_category Tahoe +graph_info This graph shows the memory used by specific processes +""" + for nodename,nodedir in nodedirs: + for f in fields: + configinfo += "%s_%s.label %s used by %s\n" % (nodename, f, + f, nodename) + linetype = "LINE1" + if f == "VmSize": + linetype = "LINE2" + configinfo += "%s_%s.draw %s\n" % (nodename, f, linetype) + if f == "VmData": + configinfo += "%s_%s.graph no\n" % (nodename, f) + + print configinfo + sys.exit(0) + +nodestats = {} +for node,pid in pids.items(): + stats = {} + statusfile = "/proc/%s/status" % pid + if not os.path.exists(statusfile): + continue + for line in open(statusfile,"r").readlines(): + for f in fields: + if line.startswith(f + ":"): + m = re.search(r'(\d+)', line) + stats[f] = int(m.group(1)) + nodestats[node] = stats + +for node,stats in nodestats.items(): + for f,value in stats.items(): + # TODO: not sure if /proc/%d/status means 1000 or 1024 when it says + # 'kB' + print "%s_%s.value %d" % (node, f, 1024*value) diff --git a/misc/operations_helpers/munin/tahoe_overhead b/misc/operations_helpers/munin/tahoe_overhead new file mode 100644 index 00000000..d8225f1f --- /dev/null +++ b/misc/operations_helpers/munin/tahoe_overhead @@ -0,0 +1,65 @@ +#!/usr/bin/env python + +# This is a munin plugin which pulls total-used data from the server in +# misc/operations_helpers/spacetime/diskwatcher.tac, and a total-deep-size number from custom +# PHP database-querying scripts on a different server. It produces a graph of +# how much garbage/overhead is present in the grid: the ratio of total-used +# over (total-deep-size*N/k), expressed as a percentage. No overhead would be +# 0, using twice as much space as we'd prefer would be 100. This is the +# percentage which could be saved if we made GC work perfectly and reduced +# other forms of overhead to zero. This script assumes 3-of-10. + +# A second graph is produced with how much of the total-deep-size number +# would be saved if we removed data from inactive accounts. This is also on a +# percentage scale. + +# A separate number (without a graph) is produced with the "effective +# expansion factor". If there were no overhead, with 3-of-10, this would be +# 3.33 . + +# Overhead is caused by the following problems (in order of size): +# uncollected garbage: files that are no longer referenced but not yet deleted +# inactive accounts: files that are referenced by cancelled accounts +# share storage overhead: bucket directories +# filesystem overhead: 4kB minimum block sizes +# share overhead: hashes, pubkeys, lease information + +# This plugin should be configured with env_diskwatcher_url= pointing at the +# diskwatcher.tac webport, and env_deepsize_url= pointing at the PHP script. + +import os, sys, urllib, simplejson + +if len(sys.argv) > 1 and sys.argv[1] == "config": + print """\ +graph_title Tahoe Overhead Calculator +graph_vlabel Percentage +graph_category tahoe +graph_info This graph shows the estimated amount of storage overhead (ratio of actual disk usage to ideal disk usage). The 'overhead' number is how much space we could save if we implemented GC, and the 'inactive' number is how much additional space we could save if we could delete data for cancelled accounts. +overhead.label disk usage overhead +overhead.draw LINE2 +inactive.label inactive account usage +inactive.draw LINE1 +effective_expansion.label Effective Expansion Factor +effective_expansion.graph no""" + sys.exit(0) + +diskwatcher_url = os.environ["diskwatcher_url"] +total = simplejson.load(urllib.urlopen(diskwatcher_url))["used"] +deepsize_url = os.environ["deepsize_url"] +deepsize = simplejson.load(urllib.urlopen(deepsize_url)) +k = 3; N = 10 +expansion = float(N) / k + +ideal = expansion * deepsize["all"] +overhead = (total - ideal) / ideal +if overhead > 0: + # until all the storage-servers come online, this number will be nonsense + print "overhead.value %f" % (100.0 * overhead) + + # same for this one + effective_expansion = total / deepsize["all"] + print "effective_expansion.value %f" % effective_expansion + +# this value remains valid, though +inactive_savings = (deepsize["all"] - deepsize["active"]) / deepsize["active"] +print "inactive.value %f" % (100.0 * inactive_savings) diff --git a/misc/operations_helpers/munin/tahoe_rootdir_space b/misc/operations_helpers/munin/tahoe_rootdir_space new file mode 100644 index 00000000..c9acb3cb --- /dev/null +++ b/misc/operations_helpers/munin/tahoe_rootdir_space @@ -0,0 +1,27 @@ +#!/usr/bin/env python + +import os, sys +import urllib +import simplejson + +configinfo = """\ +graph_title Tahoe Root Directory Size +graph_vlabel bytes +graph_category tahoe +graph_info This graph shows the amount of space consumed by all files reachable from a given directory +space.label Space +space.draw LINE2 +""" + +if len(sys.argv) > 1: + if sys.argv[1] == "config": + print configinfo.rstrip() + sys.exit(0) + +url = os.environ["url"] + +data = int(urllib.urlopen(url).read().strip()) +print "space.value %d" % data + + + diff --git a/misc/operations_helpers/munin/tahoe_server_latency_ b/misc/operations_helpers/munin/tahoe_server_latency_ new file mode 100644 index 00000000..a63f902c --- /dev/null +++ b/misc/operations_helpers/munin/tahoe_server_latency_ @@ -0,0 +1,93 @@ +#!/usr/bin/env python + +# retrieve a latency statistic for a given operation and percentile from a +# set of storage servers. + +# the OPERATION value should come from the following list: +# allocate: allocate_buckets, first step to upload an immutable file +# write: write data to an immutable share +# close: finish writing to an immutable share +# cancel: abandon a partial immutable share +# get: get_buckets, first step to download an immutable file +# read: read data from an immutable share +# writev: slot_testv_and_readv_and_writev, modify/create a directory +# readv: read a directory (or mutable file) + +# the PERCENTILE value should come from the following list: +# 01_0: 1% +# 10_0: 10% +# 50_0: 50% (median) +# 90_0: 90% +# 99_0: 99% +# 99_9: 99.9% +# mean: + +# To use this, create a symlink from +# /etc/munin/plugins/tahoe_server_latency_OPERATION_PERCENTILE to this +# script. For example: + +# ln -s /usr/share/doc/allmydata-tahoe/munin/tahoe_server_latency_ \ +# /etc/munin/plugins/tahoe_server_latency_allocate_99_9 + +# Also, you will need to put a list of node statistics URLs in the plugin's +# environment, by adding a stanza like the following to a file in +# /etc/munin/plugin-conf.d/, such as /etc/munin/plugin-conf.d/tahoe_latencies: +# +# [tahoe_server_latency*] +# env.url_storage1 http://localhost:9011/statistics?t=json +# env.url_storage2 http://localhost:9012/statistics?t=json +# env.url_storage3 http://localhost:9013/statistics?t=json +# env.url_storage4 http://localhost:9014/statistics?t=json + +# of course, these URLs must match the webports you have configured into the +# storage nodes. + +import os, sys +import urllib +import simplejson + +node_urls = [] +for k,v in os.environ.items(): + if k.startswith("url_"): + nodename = k[len("url_"):] + node_urls.append( (nodename, v) ) +node_urls.sort() + +my_name = os.path.basename(sys.argv[0]) +PREFIX = "tahoe_server_latency_" +assert my_name.startswith(PREFIX) +my_name = my_name[len(PREFIX):] +(operation, percentile) = my_name.split("_", 1) +if percentile == "mean": + what = "mean" +else: + what = percentile.replace("_", ".") + "th percentile" + +configinfo = \ +"""graph_title Tahoe Server '%(operation)s' Latency (%(what)s) +graph_vlabel seconds +graph_category tahoe +graph_info This graph shows how long '%(operation)s' operations took on the storage server, the %(what)s delay between message receipt and response generation, calculated over the last thousand operations. +""" % {'operation': operation, + 'what': what} + +for nodename, url in node_urls: + configinfo += "%s.label %s\n" % (nodename, nodename) + configinfo += "%s.draw LINE2\n" % (nodename,) + + +if len(sys.argv) > 1: + if sys.argv[1] == "config": + print configinfo.rstrip() + sys.exit(0) + +for nodename, url in node_urls: + data = simplejson.loads(urllib.urlopen(url).read()) + if percentile == "mean": + p_key = "mean" + else: + p_key = percentile + "_percentile" + key = "storage_server.latencies.%s.%s" % (operation, p_key) + value = data["stats"][key] + print "%s.value %s" % (nodename, value) + diff --git a/misc/operations_helpers/munin/tahoe_server_operations_ b/misc/operations_helpers/munin/tahoe_server_operations_ new file mode 100644 index 00000000..3f624757 --- /dev/null +++ b/misc/operations_helpers/munin/tahoe_server_operations_ @@ -0,0 +1,75 @@ +#!/usr/bin/env python + +# graph operations-per-second from a set of storage servers. + +# the OPERATION value should come from the following list: +# allocate: allocate_buckets, first step to upload an immutable file +# write: write data to an immutable share +# close: finish writing to an immutable share +# cancel: abandon a partial immutable share +# get: get_buckets, first step to download an immutable file +# read: read data from an immutable share +# writev: slot_testv_and_readv_and_writev, modify/create a directory +# readv: read a directory (or mutable file) + +# To use this, create a symlink from +# /etc/munin/plugins/tahoe_server_operations_OPERATION to this script. For +# example: + +# ln -s /usr/share/doc/allmydata-tahoe/munin/tahoe_server_operations_ \ +# /etc/munin/plugins/tahoe_server_operations_allocate + +# Also, you will need to put a list of node statistics URLs in the plugin's +# environment, by adding a stanza like the following to a file in +# /etc/munin/plugin-conf.d/, such as /etc/munin/plugin-conf.d/tahoe_operations: +# +# [tahoe_server_operations*] +# env.url_storage1 http://localhost:9011/statistics?t=json +# env.url_storage2 http://localhost:9012/statistics?t=json +# env.url_storage3 http://localhost:9013/statistics?t=json +# env.url_storage4 http://localhost:9014/statistics?t=json + +# of course, these URLs must match the webports you have configured into the +# storage nodes. + +import os, sys +import urllib +import simplejson + +node_urls = [] +for k,v in os.environ.items(): + if k.startswith("url_"): + nodename = k[len("url_"):] + node_urls.append( (nodename, v) ) +node_urls.sort() + +my_name = os.path.basename(sys.argv[0]) +PREFIX = "tahoe_server_operations_" +assert my_name.startswith(PREFIX) +operation = my_name[len(PREFIX):] + +configinfo = \ +"""graph_title Tahoe Server '%(operation)s' Operations +graph_vlabel ops per second +graph_category tahoe +graph_info This graph shows how many '%(operation)s' operations take place on the storage server +""" % {'operation': operation} + +for nodename, url in node_urls: + configinfo += "%s.label %s\n" % (nodename, nodename) + configinfo += "%s.type DERIVE\n" % (nodename,) + configinfo += "%s.min 0\n" % (nodename,) + configinfo += "%s.draw LINE2\n" % (nodename,) + + +if len(sys.argv) > 1: + if sys.argv[1] == "config": + print configinfo.rstrip() + sys.exit(0) + +for nodename, url in node_urls: + data = simplejson.loads(urllib.urlopen(url).read()) + key = "storage_server.%s" % operation + value = data["counters"][key] + print "%s.value %s" % (nodename, value) + diff --git a/misc/operations_helpers/munin/tahoe_spacetime b/misc/operations_helpers/munin/tahoe_spacetime new file mode 100644 index 00000000..520a5fab --- /dev/null +++ b/misc/operations_helpers/munin/tahoe_spacetime @@ -0,0 +1,108 @@ +#!/usr/bin/env python + +# copy .rrd files from a remote munin master host, sum the 'df' stats from a +# list of hosts, use them to estimate a rate-of-change for the past month, +# then extrapolate to guess how many weeks/months/years of storage space we +# have left, and output it to another munin graph + +import sys, os, time +import rrdtool + +MUNIN_HOST = "munin.allmydata.com" +PREFIX = "%s:/var/lib/munin/prodtahoe/" % MUNIN_HOST +FILES = [ "prodtahoe%d.allmydata.com-df-_dev_sd%s3-g.rrd" % (a,b) + for a in (1,2,3,4,5) + for b in ("a", "b", "c", "d") + ] +REMOTEFILES = [ PREFIX + f for f in FILES ] +LOCALFILES = ["/var/lib/munin/prodtahoe/" + f for f in FILES ] +WEBFILE = "/var/www/tahoe/spacetime.json" + + +def rsync_rrd(): + # copy the RRD files from your munin master host to a local one + cmd = "rsync %s rrds/" % (" ".join(REMOTEFILES)) + rc = os.system(cmd) + assert rc == 0, rc + +def format_time(t): + return time.strftime("%b %d %H:%M", time.localtime(t)) + +def predict_future(past_s): + + start_df = [] + end_df = [] + durations = [] + + for fn in LOCALFILES: + d = rrdtool.fetch(fn, "AVERAGE", "-s", "-"+past_s, "-e", "-1hr") + # ((start, end, step), (name1, name2, ...), [(data1, data2, ..), ...]) + (start_time, end_time ,step) = d[0] + #print format_time(start_time), " - ", format_time(end_time), step + names = d[1] + #for points in d[2]: + # point = points[0] + # print point + start_space = d[2][0][0] + if start_space is None: + return None + # I don't know why, but the last few points are always bogus. Running + # 'rrdtool fetch' on the command line is usually ok.. I blame the python + # bindinds. + end_space = d[2][-4][0] + if end_space is None: + return None + end_time = end_time - (4*step) + start_df.append(start_space) + end_df.append(end_space) + durations.append(end_time - start_time) + + avg_start_df = sum(start_df) / len(start_df) + avg_end_df = sum(end_df) / len(end_df) + avg_duration = sum(durations) / len(durations) + #print avg_start_df, avg_end_df, avg_duration + + rate = (avg_end_df - avg_start_df) / avg_duration + #print "Rate", rate, " %/s" + #print "measured over", avg_duration / 86400, "days" + remaining = 100 - avg_end_df + remaining_seconds = remaining / rate + #print "remaining seconds", remaining_seconds + remaining_days = remaining_seconds / 86400 + #print "remaining days", remaining_days + return remaining_days + +def write_to_file(samples): + # write a JSON-formatted dictionary + f = open(WEBFILE + ".tmp", "w") + f.write("{ ") + f.write(", ".join(['"%s": %s' % (k, samples[k]) + for k in sorted(samples.keys())])) + f.write("}\n") + f.close() + os.rename(WEBFILE + ".tmp", WEBFILE) + +if len(sys.argv) > 1 and sys.argv[1] == "config": + print """\ +graph_title Tahoe Remaining Space Predictor +graph_vlabel days remaining +graph_category tahoe +graph_info This graph shows the estimated number of days left until storage space is exhausted +days_2wk.label days left (2wk sample) +days_2wk.draw LINE2 +days_4wk.label days left (4wk sample) +days_4wk.draw LINE2""" + sys.exit(0) + +#rsync_rrd() +samples = {} +remaining_4wk = predict_future("4wk") +if remaining_4wk is not None: + print "days_4wk.value", remaining_4wk + samples["remaining_4wk"] = remaining_4wk +remaining_2wk = predict_future("2wk") +if remaining_2wk is not None: + print "days_2wk.value", remaining_2wk + samples["remaining_2wk"] = remaining_2wk +write_to_file(samples) + diff --git a/misc/operations_helpers/munin/tahoe_stats b/misc/operations_helpers/munin/tahoe_stats new file mode 100644 index 00000000..bd03aaea --- /dev/null +++ b/misc/operations_helpers/munin/tahoe_stats @@ -0,0 +1,474 @@ +#!/usr/bin/env python + +import os +import pickle +import re +import sys +import time + +STAT_VALIDITY = 300 # 5min limit on reporting stats + +PLUGINS = { + # LOAD AVERAGE + 'tahoe_runtime_load_avg': + { 'statid': 'load_monitor.avg_load', + 'category': 'stats', + 'configheader': '\n'.join(['graph_title Tahoe Runtime Load Average', + 'graph_vlabel load', + 'graph_category tahoe', + 'graph_info This graph shows average reactor delay', + ]), + 'graph_config': '\n'.join(['%(name)s.label %(name)s', + '%(name)s.draw LINE1', + ]), + 'graph_render': '\n'.join(['%(name)s.value %(value)s', + ]), + }, + + 'tahoe_runtime_load_peak': + { 'statid': 'load_monitor.max_load', + 'category': 'stats', + 'configheader': '\n'.join(['graph_title Tahoe Runtime Load Peak', + 'graph_vlabel load', + 'graph_category tahoe', + 'graph_info This graph shows peak reactor delay', + ]), + 'graph_config': '\n'.join(['%(name)s.label %(name)s', + '%(name)s.draw LINE1', + ]), + 'graph_render': '\n'.join(['%(name)s.value %(value)s', + ]), + }, + + # STORAGE ALLOCATION (BYTES) + 'tahoe_storage_consumed': + { 'statid': 'storage_server.consumed', + 'category': 'stats', + 'configheader': '\n'.join(['graph_title Tahoe Storage Server Space Consumed', + 'graph_vlabel bytes', + 'graph_category tahoe_storage_server', + 'graph_info This graph shows space consumed', + 'graph_args --base 1024', + ]), + 'graph_config': '\n'.join(['%(name)s.label %(name)s', + '%(name)s.draw LINE1', + ]), + 'graph_render': '\n'.join(['%(name)s.value %(value)s', + ]), + }, + + 'tahoe_storage_allocated': + { 'statid': 'storage_server.allocated', + 'category': 'stats', + 'configheader': '\n'.join(['graph_title Tahoe Storage Server Space Allocated', + 'graph_vlabel bytes', + 'graph_category tahoe_storage_server', + 'graph_info This graph shows space allocated', + 'graph_args --base 1024', + ]), + 'graph_config': '\n'.join(['%(name)s.label %(name)s', + '%(name)s.draw LINE1', + ]), + 'graph_render': '\n'.join(['%(name)s.value %(value)s', + ]), + }, + + 'tahoe_storage_bytes_added': + { 'statid': 'storage_server.bytes_added', + 'category': 'counters', + 'configheader': '\n'.join(['graph_title Tahoe Storage Server Bytes Added', + 'graph_vlabel bytes', + 'graph_category tahoe_storage_server', + 'graph_info This graph shows cummulative bytes added', + ]), + 'graph_config': '\n'.join(['%(name)s.label %(name)s', + '%(name)s.draw LINE1', + ]), + 'graph_render': '\n'.join(['%(name)s.value %(value)s', + ]), + }, + + 'tahoe_storage_bytes_freed': + { 'statid': 'storage_server.bytes_freed', + 'category': 'counters', + 'configheader': '\n'.join(['graph_title Tahoe Storage Server Bytes Removed', + 'graph_vlabel bytes', + 'graph_category tahoe_storage_server', + 'graph_info This graph shows cummulative bytes removed', + ]), + 'graph_config': '\n'.join(['%(name)s.label %(name)s', + '%(name)s.draw LINE1', + ]), + 'graph_render': '\n'.join(['%(name)s.value %(value)s', + ]), + }, + + 'tahoe_storage_operations_allocate': + { 'statid': 'storage_server.allocate', + 'category': 'counters', + 'configheader': '\n'.join(['graph_title Tahoe Storage Server Allocate_Bucket Operations', + 'graph_vlabel operations per second', + 'graph_category tahoe_storage_server', + 'graph_info This graph shows how many allocate_buckets operations occured per second. Each immutable file upload causes one such operation per server.', + ]), + 'graph_config': '\n'.join(['%(name)s.label %(name)s', + '%(name)s.type DERIVE', + '%(name)s.min 0', + '%(name)s.draw LINE1', + ]), + 'graph_render': '\n'.join(['%(name)s.value %(value)s', + ]), + }, + + 'tahoe_storage_operations_get': + { 'statid': 'storage_server.get', + 'category': 'counters', + 'configheader': '\n'.join(['graph_title Tahoe Storage Server get_bucket Operations', + 'graph_vlabel operations per second', + 'graph_category tahoe_storage_server', + 'graph_info This graph shows how many get_bucket operations occured per second. Each immutable file download/check causes one such operation per server.', + ]), + 'graph_config': '\n'.join(['%(name)s.label %(name)s', + '%(name)s.type DERIVE', + '%(name)s.min 0', + '%(name)s.draw LINE1', + ]), + 'graph_render': '\n'.join(['%(name)s.value %(value)s', + ]), + }, + + 'tahoe_storage_operations_writev': + { 'statid': 'storage_server.writev', + 'category': 'counters', + 'configheader': '\n'.join(['graph_title Tahoe Storage Server writev Operations', + 'graph_vlabel operations per second', + 'graph_category tahoe_storage_server', + 'graph_info This graph shows how many writev operations occured per second. Each mutable file / dirnode write causes one such operation per server.', + ]), + 'graph_config': '\n'.join(['%(name)s.label %(name)s', + '%(name)s.type DERIVE', + '%(name)s.min 0', + '%(name)s.draw LINE1', + ]), + 'graph_render': '\n'.join(['%(name)s.value %(value)s', + ]), + }, + + 'tahoe_storage_operations_readv': + { 'statid': 'storage_server.readv', + 'category': 'counters', + 'configheader': '\n'.join(['graph_title Tahoe Storage Server readv Operations', + 'graph_vlabel operations per second', + 'graph_category tahoe_storage_server', + 'graph_info This graph shows how many readv operations occured per second. Each dirnode read causes one such operation per server.', + ]), + 'graph_config': '\n'.join(['%(name)s.label %(name)s', + '%(name)s.type DERIVE', + '%(name)s.min 0', + '%(name)s.draw LINE1', + ]), + 'graph_render': '\n'.join(['%(name)s.value %(value)s', + ]), + }, + + + # HELPER + 'tahoe_helper_incoming_files': + { 'statid': 'chk_upload_helper.incoming_count', + 'category': 'stats', + 'configheader': '\n'.join(['graph_title Tahoe Upload Helper Incoming File Count', + 'graph_vlabel n files', + 'graph_category tahoe_helper', + 'graph_info This graph shows number of incoming files', + ]), + 'graph_config': '\n'.join(['%(name)s.label %(name)s', + '%(name)s.draw LINE1', + ]), + 'graph_render': '\n'.join(['%(name)s.value %(value)s', + ]), + }, + 'tahoe_helper_incoming_filesize': + { 'statid': 'chk_upload_helper.incoming_size', + 'category': 'stats', + 'configheader': '\n'.join(['graph_title Tahoe Upload Helper Incoming File Size', + 'graph_vlabel bytes', + 'graph_category tahoe_helper', + 'graph_info This graph shows total size of incoming files', + ]), + 'graph_config': '\n'.join(['%(name)s.label %(name)s', + '%(name)s.draw LINE1', + ]), + 'graph_render': '\n'.join(['%(name)s.value %(value)s', + ]), + }, + 'tahoe_helper_incoming_files_old': + { 'statid': 'chk_upload_helper.incoming_size_old', + 'category': 'stats', + 'configheader': '\n'.join(['graph_title Tahoe Upload Helper Incoming Old Files', + 'graph_vlabel bytes', + 'graph_category tahoe_helper', + 'graph_info This graph shows total size of old incoming files', + ]), + 'graph_config': '\n'.join(['%(name)s.label %(name)s', + '%(name)s.draw LINE1', + ]), + 'graph_render': '\n'.join(['%(name)s.value %(value)s', + ]), + }, + + 'tahoe_helper_encoding_files': + { 'statid': 'chk_upload_helper.encoding_count', + 'category': 'stats', + 'configheader': '\n'.join(['graph_title Tahoe Upload Helper Encoding File Count', + 'graph_vlabel n files', + 'graph_category tahoe_helper', + 'graph_info This graph shows number of encoding files', + ]), + 'graph_config': '\n'.join(['%(name)s.label %(name)s', + '%(name)s.draw LINE1', + ]), + 'graph_render': '\n'.join(['%(name)s.value %(value)s', + ]), + }, + 'tahoe_helper_encoding_filesize': + { 'statid': 'chk_upload_helper.encoding_size', + 'category': 'stats', + 'configheader': '\n'.join(['graph_title Tahoe Upload Helper Encoding File Size', + 'graph_vlabel bytes', + 'graph_category tahoe_helper', + 'graph_info This graph shows total size of encoding files', + ]), + 'graph_config': '\n'.join(['%(name)s.label %(name)s', + '%(name)s.draw LINE1', + ]), + 'graph_render': '\n'.join(['%(name)s.value %(value)s', + ]), + }, + 'tahoe_helper_encoding_files_old': + { 'statid': 'chk_upload_helper.encoding_size_old', + 'category': 'stats', + 'configheader': '\n'.join(['graph_title Tahoe Upload Helper Encoding Old Files', + 'graph_vlabel bytes', + 'graph_category tahoe_helper', + 'graph_info This graph shows total size of old encoding files', + ]), + 'graph_config': '\n'.join(['%(name)s.label %(name)s', + '%(name)s.draw LINE1', + ]), + 'graph_render': '\n'.join(['%(name)s.value %(value)s', + ]), + }, + + 'tahoe_helper_active_uploads': + { 'statid': 'chk_upload_helper.active_uploads', + 'category': 'stats', + 'configheader': '\n'.join(['graph_title Tahoe Upload Helper Active Files', + 'graph_vlabel n files', + 'graph_category tahoe_helper', + 'graph_info This graph shows number of files actively being processed by the helper', + ]), + 'graph_config': '\n'.join(['%(name)s.label %(name)s', + '%(name)s.draw LINE1', + ]), + 'graph_render': '\n'.join(['%(name)s.value %(value)s', + ]), + }, + + 'tahoe_helper_upload_requests': + { 'statid': 'chk_upload_helper.upload_requests', + 'category': 'counters', + 'configheader': '\n'.join(['graph_title Tahoe Upload Helper Upload Requests', + 'graph_vlabel requests', + 'graph_category tahoe_helper', + 'graph_info This graph shows the number of upload requests arriving at the helper', + ]), + 'graph_config': '\n'.join(['%(name)s.label %(name)s', + '%(name)s.type DERIVE', + '%(name)s.min 0', + '%(name)s.draw LINE1', + ]), + 'graph_render': '\n'.join(['%(name)s.value %(value)s', + ]), + }, + 'tahoe_helper_upload_already_present': + { 'statid': 'chk_upload_helper.upload_already_present', + 'category': 'counters', + 'configheader': '\n'.join(['graph_title Tahoe Upload Helper Uploads Already Present', + 'graph_vlabel requests', + 'graph_category tahoe_helper', + 'graph_info This graph shows the number of uploads whose files are already present in the grid', + ]), + 'graph_config': '\n'.join(['%(name)s.label %(name)s', + '%(name)s.type DERIVE', + '%(name)s.min 0', + '%(name)s.draw LINE1', + ]), + 'graph_render': '\n'.join(['%(name)s.value %(value)s', + ]), + }, + 'tahoe_helper_upload_need_upload': + { 'statid': 'chk_upload_helper.upload_need_upload', + 'category': 'counters', + 'configheader': '\n'.join(['graph_title Tahoe Upload Helper Uploads Needing Upload', + 'graph_vlabel requests', + 'graph_category tahoe_helper', + 'graph_info This graph shows the number of uploads whose files are not already present in the grid', + ]), + 'graph_config': '\n'.join(['%(name)s.label %(name)s', + '%(name)s.type DERIVE', + '%(name)s.min 0', + '%(name)s.draw LINE1', + ]), + 'graph_render': '\n'.join(['%(name)s.value %(value)s', + ]), + }, + 'tahoe_helper_encoded_bytes': + { 'statid': 'chk_upload_helper.encoded_bytes', + 'category': 'counters', + 'configheader': '\n'.join(['graph_title Tahoe Upload Helper Encoded Bytes', + 'graph_vlabel bytes', + 'graph_category tahoe_helper', + 'graph_info This graph shows the number of bytes encoded by the helper', + ]), + 'graph_config': '\n'.join(['%(name)s.label %(name)s', + '%(name)s.type DERIVE', + '%(name)s.min 0', + '%(name)s.draw LINE1', + ]), + 'graph_render': '\n'.join(['%(name)s.value %(value)s', + ]), + }, + 'tahoe_helper_fetched_bytes': + { 'statid': 'chk_upload_helper.fetched_bytes', + 'category': 'counters', + 'configheader': '\n'.join(['graph_title Tahoe Upload Helper Fetched Bytes', + 'graph_vlabel bytes', + 'graph_category tahoe_helper', + 'graph_info This graph shows the number of bytes fetched by the helper', + ]), + 'graph_config': '\n'.join(['%(name)s.label %(name)s', + '%(name)s.type DERIVE', + '%(name)s.min 0', + '%(name)s.draw LINE1', + ]), + 'graph_render': '\n'.join(['%(name)s.value %(value)s', + ]), + }, + + # WEBAPI + 'tahoe_uploader_bytes_uploaded': + { 'statid': 'uploader.bytes_uploaded', + 'category': 'counters', + 'configheader': '\n'.join(['graph_title Tahoe Uploader Bytes Uploaded', + 'graph_vlabel bytes', + 'graph_category tahoe_traffic', + 'graph_info This graph shows the number of bytes uploaded', + ]), + 'graph_config': '\n'.join(['%(name)s.label %(name)s', + '%(name)s.type DERIVE', + '%(name)s.min 0', + '%(name)s.draw LINE1', + ]), + 'graph_render': '\n'.join(['%(name)s.value %(value)s', + ]), + }, + 'tahoe_uploader_files_uploaded': + { 'statid': 'uploader.files_uploaded', + 'category': 'counters', + 'configheader': '\n'.join(['graph_title Tahoe Uploader Bytes Uploaded', + 'graph_vlabel files', + 'graph_category tahoe_traffic', + 'graph_info This graph shows the number of files uploaded', + ]), + 'graph_config': '\n'.join(['%(name)s.label %(name)s', + '%(name)s.type DERIVE', + '%(name)s.min 0', + '%(name)s.draw LINE1', + ]), + 'graph_render': '\n'.join(['%(name)s.value %(value)s', + ]), + }, + 'tahoe_mutable_files_published': + { 'statid': 'mutable.files_published', + 'category': 'counters', + 'configheader': '\n'.join(['graph_title Tahoe Mutable Files Published', + 'graph_vlabel files', + 'graph_category tahoe_traffic', + 'graph_info This graph shows the number of mutable files published', + ]), + 'graph_config': '\n'.join(['%(name)s.label %(name)s', + '%(name)s.type DERIVE', + '%(name)s.min 0', + '%(name)s.draw LINE1', + ]), + 'graph_render': '\n'.join(['%(name)s.value %(value)s', + ]), + }, + 'tahoe_mutable_files_retrieved': + { 'statid': 'mutable.files_retrieved', + 'category': 'counters', + 'configheader': '\n'.join(['graph_title Tahoe Mutable Files Retrieved', + 'graph_vlabel files', + 'graph_category tahoe_traffic', + 'graph_info This graph shows the number of files retrieved', + ]), + 'graph_config': '\n'.join(['%(name)s.label %(name)s', + '%(name)s.type DERIVE', + '%(name)s.min 0', + '%(name)s.draw LINE1', + ]), + 'graph_render': '\n'.join(['%(name)s.value %(value)s', + ]), + }, + + } + +def smash_name(name): + return re.sub('[^a-zA-Z0-9]', '_', name) + +def open_stats(fname): + f = open(fname, 'rb') + stats = pickle.load(f) + f.close() + return stats + +def main(argv): + graph_name = os.path.basename(argv[0]) + if graph_name.endswith('.py'): + graph_name = graph_name[:-3] + + plugin_conf = PLUGINS.get(graph_name) + + for k,v in os.environ.items(): + if k.startswith('statsfile'): + stats_file = v + break + else: + raise RuntimeError("No 'statsfile' env var found") + + stats = open_stats(stats_file) + + now = time.time() + def output_nodes(output_section, check_time): + for tubid, nodestats in stats.items(): + if check_time and (now - nodestats.get('timestamp', 0)) > STAT_VALIDITY: + continue + name = smash_name("%s_%s" % (nodestats['nickname'], tubid[:4])) + #value = nodestats['stats'][plugin_conf['category']].get(plugin_conf['statid']) + category = plugin_conf['category'] + statid = plugin_conf['statid'] + value = nodestats['stats'][category].get(statid) + if value is not None: + args = { 'name': name, 'value': value } + print plugin_conf[output_section] % args + + if len(argv) > 1: + if sys.argv[1] == 'config': + print plugin_conf['configheader'] + output_nodes('graph_config', False) + sys.exit(0) + + output_nodes('graph_render', True) + +if __name__ == '__main__': + main(sys.argv) diff --git a/misc/operations_helpers/munin/tahoe_storagespace b/misc/operations_helpers/munin/tahoe_storagespace new file mode 100644 index 00000000..f6edc314 --- /dev/null +++ b/misc/operations_helpers/munin/tahoe_storagespace @@ -0,0 +1,58 @@ +#!/usr/bin/env python + +# This is a munin plugin to track the amount of disk space each node's +# StorageServer is consuming on behalf of other nodes. This is where the +# shares are kept. If there are N nodes present in the mesh, the total space +# consumed by the entire mesh will be about N times the space reported by +# this plugin. + +# Copy this plugin into /etc/munun/plugins/tahoe_storagespace and then put +# the following in your /etc/munin/plugin-conf.d/foo file to let it know +# where to find the basedirectory for each node: +# +# [tahoe_storagespace] +# env.basedir_NODE1 /path/to/node1 +# env.basedir_NODE2 /path/to/node2 +# env.basedir_NODE3 /path/to/node3 +# +# Allmydata-tahoe must be installed on the system where this plugin is used, +# since it imports a utility module from allmydata.utils . + +import os, sys +import commands + +nodedirs = [] +for k,v in os.environ.items(): + if k.startswith("basedir_"): + nodename = k[len("basedir_"):] + nodedirs.append( (nodename, v) ) +nodedirs.sort() + +seriesname = "storage" + +configinfo = \ +"""graph_title Allmydata Tahoe Shareholder Space +graph_vlabel bytes +graph_category tahoe +graph_info This graph shows the space consumed by this node's StorageServer +""" + +for nodename, basedir in nodedirs: + configinfo += "%s.label %s\n" % (nodename, nodename) + configinfo += "%s.draw LINE2\n" % (nodename,) + + +if len(sys.argv) > 1: + if sys.argv[1] == "config": + print configinfo.rstrip() + sys.exit(0) + +for nodename, basedir in nodedirs: + cmd = "du --bytes --summarize %s" % os.path.join(basedir, "storage") + rc,out = commands.getstatusoutput(cmd) + if rc != 0: + sys.exit(rc) + bytes, extra = out.split() + usage = int(bytes) + print "%s.value %d" % (nodename, usage) + diff --git a/misc/operations_helpers/spacetime/diskwatcher.py b/misc/operations_helpers/spacetime/diskwatcher.py new file mode 100644 index 00000000..05a68ac9 --- /dev/null +++ b/misc/operations_helpers/spacetime/diskwatcher.py @@ -0,0 +1,32 @@ + +from axiom.item import Item +from axiom.attributes import text, integer, timestamp + + +class Sample(Item): + # we didn't originally set typeName, so it was generated from the + # fully-qualified classname ("diskwatcher.Sample"), then Axiom + # automatically lowercases and un-dot-ifies it to get + # "diskwatcher_sample". Now we explicitly provide a name. + typeName = "diskwatcher_sample" + + # version 2 added the 'total' field + schemaVersion = 2 + + url = text(indexed=True) + when = timestamp(indexed=True) + total = integer() + used = integer() + avail = integer() + +def upgradeSample1to2(old): + total = 0 + return old.upgradeVersion("diskwatcher_sample", 1, 2, + url=old.url, + when=old.when, + total=0, + used=old.used, + avail=old.avail) + +from axiom.upgrade import registerUpgrader +registerUpgrader(upgradeSample1to2, "diskwatcher_sample", 1, 2) diff --git a/misc/operations_helpers/spacetime/diskwatcher.tac b/misc/operations_helpers/spacetime/diskwatcher.tac new file mode 100644 index 00000000..112b8781 --- /dev/null +++ b/misc/operations_helpers/spacetime/diskwatcher.tac @@ -0,0 +1,385 @@ +# -*- python -*- + +""" +Run this tool with twistd in its own directory, with a file named 'urls.txt' +describing which nodes to query. Make sure to copy diskwatcher.py into the +same directory. It will request disk-usage numbers from the nodes once per +hour (or slower), and store them in a local database. It will compute +usage-per-unit time values over several time ranges and make them available +through an HTTP query (using ./webport). It will also provide an estimate of +how much time is left before the grid's storage is exhausted. + +There are munin plugins (named tahoe_doomsday and tahoe_diskusage) to graph +the values this tool computes. + +Each line of urls.txt points to a single node. Each node should have its own +dedicated disk: if multiple nodes share a disk, only list one of them in +urls.txt (otherwise that space will be double-counted, confusing the +results). Each line should be in the form: + + http://host:webport/statistics?t=json + +""" + +# TODO: +# built-in graphs on web interface + + +import os.path, urllib, time +from datetime import timedelta +from twisted.application import internet, service, strports +from twisted.web import server, resource, http, client +from twisted.internet import defer +from twisted.python import log +import simplejson +from axiom.attributes import AND +from axiom.store import Store +from epsilon import extime +from diskwatcher import Sample + +#from axiom.item import Item +#from axiom.attributes import text, integer, timestamp + +#class Sample(Item): +# url = text() +# when = timestamp() +# used = integer() +# avail = integer() + +#s = Store("history.axiom") +#ns = Store("new-history.axiom") +#for sa in s.query(Sample): +# diskwatcher.Sample(store=ns, +# url=sa.url, when=sa.when, used=sa.used, avail=sa.avail) +#print "done" + +HOUR = 3600 +DAY = 24*3600 +WEEK = 7*DAY +MONTH = 30*DAY +YEAR = 365*DAY + +class DiskWatcher(service.MultiService, resource.Resource): + POLL_INTERVAL = 1*HOUR + AVERAGES = {#"60s": 60, + #"5m": 5*60, + #"30m": 30*60, + "1hr": 1*HOUR, + "1day": 1*DAY, + "2wk": 2*WEEK, + "4wk": 4*WEEK, + } + + def __init__(self): + assert os.path.exists("diskwatcher.tac") # run from the right directory + self.growth_cache = {} + service.MultiService.__init__(self) + resource.Resource.__init__(self) + self.store = Store("history.axiom") + self.store.whenFullyUpgraded().addCallback(self._upgrade_complete) + service.IService(self.store).setServiceParent(self) # let upgrader run + ts = internet.TimerService(self.POLL_INTERVAL, self.poll) + ts.setServiceParent(self) + + def _upgrade_complete(self, ignored): + print "Axiom store upgrade complete" + + def startService(self): + service.MultiService.startService(self) + + try: + desired_webport = open("webport", "r").read().strip() + except EnvironmentError: + desired_webport = None + webport = desired_webport or "tcp:0" + root = self + serv = strports.service(webport, server.Site(root)) + serv.setServiceParent(self) + if not desired_webport: + got_port = serv._port.getHost().port + open("webport", "w").write("tcp:%d\n" % got_port) + + + def get_urls(self): + for url in open("urls.txt","r").readlines(): + if "#" in url: + url = url[:url.find("#")] + url = url.strip() + if not url: + continue + yield url + + def poll(self): + log.msg("polling..") + #return self.poll_synchronous() + return self.poll_asynchronous() + + def poll_asynchronous(self): + # this didn't actually seem to work any better than poll_synchronous: + # logs are more noisy, and I got frequent DNS failures. But with a + # lot of servers to query, this is probably the better way to go. A + # significant advantage of this approach is that we can use a + # timeout= argument to tolerate hanging servers. + dl = [] + for url in self.get_urls(): + when = extime.Time() + d = client.getPage(url, timeout=60) + d.addCallback(self.got_response, when, url) + dl.append(d) + d = defer.DeferredList(dl) + def _done(res): + fetched = len([1 for (success, value) in res if success]) + log.msg("fetched %d of %d" % (fetched, len(dl))) + d.addCallback(_done) + return d + + def poll_synchronous(self): + attempts = 0 + fetched = 0 + for url in self.get_urls(): + attempts += 1 + try: + when = extime.Time() + # if a server accepts the connection and then hangs, this + # will block forever + data_json = urllib.urlopen(url).read() + self.got_response(data_json, when, url) + fetched += 1 + except: + log.msg("error while fetching: %s" % url) + log.err() + log.msg("fetched %d of %d" % (fetched, attempts)) + + def got_response(self, data_json, when, url): + data = simplejson.loads(data_json) + total = data[u"stats"][u"storage_server.disk_total"] + used = data[u"stats"][u"storage_server.disk_used"] + avail = data[u"stats"][u"storage_server.disk_avail"] + print "%s : total=%s, used=%s, avail=%s" % (url, + total, used, avail) + Sample(store=self.store, + url=unicode(url), when=when, total=total, used=used, avail=avail) + + def calculate_growth_timeleft(self): + timespans = [] + total_avail_space = self.find_total_available_space() + pairs = [ (timespan,name) + for name,timespan in self.AVERAGES.items() ] + pairs.sort() + for (timespan,name) in pairs: + growth = self.growth(timespan) + print name, total_avail_space, growth + if growth is not None: + timeleft = None + if growth > 0: + timeleft = total_avail_space / growth + timespans.append( (name, timespan, growth, timeleft) ) + return timespans + + def find_total_space(self): + # this returns the sum of disk-avail stats for all servers that 1) + # are listed in urls.txt and 2) have responded recently. + now = extime.Time() + recent = now - timedelta(seconds=2*self.POLL_INTERVAL) + total_space = 0 + for url in self.get_urls(): + url = unicode(url) + latest = list(self.store.query(Sample, + AND(Sample.url == url, + Sample.when > recent), + sort=Sample.when.descending, + limit=1)) + if latest: + total_space += latest[0].total + return total_space + + def find_total_available_space(self): + # this returns the sum of disk-avail stats for all servers that 1) + # are listed in urls.txt and 2) have responded recently. + now = extime.Time() + recent = now - timedelta(seconds=2*self.POLL_INTERVAL) + total_avail_space = 0 + for url in self.get_urls(): + url = unicode(url) + latest = list(self.store.query(Sample, + AND(Sample.url == url, + Sample.when > recent), + sort=Sample.when.descending, + limit=1)) + if latest: + total_avail_space += latest[0].avail + return total_avail_space + + def find_total_used_space(self): + # this returns the sum of disk-used stats for all servers that 1) are + # listed in urls.txt and 2) have responded recently. + now = extime.Time() + recent = now - timedelta(seconds=2*self.POLL_INTERVAL) + total_used_space = 0 + for url in self.get_urls(): + url = unicode(url) + latest = list(self.store.query(Sample, + AND(Sample.url == url, + Sample.when > recent), + sort=Sample.when.descending, + limit=1)) + if latest: + total_used_space += latest[0].used + return total_used_space + + + def growth(self, timespan): + """Calculate the bytes-per-second growth of the total disk-used stat, + over a period of TIMESPAN seconds (i.e. between the most recent + sample and the latest one that's at least TIMESPAN seconds ago), + summed over all nodes which 1) are listed in urls.txt, 2) have + responded recently, and 3) have a response at least as old as + TIMESPAN. If there are no nodes which meet these criteria, we'll + return None; this is likely to happen for the longer timespans (4wk) + until the gatherer has been running and collecting data for that + long.""" + + # a note about workload: for our oldest storage servers, as of + # 25-Jan-2009, the first DB query here takes about 40ms per server + # URL (some take as little as 10ms). There are about 110 servers, and + # two queries each, so the growth() function takes about 7s to run + # for each timespan. We track 4 timespans, and find_total_*_space() + # takes about 2.3s to run, so calculate_growth_timeleft() takes about + # 27s. Each HTTP query thus takes 27s, and we have six munin plugins + # which perform HTTP queries every 5 minutes. By adding growth_cache(), + # I hope to reduce this: the first HTTP query will still take 27s, + # but the subsequent five should be about 2.3s each. + + # we're allowed to cache this value for 3 minutes + if timespan in self.growth_cache: + (when, value) = self.growth_cache[timespan] + if time.time() - when < 3*60: + return value + + td = timedelta(seconds=timespan) + now = extime.Time() + then = now - td + recent = now - timedelta(seconds=2*self.POLL_INTERVAL) + + total_growth = 0.0 + num_nodes = 0 + + for url in self.get_urls(): + url = unicode(url) + latest = list(self.store.query(Sample, + AND(Sample.url == url, + Sample.when > recent), + sort=Sample.when.descending, + limit=1)) + if not latest: + #print "no latest sample from", url + continue # skip this node + latest = latest[0] + old = list(self.store.query(Sample, + AND(Sample.url == url, + Sample.when < then), + sort=Sample.when.descending, + limit=1)) + if not old: + #print "no old sample from", url + continue # skip this node + old = old[0] + duration = latest.when.asPOSIXTimestamp() - old.when.asPOSIXTimestamp() + if not duration: + print "only one sample from", url + continue + + rate = float(latest.used - old.used) / duration + #print url, rate + total_growth += rate + num_nodes += 1 + + if not num_nodes: + return None + self.growth_cache[timespan] = (time.time(), total_growth) + return total_growth + + def getChild(self, path, req): + if path == "": + return self + return resource.Resource.getChild(self, path, req) + + def abbreviate_time(self, s): + def _plural(count, unit): + count = int(count) + if count == 1: + return "%d %s" % (count, unit) + return "%d %ss" % (count, unit) + if s is None: + return "unknown" + if s < 120: + return _plural(s, "second") + if s < 3*HOUR: + return _plural(s/60, "minute") + if s < 2*DAY: + return _plural(s/HOUR, "hour") + if s < 2*MONTH: + return _plural(s/DAY, "day") + if s < 4*YEAR: + return _plural(s/MONTH, "month") + return _plural(s/YEAR, "year") + + def abbreviate_space2(self, s, SI=True): + if s is None: + return "unknown" + if SI: + U = 1000.0 + isuffix = "B" + else: + U = 1024.0 + isuffix = "iB" + def r(count, suffix): + return "%.2f %s%s" % (count, suffix, isuffix) + + if s < 1024: # 1000-1023 get emitted as bytes, even in SI mode + return r(s, "") + if s < U*U: + return r(s/U, "k") + if s < U*U*U: + return r(s/(U*U), "M") + if s < U*U*U*U: + return r(s/(U*U*U), "G") + if s < U*U*U*U*U: + return r(s/(U*U*U*U), "T") + return r(s/(U*U*U*U*U), "P") + + def abbreviate_space(self, s): + return "(%s, %s)" % (self.abbreviate_space2(s, True), + self.abbreviate_space2(s, False)) + + def render(self, req): + t = req.args.get("t", ["html"])[0] + ctype = "text/plain" + data = "" + if t == "html": + data = "" + for (name, timespan, growth, timeleft) in self.calculate_growth_timeleft(): + data += "%f bytes per second (%sps), %s remaining (over %s)\n" % \ + (growth, self.abbreviate_space2(growth, True), + self.abbreviate_time(timeleft), name) + used = self.find_total_used_space() + data += "total used: %d bytes %s\n" % (used, + self.abbreviate_space(used)) + total = self.find_total_space() + data += "total space: %d bytes %s\n" % (total, + self.abbreviate_space(total)) + elif t == "json": + current = {"rates": self.calculate_growth_timeleft(), + "total": self.find_total_space(), + "used": self.find_total_used_space(), + "available": self.find_total_available_space(), + } + data = simplejson.dumps(current, indent=True) + else: + req.setResponseCode(http.BAD_REQUEST) + data = "Unknown t= %s\n" % t + req.setHeader("content-type", ctype) + return data + +application = service.Application("disk-watcher") +DiskWatcher().setServiceParent(application) diff --git a/misc/pyver.py b/misc/pyver.py deleted file mode 100644 index d53db165..00000000 --- a/misc/pyver.py +++ /dev/null @@ -1,4 +0,0 @@ -#!/usr/bin/env python - -import sys -print "python%d.%d" % (sys.version_info[:2]) diff --git a/misc/ringsim.py b/misc/ringsim.py deleted file mode 100644 index 46480c44..00000000 --- a/misc/ringsim.py +++ /dev/null @@ -1,239 +0,0 @@ -#! /usr/bin/python - -# used to discuss ticket #302: "stop permuting peerlist?" - -import time -import math -from hashlib import sha1, md5, sha256 -myhash = md5 -# md5: 1520 "uploads" per second -# sha1: 1350 ups -# sha256: 930 ups -from itertools import count -from twisted.python import usage - -def abbreviate_space(s, SI=True): - if s is None: - return "unknown" - if SI: - U = 1000.0 - isuffix = "B" - else: - U = 1024.0 - isuffix = "iB" - def r(count, suffix): - return "%.2f %s%s" % (count, suffix, isuffix) - - if s < 1024: # 1000-1023 get emitted as bytes, even in SI mode - return "%d B" % s - if s < U*U: - return r(s/U, "k") - if s < U*U*U: - return r(s/(U*U), "M") - if s < U*U*U*U: - return r(s/(U*U*U), "G") - if s < U*U*U*U*U: - return r(s/(U*U*U*U), "T") - return r(s/(U*U*U*U*U), "P") - -def make_up_a_file_size(seed): - h = int(myhash(seed).hexdigest(),16) - max=2**31 - if 1: # exponential distribution - e = 8 + (h % (31-8)) - return 2 ** e - # uniform distribution - return h % max # avg 1GB - -sizes = [make_up_a_file_size(str(i)) for i in range(10000)] -avg_filesize = sum(sizes)/len(sizes) -print "average file size:", abbreviate_space(avg_filesize) - -SERVER_CAPACITY = 10**12 - -class Server: - def __init__(self, nodeid, capacity): - self.nodeid = nodeid - self.used = 0 - self.capacity = capacity - self.numshares = 0 - self.full_at_tick = None - - def upload(self, sharesize): - if self.used + sharesize < self.capacity: - self.used += sharesize - self.numshares += 1 - return True - return False - - def __repr__(self): - if self.full_at_tick is not None: - return "<%s %s full at %d>" % (self.__class__.__name__, self.nodeid, self.full_at_tick) - else: - return "<%s %s>" % (self.__class__.__name__, self.nodeid) - -class Ring: - SHOW_MINMAX = False - def __init__(self, numservers, seed, permute): - self.servers = [] - for i in range(numservers): - nodeid = myhash(str(seed)+str(i)).hexdigest() - capacity = SERVER_CAPACITY - s = Server(nodeid, capacity) - self.servers.append(s) - self.servers.sort(key=lambda s: s.nodeid) - self.permute = permute - #self.list_servers() - - def list_servers(self): - for i in range(len(self.servers)): - s = self.servers[i] - next_s = self.servers[(i+1)%len(self.servers)] - diff = "%032x" % (int(next_s.nodeid,16) - int(s.nodeid,16)) - s.next_diff = diff - prev_s = self.servers[(i-1)%len(self.servers)] - diff = "%032x" % (int(s.nodeid,16) - int(prev_s.nodeid,16)) - s.prev_diff = diff - print s, s.prev_diff - - print "sorted by delta" - for s in sorted(self.servers, key=lambda s:s.prev_diff): - print s, s.prev_diff - - def servers_for_si(self, si): - if self.permute: - def sortkey(s): - return myhash(s.nodeid+si).digest() - return sorted(self.servers, key=sortkey) - for i in range(len(self.servers)): - if self.servers[i].nodeid >= si: - return self.servers[i:] + self.servers[:i] - return list(self.servers) - - def show_servers(self, picked): - bits = [] - for s in self.servers: - if s in picked: - bits.append("1") - else: - bits.append("0") - #d = [s in picked and "1" or "0" for s in self.servers] - return "".join(bits) - - def dump_usage(self, numfiles, avg_space_per_file): - print "uploaded", numfiles - # avg_space_per_file measures expected grid-wide ciphertext per file - used = list(reversed(sorted([s.used for s in self.servers]))) - # used is actual per-server ciphertext - usedpf = [1.0*u/numfiles for u in used] - # usedpf is actual per-server-per-file ciphertext - #print "min/max usage: %s/%s" % (abbreviate_space(used[-1]), - # abbreviate_space(used[0])) - avg_usage_per_file = avg_space_per_file/len(self.servers) - # avg_usage_per_file is expected per-server-per-file ciphertext - spreadpf = usedpf[0] - usedpf[-1] - average_usagepf = sum(usedpf) / len(usedpf) - variance = sum([(u-average_usagepf)**2 for u in usedpf])/(len(usedpf)-1) - std_deviation = math.sqrt(variance) - sd_of_total = std_deviation / avg_usage_per_file - - print "min/max/(exp) usage-pf-ps %s/%s/(%s):" % ( - abbreviate_space(usedpf[-1]), - abbreviate_space(usedpf[0]), - abbreviate_space(avg_usage_per_file) ), - print "spread-pf: %s (%.2f%%)" % ( - abbreviate_space(spreadpf), 100.0*spreadpf/avg_usage_per_file), - #print "average_usage:", abbreviate_space(average_usagepf) - print "stddev: %s (%.2f%%)" % (abbreviate_space(std_deviation), - 100.0*sd_of_total) - if self.SHOW_MINMAX: - s2 = sorted(self.servers, key=lambda s: s.used) - print "least:", s2[0].nodeid - print "most:", s2[-1].nodeid - - -class Options(usage.Options): - optParameters = [ - ("k", "k", 3, "required shares", int), - ("N", "N", 10, "total shares", int), - ("servers", None, 100, "number of servers", int), - ("seed", None, None, "seed to use for creating ring"), - ("fileseed", None, "blah", "seed to use for creating files"), - ("permute", "p", 1, "1 to permute, 0 to use flat ring", int), - ] - def postOptions(self): - assert self["seed"] - - -def do_run(ring, opts): - avg_space_per_file = avg_filesize * opts["N"] / opts["k"] - fileseed = opts["fileseed"] - start = time.time() - all_servers_have_room = True - no_files_have_wrapped = True - for filenum in count(0): - #used = list(reversed(sorted([s.used for s in ring.servers]))) - #used = [s.used for s in ring.servers] - #print used - si = myhash(fileseed+str(filenum)).hexdigest() - filesize = make_up_a_file_size(si) - sharesize = filesize / opts["k"] - if filenum%4000==0 and filenum > 1: - ring.dump_usage(filenum, avg_space_per_file) - servers = ring.servers_for_si(si) - #print ring.show_servers(servers[:opts["N"]]) - remaining_shares = opts["N"] - index = 0 - server_was_full = False - file_was_wrapped = False - remaining_servers = set(servers) - while remaining_shares: - if index >= len(servers): - index = 0 - file_was_wrapped = True - s = servers[index] - accepted = s.upload(sharesize) - if not accepted: - server_was_full = True - remaining_servers.discard(s) - if not remaining_servers: - print "-- GRID IS FULL" - ring.dump_usage(filenum, avg_space_per_file) - return filenum - index += 1 - continue - remaining_shares -= 1 - index += 1 - # file is done being uploaded - - if server_was_full and all_servers_have_room: - all_servers_have_room = False - print "-- FIRST SERVER FULL" - ring.dump_usage(filenum, avg_space_per_file) - if file_was_wrapped and no_files_have_wrapped: - no_files_have_wrapped = False - print "-- FIRST FILE WRAPPED" - ring.dump_usage(filenum, avg_space_per_file) - - -def do_ring(opts): - total_capacity = opts["servers"]*SERVER_CAPACITY - avg_space_per_file = avg_filesize * opts["N"] / opts["k"] - avg_files = total_capacity / avg_space_per_file - print "expected number of uploads:", avg_files - if opts["permute"]: - print " PERMUTED" - else: - print " LINEAR" - seed = opts["seed"] - - ring = Ring(opts["servers"], seed, opts["permute"]) - num_files = do_run(ring, opts) - -def run(opts): - do_ring(opts) - -if __name__ == "__main__": - opts = Options() - opts.parseOptions() - run(opts) diff --git a/misc/run-with-pythonpath.py b/misc/run-with-pythonpath.py deleted file mode 100644 index 9aee7fa6..00000000 --- a/misc/run-with-pythonpath.py +++ /dev/null @@ -1,46 +0,0 @@ -# -*- python -*- -# you must invoke this with an explicit python, from the tree root - -"""Run an arbitrary command with a PYTHONPATH that will include the Tahoe -code, including dependent libraries. Run this like: - - python misc/run-with-pythonpath.py python foo.py -or - python misc/run-with-pythonpath.py trial -r poll allmydata.test.test_util - -""" - -import os, sys, subprocess - -# figure out where support/lib/pythonX.X/site-packages is -# add it to os.environ["PYTHONPATH"] -# spawn the child process - - -def pylibdir(prefixdir): - pyver = "python%d.%d" % (sys.version_info[:2]) - if sys.platform == "win32": - return os.path.join(prefixdir, "Lib", "site-packages") - else: - return os.path.join(prefixdir, "lib", pyver, "site-packages") - -basedir = os.path.dirname(os.path.abspath(__file__)) -supportlib = pylibdir(os.path.abspath("support")) - -oldpp = os.environ.get("PYTHONPATH", "").split(os.pathsep) -if oldpp == [""]: - # grr silly split() behavior - oldpp = [] -newpp = os.pathsep.join(oldpp + [supportlib,]) -os.environ['PYTHONPATH'] = newpp - -from twisted.python.procutils import which -cmd = sys.argv[1] -if cmd and cmd[0] not in "/~.": - cmds = which(cmd) - if not cmds: - print >>sys.stderr, "'%s' not found on PATH" % (cmd,) - sys.exit(-1) - cmd = cmds[0] - -os.execve(cmd, sys.argv[1:], os.environ) diff --git a/misc/run_trial.py b/misc/run_trial.py deleted file mode 100644 index 4d06a5d0..00000000 --- a/misc/run_trial.py +++ /dev/null @@ -1 +0,0 @@ -from twisted.scripts.trial import run; run() \ No newline at end of file diff --git a/misc/show-tool-versions.py b/misc/show-tool-versions.py deleted file mode 100644 index d1c98b26..00000000 --- a/misc/show-tool-versions.py +++ /dev/null @@ -1,95 +0,0 @@ -#! /usr/bin/env python - -import locale, os, subprocess, sys - -def print_platform(): - try: - import platform - out = platform.platform() - print - print "platform:", out.replace("\n", " ") - except EnvironmentError, le: - sys.stderr.write("Got exception using 'platform': %s\n" % (le,)) - pass - -def print_python_ver(): - print "python:", sys.version.replace("\n", " "), - print ', maxunicode: ' + str(sys.maxunicode), - print ', stdout.encoding: ' + str(sys.stdout.encoding), - print ', stdin.encoding: ' + str(sys.stdin.encoding), - print ', filesystem.encoding: ' + str(sys.getfilesystemencoding()), - print ', locale.getpreferredencoding: ' + str(locale.getpreferredencoding()), - print ', os.path.supports_unicode_filenames: ' + str(os.path.supports_unicode_filenames), - print ', locale.defaultlocale: ' + str(locale.getdefaultlocale()), - print ', locale.locale: ' + str(locale.getlocale()) - -def print_cmd_ver(cmdlist, label=None): - try: - res = subprocess.Popen(cmdlist, stdin=open(os.devnull), - stdout=subprocess.PIPE).communicate()[0] - if label is None: - label = cmdlist[0] - print - print label + ': ' + res.replace("\n", " ") - except EnvironmentError, le: - sys.stderr.write("Got exception invoking '%s': %s\n" % (cmdlist[0], le,)) - pass - -def print_as_ver(): - if os.path.exists('a.out'): - print - print "WARNING: a file named a.out exists, and getting the version of the 'as' assembler writes to that filename, so I'm not attempting to get the version of 'as'." - return - try: - res = subprocess.Popen(['as', '-version'], stdin=open(os.devnull), - stderr=subprocess.PIPE).communicate()[1] - print - print 'as: ' + res.replace("\n", " ") - os.remove('a.out') - except EnvironmentError, le: - sys.stderr.write("Got exception invoking '%s': %s\n" % ('as', le,)) - pass - -def print_setuptools_ver(): - try: - import pkg_resources - out = str(pkg_resources.require("setuptools")) - print - print "setuptools:", out.replace("\n", " ") - except (ImportError, EnvironmentError), le: - sys.stderr.write("Got exception using 'pkg_resources' to get the version of setuptools: %s\n" % (le,)) - pass - -def print_py_pkg_ver(pkgname): - try: - import pkg_resources - out = str(pkg_resources.require(pkgname)) - print - print pkgname + ': ' + out.replace("\n", " ") - except (ImportError, EnvironmentError), le: - sys.stderr.write("Got exception using 'pkg_resources' to get the version of %s: %s\n" % (pkgname, le,)) - pass - except pkg_resources.DistributionNotFound, le: - sys.stderr.write("pkg_resources reported no %s package installed: %s\n" % (pkgname, le,)) - pass - -print_platform() - -print_python_ver() - -print_cmd_ver(['buildbot', '--version']) -print_cmd_ver(['cl']) -print_cmd_ver(['gcc', '--version']) -print_cmd_ver(['g++', '--version']) -print_cmd_ver(['cryptest', 'V']) -print_cmd_ver(['darcs', '--version']) -print_cmd_ver(['darcs', '--exact-version'], label='darcs-exact-version') -print_cmd_ver(['7za']) - -print_as_ver() - -print_setuptools_ver() - -print_py_pkg_ver('coverage') -print_py_pkg_ver('trialcoverage') -print_py_pkg_ver('setuptools_trial') diff --git a/misc/sid/debian/changelog b/misc/sid/debian/changelog deleted file mode 100644 index 9f4e959e..00000000 --- a/misc/sid/debian/changelog +++ /dev/null @@ -1,5 +0,0 @@ -allmydata-tahoe (0.0.1) unstable; urgency=low - - * experimental packaging - - -- Brian Warner Mon, 4 Dec 2006 23:34:07 -0800 diff --git a/misc/sid/debian/compat b/misc/sid/debian/compat deleted file mode 100644 index b8626c4c..00000000 --- a/misc/sid/debian/compat +++ /dev/null @@ -1 +0,0 @@ -4 diff --git a/misc/sid/debian/control b/misc/sid/debian/control deleted file mode 100644 index e37fbde1..00000000 --- a/misc/sid/debian/control +++ /dev/null @@ -1,16 +0,0 @@ -Source: allmydata-tahoe -Section: python -Priority: optional -Maintainer: Brian Warner -Build-Depends: debhelper (>= 5.0.37.2), cdbs (>= 0.4.43), python-central (>= 0.5), python-setuptools, python, python-dev -Build-Depends-Indep: python-twisted-core -XS-Python-Version: 2.4,2.5,2.6 -Standards-Version: 3.7.2 - -Package: allmydata-tahoe -Architecture: all -Depends: ${python:Depends}, python-twisted-core, python-twisted-names, python-twisted-web, python-foolscap (>= 0.4.1), python-pyopenssl, python-nevow, python-simplejson (>= 1.4), python-zfec (>= 1.1), python-pycryptopp (>= 0.5.15), python-setuptools -Recommends: -XB-Python-Version: 2.4,2.5,2.6 -Description: A secure distributed filesystem - Allmydata Tahoe diff --git a/misc/sid/debian/copyright b/misc/sid/debian/copyright deleted file mode 100644 index 1dce8a20..00000000 --- a/misc/sid/debian/copyright +++ /dev/null @@ -1,19 +0,0 @@ -This package was debianized by Brian Warner - -The upstream source of this project is http://allmydata.org . - -Copyright (c) 2006-2009 -AllMyData, Inc. - -You may use this package under the GNU General Public License, version 2 or, at -your option, any later version. - -You may use this package under the Transitive Grace Period Public Licence, -version 1 or, at your option, any later version. The Transitive Grace Period -Public Licence has requirements similar to the GPL except that it allows you to -wait for up to twelve months after you redistribute a derived work before -releasing the source code of your derived work. See the file COPYING.TGPPL.html -for the terms of the Transitive Grace Period Public Licence, version 1. - -(You may choose to use this package under the terms of either licence, at your -option.) diff --git a/misc/sid/debian/pycompat b/misc/sid/debian/pycompat deleted file mode 100644 index 0cfbf088..00000000 --- a/misc/sid/debian/pycompat +++ /dev/null @@ -1 +0,0 @@ -2 diff --git a/misc/sid/debian/rules b/misc/sid/debian/rules deleted file mode 100644 index 58f7bf29..00000000 --- a/misc/sid/debian/rules +++ /dev/null @@ -1,48 +0,0 @@ -#! /usr/bin/make -f -# Uncomment this to turn on verbose mode. -#export DH_VERBOSE=1 - -DEB_PYTHON_SYSTEM=pycentral - -include /usr/share/cdbs/1/rules/debhelper.mk -include /usr/share/cdbs/1/class/python-distutils.mk - -# this ought to be the name of the package that we're building, which is -# different on each tahoe branch. debian/control is the master: whatever -# package is listed in there will be built. -DEBNAME := $(firstword $(DEB_PACKAGES)) - -STAGING_DIR := $(CURDIR)/debian/$(DEBNAME) - -DEB_INSTALL_DOCS_ALL := COPYING.GPL COPYING.TGPPL.html CREDITS \ - NEWS README.txt relnotes.txt docs misc/spacetime misc/cpu-watcher.tac -DEB_COMPRESS_EXCLUDE := .tac - - -# we overwrite the setuptools-generated /usr/bin/tahoe (located in -# support/bin/tahoe after a 'make build') with a different version, because -# the setuptools form (using "entry points") insists upon .egg-info -visible -# forms of dependent packages to be installed. For a debian package, we rely -# upon the dependencies that are declared in debian/control . -# -# To make sure the #! line matches the version of python that we're using for -# this build, we copy it from the setuptools-generated /usr/bin/tahoe, then -# add other code to the generated file. - -install/$(DEBNAME):: - mkdir -pm755 $(STAGING_DIR) - python setup.py install --root=$(STAGING_DIR) - - head -1 $(STAGING_DIR)/usr/bin/tahoe >$(STAGING_DIR)/usr/bin/tahoe.new - echo "from allmydata.scripts import runner" >>$(STAGING_DIR)/usr/bin/tahoe.new - echo "runner.run()" >>$(STAGING_DIR)/usr/bin/tahoe.new - chmod +x $(STAGING_DIR)/usr/bin/tahoe.new - mv $(STAGING_DIR)/usr/bin/tahoe.new $(STAGING_DIR)/usr/bin/tahoe - - dh_install misc/munin/* usr/share/$(DEBNAME)/munin - chmod +x $(STAGING_DIR)/usr/share/$(DEBNAME)/munin/* - - dh_pycentral - -clean:: - -rm -rf build diff --git a/misc/simulate_load.py b/misc/simulate_load.py deleted file mode 100644 index 50ff45b9..00000000 --- a/misc/simulate_load.py +++ /dev/null @@ -1,154 +0,0 @@ -#!/usr/bin/env python - -# WARNING. There is a bug in this script so that it does not simulate the actual Tahoe Two server selection algorithm that it was intended to simulate. See http://allmydata.org/trac/tahoe-lafs/ticket/302 (stop permuting peerlist, use SI as offset into ring instead?) - -import random - -SERVER_CAPACITY = 10**12 - -class Server: - def __init__(self): - self.si = random.randrange(0, 2**31) - self.used = 0 - self.max = SERVER_CAPACITY - self.full_at_tick = None - - def __repr__(self): - if self.full_at_tick is not None: - return "<%s %s full at %d>" % (self.__class__.__name__, self.si, self.full_at_tick) - else: - return "<%s %s>" % (self.__class__.__name__, self.si) - -SERVERS = 4 -K = 3 -N = 10 - -def make_up_a_file_size(): - return (2 ** random.randrange(8, 31)) - -def go(permutedpeerlist): - servers = [ Server() for x in range(SERVERS) ] - servers.sort(cmp=lambda x,y: cmp(x.si, y.si)) - - doubled_up_shares = 0 - tick = 0 - fullservers = 0 - while True: - nextsharesize = make_up_a_file_size() / K - if permutedpeerlist: - random.shuffle(servers) - else: - # rotate a random number - rot = random.randrange(0, len(servers)) - servers = servers[rot:] + servers[:rot] - - i = 0 - wrapped = False - sharestoput = N - while sharestoput: - server = servers[i] - if server.used + nextsharesize < server.max: - server.used += nextsharesize - sharestoput -= 1 - if wrapped: - doubled_up_shares += 1 - else: - if server.full_at_tick is None: - server.full_at_tick = tick - fullservers += 1 - if fullservers == len(servers): - # print "Couldn't place share -- all servers full. Stopping." - return (servers, doubled_up_shares) - - i += 1 - if i == len(servers): - wrapped = True - i = 0 - - tick += 1 - -def div_ceil(n, d): - """ - The smallest integer k such that k*d >= n. - """ - return (n/d) + (n%d != 0) - -DESIRED_COLUMNS = 70 - -START_FILES = 137000 -STOP_FILES = 144000 - -def test(permutedpeerlist, iters): - # The i'th element of the filledat list is how many servers got full when the i'th file was uploaded. - filledat = [] - for test in range(iters): - (servers, doubled_up_shares) = go(permutedpeerlist) - print "doubled_up_shares: ", doubled_up_shares - for server in servers: - fidx = server.full_at_tick - filledat.extend([0]*(fidx-len(filledat)+1)) - filledat[fidx] += 1 - - startfiles = 0 - while filledat[startfiles] == 0: - startfiles += 1 - filespercolumn = div_ceil(len(filledat) - startfiles, (DESIRED_COLUMNS - 3)) - - # to make comparisons between runs line up: - # startfiles = START_FILES - # filespercolumn = div_ceil(STOP_FILES - startfiles, (DESIRED_COLUMNS - 3)) - - # The i'th element of the compressedfilledat list is how many servers got full when the filespercolumn files starting at startfiles + i were uploaded. - compressedfilledat = [] - idx = startfiles - while idx < len(filledat): - compressedfilledat.append(0) - for i in range(filespercolumn): - compressedfilledat[-1] += filledat[idx] - idx += 1 - if idx >= len(filledat): - break - - # The i'th element of the fullat list is how many servers were full by the tick numbered startfiles + i * filespercolumn (on average). - fullat = [0] * len(compressedfilledat) - for idx, num in enumerate(compressedfilledat): - for fidx in range(idx, len(fullat)): - fullat[fidx] += num - - for idx in range(len(fullat)): - fullat[idx] = fullat[idx] / float(iters) - - # Now print it out as an ascii art graph. - import sys - for serversfull in range(40, 0, -1): - sys.stdout.write("%2d " % serversfull) - for numfull in fullat: - if int(numfull) == serversfull: - sys.stdout.write("*") - else: - sys.stdout.write(" ") - sys.stdout.write("\n") - - sys.stdout.write(" ^-- servers full\n") - idx = 0 - while idx < len(fullat): - nextmark = "%d--^ " % (startfiles + idx * filespercolumn) - sys.stdout.write(nextmark) - idx += len(nextmark) - - sys.stdout.write("\nfiles uploaded --> \n") - - - -if __name__ == "__main__": - import sys - iters = 16 - for arg in sys.argv: - if arg.startswith("--iters="): - iters = int(arg[8:]) - if "--permute" in sys.argv: - print "doing permuted peerlist, iterations: %d" % iters - test(True, iters) - else: - print "doing simple ring, iterations: %d" % iters - test(False, iters) diff --git a/misc/simulator.py b/misc/simulator.py deleted file mode 100644 index adf674de..00000000 --- a/misc/simulator.py +++ /dev/null @@ -1,292 +0,0 @@ -#! /usr/bin/env python - -import sha as shamodule -import os, random - -from pkg_resources import require -require('PyRRD') -from pyrrd import graph -from pyrrd.rrd import DataSource, RRD, RRA - - -def sha(s): - return shamodule.new(s).digest() - -def randomid(): - return os.urandom(20) - -class Node: - def __init__(self, nid, introducer, simulator): - self.nid = nid - self.introducer = introducer - self.simulator = simulator - self.shares = {} - self.capacity = random.randrange(1000) - self.utilization = 0 - self.files = [] - - def permute_peers(self, fileid): - permuted = [(sha(fileid+n.nid),n) - for n in self.introducer.get_all_nodes()] - permuted.sort() - return permuted - - def publish_file(self, fileid, size, numshares=100): - sharesize = 4 * size / numshares - permuted = self.permute_peers(fileid) - last_givento = None - tried = 0 - givento = [] - while numshares and permuted: - pid,node = permuted.pop(0) - tried += 1 - last_givento = pid - if node.accept_share(fileid, sharesize): - givento.append((pid,node)) - numshares -= 1 - if numshares: - # couldn't push, should delete - for pid,node in givento: - node.delete_share(fileid) - return False - self.files.append((fileid, numshares)) - self.introducer.please_preserve(fileid, size, tried, last_givento) - return (True, tried) - - def accept_share(self, fileid, sharesize): - accept = False - if self.utilization < self.capacity: - # we have room! yay! - self.shares[fileid] = sharesize - self.utilization += sharesize - return True - if self.decide(sharesize): - # we don't, but we'll make room - self.make_space(sharesize) - self.shares[fileid] = sharesize - self.utilization += sharesize - return True - else: - # we're full, try elsewhere - return False - - def decide(self, sharesize): - if sharesize > self.capacity: - return False - return False - return random.random() > 0.5 - - def make_space(self, sharesize): - assert sharesize <= self.capacity - while self.capacity - self.utilization < sharesize: - victim = random.choice(self.shares.keys()) - self.simulator.lost_data(self.shares[victim]) - self.delete_share(victim) - - def delete_share(self, fileid): - if fileid in self.shares: - self.utilization -= self.shares[fileid] - del self.shares[fileid] - return True - return False - - def retrieve_file(self): - if not self.files: - return - fileid,numshares = random.choice(self.files) - needed = numshares / 4 - peers = [] - for pid,node in self.permute_peers(fileid): - if random.random() > self.simulator.P_NODEAVAIL: - continue # node isn't available right now - if node.has_share(fileid): - peers.append(node) - if len(peers) >= needed: - return True - return False - - def delete_file(self): - if not self.files: - return False - which = random.choice(self.files) - self.files.remove(which) - fileid,numshares = which - self.introducer.delete(fileid) - return True - -class Introducer: - def __init__(self, simulator): - self.living_files = {} - self.utilization = 0 # total size of all active files - self.simulator = simulator - self.simulator.stamp_utilization(self.utilization) - - def get_all_nodes(self): - return self.all_nodes - - def please_preserve(self, fileid, size, tried, last_givento): - self.living_files[fileid] = (size, tried, last_givento) - self.utilization += size - self.simulator.stamp_utilization(self.utilization) - - def please_delete(self, fileid): - self.delete(fileid) - - def permute_peers(self, fileid): - permuted = [(sha(fileid+n.nid),n) - for n in self.get_all_nodes()] - permuted.sort() - return permuted - - def delete(self, fileid): - permuted = self.permute_peers(fileid) - size, tried, last_givento = self.living_files[fileid] - pid = "" - while tried and pid < last_givento: - pid,node = permuted.pop(0) - had_it = node.delete_share(fileid) - if had_it: - tried -= 1 - self.utilization -= size - self.simulator.stamp_utilization(self.utilization) - del self.living_files[fileid] - -class Simulator: - NUM_NODES = 1000 - EVENTS = ["ADDFILE", "DELFILE", "ADDNODE", "DELNODE"] - RATE_ADDFILE = 1.0 / 10 - RATE_DELFILE = 1.0 / 20 - RATE_ADDNODE = 1.0 / 3000 - RATE_DELNODE = 1.0 / 4000 - P_NODEAVAIL = 1.0 - - def __init__(self): - self.time = 1164783600 # small numbers of seconds since the epoch confuse rrdtool - self.prevstamptime = int(self.time) - - ds = DataSource(ds_name='utilizationds', ds_type='GAUGE', heartbeat=1) - rra = RRA(cf='AVERAGE', xff=0.1, steps=1, rows=1200) - self.rrd = RRD("/tmp/utilization.rrd", ds=[ds], rra=[rra], start=self.time) - self.rrd.create() - - self.introducer = q = Introducer(self) - self.all_nodes = [Node(randomid(), q, self) - for i in range(self.NUM_NODES)] - q.all_nodes = self.all_nodes - self.next = [] - self.schedule_events() - self.verbose = False - - self.added_files = 0 - self.added_data = 0 - self.deleted_files = 0 - self.published_files = [] - self.failed_files = 0 - self.lost_data_bytes = 0 # bytes deleted to make room for new shares - - def stamp_utilization(self, utilization): - if int(self.time) > (self.prevstamptime+1): - self.rrd.bufferValue(self.time, utilization) - self.prevstamptime = int(self.time) - - def write_graph(self): - self.rrd.update() - self.rrd = None - import gc - gc.collect() - - def1 = graph.DataDefinition(vname="a", rrdfile='/tmp/utilization.rrd', ds_name='utilizationds') - area1 = graph.Area(value="a", color="#990033", legend='utilizationlegend') - g = graph.Graph('/tmp/utilization.png', imgformat='PNG', width=540, height=100, vertical_label='utilizationverticallabel', title='utilizationtitle', lower_limit=0) - g.data.append(def1) - g.data.append(area1) - g.write() - - def add_file(self): - size = random.randrange(1000) - n = random.choice(self.all_nodes) - if self.verbose: - print "add_file(size=%d, from node %s)" % (size, n) - fileid = randomid() - able = n.publish_file(fileid, size) - if able: - able, tried = able - self.added_files += 1 - self.added_data += size - self.published_files.append(tried) - else: - self.failed_files += 1 - - def lost_data(self, size): - self.lost_data_bytes += size - - def delete_file(self): - all_nodes = self.all_nodes[:] - random.shuffle(all_nodes) - for n in all_nodes: - if n.delete_file(): - self.deleted_files += 1 - return - print "no files to delete" - - def _add_event(self, etype): - rate = getattr(self, "RATE_" + etype) - next = self.time + random.expovariate(rate) - self.next.append((next, etype)) - self.next.sort() - - def schedule_events(self): - types = set([e[1] for e in self.next]) - for etype in self.EVENTS: - if not etype in types: - self._add_event(etype) - - def do_event(self): - time, etype = self.next.pop(0) - assert time > self.time - current_time = self.time - self.time = time - self._add_event(etype) - if etype == "ADDFILE": - self.add_file() - elif etype == "DELFILE": - self.delete_file() - elif etype == "ADDNODE": - pass - #self.add_node() - elif etype == "DELNODE": - #self.del_node() - pass - # self.print_stats(current_time, etype) - - def print_stats_header(self): - print "time: added failed lost avg_tried" - - def print_stats(self, time, etype): - if not self.published_files: - avg_tried = "NONE" - else: - avg_tried = sum(self.published_files) / len(self.published_files) - print time, etype, self.added_data, self.failed_files, self.lost_data_bytes, avg_tried, len(self.introducer.living_files), self.introducer.utilization - -global s -s = None - -def main(): -# rrdtool.create("foo.rrd", -# "--step 10", -# "DS:files-added:DERIVE::0:1000", -# "RRA:AVERAGE:1:1:1200", -# ) - global s - s = Simulator() - # s.print_stats_header() - for i in range(1000): - s.do_event() - print "%d files added, %d files deleted" % (s.added_files, s.deleted_files) - return s - -if __name__ == '__main__': - main() - - diff --git a/misc/simulators/count_dirs.py b/misc/simulators/count_dirs.py new file mode 100644 index 00000000..78412d33 --- /dev/null +++ b/misc/simulators/count_dirs.py @@ -0,0 +1,118 @@ +#!/usr/bin/env python + +""" +This tool estimates how much space would be consumed by a filetree into which +a native directory was copied. + +One open question is how we should encode directories. One approach is to put +a block of data on a server, one per directory, which effectively contains a +dictionary that maps child names to targets (URIs for children which are +files, slotnames for children which are directories). To prevent the server +which hosts this data from either learning its contents or corrupting them, +we can add encryption and integrity checks to the data, at the cost of +storage overhead. + +This program is intended to estimate the size of these data blocks using +real-world filenames and directories. You point it at a real directory, and +it does a recursive walk of the filesystem, adding up the size of the +filetree data structures that would be required to represent it. + +MODES: + + A: no confidentiality or integrity checking. Directories are serialized + plaintext dictionaries which map file/subdir names to targets (either + URIs or slotnames). Each entry can be changed independently. + B1: child names and targets are encrypted. No integrity checks, so the + server can still corrupt the contents undetectably. Each entry can + still be changed independently. + B2: same security properties as B1, but the dictionary is serialized before + encryption. This reduces overhead at the cost of preventing independent + updates of entries (all entries must be updated at the same time, so + test-and-set operations are required to avoid data-losing races) + C1: like B1, but adding HMACs to each entry to guarantee data integrity + C2: like B2, but adding a single block-wide HMAC for data integrity + +""" + +import sys, os.path + +#URI:7jzbza6iwdsk5xbxsvdgjaugyrhetw64zpflp4gihmyh5krjblra====:a5qdejwbimu5b2wfke7xwexxlq======:gzeub5v42rjbgd7ccawnahu2evqd42lpdpzd447c6zkmdvjkpowq====:25:100:219889 +# that's a printable representation of two 32-byte hashes (storage index, URI +# extension block hash) and a 16-byte AES read-capability key, and some +# share-count and size information +URI_SIZE = 164 + +#pb://xextf3eap44o3wi27mf7ehiur6wvhzr6@207.7.153.180:56677,127.0.0.1:56677/zilcw5uz2yyyo=== +# that's a FURL which points at the slot. Modes that need to add a +# read-capability AES key will need more space. +SLOTNAME_SIZE = 90 + + +def slotsize(mode, numfiles, numdirs): + # URI_sizes is the total space taken up by the target (dict keys) strings + # for all of the targets that are files, instead of directories + target_sizes_for_files = numfiles * URI_SIZE + slotname_size = SLOTNAME_SIZE + if mode in ("B1", "B2", "C1", "C2"): + slotname_size += 16 + # slotname_sizes is the total space taken up by the target strings for + # all the targets that are directories, instead of files. These are + # bigger when the read+write-cap slotname is larger than the store-cap, + # which happens as soon as we seek to prevent the slot's host from + # reading or corrupting it. + target_sizes_for_subdirs = numdirs * slotname_size + + # now how much overhead is there for each entry? + per_slot, per_entry = 0, 0 + if mode == "B1": + per_entry = 16+12+12 + elif mode == "C1": + per_entry = 16+12+12 + 32+32 + elif mode == "B2": + per_slot = 12 + elif mode == "C2": + per_slot = 12+32 + num_entries = numfiles + numdirs + total = (target_sizes_for_files + + target_sizes_for_subdirs + + per_slot + + per_entry * num_entries + ) + return total + +MODES = ("A", "B1", "B2", "C1", "C2") + +def scan(root): + total = dict([(mode,0) for mode in MODES]) + num_files = 0 + num_dirs = 0 + for absroot, dirs, files in os.walk(root): + #print absroot + #print " %d files" % len(files) + #print " %d subdirs" % len(dirs) + num_files += len(files) + num_dirs += len(dirs) + stringsize = len(''.join(files) + ''.join(dirs)) + for mode in MODES: + total[mode] += slotsize(mode, len(files), len(dirs)) + stringsize + + print "%d directories" % num_dirs + print "%d files" % num_files + for mode in sorted(total.keys()): + print "%s: %d bytes" % (mode, total[mode]) + + +if __name__ == '__main__': + scan(sys.argv[1]) + +""" +260:warner@monolith% ./count_dirs.py ~ +70925 directories +457199 files +A: 90042361 bytes +B1: 112302121 bytes +B2: 92027061 bytes +C1: 146102057 bytes +C2: 94293461 bytes + +""" diff --git a/misc/simulators/ringsim.py b/misc/simulators/ringsim.py new file mode 100644 index 00000000..46480c44 --- /dev/null +++ b/misc/simulators/ringsim.py @@ -0,0 +1,239 @@ +#! /usr/bin/python + +# used to discuss ticket #302: "stop permuting peerlist?" + +import time +import math +from hashlib import sha1, md5, sha256 +myhash = md5 +# md5: 1520 "uploads" per second +# sha1: 1350 ups +# sha256: 930 ups +from itertools import count +from twisted.python import usage + +def abbreviate_space(s, SI=True): + if s is None: + return "unknown" + if SI: + U = 1000.0 + isuffix = "B" + else: + U = 1024.0 + isuffix = "iB" + def r(count, suffix): + return "%.2f %s%s" % (count, suffix, isuffix) + + if s < 1024: # 1000-1023 get emitted as bytes, even in SI mode + return "%d B" % s + if s < U*U: + return r(s/U, "k") + if s < U*U*U: + return r(s/(U*U), "M") + if s < U*U*U*U: + return r(s/(U*U*U), "G") + if s < U*U*U*U*U: + return r(s/(U*U*U*U), "T") + return r(s/(U*U*U*U*U), "P") + +def make_up_a_file_size(seed): + h = int(myhash(seed).hexdigest(),16) + max=2**31 + if 1: # exponential distribution + e = 8 + (h % (31-8)) + return 2 ** e + # uniform distribution + return h % max # avg 1GB + +sizes = [make_up_a_file_size(str(i)) for i in range(10000)] +avg_filesize = sum(sizes)/len(sizes) +print "average file size:", abbreviate_space(avg_filesize) + +SERVER_CAPACITY = 10**12 + +class Server: + def __init__(self, nodeid, capacity): + self.nodeid = nodeid + self.used = 0 + self.capacity = capacity + self.numshares = 0 + self.full_at_tick = None + + def upload(self, sharesize): + if self.used + sharesize < self.capacity: + self.used += sharesize + self.numshares += 1 + return True + return False + + def __repr__(self): + if self.full_at_tick is not None: + return "<%s %s full at %d>" % (self.__class__.__name__, self.nodeid, self.full_at_tick) + else: + return "<%s %s>" % (self.__class__.__name__, self.nodeid) + +class Ring: + SHOW_MINMAX = False + def __init__(self, numservers, seed, permute): + self.servers = [] + for i in range(numservers): + nodeid = myhash(str(seed)+str(i)).hexdigest() + capacity = SERVER_CAPACITY + s = Server(nodeid, capacity) + self.servers.append(s) + self.servers.sort(key=lambda s: s.nodeid) + self.permute = permute + #self.list_servers() + + def list_servers(self): + for i in range(len(self.servers)): + s = self.servers[i] + next_s = self.servers[(i+1)%len(self.servers)] + diff = "%032x" % (int(next_s.nodeid,16) - int(s.nodeid,16)) + s.next_diff = diff + prev_s = self.servers[(i-1)%len(self.servers)] + diff = "%032x" % (int(s.nodeid,16) - int(prev_s.nodeid,16)) + s.prev_diff = diff + print s, s.prev_diff + + print "sorted by delta" + for s in sorted(self.servers, key=lambda s:s.prev_diff): + print s, s.prev_diff + + def servers_for_si(self, si): + if self.permute: + def sortkey(s): + return myhash(s.nodeid+si).digest() + return sorted(self.servers, key=sortkey) + for i in range(len(self.servers)): + if self.servers[i].nodeid >= si: + return self.servers[i:] + self.servers[:i] + return list(self.servers) + + def show_servers(self, picked): + bits = [] + for s in self.servers: + if s in picked: + bits.append("1") + else: + bits.append("0") + #d = [s in picked and "1" or "0" for s in self.servers] + return "".join(bits) + + def dump_usage(self, numfiles, avg_space_per_file): + print "uploaded", numfiles + # avg_space_per_file measures expected grid-wide ciphertext per file + used = list(reversed(sorted([s.used for s in self.servers]))) + # used is actual per-server ciphertext + usedpf = [1.0*u/numfiles for u in used] + # usedpf is actual per-server-per-file ciphertext + #print "min/max usage: %s/%s" % (abbreviate_space(used[-1]), + # abbreviate_space(used[0])) + avg_usage_per_file = avg_space_per_file/len(self.servers) + # avg_usage_per_file is expected per-server-per-file ciphertext + spreadpf = usedpf[0] - usedpf[-1] + average_usagepf = sum(usedpf) / len(usedpf) + variance = sum([(u-average_usagepf)**2 for u in usedpf])/(len(usedpf)-1) + std_deviation = math.sqrt(variance) + sd_of_total = std_deviation / avg_usage_per_file + + print "min/max/(exp) usage-pf-ps %s/%s/(%s):" % ( + abbreviate_space(usedpf[-1]), + abbreviate_space(usedpf[0]), + abbreviate_space(avg_usage_per_file) ), + print "spread-pf: %s (%.2f%%)" % ( + abbreviate_space(spreadpf), 100.0*spreadpf/avg_usage_per_file), + #print "average_usage:", abbreviate_space(average_usagepf) + print "stddev: %s (%.2f%%)" % (abbreviate_space(std_deviation), + 100.0*sd_of_total) + if self.SHOW_MINMAX: + s2 = sorted(self.servers, key=lambda s: s.used) + print "least:", s2[0].nodeid + print "most:", s2[-1].nodeid + + +class Options(usage.Options): + optParameters = [ + ("k", "k", 3, "required shares", int), + ("N", "N", 10, "total shares", int), + ("servers", None, 100, "number of servers", int), + ("seed", None, None, "seed to use for creating ring"), + ("fileseed", None, "blah", "seed to use for creating files"), + ("permute", "p", 1, "1 to permute, 0 to use flat ring", int), + ] + def postOptions(self): + assert self["seed"] + + +def do_run(ring, opts): + avg_space_per_file = avg_filesize * opts["N"] / opts["k"] + fileseed = opts["fileseed"] + start = time.time() + all_servers_have_room = True + no_files_have_wrapped = True + for filenum in count(0): + #used = list(reversed(sorted([s.used for s in ring.servers]))) + #used = [s.used for s in ring.servers] + #print used + si = myhash(fileseed+str(filenum)).hexdigest() + filesize = make_up_a_file_size(si) + sharesize = filesize / opts["k"] + if filenum%4000==0 and filenum > 1: + ring.dump_usage(filenum, avg_space_per_file) + servers = ring.servers_for_si(si) + #print ring.show_servers(servers[:opts["N"]]) + remaining_shares = opts["N"] + index = 0 + server_was_full = False + file_was_wrapped = False + remaining_servers = set(servers) + while remaining_shares: + if index >= len(servers): + index = 0 + file_was_wrapped = True + s = servers[index] + accepted = s.upload(sharesize) + if not accepted: + server_was_full = True + remaining_servers.discard(s) + if not remaining_servers: + print "-- GRID IS FULL" + ring.dump_usage(filenum, avg_space_per_file) + return filenum + index += 1 + continue + remaining_shares -= 1 + index += 1 + # file is done being uploaded + + if server_was_full and all_servers_have_room: + all_servers_have_room = False + print "-- FIRST SERVER FULL" + ring.dump_usage(filenum, avg_space_per_file) + if file_was_wrapped and no_files_have_wrapped: + no_files_have_wrapped = False + print "-- FIRST FILE WRAPPED" + ring.dump_usage(filenum, avg_space_per_file) + + +def do_ring(opts): + total_capacity = opts["servers"]*SERVER_CAPACITY + avg_space_per_file = avg_filesize * opts["N"] / opts["k"] + avg_files = total_capacity / avg_space_per_file + print "expected number of uploads:", avg_files + if opts["permute"]: + print " PERMUTED" + else: + print " LINEAR" + seed = opts["seed"] + + ring = Ring(opts["servers"], seed, opts["permute"]) + num_files = do_run(ring, opts) + +def run(opts): + do_ring(opts) + +if __name__ == "__main__": + opts = Options() + opts.parseOptions() + run(opts) diff --git a/misc/simulators/simulate_load.py b/misc/simulators/simulate_load.py new file mode 100644 index 00000000..50ff45b9 --- /dev/null +++ b/misc/simulators/simulate_load.py @@ -0,0 +1,154 @@ +#!/usr/bin/env python + +# WARNING. There is a bug in this script so that it does not simulate the actual Tahoe Two server selection algorithm that it was intended to simulate. See http://allmydata.org/trac/tahoe-lafs/ticket/302 (stop permuting peerlist, use SI as offset into ring instead?) + +import random + +SERVER_CAPACITY = 10**12 + +class Server: + def __init__(self): + self.si = random.randrange(0, 2**31) + self.used = 0 + self.max = SERVER_CAPACITY + self.full_at_tick = None + + def __repr__(self): + if self.full_at_tick is not None: + return "<%s %s full at %d>" % (self.__class__.__name__, self.si, self.full_at_tick) + else: + return "<%s %s>" % (self.__class__.__name__, self.si) + +SERVERS = 4 +K = 3 +N = 10 + +def make_up_a_file_size(): + return (2 ** random.randrange(8, 31)) + +def go(permutedpeerlist): + servers = [ Server() for x in range(SERVERS) ] + servers.sort(cmp=lambda x,y: cmp(x.si, y.si)) + + doubled_up_shares = 0 + tick = 0 + fullservers = 0 + while True: + nextsharesize = make_up_a_file_size() / K + if permutedpeerlist: + random.shuffle(servers) + else: + # rotate a random number + rot = random.randrange(0, len(servers)) + servers = servers[rot:] + servers[:rot] + + i = 0 + wrapped = False + sharestoput = N + while sharestoput: + server = servers[i] + if server.used + nextsharesize < server.max: + server.used += nextsharesize + sharestoput -= 1 + if wrapped: + doubled_up_shares += 1 + else: + if server.full_at_tick is None: + server.full_at_tick = tick + fullservers += 1 + if fullservers == len(servers): + # print "Couldn't place share -- all servers full. Stopping." + return (servers, doubled_up_shares) + + i += 1 + if i == len(servers): + wrapped = True + i = 0 + + tick += 1 + +def div_ceil(n, d): + """ + The smallest integer k such that k*d >= n. + """ + return (n/d) + (n%d != 0) + +DESIRED_COLUMNS = 70 + +START_FILES = 137000 +STOP_FILES = 144000 + +def test(permutedpeerlist, iters): + # The i'th element of the filledat list is how many servers got full when the i'th file was uploaded. + filledat = [] + for test in range(iters): + (servers, doubled_up_shares) = go(permutedpeerlist) + print "doubled_up_shares: ", doubled_up_shares + for server in servers: + fidx = server.full_at_tick + filledat.extend([0]*(fidx-len(filledat)+1)) + filledat[fidx] += 1 + + startfiles = 0 + while filledat[startfiles] == 0: + startfiles += 1 + filespercolumn = div_ceil(len(filledat) - startfiles, (DESIRED_COLUMNS - 3)) + + # to make comparisons between runs line up: + # startfiles = START_FILES + # filespercolumn = div_ceil(STOP_FILES - startfiles, (DESIRED_COLUMNS - 3)) + + # The i'th element of the compressedfilledat list is how many servers got full when the filespercolumn files starting at startfiles + i were uploaded. + compressedfilledat = [] + idx = startfiles + while idx < len(filledat): + compressedfilledat.append(0) + for i in range(filespercolumn): + compressedfilledat[-1] += filledat[idx] + idx += 1 + if idx >= len(filledat): + break + + # The i'th element of the fullat list is how many servers were full by the tick numbered startfiles + i * filespercolumn (on average). + fullat = [0] * len(compressedfilledat) + for idx, num in enumerate(compressedfilledat): + for fidx in range(idx, len(fullat)): + fullat[fidx] += num + + for idx in range(len(fullat)): + fullat[idx] = fullat[idx] / float(iters) + + # Now print it out as an ascii art graph. + import sys + for serversfull in range(40, 0, -1): + sys.stdout.write("%2d " % serversfull) + for numfull in fullat: + if int(numfull) == serversfull: + sys.stdout.write("*") + else: + sys.stdout.write(" ") + sys.stdout.write("\n") + + sys.stdout.write(" ^-- servers full\n") + idx = 0 + while idx < len(fullat): + nextmark = "%d--^ " % (startfiles + idx * filespercolumn) + sys.stdout.write(nextmark) + idx += len(nextmark) + + sys.stdout.write("\nfiles uploaded --> \n") + + + +if __name__ == "__main__": + import sys + iters = 16 + for arg in sys.argv: + if arg.startswith("--iters="): + iters = int(arg[8:]) + if "--permute" in sys.argv: + print "doing permuted peerlist, iterations: %d" % iters + test(True, iters) + else: + print "doing simple ring, iterations: %d" % iters + test(False, iters) diff --git a/misc/simulators/simulator.py b/misc/simulators/simulator.py new file mode 100644 index 00000000..adf674de --- /dev/null +++ b/misc/simulators/simulator.py @@ -0,0 +1,292 @@ +#! /usr/bin/env python + +import sha as shamodule +import os, random + +from pkg_resources import require +require('PyRRD') +from pyrrd import graph +from pyrrd.rrd import DataSource, RRD, RRA + + +def sha(s): + return shamodule.new(s).digest() + +def randomid(): + return os.urandom(20) + +class Node: + def __init__(self, nid, introducer, simulator): + self.nid = nid + self.introducer = introducer + self.simulator = simulator + self.shares = {} + self.capacity = random.randrange(1000) + self.utilization = 0 + self.files = [] + + def permute_peers(self, fileid): + permuted = [(sha(fileid+n.nid),n) + for n in self.introducer.get_all_nodes()] + permuted.sort() + return permuted + + def publish_file(self, fileid, size, numshares=100): + sharesize = 4 * size / numshares + permuted = self.permute_peers(fileid) + last_givento = None + tried = 0 + givento = [] + while numshares and permuted: + pid,node = permuted.pop(0) + tried += 1 + last_givento = pid + if node.accept_share(fileid, sharesize): + givento.append((pid,node)) + numshares -= 1 + if numshares: + # couldn't push, should delete + for pid,node in givento: + node.delete_share(fileid) + return False + self.files.append((fileid, numshares)) + self.introducer.please_preserve(fileid, size, tried, last_givento) + return (True, tried) + + def accept_share(self, fileid, sharesize): + accept = False + if self.utilization < self.capacity: + # we have room! yay! + self.shares[fileid] = sharesize + self.utilization += sharesize + return True + if self.decide(sharesize): + # we don't, but we'll make room + self.make_space(sharesize) + self.shares[fileid] = sharesize + self.utilization += sharesize + return True + else: + # we're full, try elsewhere + return False + + def decide(self, sharesize): + if sharesize > self.capacity: + return False + return False + return random.random() > 0.5 + + def make_space(self, sharesize): + assert sharesize <= self.capacity + while self.capacity - self.utilization < sharesize: + victim = random.choice(self.shares.keys()) + self.simulator.lost_data(self.shares[victim]) + self.delete_share(victim) + + def delete_share(self, fileid): + if fileid in self.shares: + self.utilization -= self.shares[fileid] + del self.shares[fileid] + return True + return False + + def retrieve_file(self): + if not self.files: + return + fileid,numshares = random.choice(self.files) + needed = numshares / 4 + peers = [] + for pid,node in self.permute_peers(fileid): + if random.random() > self.simulator.P_NODEAVAIL: + continue # node isn't available right now + if node.has_share(fileid): + peers.append(node) + if len(peers) >= needed: + return True + return False + + def delete_file(self): + if not self.files: + return False + which = random.choice(self.files) + self.files.remove(which) + fileid,numshares = which + self.introducer.delete(fileid) + return True + +class Introducer: + def __init__(self, simulator): + self.living_files = {} + self.utilization = 0 # total size of all active files + self.simulator = simulator + self.simulator.stamp_utilization(self.utilization) + + def get_all_nodes(self): + return self.all_nodes + + def please_preserve(self, fileid, size, tried, last_givento): + self.living_files[fileid] = (size, tried, last_givento) + self.utilization += size + self.simulator.stamp_utilization(self.utilization) + + def please_delete(self, fileid): + self.delete(fileid) + + def permute_peers(self, fileid): + permuted = [(sha(fileid+n.nid),n) + for n in self.get_all_nodes()] + permuted.sort() + return permuted + + def delete(self, fileid): + permuted = self.permute_peers(fileid) + size, tried, last_givento = self.living_files[fileid] + pid = "" + while tried and pid < last_givento: + pid,node = permuted.pop(0) + had_it = node.delete_share(fileid) + if had_it: + tried -= 1 + self.utilization -= size + self.simulator.stamp_utilization(self.utilization) + del self.living_files[fileid] + +class Simulator: + NUM_NODES = 1000 + EVENTS = ["ADDFILE", "DELFILE", "ADDNODE", "DELNODE"] + RATE_ADDFILE = 1.0 / 10 + RATE_DELFILE = 1.0 / 20 + RATE_ADDNODE = 1.0 / 3000 + RATE_DELNODE = 1.0 / 4000 + P_NODEAVAIL = 1.0 + + def __init__(self): + self.time = 1164783600 # small numbers of seconds since the epoch confuse rrdtool + self.prevstamptime = int(self.time) + + ds = DataSource(ds_name='utilizationds', ds_type='GAUGE', heartbeat=1) + rra = RRA(cf='AVERAGE', xff=0.1, steps=1, rows=1200) + self.rrd = RRD("/tmp/utilization.rrd", ds=[ds], rra=[rra], start=self.time) + self.rrd.create() + + self.introducer = q = Introducer(self) + self.all_nodes = [Node(randomid(), q, self) + for i in range(self.NUM_NODES)] + q.all_nodes = self.all_nodes + self.next = [] + self.schedule_events() + self.verbose = False + + self.added_files = 0 + self.added_data = 0 + self.deleted_files = 0 + self.published_files = [] + self.failed_files = 0 + self.lost_data_bytes = 0 # bytes deleted to make room for new shares + + def stamp_utilization(self, utilization): + if int(self.time) > (self.prevstamptime+1): + self.rrd.bufferValue(self.time, utilization) + self.prevstamptime = int(self.time) + + def write_graph(self): + self.rrd.update() + self.rrd = None + import gc + gc.collect() + + def1 = graph.DataDefinition(vname="a", rrdfile='/tmp/utilization.rrd', ds_name='utilizationds') + area1 = graph.Area(value="a", color="#990033", legend='utilizationlegend') + g = graph.Graph('/tmp/utilization.png', imgformat='PNG', width=540, height=100, vertical_label='utilizationverticallabel', title='utilizationtitle', lower_limit=0) + g.data.append(def1) + g.data.append(area1) + g.write() + + def add_file(self): + size = random.randrange(1000) + n = random.choice(self.all_nodes) + if self.verbose: + print "add_file(size=%d, from node %s)" % (size, n) + fileid = randomid() + able = n.publish_file(fileid, size) + if able: + able, tried = able + self.added_files += 1 + self.added_data += size + self.published_files.append(tried) + else: + self.failed_files += 1 + + def lost_data(self, size): + self.lost_data_bytes += size + + def delete_file(self): + all_nodes = self.all_nodes[:] + random.shuffle(all_nodes) + for n in all_nodes: + if n.delete_file(): + self.deleted_files += 1 + return + print "no files to delete" + + def _add_event(self, etype): + rate = getattr(self, "RATE_" + etype) + next = self.time + random.expovariate(rate) + self.next.append((next, etype)) + self.next.sort() + + def schedule_events(self): + types = set([e[1] for e in self.next]) + for etype in self.EVENTS: + if not etype in types: + self._add_event(etype) + + def do_event(self): + time, etype = self.next.pop(0) + assert time > self.time + current_time = self.time + self.time = time + self._add_event(etype) + if etype == "ADDFILE": + self.add_file() + elif etype == "DELFILE": + self.delete_file() + elif etype == "ADDNODE": + pass + #self.add_node() + elif etype == "DELNODE": + #self.del_node() + pass + # self.print_stats(current_time, etype) + + def print_stats_header(self): + print "time: added failed lost avg_tried" + + def print_stats(self, time, etype): + if not self.published_files: + avg_tried = "NONE" + else: + avg_tried = sum(self.published_files) / len(self.published_files) + print time, etype, self.added_data, self.failed_files, self.lost_data_bytes, avg_tried, len(self.introducer.living_files), self.introducer.utilization + +global s +s = None + +def main(): +# rrdtool.create("foo.rrd", +# "--step 10", +# "DS:files-added:DERIVE::0:1000", +# "RRA:AVERAGE:1:1:1200", +# ) + global s + s = Simulator() + # s.print_stats_header() + for i in range(1000): + s.do_event() + print "%d files added, %d files deleted" % (s.added_files, s.deleted_files) + return s + +if __name__ == '__main__': + main() + + diff --git a/misc/simulators/sizes.py b/misc/simulators/sizes.py new file mode 100644 index 00000000..d9c230a3 --- /dev/null +++ b/misc/simulators/sizes.py @@ -0,0 +1,213 @@ +#! /usr/bin/env python + +import random, math, re +from twisted.python import usage + +class Args(usage.Options): + optParameters = [ + ["mode", "m", "alpha", "validation scheme"], + ["arity", "k", 2, "k (airty) for hash tree"], + ] + def opt_arity(self, option): + self['arity'] = int(option) + def parseArgs(self, *args): + if len(args) > 0: + self['mode'] = args[0] + + +def charttest(): + import gdchart + sizes = [random.randrange(10, 20) for i in range(10)] + x = gdchart.Line() + x.width = 250 + x.height = 250 + x.xtitle = "sample" + x.ytitle = "size" + x.title = "Example Graph" + #x.ext_color = [ "white", "yellow", "red", "blue", "green"] + x.setData(sizes) + #x.setLabels(["Mon", "Tue", "Wed", "Thu", "Fri"]) + x.draw("simple.png") + +KiB=1024 +MiB=1024*KiB +GiB=1024*MiB +TiB=1024*GiB +PiB=1024*TiB + +class Sizes: + def __init__(self, mode, file_size, arity=2): + MAX_SEGSIZE = 128*KiB + self.mode = mode + self.file_size = file_size + self.seg_size = seg_size = 1.0 * min(MAX_SEGSIZE, file_size) + self.num_segs = num_segs = math.ceil(file_size / seg_size) + self.num_blocks = num_blocks = num_segs + + self.num_shares = num_shares = 10 + self.shares_needed = shares_needed = 3 + + self.block_size = block_size = seg_size / shares_needed + self.share_size = share_size = block_size * num_blocks + + # none of this includes the share-level hash chain yet, since that is + # only a function of the number of shares. All overhead numbers + # assume that the share-level hash chain has already been sent, + # including the root of the block-level hash tree. + + if mode == "alpha": + # no hash tree at all + self.block_arity = 0 + self.block_tree_depth = 0 + self.block_overhead = 0 + self.bytes_until_some_data = 20 + share_size + self.share_storage_overhead = 0 + self.share_transmission_overhead = 0 + + elif mode == "beta": + # k=num_blocks, d=1 + # each block has a 20-byte hash + self.block_arity = num_blocks + self.block_tree_depth = 1 + self.block_overhead = 20 + # the share has a list of hashes, one for each block + self.share_storage_overhead = (self.block_overhead * + num_blocks) + # we can get away with not sending the hash of the share that + # we're sending in full, once + self.share_transmission_overhead = self.share_storage_overhead - 20 + # we must get the whole list (so it can be validated) before + # any data can be validated + self.bytes_until_some_data = (self.share_transmission_overhead + + block_size) + + elif mode == "gamma": + self.block_arity = k = arity + d = math.ceil(math.log(num_blocks, k)) + self.block_tree_depth = d + num_leaves = k ** d + # to make things easier, we make the pessimistic assumption that + # we have to store hashes for all the empty places in the tree + # (when the number of shares is not an exact exponent of k) + self.block_overhead = 20 + # the block hashes are organized into a k-ary tree, which + # means storing (and eventually transmitting) more hashes. This + # count includes all the low-level share hashes and the root. + hash_nodes = (num_leaves*k - 1) / (k - 1) + #print "hash_depth", d + #print "num_leaves", num_leaves + #print "hash_nodes", hash_nodes + # the storage overhead is this + self.share_storage_overhead = 20 * (hash_nodes - 1) + # the transmission overhead is smaller: if we actually transmit + # every block, we don't have to transmit 1/k of the + # lowest-level block hashes, and we don't have to transmit the + # root because it was already sent with the share-level hash tree + self.share_transmission_overhead = 20 * (hash_nodes + - 1 # the root + - num_leaves / k) + # we must get a full sibling hash chain before we can validate + # any data + sibling_length = d * (k-1) + self.bytes_until_some_data = 20 * sibling_length + block_size + + + + else: + raise ValueError("unknown mode '%s" % mode) + + self.storage_overhead = self.share_storage_overhead * num_shares + self.storage_overhead_percentage = 100.0 * self.storage_overhead / file_size + + def dump(self): + for k in ("mode", "file_size", "seg_size", + "num_segs", "num_blocks", "num_shares", "shares_needed", + "block_size", "share_size", + "block_arity", "block_tree_depth", + "block_overhead", + "share_storage_overhead", "share_transmission_overhead", + "storage_overhead", "storage_overhead_percentage", + "bytes_until_some_data"): + print k, getattr(self, k) + +def fmt(num, trim=False): + if num < KiB: + #s = str(num) + "#" + s = "%.2f#" % num + elif num < MiB: + s = "%.2fk" % (num / KiB) + elif num < GiB: + s = "%.2fM" % (num / MiB) + elif num < TiB: + s = "%.2fG" % (num / GiB) + elif num < PiB: + s = "%.2fT" % (num / TiB) + else: + s = "big" + if trim: + s = re.sub(r'(\.0+)([kMGT#])', + lambda m: m.group(2), + s) + else: + s = re.sub(r'(\.0+)([kMGT#])', + lambda m: (" "*len(m.group(1))+m.group(2)), + s) + if s.endswith("#"): + s = s[:-1] + " " + return s + +def text(): + opts = Args() + opts.parseOptions() + mode = opts["mode"] + arity = opts["arity"] + # 0123456789012345678901234567890123456789012345678901234567890123456 + print "mode=%s" % mode, " arity=%d" % arity + print " storage storage" + print "Size sharesize overhead overhead k d alacrity" + print " (bytes) (%)" + print "------- ------- -------- -------- ---- -- --------" + #sizes = [2 ** i for i in range(7, 41)] + radix = math.sqrt(10); expstep = 2 + radix = 2; expstep = 2 + #radix = 10; expstep = 1 + maxexp = int(math.ceil(math.log(1e12, radix)))+2 + sizes = [radix ** i for i in range(2,maxexp,expstep)] + for file_size in sizes: + s = Sizes(mode, file_size, arity) + out = "" + out += "%7s " % fmt(file_size, trim=True) + out += "%7s " % fmt(s.share_size) + out += "%8s" % fmt(s.storage_overhead) + out += "%10.2f " % s.storage_overhead_percentage + out += " %4d" % int(s.block_arity) + out += " %2d" % int(s.block_tree_depth) + out += " %8s" % fmt(s.bytes_until_some_data) + print out + + +def graph(): + # doesn't work yet + import Gnuplot + opts = Args() + opts.parseOptions() + mode = opts["mode"] + arity = opts["arity"] + g = Gnuplot.Gnuplot(debug=1) + g.title("overhead / alacrity tradeoffs") + g.xlabel("file size") + g.ylabel("stuff") + sizes = [2 ** i for i in range(7, 32)] + series = {"overhead": {}, "alacrity": {}} + for file_size in sizes: + s = Sizes(mode, file_size, arity) + series["overhead"][file_size] = s.storage_overhead_percentage + series["alacrity"][file_size] = s.bytes_until_some_data + g.plot([ (fs, series["overhead"][fs]) + for fs in sizes ]) + raw_input("press return") + + +if __name__ == '__main__': + text() + #graph() diff --git a/misc/simulators/storage-overhead.py b/misc/simulators/storage-overhead.py new file mode 100644 index 00000000..75a0bf61 --- /dev/null +++ b/misc/simulators/storage-overhead.py @@ -0,0 +1,84 @@ +#!/usr/bin/env python + +import sys, math +from allmydata import upload, uri, encode, storage +from allmydata.util import mathutil + +def roundup(size, blocksize=4096): + return blocksize * mathutil.div_ceil(size, blocksize) + + +class BigFakeString: + def __init__(self, length): + self.length = length + self.fp = 0 + def seek(self, offset, whence=0): + if whence == 0: + self.fp = offset + elif whence == 1: + self.fp += offset + elif whence == 2: + self.fp = self.length - offset + def tell(self): + return self.fp + +def calc(filesize, params=(3,7,10), segsize=encode.Encoder.MAX_SEGMENT_SIZE): + num_shares = params[2] + if filesize <= upload.Uploader.URI_LIT_SIZE_THRESHOLD: + urisize = len(uri.pack_lit("A"*filesize)) + sharesize = 0 + sharespace = 0 + else: + u = upload.FileUploader(None) + u.set_params(params) + # unfortunately, Encoder doesn't currently lend itself to answering + # this question without measuring a filesize, so we have to give it a + # fake one + data = BigFakeString(filesize) + u.set_filehandle(data) + u.set_encryption_key("a"*16) + sharesize, blocksize = u.setup_encoder() + # how much overhead? + # 0x20 bytes of offsets + # 0x04 bytes of extension length + # 0x1ad bytes of extension (=429) + # total is 465 bytes + num_segments = mathutil.div_ceil(filesize, segsize) + num_share_hashes = int(math.log(mathutil.next_power_of_k(num_shares, 2), + 2)) + 1 + sharesize = storage.allocated_size(sharesize, num_segments, + num_share_hashes, + 429) + sharespace = num_shares * roundup(sharesize) + urisize = len(uri.pack_uri(storage_index="a"*32, + key="a"*16, + uri_extension_hash="a"*32, + needed_shares=params[0], + total_shares=params[2], + size=filesize)) + + return urisize, sharesize, sharespace + +def main(): + filesize = int(sys.argv[1]) + urisize, sharesize, sharespace = calc(filesize) + print "urisize:", urisize + print "sharesize: %10d" % sharesize + print "sharespace: %10d" % sharespace + print "desired expansion: %1.1f" % (1.0 * 10 / 3) + print "effective expansion: %1.1f" % (1.0 * sharespace / filesize) + +def chart(): + filesize = 2 + while filesize < 2**20: + urisize, sharesize, sharespace = calc(int(filesize)) + expansion = 1.0 * sharespace / int(filesize) + print "%d,%d,%d,%1.2f" % (int(filesize), urisize, sharespace, expansion) + filesize = filesize * 2**0.5 + +if __name__ == '__main__': + if sys.argv[1] == "chart": + chart() + else: + main() + diff --git a/misc/sizes.py b/misc/sizes.py deleted file mode 100644 index d9c230a3..00000000 --- a/misc/sizes.py +++ /dev/null @@ -1,213 +0,0 @@ -#! /usr/bin/env python - -import random, math, re -from twisted.python import usage - -class Args(usage.Options): - optParameters = [ - ["mode", "m", "alpha", "validation scheme"], - ["arity", "k", 2, "k (airty) for hash tree"], - ] - def opt_arity(self, option): - self['arity'] = int(option) - def parseArgs(self, *args): - if len(args) > 0: - self['mode'] = args[0] - - -def charttest(): - import gdchart - sizes = [random.randrange(10, 20) for i in range(10)] - x = gdchart.Line() - x.width = 250 - x.height = 250 - x.xtitle = "sample" - x.ytitle = "size" - x.title = "Example Graph" - #x.ext_color = [ "white", "yellow", "red", "blue", "green"] - x.setData(sizes) - #x.setLabels(["Mon", "Tue", "Wed", "Thu", "Fri"]) - x.draw("simple.png") - -KiB=1024 -MiB=1024*KiB -GiB=1024*MiB -TiB=1024*GiB -PiB=1024*TiB - -class Sizes: - def __init__(self, mode, file_size, arity=2): - MAX_SEGSIZE = 128*KiB - self.mode = mode - self.file_size = file_size - self.seg_size = seg_size = 1.0 * min(MAX_SEGSIZE, file_size) - self.num_segs = num_segs = math.ceil(file_size / seg_size) - self.num_blocks = num_blocks = num_segs - - self.num_shares = num_shares = 10 - self.shares_needed = shares_needed = 3 - - self.block_size = block_size = seg_size / shares_needed - self.share_size = share_size = block_size * num_blocks - - # none of this includes the share-level hash chain yet, since that is - # only a function of the number of shares. All overhead numbers - # assume that the share-level hash chain has already been sent, - # including the root of the block-level hash tree. - - if mode == "alpha": - # no hash tree at all - self.block_arity = 0 - self.block_tree_depth = 0 - self.block_overhead = 0 - self.bytes_until_some_data = 20 + share_size - self.share_storage_overhead = 0 - self.share_transmission_overhead = 0 - - elif mode == "beta": - # k=num_blocks, d=1 - # each block has a 20-byte hash - self.block_arity = num_blocks - self.block_tree_depth = 1 - self.block_overhead = 20 - # the share has a list of hashes, one for each block - self.share_storage_overhead = (self.block_overhead * - num_blocks) - # we can get away with not sending the hash of the share that - # we're sending in full, once - self.share_transmission_overhead = self.share_storage_overhead - 20 - # we must get the whole list (so it can be validated) before - # any data can be validated - self.bytes_until_some_data = (self.share_transmission_overhead + - block_size) - - elif mode == "gamma": - self.block_arity = k = arity - d = math.ceil(math.log(num_blocks, k)) - self.block_tree_depth = d - num_leaves = k ** d - # to make things easier, we make the pessimistic assumption that - # we have to store hashes for all the empty places in the tree - # (when the number of shares is not an exact exponent of k) - self.block_overhead = 20 - # the block hashes are organized into a k-ary tree, which - # means storing (and eventually transmitting) more hashes. This - # count includes all the low-level share hashes and the root. - hash_nodes = (num_leaves*k - 1) / (k - 1) - #print "hash_depth", d - #print "num_leaves", num_leaves - #print "hash_nodes", hash_nodes - # the storage overhead is this - self.share_storage_overhead = 20 * (hash_nodes - 1) - # the transmission overhead is smaller: if we actually transmit - # every block, we don't have to transmit 1/k of the - # lowest-level block hashes, and we don't have to transmit the - # root because it was already sent with the share-level hash tree - self.share_transmission_overhead = 20 * (hash_nodes - - 1 # the root - - num_leaves / k) - # we must get a full sibling hash chain before we can validate - # any data - sibling_length = d * (k-1) - self.bytes_until_some_data = 20 * sibling_length + block_size - - - - else: - raise ValueError("unknown mode '%s" % mode) - - self.storage_overhead = self.share_storage_overhead * num_shares - self.storage_overhead_percentage = 100.0 * self.storage_overhead / file_size - - def dump(self): - for k in ("mode", "file_size", "seg_size", - "num_segs", "num_blocks", "num_shares", "shares_needed", - "block_size", "share_size", - "block_arity", "block_tree_depth", - "block_overhead", - "share_storage_overhead", "share_transmission_overhead", - "storage_overhead", "storage_overhead_percentage", - "bytes_until_some_data"): - print k, getattr(self, k) - -def fmt(num, trim=False): - if num < KiB: - #s = str(num) + "#" - s = "%.2f#" % num - elif num < MiB: - s = "%.2fk" % (num / KiB) - elif num < GiB: - s = "%.2fM" % (num / MiB) - elif num < TiB: - s = "%.2fG" % (num / GiB) - elif num < PiB: - s = "%.2fT" % (num / TiB) - else: - s = "big" - if trim: - s = re.sub(r'(\.0+)([kMGT#])', - lambda m: m.group(2), - s) - else: - s = re.sub(r'(\.0+)([kMGT#])', - lambda m: (" "*len(m.group(1))+m.group(2)), - s) - if s.endswith("#"): - s = s[:-1] + " " - return s - -def text(): - opts = Args() - opts.parseOptions() - mode = opts["mode"] - arity = opts["arity"] - # 0123456789012345678901234567890123456789012345678901234567890123456 - print "mode=%s" % mode, " arity=%d" % arity - print " storage storage" - print "Size sharesize overhead overhead k d alacrity" - print " (bytes) (%)" - print "------- ------- -------- -------- ---- -- --------" - #sizes = [2 ** i for i in range(7, 41)] - radix = math.sqrt(10); expstep = 2 - radix = 2; expstep = 2 - #radix = 10; expstep = 1 - maxexp = int(math.ceil(math.log(1e12, radix)))+2 - sizes = [radix ** i for i in range(2,maxexp,expstep)] - for file_size in sizes: - s = Sizes(mode, file_size, arity) - out = "" - out += "%7s " % fmt(file_size, trim=True) - out += "%7s " % fmt(s.share_size) - out += "%8s" % fmt(s.storage_overhead) - out += "%10.2f " % s.storage_overhead_percentage - out += " %4d" % int(s.block_arity) - out += " %2d" % int(s.block_tree_depth) - out += " %8s" % fmt(s.bytes_until_some_data) - print out - - -def graph(): - # doesn't work yet - import Gnuplot - opts = Args() - opts.parseOptions() - mode = opts["mode"] - arity = opts["arity"] - g = Gnuplot.Gnuplot(debug=1) - g.title("overhead / alacrity tradeoffs") - g.xlabel("file size") - g.ylabel("stuff") - sizes = [2 ** i for i in range(7, 32)] - series = {"overhead": {}, "alacrity": {}} - for file_size in sizes: - s = Sizes(mode, file_size, arity) - series["overhead"][file_size] = s.storage_overhead_percentage - series["alacrity"][file_size] = s.bytes_until_some_data - g.plot([ (fs, series["overhead"][fs]) - for fs in sizes ]) - raw_input("press return") - - -if __name__ == '__main__': - text() - #graph() diff --git a/misc/spacetime/diskwatcher.py b/misc/spacetime/diskwatcher.py deleted file mode 100644 index 05a68ac9..00000000 --- a/misc/spacetime/diskwatcher.py +++ /dev/null @@ -1,32 +0,0 @@ - -from axiom.item import Item -from axiom.attributes import text, integer, timestamp - - -class Sample(Item): - # we didn't originally set typeName, so it was generated from the - # fully-qualified classname ("diskwatcher.Sample"), then Axiom - # automatically lowercases and un-dot-ifies it to get - # "diskwatcher_sample". Now we explicitly provide a name. - typeName = "diskwatcher_sample" - - # version 2 added the 'total' field - schemaVersion = 2 - - url = text(indexed=True) - when = timestamp(indexed=True) - total = integer() - used = integer() - avail = integer() - -def upgradeSample1to2(old): - total = 0 - return old.upgradeVersion("diskwatcher_sample", 1, 2, - url=old.url, - when=old.when, - total=0, - used=old.used, - avail=old.avail) - -from axiom.upgrade import registerUpgrader -registerUpgrader(upgradeSample1to2, "diskwatcher_sample", 1, 2) diff --git a/misc/spacetime/diskwatcher.tac b/misc/spacetime/diskwatcher.tac deleted file mode 100644 index 112b8781..00000000 --- a/misc/spacetime/diskwatcher.tac +++ /dev/null @@ -1,385 +0,0 @@ -# -*- python -*- - -""" -Run this tool with twistd in its own directory, with a file named 'urls.txt' -describing which nodes to query. Make sure to copy diskwatcher.py into the -same directory. It will request disk-usage numbers from the nodes once per -hour (or slower), and store them in a local database. It will compute -usage-per-unit time values over several time ranges and make them available -through an HTTP query (using ./webport). It will also provide an estimate of -how much time is left before the grid's storage is exhausted. - -There are munin plugins (named tahoe_doomsday and tahoe_diskusage) to graph -the values this tool computes. - -Each line of urls.txt points to a single node. Each node should have its own -dedicated disk: if multiple nodes share a disk, only list one of them in -urls.txt (otherwise that space will be double-counted, confusing the -results). Each line should be in the form: - - http://host:webport/statistics?t=json - -""" - -# TODO: -# built-in graphs on web interface - - -import os.path, urllib, time -from datetime import timedelta -from twisted.application import internet, service, strports -from twisted.web import server, resource, http, client -from twisted.internet import defer -from twisted.python import log -import simplejson -from axiom.attributes import AND -from axiom.store import Store -from epsilon import extime -from diskwatcher import Sample - -#from axiom.item import Item -#from axiom.attributes import text, integer, timestamp - -#class Sample(Item): -# url = text() -# when = timestamp() -# used = integer() -# avail = integer() - -#s = Store("history.axiom") -#ns = Store("new-history.axiom") -#for sa in s.query(Sample): -# diskwatcher.Sample(store=ns, -# url=sa.url, when=sa.when, used=sa.used, avail=sa.avail) -#print "done" - -HOUR = 3600 -DAY = 24*3600 -WEEK = 7*DAY -MONTH = 30*DAY -YEAR = 365*DAY - -class DiskWatcher(service.MultiService, resource.Resource): - POLL_INTERVAL = 1*HOUR - AVERAGES = {#"60s": 60, - #"5m": 5*60, - #"30m": 30*60, - "1hr": 1*HOUR, - "1day": 1*DAY, - "2wk": 2*WEEK, - "4wk": 4*WEEK, - } - - def __init__(self): - assert os.path.exists("diskwatcher.tac") # run from the right directory - self.growth_cache = {} - service.MultiService.__init__(self) - resource.Resource.__init__(self) - self.store = Store("history.axiom") - self.store.whenFullyUpgraded().addCallback(self._upgrade_complete) - service.IService(self.store).setServiceParent(self) # let upgrader run - ts = internet.TimerService(self.POLL_INTERVAL, self.poll) - ts.setServiceParent(self) - - def _upgrade_complete(self, ignored): - print "Axiom store upgrade complete" - - def startService(self): - service.MultiService.startService(self) - - try: - desired_webport = open("webport", "r").read().strip() - except EnvironmentError: - desired_webport = None - webport = desired_webport or "tcp:0" - root = self - serv = strports.service(webport, server.Site(root)) - serv.setServiceParent(self) - if not desired_webport: - got_port = serv._port.getHost().port - open("webport", "w").write("tcp:%d\n" % got_port) - - - def get_urls(self): - for url in open("urls.txt","r").readlines(): - if "#" in url: - url = url[:url.find("#")] - url = url.strip() - if not url: - continue - yield url - - def poll(self): - log.msg("polling..") - #return self.poll_synchronous() - return self.poll_asynchronous() - - def poll_asynchronous(self): - # this didn't actually seem to work any better than poll_synchronous: - # logs are more noisy, and I got frequent DNS failures. But with a - # lot of servers to query, this is probably the better way to go. A - # significant advantage of this approach is that we can use a - # timeout= argument to tolerate hanging servers. - dl = [] - for url in self.get_urls(): - when = extime.Time() - d = client.getPage(url, timeout=60) - d.addCallback(self.got_response, when, url) - dl.append(d) - d = defer.DeferredList(dl) - def _done(res): - fetched = len([1 for (success, value) in res if success]) - log.msg("fetched %d of %d" % (fetched, len(dl))) - d.addCallback(_done) - return d - - def poll_synchronous(self): - attempts = 0 - fetched = 0 - for url in self.get_urls(): - attempts += 1 - try: - when = extime.Time() - # if a server accepts the connection and then hangs, this - # will block forever - data_json = urllib.urlopen(url).read() - self.got_response(data_json, when, url) - fetched += 1 - except: - log.msg("error while fetching: %s" % url) - log.err() - log.msg("fetched %d of %d" % (fetched, attempts)) - - def got_response(self, data_json, when, url): - data = simplejson.loads(data_json) - total = data[u"stats"][u"storage_server.disk_total"] - used = data[u"stats"][u"storage_server.disk_used"] - avail = data[u"stats"][u"storage_server.disk_avail"] - print "%s : total=%s, used=%s, avail=%s" % (url, - total, used, avail) - Sample(store=self.store, - url=unicode(url), when=when, total=total, used=used, avail=avail) - - def calculate_growth_timeleft(self): - timespans = [] - total_avail_space = self.find_total_available_space() - pairs = [ (timespan,name) - for name,timespan in self.AVERAGES.items() ] - pairs.sort() - for (timespan,name) in pairs: - growth = self.growth(timespan) - print name, total_avail_space, growth - if growth is not None: - timeleft = None - if growth > 0: - timeleft = total_avail_space / growth - timespans.append( (name, timespan, growth, timeleft) ) - return timespans - - def find_total_space(self): - # this returns the sum of disk-avail stats for all servers that 1) - # are listed in urls.txt and 2) have responded recently. - now = extime.Time() - recent = now - timedelta(seconds=2*self.POLL_INTERVAL) - total_space = 0 - for url in self.get_urls(): - url = unicode(url) - latest = list(self.store.query(Sample, - AND(Sample.url == url, - Sample.when > recent), - sort=Sample.when.descending, - limit=1)) - if latest: - total_space += latest[0].total - return total_space - - def find_total_available_space(self): - # this returns the sum of disk-avail stats for all servers that 1) - # are listed in urls.txt and 2) have responded recently. - now = extime.Time() - recent = now - timedelta(seconds=2*self.POLL_INTERVAL) - total_avail_space = 0 - for url in self.get_urls(): - url = unicode(url) - latest = list(self.store.query(Sample, - AND(Sample.url == url, - Sample.when > recent), - sort=Sample.when.descending, - limit=1)) - if latest: - total_avail_space += latest[0].avail - return total_avail_space - - def find_total_used_space(self): - # this returns the sum of disk-used stats for all servers that 1) are - # listed in urls.txt and 2) have responded recently. - now = extime.Time() - recent = now - timedelta(seconds=2*self.POLL_INTERVAL) - total_used_space = 0 - for url in self.get_urls(): - url = unicode(url) - latest = list(self.store.query(Sample, - AND(Sample.url == url, - Sample.when > recent), - sort=Sample.when.descending, - limit=1)) - if latest: - total_used_space += latest[0].used - return total_used_space - - - def growth(self, timespan): - """Calculate the bytes-per-second growth of the total disk-used stat, - over a period of TIMESPAN seconds (i.e. between the most recent - sample and the latest one that's at least TIMESPAN seconds ago), - summed over all nodes which 1) are listed in urls.txt, 2) have - responded recently, and 3) have a response at least as old as - TIMESPAN. If there are no nodes which meet these criteria, we'll - return None; this is likely to happen for the longer timespans (4wk) - until the gatherer has been running and collecting data for that - long.""" - - # a note about workload: for our oldest storage servers, as of - # 25-Jan-2009, the first DB query here takes about 40ms per server - # URL (some take as little as 10ms). There are about 110 servers, and - # two queries each, so the growth() function takes about 7s to run - # for each timespan. We track 4 timespans, and find_total_*_space() - # takes about 2.3s to run, so calculate_growth_timeleft() takes about - # 27s. Each HTTP query thus takes 27s, and we have six munin plugins - # which perform HTTP queries every 5 minutes. By adding growth_cache(), - # I hope to reduce this: the first HTTP query will still take 27s, - # but the subsequent five should be about 2.3s each. - - # we're allowed to cache this value for 3 minutes - if timespan in self.growth_cache: - (when, value) = self.growth_cache[timespan] - if time.time() - when < 3*60: - return value - - td = timedelta(seconds=timespan) - now = extime.Time() - then = now - td - recent = now - timedelta(seconds=2*self.POLL_INTERVAL) - - total_growth = 0.0 - num_nodes = 0 - - for url in self.get_urls(): - url = unicode(url) - latest = list(self.store.query(Sample, - AND(Sample.url == url, - Sample.when > recent), - sort=Sample.when.descending, - limit=1)) - if not latest: - #print "no latest sample from", url - continue # skip this node - latest = latest[0] - old = list(self.store.query(Sample, - AND(Sample.url == url, - Sample.when < then), - sort=Sample.when.descending, - limit=1)) - if not old: - #print "no old sample from", url - continue # skip this node - old = old[0] - duration = latest.when.asPOSIXTimestamp() - old.when.asPOSIXTimestamp() - if not duration: - print "only one sample from", url - continue - - rate = float(latest.used - old.used) / duration - #print url, rate - total_growth += rate - num_nodes += 1 - - if not num_nodes: - return None - self.growth_cache[timespan] = (time.time(), total_growth) - return total_growth - - def getChild(self, path, req): - if path == "": - return self - return resource.Resource.getChild(self, path, req) - - def abbreviate_time(self, s): - def _plural(count, unit): - count = int(count) - if count == 1: - return "%d %s" % (count, unit) - return "%d %ss" % (count, unit) - if s is None: - return "unknown" - if s < 120: - return _plural(s, "second") - if s < 3*HOUR: - return _plural(s/60, "minute") - if s < 2*DAY: - return _plural(s/HOUR, "hour") - if s < 2*MONTH: - return _plural(s/DAY, "day") - if s < 4*YEAR: - return _plural(s/MONTH, "month") - return _plural(s/YEAR, "year") - - def abbreviate_space2(self, s, SI=True): - if s is None: - return "unknown" - if SI: - U = 1000.0 - isuffix = "B" - else: - U = 1024.0 - isuffix = "iB" - def r(count, suffix): - return "%.2f %s%s" % (count, suffix, isuffix) - - if s < 1024: # 1000-1023 get emitted as bytes, even in SI mode - return r(s, "") - if s < U*U: - return r(s/U, "k") - if s < U*U*U: - return r(s/(U*U), "M") - if s < U*U*U*U: - return r(s/(U*U*U), "G") - if s < U*U*U*U*U: - return r(s/(U*U*U*U), "T") - return r(s/(U*U*U*U*U), "P") - - def abbreviate_space(self, s): - return "(%s, %s)" % (self.abbreviate_space2(s, True), - self.abbreviate_space2(s, False)) - - def render(self, req): - t = req.args.get("t", ["html"])[0] - ctype = "text/plain" - data = "" - if t == "html": - data = "" - for (name, timespan, growth, timeleft) in self.calculate_growth_timeleft(): - data += "%f bytes per second (%sps), %s remaining (over %s)\n" % \ - (growth, self.abbreviate_space2(growth, True), - self.abbreviate_time(timeleft), name) - used = self.find_total_used_space() - data += "total used: %d bytes %s\n" % (used, - self.abbreviate_space(used)) - total = self.find_total_space() - data += "total space: %d bytes %s\n" % (total, - self.abbreviate_space(total)) - elif t == "json": - current = {"rates": self.calculate_growth_timeleft(), - "total": self.find_total_space(), - "used": self.find_total_used_space(), - "available": self.find_total_available_space(), - } - data = simplejson.dumps(current, indent=True) - else: - req.setResponseCode(http.BAD_REQUEST) - data = "Unknown t= %s\n" % t - req.setHeader("content-type", ctype) - return data - -application = service.Application("disk-watcher") -DiskWatcher().setServiceParent(application) diff --git a/misc/storage-overhead.py b/misc/storage-overhead.py deleted file mode 100644 index 75a0bf61..00000000 --- a/misc/storage-overhead.py +++ /dev/null @@ -1,84 +0,0 @@ -#!/usr/bin/env python - -import sys, math -from allmydata import upload, uri, encode, storage -from allmydata.util import mathutil - -def roundup(size, blocksize=4096): - return blocksize * mathutil.div_ceil(size, blocksize) - - -class BigFakeString: - def __init__(self, length): - self.length = length - self.fp = 0 - def seek(self, offset, whence=0): - if whence == 0: - self.fp = offset - elif whence == 1: - self.fp += offset - elif whence == 2: - self.fp = self.length - offset - def tell(self): - return self.fp - -def calc(filesize, params=(3,7,10), segsize=encode.Encoder.MAX_SEGMENT_SIZE): - num_shares = params[2] - if filesize <= upload.Uploader.URI_LIT_SIZE_THRESHOLD: - urisize = len(uri.pack_lit("A"*filesize)) - sharesize = 0 - sharespace = 0 - else: - u = upload.FileUploader(None) - u.set_params(params) - # unfortunately, Encoder doesn't currently lend itself to answering - # this question without measuring a filesize, so we have to give it a - # fake one - data = BigFakeString(filesize) - u.set_filehandle(data) - u.set_encryption_key("a"*16) - sharesize, blocksize = u.setup_encoder() - # how much overhead? - # 0x20 bytes of offsets - # 0x04 bytes of extension length - # 0x1ad bytes of extension (=429) - # total is 465 bytes - num_segments = mathutil.div_ceil(filesize, segsize) - num_share_hashes = int(math.log(mathutil.next_power_of_k(num_shares, 2), - 2)) + 1 - sharesize = storage.allocated_size(sharesize, num_segments, - num_share_hashes, - 429) - sharespace = num_shares * roundup(sharesize) - urisize = len(uri.pack_uri(storage_index="a"*32, - key="a"*16, - uri_extension_hash="a"*32, - needed_shares=params[0], - total_shares=params[2], - size=filesize)) - - return urisize, sharesize, sharespace - -def main(): - filesize = int(sys.argv[1]) - urisize, sharesize, sharespace = calc(filesize) - print "urisize:", urisize - print "sharesize: %10d" % sharesize - print "sharespace: %10d" % sharespace - print "desired expansion: %1.1f" % (1.0 * 10 / 3) - print "effective expansion: %1.1f" % (1.0 * sharespace / filesize) - -def chart(): - filesize = 2 - while filesize < 2**20: - urisize, sharesize, sharespace = calc(int(filesize)) - expansion = 1.0 * sharespace / int(filesize) - print "%d,%d,%d,%1.2f" % (int(filesize), urisize, sharespace, expansion) - filesize = filesize * 2**0.5 - -if __name__ == '__main__': - if sys.argv[1] == "chart": - chart() - else: - main() - diff --git a/misc/sub-ver.py b/misc/sub-ver.py deleted file mode 100644 index 6a1392cb..00000000 --- a/misc/sub-ver.py +++ /dev/null @@ -1,26 +0,0 @@ -#!/usr/bin/env python - -from allmydata import __version__ as v - -import sys - -if len(sys.argv) == 1: - input = sys.stdin -elif len(sys.argv) == 2: - fname = sys.argv[1] - input = file(fname, 'rb') -else: - raise ValueError('must provide 0 or 1 argument (stdin, or filename)') - -vern = { - 'major': v.major or 0, - 'minor': v.minor or 0, - 'point': v.micro or 0, - 'micro': v.micro or 0, - 'revision' : v.revision or 0, - 'build': str(v), - } - -for line in input.readlines(): - print line % vern, - diff --git a/misc/test-darcs-boringfile.py b/misc/test-darcs-boringfile.py deleted file mode 100644 index 619a4e44..00000000 --- a/misc/test-darcs-boringfile.py +++ /dev/null @@ -1,14 +0,0 @@ -#!/usr/bin/env python - -import sys -from subprocess import Popen, PIPE - -cmd = ["darcs", "whatsnew", "-l"] -p = Popen(cmd, stdout=PIPE) -output = p.communicate()[0] -print output -if output == "No changes!\n": - sys.exit(0) -sys.exit(1) - - diff --git a/misc/test_mac_diskimage.py b/misc/test_mac_diskimage.py deleted file mode 100644 index 7795dfd1..00000000 --- a/misc/test_mac_diskimage.py +++ /dev/null @@ -1,78 +0,0 @@ -# This script uses hdiutil to attach a dmg (whose name is derived from the -# appname and the version number passed in), asserts that it attached as -# expected, cd's into the mounted filesystem, executes "$appname -# --version-and-path", and checks whether the output of --version-and-path is -# right. - -# If all of the paths listed therein are loaded from within the current PWD -# then it exits with code 0. - -# If anything goes wrong then it exits with non-zero (failure). This is to -# check that the Mac OS "DMG" (disk image) package that gets built is correctly -# loading all of its packages from inside the image. - -# Here is an example output from --version-and-path: - -# allmydata-tahoe: 1.4.1-r3916 (/home/zooko/playground/allmydata/tahoe/trunk/trunk/src), foolscap: 0.4.1 (/usr/local/lib/python2.6/dist-packages/foolscap-0.4.1-py2.6.egg), pycryptopp: 0.5.10 (/home/zooko/playground/allmydata/tahoe/trunk/trunk/support/lib/python2.6/site-packages/pycryptopp-0.5.10-py2.6-linux-x86_64.egg), zfec: 1.4.2 (/usr/local/lib/python2.6/dist-packages/zfec-1.4.2-py2.6-linux-x86_64.egg), Twisted: 8.2.0-r26987 (/usr/local/lib/python2.6/dist-packages/Twisted-8.2.0_r26987-py2.6-linux-x86_64.egg), Nevow: 0.9.32 (/home/zooko/playground/allmydata/tahoe/trunk/trunk/support/lib/python2.6/site-packages/Nevow-0.9.32-py2.6.egg), zope.interface: 3.4.0 (/usr/lib/python2.6/dist-packages), python: 2.6.2 (/usr/bin/python), platform: Linux-Ubuntu_9.04-x86_64-64bit_ELF (None), sqlite: 3.6.10 (unknown), simplejson: 2.0.1 (/usr/local/lib/python2.6/dist-packages/simplejson-2.0.1-py2.6-linux-x86_64.egg), argparse: 0.8.0 (/usr/local/lib/python2.6/dist-packages/argparse-0.8.0-py2.6.egg), pyOpenSSL: 0.7 (/home/zooko/playground/allmydata/tahoe/trunk/trunk/support/lib/python2.6/site-packages/pyOpenSSL-0.7-py2.6-linux-x86_64.egg), pyutil: 1.3.30 (/usr/local/lib/python2.6/dist-packages/pyutil-1.3.30-py2.6.egg), zbase32: 1.1.1 (/usr/local/lib/python2.6/dist-packages/zbase32-1.1.1-py2.6.egg), setuptools: 0.6c12dev (/home/zooko/playground/allmydata/tahoe/trunk/trunk/support/lib/python2.6/site-packages/setuptools-0.6c12dev.egg), pysqlite: 2.4.1 (/usr/lib/python2.6/sqlite3) - -import fcntl, os, re, subprocess, time - -def test_mac_diskimage(appname, version): - """ Return True on success, raise exception on failure. """ - assert isinstance(appname, basestring), appname - assert isinstance(version, basestring), version - DMGNAME='mac/'+appname+'-'+version+'.dmg' - - cmd = ['hdiutil', 'attach', DMGNAME] - attachit = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - rc = attachit.wait() - if rc != 0: - raise Exception("FAIL: hdiutil returned non-zero exit code: %r from command: %r" % (rc, cmd,)) - - stderrtxt = attachit.stderr.read() - if stderrtxt: - raise Exception("FAIL: hdiutil said something on stderr: %r" % (stderrtxt,)) - stdouttxt = attachit.stdout.read() - mo = re.search("^(/[^ ]+)\s+Apple_HFS\s+(/Volumes/.*)$", stdouttxt, re.UNICODE|re.MULTILINE) - if not mo: - raise Exception("FAIL: hdiutil said something on stdout that didn't match our expectations: %r" % (stdouttxt,)) - DEV=mo.group(1) - MOUNTPOINT=mo.group(2) - - callitpid = None - try: - basedir = MOUNTPOINT + '/' + appname + '.app/Contents/Resources' - - os.chdir(basedir) - - cmd = ['../MacOS/' + appname, '--version-and-path'] - callit = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - callitpid = callit.pid - assert callitpid - deadline = time.time() + 2 # If it takes longer than 2 seconds to do this then it fails. - while True: - rc = callit.poll() - if rc is not None: - break - if time.time() > deadline: - os.kill(callitpid, 15) - raise Exception("FAIL: it took longer than 2 seconds to invoke $appname --version-and-path. stdout: %s, stderr: %s" % (callit.stdout.read(), callit.stderr.read())) - time.sleep(0.05) - - if rc != 0: - raise Exception("FAIL: $appname --version-and-path returned non-zero exit code: %r" % (rc,)) - - stdouttxt = callit.stdout.read() - - PKG_VER_PATH_RE=re.compile("(\S+): (\S+) \((.+?)\), ", re.UNICODE) - - for mo in PKG_VER_PATH_RE.finditer(stdouttxt): - if not mo.group(3).startswith(basedir): - raise Exception("FAIL: found package not loaded from basedir (%s); package was: %s" % (basedir, mo.groups(),)) - - return True # success! - finally: - if callitpid: - os.kill(callitpid, 9) - os.waitpid(callitpid, 0) - subprocess.call(['hdiutil', 'detach', '-Force', DEV]) diff --git a/misc/xfer-client.py b/misc/xfer-client.py deleted file mode 100644 index e7af4803..00000000 --- a/misc/xfer-client.py +++ /dev/null @@ -1,66 +0,0 @@ -#!/usr/bin/env python - -"""This program is a client that sends files to xfer-server.py. You give it -the server's FURL, and it can put files into the server's targetdir (and -nowhere else). When you want an unattended process on one machine to be able -to place files in a remote directory, you could give its parent process an -ssh account on the target, with an empty passphrase, but that provides too -much power. This program is a least-privilege replacement for the ssh/scp -approach. - -Give the client a FURL, or a file where the FURL is stored. You also give it -the name of the local file to be transferred. The last component of the local -pathname will be used as the remote filename. -""" - -import os.path -from twisted.internet import reactor -from foolscap import UnauthenticatedTub -from twisted.python import usage - -class Options(usage.Options): - synopsis = "xfer-client.py (--furl FURL | --furlfile furlfile) LOCALFILE" - optParameters = [ - ["furl", "f", None, - "The server FURL. You must either provide --furl or --furlfile."], - ["furlfile", "l", None, - "A file containing the server FURL."], - ] - optFlags = [ - ["quiet", "q", "Do not announce success."], - ] - - def parseArgs(self, localfile): - self['localfile'] = localfile - - def postOptions(self): - if not self["furl"] and not self["furlfile"]: - raise usage.UsageError("you must either provide --furl or --furlfile") - if not os.path.exists(self["localfile"]): - raise usage.UsageError("local file '%s' doesn't exist" % self["localfile"]) - -opts = Options() -opts.parseOptions() -tub = UnauthenticatedTub() -tub.startService() -if opts["furl"]: - furl = opts["furl"] -else: - furl = open(os.path.expanduser(opts["furlfile"]), "r").read().strip() -remotename = os.path.basename(opts["localfile"]) -d = tub.getReference(furl) -def _push(rref): - data = open(os.path.expanduser(opts["localfile"]), "r").read() - return rref.callRemote("putfile", remotename, data) -d.addCallback(_push) -def _success(res): - reactor.stop() - if not opts["quiet"]: - print "file transferred to %s" % remotename -def _failure(f): - reactor.stop() - print "error while transferring file:" - print f -d.addCallbacks(_success, _failure) - -reactor.run() diff --git a/setup.py b/setup.py index d1481564..bdbbbbd0 100644 --- a/setup.py +++ b/setup.py @@ -205,7 +205,7 @@ class TestMacDiskImage(Command): pass def run(self): import sys - sys.path.append('misc') + sys.path.append(os.path.join('misc', 'build_helpers')) import test_mac_diskimage return test_mac_diskimage.test_mac_diskimage('Allmydata', version=self.distribution.metadata.version)