misc cleanup: remove old .tac files, move old stuff into misc/
authorBrian Warner <warner@lothar.com>
Sat, 7 Apr 2007 03:37:38 +0000 (20:37 -0700)
committerBrian Warner <warner@lothar.com>
Sat, 7 Apr 2007 03:37:38 +0000 (20:37 -0700)
client.tac [deleted file]
misc/simulator.py [new file with mode: 0644]
misc/sizes.py [new file with mode: 0644]
queen.tac [deleted file]
simulator.py [deleted file]
sizes.py [deleted file]

diff --git a/client.tac b/client.tac
deleted file mode 100644 (file)
index 28dc074..0000000
+++ /dev/null
@@ -1,9 +0,0 @@
-# -*- python -*-
-
-from allmydata import client
-from twisted.application import service
-
-c = client.Client()
-
-application = service.Application("allmydata_client")
-c.setServiceParent(application)
diff --git a/misc/simulator.py b/misc/simulator.py
new file mode 100644 (file)
index 0000000..5a53fad
--- /dev/null
@@ -0,0 +1,292 @@
+#! /usr/bin/env python
+
+import sha as shamodule
+import os, random
+
+from pkg_resources import require
+require('PyRRD')
+from pyrrd import graph
+from pyrrd.rrd import DataSource, RRD, RRA
+
+
+def sha(s):
+    return shamodule.new(s).digest()
+
+def randomid():
+    return os.urandom(20)
+
+class Node:
+    def __init__(self, nid, queen, simulator):
+        self.nid = nid
+        self.queen = queen
+        self.simulator = simulator
+        self.shares = {}
+        self.capacity = random.randrange(1000)
+        self.utilization = 0
+        self.files = []
+
+    def permute_peers(self, fileid):
+        permuted = [(sha(fileid+n.nid),n)
+                    for n in self.queen.get_all_nodes()]
+        permuted.sort()
+        return permuted
+
+    def publish_file(self, fileid, size, numshares=100):
+        sharesize = 4 * size / numshares
+        permuted = self.permute_peers(fileid)
+        last_givento = None
+        tried = 0
+        givento = []
+        while numshares and permuted:
+            pid,node = permuted.pop(0)
+            tried += 1
+            last_givento = pid
+            if node.accept_share(fileid, sharesize):
+                givento.append((pid,node))
+                numshares -= 1
+        if numshares:
+            # couldn't push, should delete
+            for pid,node in givento:
+                node.delete_share(fileid)
+            return False
+        self.files.append((fileid, numshares))
+        self.queen.please_preserve(fileid, size, tried, last_givento)
+        return (True, tried)
+
+    def accept_share(self, fileid, sharesize):
+        accept = False
+        if self.utilization < self.capacity:
+            # we have room! yay!
+            self.shares[fileid] = sharesize
+            self.utilization += sharesize
+            return True
+        if self.decide(sharesize):
+            # we don't, but we'll make room
+            self.make_space(sharesize)
+            self.shares[fileid] = sharesize
+            self.utilization += sharesize
+            return True
+        else:
+            # we're full, try elsewhere
+            return False
+
+    def decide(self, sharesize):
+        if sharesize > self.capacity:
+            return False
+        return False
+        return random.random() > 0.5
+
+    def make_space(self, sharesize):
+        assert sharesize <= self.capacity
+        while self.capacity - self.utilization < sharesize:
+            victim = random.choice(self.shares.keys())
+            self.simulator.lost_data(self.shares[victim])
+            self.delete_share(victim)
+
+    def delete_share(self, fileid):
+        if fileid in self.shares:
+            self.utilization -= self.shares[fileid]
+            del self.shares[fileid]
+            return True
+        return False
+
+    def retrieve_file(self):
+        if not self.files:
+            return
+        fileid,numshares = random.choice(self.files)
+        needed = numshares / 4
+        peers = []
+        for pid,node in self.permute_peers(fileid):
+            if random.random() > self.simulator.P_NODEAVAIL:
+                continue # node isn't available right now
+            if node.has_share(fileid):
+                peers.append(node)
+            if len(peers) >= needed:
+                return True
+        return False
+
+    def delete_file(self):
+        if not self.files:
+            return False
+        which = random.choice(self.files)
+        self.files.remove(which)
+        fileid,numshares = which
+        self.queen.delete(fileid)
+        return True
+
+class Queen:
+    def __init__(self, simulator):
+        self.living_files = {}
+        self.utilization = 0 # total size of all active files
+        self.simulator = simulator
+        self.simulator.stamp_utilization(self.utilization)
+
+    def get_all_nodes(self):
+        return self.all_nodes
+
+    def please_preserve(self, fileid, size, tried, last_givento):
+        self.living_files[fileid] = (size, tried, last_givento)
+        self.utilization += size
+        self.simulator.stamp_utilization(self.utilization)
+
+    def please_delete(self, fileid):
+        self.delete(fileid)
+
+    def permute_peers(self, fileid):
+        permuted = [(sha(fileid+n.nid),n)
+                    for n in self.get_all_nodes()]
+        permuted.sort()
+        return permuted
+
+    def delete(self, fileid):
+        permuted = self.permute_peers(fileid)
+        size, tried, last_givento = self.living_files[fileid]
+        pid = ""
+        while tried and pid < last_givento:
+            pid,node = permuted.pop(0)
+            had_it = node.delete_share(fileid)
+            if had_it:
+                tried -= 1
+        self.utilization -= size
+        self.simulator.stamp_utilization(self.utilization)
+        del self.living_files[fileid]
+
+class Simulator:
+    NUM_NODES = 1000
+    EVENTS = ["ADDFILE", "DELFILE", "ADDNODE", "DELNODE"]
+    RATE_ADDFILE = 1.0 / 10
+    RATE_DELFILE = 1.0 / 20
+    RATE_ADDNODE = 1.0 / 3000
+    RATE_DELNODE = 1.0 / 4000
+    P_NODEAVAIL = 1.0
+
+    def __init__(self):
+        self.time = 1164783600 # small numbers of seconds since the epoch confuse rrdtool
+        self.prevstamptime = int(self.time)
+
+        ds = DataSource(ds_name='utilizationds', ds_type='GAUGE', heartbeat=1)
+        rra = RRA(cf='AVERAGE', xff=0.1, steps=1, rows=1200)
+        self.rrd = RRD("/tmp/utilization.rrd", ds=[ds], rra=[rra], start=self.time)
+        self.rrd.create()
+
+        self.queen = q = Queen(self)
+        self.all_nodes = [Node(randomid(), q, self)
+                          for i in range(self.NUM_NODES)]
+        q.all_nodes = self.all_nodes
+        self.next = []
+        self.schedule_events()
+        self.verbose = False
+
+        self.added_files = 0
+        self.added_data = 0
+        self.deleted_files = 0
+        self.published_files = []
+        self.failed_files = 0
+        self.lost_data_bytes = 0 # bytes deleted to make room for new shares
+
+    def stamp_utilization(self, utilization):
+        if int(self.time) > (self.prevstamptime+1):
+            self.rrd.bufferValue(self.time, utilization)
+            self.prevstamptime = int(self.time)
+
+    def write_graph(self):
+        self.rrd.update()
+        self.rrd = None
+        import gc
+        gc.collect()
+
+        def1 = graph.DataDefinition(vname="a", rrdfile='/tmp/utilization.rrd', ds_name='utilizationds')
+        area1 = graph.Area(value="a", color="#990033", legend='utilizationlegend')
+        g = graph.Graph('/tmp/utilization.png', imgformat='PNG', width=540, height=100, vertical_label='utilizationverticallabel', title='utilizationtitle', lower_limit=0)
+        g.data.append(def1)
+        g.data.append(area1)
+        g.write()
+
+    def add_file(self):
+        size = random.randrange(1000)
+        n = random.choice(self.all_nodes)
+        if self.verbose:
+            print "add_file(size=%d, from node %s)" % (size, n)
+        fileid = randomid()
+        able = n.publish_file(fileid, size)
+        if able:
+            able, tried = able
+            self.added_files += 1
+            self.added_data += size
+            self.published_files.append(tried)
+        else:
+            self.failed_files += 1
+
+    def lost_data(self, size):
+        self.lost_data_bytes += size
+
+    def delete_file(self):
+        all_nodes = self.all_nodes[:]
+        random.shuffle(all_nodes)
+        for n in all_nodes:
+            if n.delete_file():
+                self.deleted_files += 1
+                return
+        print "no files to delete"
+
+    def _add_event(self, etype):
+        rate = getattr(self, "RATE_" + etype)
+        next = self.time + random.expovariate(rate)
+        self.next.append((next, etype))
+        self.next.sort()
+
+    def schedule_events(self):
+        types = set([e[1] for e in self.next])
+        for etype in self.EVENTS:
+            if not etype in types:
+                self._add_event(etype)
+
+    def do_event(self):
+        time, etype = self.next.pop(0)
+        assert time > self.time
+        current_time = self.time
+        self.time = time
+        self._add_event(etype)
+        if etype == "ADDFILE":
+            self.add_file()
+        elif etype == "DELFILE":
+            self.delete_file()
+        elif etype == "ADDNODE":
+            pass
+            #self.add_node()
+        elif etype == "DELNODE":
+            #self.del_node()
+            pass
+        # self.print_stats(current_time, etype)
+
+    def print_stats_header(self):
+        print "time:  added   failed   lost  avg_tried"
+
+    def print_stats(self, time, etype):
+        if not self.published_files:
+            avg_tried = "NONE"
+        else:
+            avg_tried = sum(self.published_files) / len(self.published_files)
+        print time, etype, self.added_data, self.failed_files, self.lost_data_bytes, avg_tried, len(self.queen.living_files), self.queen.utilization
+
+global s
+s = None
+
+def main():
+#    rrdtool.create("foo.rrd",
+#                   "--step 10",
+#                   "DS:files-added:DERIVE::0:1000",
+#                   "RRA:AVERAGE:1:1:1200",
+#                   )
+    global s
+    s = Simulator()
+    # s.print_stats_header()
+    for i in range(1000):
+        s.do_event()
+    print "%d files added, %d files deleted" % (s.added_files, s.deleted_files)
+    return s
+
+if __name__ == '__main__':
+    main()
+    
+
diff --git a/misc/sizes.py b/misc/sizes.py
new file mode 100644 (file)
index 0000000..5d7eec0
--- /dev/null
@@ -0,0 +1,213 @@
+#! /usr/bin/env python
+
+import random, math, os, re
+from twisted.python import usage
+
+class Args(usage.Options):
+    optParameters = [
+        ["mode", "m", "alpha", "validation scheme"],
+        ["arity", "k", 2, "k (airty) for hash tree"],
+        ]
+    def opt_arity(self, option):
+        self['arity'] = int(option)
+    def parseArgs(self, *args):
+        if len(args) > 0:
+            self['mode'] = args[0]
+
+
+def charttest():
+    import gdchart
+    sizes = [random.randrange(10, 20) for i in range(10)]
+    x = gdchart.Line()
+    x.width = 250
+    x.height = 250
+    x.xtitle = "sample"
+    x.ytitle = "size"
+    x.title = "Example Graph"
+    #x.ext_color = [ "white", "yellow", "red", "blue", "green"]
+    x.setData(sizes)
+    #x.setLabels(["Mon", "Tue", "Wed", "Thu", "Fri"])
+    x.draw("simple.png")
+
+KiB=1024
+MiB=1024*KiB
+GiB=1024*MiB
+TiB=1024*GiB
+PiB=1024*TiB
+
+class Sizes:
+    def __init__(self, mode, file_size, arity=2):
+        MAX_SEGSIZE = 2*MiB
+        self.mode = mode
+        self.file_size = file_size
+        self.seg_size = seg_size = 1.0 * min(MAX_SEGSIZE, file_size)
+        self.num_segs = num_segs = math.ceil(file_size / seg_size)
+        self.num_subblocks = num_subblocks = num_segs
+
+        self.num_blocks = num_blocks = 100
+        self.blocks_needed = blocks_needed = 25
+
+        self.subblock_size = subblock_size = seg_size / blocks_needed
+        self.block_size = block_size = subblock_size * num_subblocks
+
+        # none of this includes the block-level hash chain yet, since that is
+        # only a function of the number of blocks. All overhead numbers
+        # assume that the block-level hash chain has already been sent,
+        # including the root of the subblock-level hash tree.
+
+        if mode == "alpha":
+            # no hash tree at all
+            self.subblock_arity = 0
+            self.subblock_tree_depth = 0
+            self.subblock_overhead = 0
+            self.bytes_until_some_data = 20 + block_size
+            self.block_storage_overhead = 0
+            self.block_transmission_overhead = 0
+
+        elif mode == "beta":
+            # k=num_subblocks, d=1
+            # each subblock has a 20-byte hash
+            self.subblock_arity = num_subblocks
+            self.subblock_tree_depth = 1
+            self.subblock_overhead = 20
+            # the block has a list of hashes, one for each subblock
+            self.block_storage_overhead = (self.subblock_overhead *
+                                           num_subblocks)
+            # we can get away with not sending the hash of the block that
+            # we're sending in full, once
+            self.block_transmission_overhead = self.block_storage_overhead - 20
+            # we must get the whole list (so it can be validated) before
+            # any data can be validated
+            self.bytes_until_some_data = (self.block_transmission_overhead +
+                                          subblock_size)
+
+        elif mode == "gamma":
+            self.subblock_arity = k = arity
+            d = math.ceil(math.log(num_subblocks, k))
+            self.subblock_tree_depth = d
+            num_leaves = k ** d
+            # to make things easier, we make the pessimistic assumption that
+            # we have to store hashes for all the empty places in the tree
+            # (when the number of blocks is not an exact exponent of k)
+            self.subblock_overhead = 20
+            # the subblock hashes are organized into a k-ary tree, which
+            # means storing (and eventually transmitting) more hashes. This
+            # count includes all the low-level block hashes and the root.
+            hash_nodes = (num_leaves*k - 1) / (k - 1)
+            #print "hash_depth", d
+            #print "num_leaves", num_leaves
+            #print "hash_nodes", hash_nodes
+            # the storage overhead is this
+            self.block_storage_overhead = 20 * (hash_nodes - 1)
+            # the transmission overhead is smaller: if we actually transmit
+            # every subblock, we don't have to transmit 1/k of the
+            # lowest-level subblock hashes, and we don't have to transmit the
+            # root because it was already sent with the block-level hash tree
+            self.block_transmission_overhead = 20 * (hash_nodes
+                                                     - 1 # the root
+                                                     - num_leaves / k)
+            # we must get a full sibling hash chain before we can validate
+            # any data
+            sibling_length = d * (k-1)
+            self.bytes_until_some_data = 20 * sibling_length + subblock_size
+            
+            
+
+        else:
+            raise RuntimeError("unknown mode '%s" % mode)
+
+        self.storage_overhead = self.block_storage_overhead * num_blocks
+        self.storage_overhead_percentage = 100.0 * self.storage_overhead / file_size
+
+    def dump(self):
+        for k in ("mode", "file_size", "seg_size",
+                  "num_segs", "num_subblocks", "num_blocks", "blocks_needed",
+                  "subblock_size", "block_size",
+                  "subblock_arity", "subblock_tree_depth",
+                  "subblock_overhead",
+                  "block_storage_overhead", "block_transmission_overhead",
+                  "storage_overhead", "storage_overhead_percentage",
+                  "bytes_until_some_data"):
+            print k, getattr(self, k)
+
+def fmt(num, trim=False):
+    if num < KiB:
+        #s = str(num) + "#"
+        s = "%.2f#" % num
+    elif num < MiB:
+        s = "%.2fk" % (num / KiB)
+    elif num < GiB:
+        s = "%.2fM" % (num / MiB)
+    elif num < TiB:
+        s = "%.2fG" % (num / GiB)
+    elif num < PiB:
+        s = "%.2fT" % (num / TiB)
+    else:
+        s = "big"
+    if trim:
+        s = re.sub(r'(\.0+)([kMGT#])',
+                   lambda m: m.group(2),
+                   s)
+    else:
+        s = re.sub(r'(\.0+)([kMGT#])',
+                   lambda m: (" "*len(m.group(1))+m.group(2)),
+                   s)
+    if s.endswith("#"):
+        s = s[:-1] + " "
+    return s
+
+def text():
+    opts = Args()
+    opts.parseOptions()
+    mode = opts["mode"]
+    arity = opts["arity"]
+    #      0123456789012345678901234567890123456789012345678901234567890123456
+    print "mode=%s" % mode, " arity=%d" % arity
+    print "                    storage    storage"
+    print "Size     blocksize  overhead   overhead     k  d  alacrity"
+    print "                    (bytes)      (%)"
+    print "-------  -------    --------   --------  ---- --  --------"
+    #sizes = [2 ** i for i in range(7, 41)]
+    radix = math.sqrt(10); expstep = 2
+    radix = 2; expstep = 2
+    #radix = 10; expstep = 1
+    maxexp = int(math.ceil(math.log(1e12, radix)))+2
+    sizes = [radix ** i for i in range(2,maxexp,expstep)]
+    for file_size in sizes:
+        s = Sizes(mode, file_size, arity)
+        out = ""
+        out += "%7s  " % fmt(file_size, trim=True)
+        out += "%7s    " % fmt(s.block_size)
+        out += "%8s" % fmt(s.storage_overhead)
+        out += "%10.2f  " % s.storage_overhead_percentage
+        out += " %4d" % int(s.subblock_arity)
+        out += " %2d" % int(s.subblock_tree_depth)
+        out += " %8s" % fmt(s.bytes_until_some_data)
+        print out
+
+
+def graph():
+    # doesn't work yet
+    import Gnuplot
+    opts = Args()
+    opts.parseOptions()
+    mode = opts["mode"]
+    arity = opts["arity"]
+    g = Gnuplot.Gnuplot(debug=1)
+    g.title("overhead / alacrity tradeoffs")
+    g.xlabel("file size")
+    g.ylabel("stuff")
+    sizes = [2 ** i for i in range(7, 32)]
+    series = {"overhead": {}, "alacrity": {}}
+    for file_size in sizes:
+        s = Sizes(mode, file_size, arity)
+        series["overhead"][file_size] = s.storage_overhead_percentage
+        series["alacrity"][file_size] = s.bytes_until_some_data
+    g.plot([ (fs, series["overhead"][fs])
+             for fs in sizes ])
+    raw_input("press return")
+
+
+if __name__ == '__main__':
+    text()
+    #graph()
diff --git a/queen.tac b/queen.tac
deleted file mode 100644 (file)
index ac9b9b1..0000000
--- a/queen.tac
+++ /dev/null
@@ -1,9 +0,0 @@
-# -*- python -*-
-
-from allmydata import queen
-from twisted.application import service
-
-c = queen.Queen()
-
-application = service.Application("allmydata_queen")
-c.setServiceParent(application)
diff --git a/simulator.py b/simulator.py
deleted file mode 100644 (file)
index 5a53fad..0000000
+++ /dev/null
@@ -1,292 +0,0 @@
-#! /usr/bin/env python
-
-import sha as shamodule
-import os, random
-
-from pkg_resources import require
-require('PyRRD')
-from pyrrd import graph
-from pyrrd.rrd import DataSource, RRD, RRA
-
-
-def sha(s):
-    return shamodule.new(s).digest()
-
-def randomid():
-    return os.urandom(20)
-
-class Node:
-    def __init__(self, nid, queen, simulator):
-        self.nid = nid
-        self.queen = queen
-        self.simulator = simulator
-        self.shares = {}
-        self.capacity = random.randrange(1000)
-        self.utilization = 0
-        self.files = []
-
-    def permute_peers(self, fileid):
-        permuted = [(sha(fileid+n.nid),n)
-                    for n in self.queen.get_all_nodes()]
-        permuted.sort()
-        return permuted
-
-    def publish_file(self, fileid, size, numshares=100):
-        sharesize = 4 * size / numshares
-        permuted = self.permute_peers(fileid)
-        last_givento = None
-        tried = 0
-        givento = []
-        while numshares and permuted:
-            pid,node = permuted.pop(0)
-            tried += 1
-            last_givento = pid
-            if node.accept_share(fileid, sharesize):
-                givento.append((pid,node))
-                numshares -= 1
-        if numshares:
-            # couldn't push, should delete
-            for pid,node in givento:
-                node.delete_share(fileid)
-            return False
-        self.files.append((fileid, numshares))
-        self.queen.please_preserve(fileid, size, tried, last_givento)
-        return (True, tried)
-
-    def accept_share(self, fileid, sharesize):
-        accept = False
-        if self.utilization < self.capacity:
-            # we have room! yay!
-            self.shares[fileid] = sharesize
-            self.utilization += sharesize
-            return True
-        if self.decide(sharesize):
-            # we don't, but we'll make room
-            self.make_space(sharesize)
-            self.shares[fileid] = sharesize
-            self.utilization += sharesize
-            return True
-        else:
-            # we're full, try elsewhere
-            return False
-
-    def decide(self, sharesize):
-        if sharesize > self.capacity:
-            return False
-        return False
-        return random.random() > 0.5
-
-    def make_space(self, sharesize):
-        assert sharesize <= self.capacity
-        while self.capacity - self.utilization < sharesize:
-            victim = random.choice(self.shares.keys())
-            self.simulator.lost_data(self.shares[victim])
-            self.delete_share(victim)
-
-    def delete_share(self, fileid):
-        if fileid in self.shares:
-            self.utilization -= self.shares[fileid]
-            del self.shares[fileid]
-            return True
-        return False
-
-    def retrieve_file(self):
-        if not self.files:
-            return
-        fileid,numshares = random.choice(self.files)
-        needed = numshares / 4
-        peers = []
-        for pid,node in self.permute_peers(fileid):
-            if random.random() > self.simulator.P_NODEAVAIL:
-                continue # node isn't available right now
-            if node.has_share(fileid):
-                peers.append(node)
-            if len(peers) >= needed:
-                return True
-        return False
-
-    def delete_file(self):
-        if not self.files:
-            return False
-        which = random.choice(self.files)
-        self.files.remove(which)
-        fileid,numshares = which
-        self.queen.delete(fileid)
-        return True
-
-class Queen:
-    def __init__(self, simulator):
-        self.living_files = {}
-        self.utilization = 0 # total size of all active files
-        self.simulator = simulator
-        self.simulator.stamp_utilization(self.utilization)
-
-    def get_all_nodes(self):
-        return self.all_nodes
-
-    def please_preserve(self, fileid, size, tried, last_givento):
-        self.living_files[fileid] = (size, tried, last_givento)
-        self.utilization += size
-        self.simulator.stamp_utilization(self.utilization)
-
-    def please_delete(self, fileid):
-        self.delete(fileid)
-
-    def permute_peers(self, fileid):
-        permuted = [(sha(fileid+n.nid),n)
-                    for n in self.get_all_nodes()]
-        permuted.sort()
-        return permuted
-
-    def delete(self, fileid):
-        permuted = self.permute_peers(fileid)
-        size, tried, last_givento = self.living_files[fileid]
-        pid = ""
-        while tried and pid < last_givento:
-            pid,node = permuted.pop(0)
-            had_it = node.delete_share(fileid)
-            if had_it:
-                tried -= 1
-        self.utilization -= size
-        self.simulator.stamp_utilization(self.utilization)
-        del self.living_files[fileid]
-
-class Simulator:
-    NUM_NODES = 1000
-    EVENTS = ["ADDFILE", "DELFILE", "ADDNODE", "DELNODE"]
-    RATE_ADDFILE = 1.0 / 10
-    RATE_DELFILE = 1.0 / 20
-    RATE_ADDNODE = 1.0 / 3000
-    RATE_DELNODE = 1.0 / 4000
-    P_NODEAVAIL = 1.0
-
-    def __init__(self):
-        self.time = 1164783600 # small numbers of seconds since the epoch confuse rrdtool
-        self.prevstamptime = int(self.time)
-
-        ds = DataSource(ds_name='utilizationds', ds_type='GAUGE', heartbeat=1)
-        rra = RRA(cf='AVERAGE', xff=0.1, steps=1, rows=1200)
-        self.rrd = RRD("/tmp/utilization.rrd", ds=[ds], rra=[rra], start=self.time)
-        self.rrd.create()
-
-        self.queen = q = Queen(self)
-        self.all_nodes = [Node(randomid(), q, self)
-                          for i in range(self.NUM_NODES)]
-        q.all_nodes = self.all_nodes
-        self.next = []
-        self.schedule_events()
-        self.verbose = False
-
-        self.added_files = 0
-        self.added_data = 0
-        self.deleted_files = 0
-        self.published_files = []
-        self.failed_files = 0
-        self.lost_data_bytes = 0 # bytes deleted to make room for new shares
-
-    def stamp_utilization(self, utilization):
-        if int(self.time) > (self.prevstamptime+1):
-            self.rrd.bufferValue(self.time, utilization)
-            self.prevstamptime = int(self.time)
-
-    def write_graph(self):
-        self.rrd.update()
-        self.rrd = None
-        import gc
-        gc.collect()
-
-        def1 = graph.DataDefinition(vname="a", rrdfile='/tmp/utilization.rrd', ds_name='utilizationds')
-        area1 = graph.Area(value="a", color="#990033", legend='utilizationlegend')
-        g = graph.Graph('/tmp/utilization.png', imgformat='PNG', width=540, height=100, vertical_label='utilizationverticallabel', title='utilizationtitle', lower_limit=0)
-        g.data.append(def1)
-        g.data.append(area1)
-        g.write()
-
-    def add_file(self):
-        size = random.randrange(1000)
-        n = random.choice(self.all_nodes)
-        if self.verbose:
-            print "add_file(size=%d, from node %s)" % (size, n)
-        fileid = randomid()
-        able = n.publish_file(fileid, size)
-        if able:
-            able, tried = able
-            self.added_files += 1
-            self.added_data += size
-            self.published_files.append(tried)
-        else:
-            self.failed_files += 1
-
-    def lost_data(self, size):
-        self.lost_data_bytes += size
-
-    def delete_file(self):
-        all_nodes = self.all_nodes[:]
-        random.shuffle(all_nodes)
-        for n in all_nodes:
-            if n.delete_file():
-                self.deleted_files += 1
-                return
-        print "no files to delete"
-
-    def _add_event(self, etype):
-        rate = getattr(self, "RATE_" + etype)
-        next = self.time + random.expovariate(rate)
-        self.next.append((next, etype))
-        self.next.sort()
-
-    def schedule_events(self):
-        types = set([e[1] for e in self.next])
-        for etype in self.EVENTS:
-            if not etype in types:
-                self._add_event(etype)
-
-    def do_event(self):
-        time, etype = self.next.pop(0)
-        assert time > self.time
-        current_time = self.time
-        self.time = time
-        self._add_event(etype)
-        if etype == "ADDFILE":
-            self.add_file()
-        elif etype == "DELFILE":
-            self.delete_file()
-        elif etype == "ADDNODE":
-            pass
-            #self.add_node()
-        elif etype == "DELNODE":
-            #self.del_node()
-            pass
-        # self.print_stats(current_time, etype)
-
-    def print_stats_header(self):
-        print "time:  added   failed   lost  avg_tried"
-
-    def print_stats(self, time, etype):
-        if not self.published_files:
-            avg_tried = "NONE"
-        else:
-            avg_tried = sum(self.published_files) / len(self.published_files)
-        print time, etype, self.added_data, self.failed_files, self.lost_data_bytes, avg_tried, len(self.queen.living_files), self.queen.utilization
-
-global s
-s = None
-
-def main():
-#    rrdtool.create("foo.rrd",
-#                   "--step 10",
-#                   "DS:files-added:DERIVE::0:1000",
-#                   "RRA:AVERAGE:1:1:1200",
-#                   )
-    global s
-    s = Simulator()
-    # s.print_stats_header()
-    for i in range(1000):
-        s.do_event()
-    print "%d files added, %d files deleted" % (s.added_files, s.deleted_files)
-    return s
-
-if __name__ == '__main__':
-    main()
-    
-
diff --git a/sizes.py b/sizes.py
deleted file mode 100644 (file)
index 5d7eec0..0000000
--- a/sizes.py
+++ /dev/null
@@ -1,213 +0,0 @@
-#! /usr/bin/env python
-
-import random, math, os, re
-from twisted.python import usage
-
-class Args(usage.Options):
-    optParameters = [
-        ["mode", "m", "alpha", "validation scheme"],
-        ["arity", "k", 2, "k (airty) for hash tree"],
-        ]
-    def opt_arity(self, option):
-        self['arity'] = int(option)
-    def parseArgs(self, *args):
-        if len(args) > 0:
-            self['mode'] = args[0]
-
-
-def charttest():
-    import gdchart
-    sizes = [random.randrange(10, 20) for i in range(10)]
-    x = gdchart.Line()
-    x.width = 250
-    x.height = 250
-    x.xtitle = "sample"
-    x.ytitle = "size"
-    x.title = "Example Graph"
-    #x.ext_color = [ "white", "yellow", "red", "blue", "green"]
-    x.setData(sizes)
-    #x.setLabels(["Mon", "Tue", "Wed", "Thu", "Fri"])
-    x.draw("simple.png")
-
-KiB=1024
-MiB=1024*KiB
-GiB=1024*MiB
-TiB=1024*GiB
-PiB=1024*TiB
-
-class Sizes:
-    def __init__(self, mode, file_size, arity=2):
-        MAX_SEGSIZE = 2*MiB
-        self.mode = mode
-        self.file_size = file_size
-        self.seg_size = seg_size = 1.0 * min(MAX_SEGSIZE, file_size)
-        self.num_segs = num_segs = math.ceil(file_size / seg_size)
-        self.num_subblocks = num_subblocks = num_segs
-
-        self.num_blocks = num_blocks = 100
-        self.blocks_needed = blocks_needed = 25
-
-        self.subblock_size = subblock_size = seg_size / blocks_needed
-        self.block_size = block_size = subblock_size * num_subblocks
-
-        # none of this includes the block-level hash chain yet, since that is
-        # only a function of the number of blocks. All overhead numbers
-        # assume that the block-level hash chain has already been sent,
-        # including the root of the subblock-level hash tree.
-
-        if mode == "alpha":
-            # no hash tree at all
-            self.subblock_arity = 0
-            self.subblock_tree_depth = 0
-            self.subblock_overhead = 0
-            self.bytes_until_some_data = 20 + block_size
-            self.block_storage_overhead = 0
-            self.block_transmission_overhead = 0
-
-        elif mode == "beta":
-            # k=num_subblocks, d=1
-            # each subblock has a 20-byte hash
-            self.subblock_arity = num_subblocks
-            self.subblock_tree_depth = 1
-            self.subblock_overhead = 20
-            # the block has a list of hashes, one for each subblock
-            self.block_storage_overhead = (self.subblock_overhead *
-                                           num_subblocks)
-            # we can get away with not sending the hash of the block that
-            # we're sending in full, once
-            self.block_transmission_overhead = self.block_storage_overhead - 20
-            # we must get the whole list (so it can be validated) before
-            # any data can be validated
-            self.bytes_until_some_data = (self.block_transmission_overhead +
-                                          subblock_size)
-
-        elif mode == "gamma":
-            self.subblock_arity = k = arity
-            d = math.ceil(math.log(num_subblocks, k))
-            self.subblock_tree_depth = d
-            num_leaves = k ** d
-            # to make things easier, we make the pessimistic assumption that
-            # we have to store hashes for all the empty places in the tree
-            # (when the number of blocks is not an exact exponent of k)
-            self.subblock_overhead = 20
-            # the subblock hashes are organized into a k-ary tree, which
-            # means storing (and eventually transmitting) more hashes. This
-            # count includes all the low-level block hashes and the root.
-            hash_nodes = (num_leaves*k - 1) / (k - 1)
-            #print "hash_depth", d
-            #print "num_leaves", num_leaves
-            #print "hash_nodes", hash_nodes
-            # the storage overhead is this
-            self.block_storage_overhead = 20 * (hash_nodes - 1)
-            # the transmission overhead is smaller: if we actually transmit
-            # every subblock, we don't have to transmit 1/k of the
-            # lowest-level subblock hashes, and we don't have to transmit the
-            # root because it was already sent with the block-level hash tree
-            self.block_transmission_overhead = 20 * (hash_nodes
-                                                     - 1 # the root
-                                                     - num_leaves / k)
-            # we must get a full sibling hash chain before we can validate
-            # any data
-            sibling_length = d * (k-1)
-            self.bytes_until_some_data = 20 * sibling_length + subblock_size
-            
-            
-
-        else:
-            raise RuntimeError("unknown mode '%s" % mode)
-
-        self.storage_overhead = self.block_storage_overhead * num_blocks
-        self.storage_overhead_percentage = 100.0 * self.storage_overhead / file_size
-
-    def dump(self):
-        for k in ("mode", "file_size", "seg_size",
-                  "num_segs", "num_subblocks", "num_blocks", "blocks_needed",
-                  "subblock_size", "block_size",
-                  "subblock_arity", "subblock_tree_depth",
-                  "subblock_overhead",
-                  "block_storage_overhead", "block_transmission_overhead",
-                  "storage_overhead", "storage_overhead_percentage",
-                  "bytes_until_some_data"):
-            print k, getattr(self, k)
-
-def fmt(num, trim=False):
-    if num < KiB:
-        #s = str(num) + "#"
-        s = "%.2f#" % num
-    elif num < MiB:
-        s = "%.2fk" % (num / KiB)
-    elif num < GiB:
-        s = "%.2fM" % (num / MiB)
-    elif num < TiB:
-        s = "%.2fG" % (num / GiB)
-    elif num < PiB:
-        s = "%.2fT" % (num / TiB)
-    else:
-        s = "big"
-    if trim:
-        s = re.sub(r'(\.0+)([kMGT#])',
-                   lambda m: m.group(2),
-                   s)
-    else:
-        s = re.sub(r'(\.0+)([kMGT#])',
-                   lambda m: (" "*len(m.group(1))+m.group(2)),
-                   s)
-    if s.endswith("#"):
-        s = s[:-1] + " "
-    return s
-
-def text():
-    opts = Args()
-    opts.parseOptions()
-    mode = opts["mode"]
-    arity = opts["arity"]
-    #      0123456789012345678901234567890123456789012345678901234567890123456
-    print "mode=%s" % mode, " arity=%d" % arity
-    print "                    storage    storage"
-    print "Size     blocksize  overhead   overhead     k  d  alacrity"
-    print "                    (bytes)      (%)"
-    print "-------  -------    --------   --------  ---- --  --------"
-    #sizes = [2 ** i for i in range(7, 41)]
-    radix = math.sqrt(10); expstep = 2
-    radix = 2; expstep = 2
-    #radix = 10; expstep = 1
-    maxexp = int(math.ceil(math.log(1e12, radix)))+2
-    sizes = [radix ** i for i in range(2,maxexp,expstep)]
-    for file_size in sizes:
-        s = Sizes(mode, file_size, arity)
-        out = ""
-        out += "%7s  " % fmt(file_size, trim=True)
-        out += "%7s    " % fmt(s.block_size)
-        out += "%8s" % fmt(s.storage_overhead)
-        out += "%10.2f  " % s.storage_overhead_percentage
-        out += " %4d" % int(s.subblock_arity)
-        out += " %2d" % int(s.subblock_tree_depth)
-        out += " %8s" % fmt(s.bytes_until_some_data)
-        print out
-
-
-def graph():
-    # doesn't work yet
-    import Gnuplot
-    opts = Args()
-    opts.parseOptions()
-    mode = opts["mode"]
-    arity = opts["arity"]
-    g = Gnuplot.Gnuplot(debug=1)
-    g.title("overhead / alacrity tradeoffs")
-    g.xlabel("file size")
-    g.ylabel("stuff")
-    sizes = [2 ** i for i in range(7, 32)]
-    series = {"overhead": {}, "alacrity": {}}
-    for file_size in sizes:
-        s = Sizes(mode, file_size, arity)
-        series["overhead"][file_size] = s.storage_overhead_percentage
-        series["alacrity"][file_size] = s.bytes_until_some_data
-    g.plot([ (fs, series["overhead"][fs])
-             for fs in sizes ])
-    raw_input("press return")
-
-
-if __name__ == '__main__':
-    text()
-    #graph()