+++ /dev/null
-# -*- python -*-
-
-from allmydata import client
-from twisted.application import service
-
-c = client.Client()
-
-application = service.Application("allmydata_client")
-c.setServiceParent(application)
--- /dev/null
+#! /usr/bin/env python
+
+import sha as shamodule
+import os, random
+
+from pkg_resources import require
+require('PyRRD')
+from pyrrd import graph
+from pyrrd.rrd import DataSource, RRD, RRA
+
+
+def sha(s):
+ return shamodule.new(s).digest()
+
+def randomid():
+ return os.urandom(20)
+
+class Node:
+ def __init__(self, nid, queen, simulator):
+ self.nid = nid
+ self.queen = queen
+ self.simulator = simulator
+ self.shares = {}
+ self.capacity = random.randrange(1000)
+ self.utilization = 0
+ self.files = []
+
+ def permute_peers(self, fileid):
+ permuted = [(sha(fileid+n.nid),n)
+ for n in self.queen.get_all_nodes()]
+ permuted.sort()
+ return permuted
+
+ def publish_file(self, fileid, size, numshares=100):
+ sharesize = 4 * size / numshares
+ permuted = self.permute_peers(fileid)
+ last_givento = None
+ tried = 0
+ givento = []
+ while numshares and permuted:
+ pid,node = permuted.pop(0)
+ tried += 1
+ last_givento = pid
+ if node.accept_share(fileid, sharesize):
+ givento.append((pid,node))
+ numshares -= 1
+ if numshares:
+ # couldn't push, should delete
+ for pid,node in givento:
+ node.delete_share(fileid)
+ return False
+ self.files.append((fileid, numshares))
+ self.queen.please_preserve(fileid, size, tried, last_givento)
+ return (True, tried)
+
+ def accept_share(self, fileid, sharesize):
+ accept = False
+ if self.utilization < self.capacity:
+ # we have room! yay!
+ self.shares[fileid] = sharesize
+ self.utilization += sharesize
+ return True
+ if self.decide(sharesize):
+ # we don't, but we'll make room
+ self.make_space(sharesize)
+ self.shares[fileid] = sharesize
+ self.utilization += sharesize
+ return True
+ else:
+ # we're full, try elsewhere
+ return False
+
+ def decide(self, sharesize):
+ if sharesize > self.capacity:
+ return False
+ return False
+ return random.random() > 0.5
+
+ def make_space(self, sharesize):
+ assert sharesize <= self.capacity
+ while self.capacity - self.utilization < sharesize:
+ victim = random.choice(self.shares.keys())
+ self.simulator.lost_data(self.shares[victim])
+ self.delete_share(victim)
+
+ def delete_share(self, fileid):
+ if fileid in self.shares:
+ self.utilization -= self.shares[fileid]
+ del self.shares[fileid]
+ return True
+ return False
+
+ def retrieve_file(self):
+ if not self.files:
+ return
+ fileid,numshares = random.choice(self.files)
+ needed = numshares / 4
+ peers = []
+ for pid,node in self.permute_peers(fileid):
+ if random.random() > self.simulator.P_NODEAVAIL:
+ continue # node isn't available right now
+ if node.has_share(fileid):
+ peers.append(node)
+ if len(peers) >= needed:
+ return True
+ return False
+
+ def delete_file(self):
+ if not self.files:
+ return False
+ which = random.choice(self.files)
+ self.files.remove(which)
+ fileid,numshares = which
+ self.queen.delete(fileid)
+ return True
+
+class Queen:
+ def __init__(self, simulator):
+ self.living_files = {}
+ self.utilization = 0 # total size of all active files
+ self.simulator = simulator
+ self.simulator.stamp_utilization(self.utilization)
+
+ def get_all_nodes(self):
+ return self.all_nodes
+
+ def please_preserve(self, fileid, size, tried, last_givento):
+ self.living_files[fileid] = (size, tried, last_givento)
+ self.utilization += size
+ self.simulator.stamp_utilization(self.utilization)
+
+ def please_delete(self, fileid):
+ self.delete(fileid)
+
+ def permute_peers(self, fileid):
+ permuted = [(sha(fileid+n.nid),n)
+ for n in self.get_all_nodes()]
+ permuted.sort()
+ return permuted
+
+ def delete(self, fileid):
+ permuted = self.permute_peers(fileid)
+ size, tried, last_givento = self.living_files[fileid]
+ pid = ""
+ while tried and pid < last_givento:
+ pid,node = permuted.pop(0)
+ had_it = node.delete_share(fileid)
+ if had_it:
+ tried -= 1
+ self.utilization -= size
+ self.simulator.stamp_utilization(self.utilization)
+ del self.living_files[fileid]
+
+class Simulator:
+ NUM_NODES = 1000
+ EVENTS = ["ADDFILE", "DELFILE", "ADDNODE", "DELNODE"]
+ RATE_ADDFILE = 1.0 / 10
+ RATE_DELFILE = 1.0 / 20
+ RATE_ADDNODE = 1.0 / 3000
+ RATE_DELNODE = 1.0 / 4000
+ P_NODEAVAIL = 1.0
+
+ def __init__(self):
+ self.time = 1164783600 # small numbers of seconds since the epoch confuse rrdtool
+ self.prevstamptime = int(self.time)
+
+ ds = DataSource(ds_name='utilizationds', ds_type='GAUGE', heartbeat=1)
+ rra = RRA(cf='AVERAGE', xff=0.1, steps=1, rows=1200)
+ self.rrd = RRD("/tmp/utilization.rrd", ds=[ds], rra=[rra], start=self.time)
+ self.rrd.create()
+
+ self.queen = q = Queen(self)
+ self.all_nodes = [Node(randomid(), q, self)
+ for i in range(self.NUM_NODES)]
+ q.all_nodes = self.all_nodes
+ self.next = []
+ self.schedule_events()
+ self.verbose = False
+
+ self.added_files = 0
+ self.added_data = 0
+ self.deleted_files = 0
+ self.published_files = []
+ self.failed_files = 0
+ self.lost_data_bytes = 0 # bytes deleted to make room for new shares
+
+ def stamp_utilization(self, utilization):
+ if int(self.time) > (self.prevstamptime+1):
+ self.rrd.bufferValue(self.time, utilization)
+ self.prevstamptime = int(self.time)
+
+ def write_graph(self):
+ self.rrd.update()
+ self.rrd = None
+ import gc
+ gc.collect()
+
+ def1 = graph.DataDefinition(vname="a", rrdfile='/tmp/utilization.rrd', ds_name='utilizationds')
+ area1 = graph.Area(value="a", color="#990033", legend='utilizationlegend')
+ g = graph.Graph('/tmp/utilization.png', imgformat='PNG', width=540, height=100, vertical_label='utilizationverticallabel', title='utilizationtitle', lower_limit=0)
+ g.data.append(def1)
+ g.data.append(area1)
+ g.write()
+
+ def add_file(self):
+ size = random.randrange(1000)
+ n = random.choice(self.all_nodes)
+ if self.verbose:
+ print "add_file(size=%d, from node %s)" % (size, n)
+ fileid = randomid()
+ able = n.publish_file(fileid, size)
+ if able:
+ able, tried = able
+ self.added_files += 1
+ self.added_data += size
+ self.published_files.append(tried)
+ else:
+ self.failed_files += 1
+
+ def lost_data(self, size):
+ self.lost_data_bytes += size
+
+ def delete_file(self):
+ all_nodes = self.all_nodes[:]
+ random.shuffle(all_nodes)
+ for n in all_nodes:
+ if n.delete_file():
+ self.deleted_files += 1
+ return
+ print "no files to delete"
+
+ def _add_event(self, etype):
+ rate = getattr(self, "RATE_" + etype)
+ next = self.time + random.expovariate(rate)
+ self.next.append((next, etype))
+ self.next.sort()
+
+ def schedule_events(self):
+ types = set([e[1] for e in self.next])
+ for etype in self.EVENTS:
+ if not etype in types:
+ self._add_event(etype)
+
+ def do_event(self):
+ time, etype = self.next.pop(0)
+ assert time > self.time
+ current_time = self.time
+ self.time = time
+ self._add_event(etype)
+ if etype == "ADDFILE":
+ self.add_file()
+ elif etype == "DELFILE":
+ self.delete_file()
+ elif etype == "ADDNODE":
+ pass
+ #self.add_node()
+ elif etype == "DELNODE":
+ #self.del_node()
+ pass
+ # self.print_stats(current_time, etype)
+
+ def print_stats_header(self):
+ print "time: added failed lost avg_tried"
+
+ def print_stats(self, time, etype):
+ if not self.published_files:
+ avg_tried = "NONE"
+ else:
+ avg_tried = sum(self.published_files) / len(self.published_files)
+ print time, etype, self.added_data, self.failed_files, self.lost_data_bytes, avg_tried, len(self.queen.living_files), self.queen.utilization
+
+global s
+s = None
+
+def main():
+# rrdtool.create("foo.rrd",
+# "--step 10",
+# "DS:files-added:DERIVE::0:1000",
+# "RRA:AVERAGE:1:1:1200",
+# )
+ global s
+ s = Simulator()
+ # s.print_stats_header()
+ for i in range(1000):
+ s.do_event()
+ print "%d files added, %d files deleted" % (s.added_files, s.deleted_files)
+ return s
+
+if __name__ == '__main__':
+ main()
+
+
--- /dev/null
+#! /usr/bin/env python
+
+import random, math, os, re
+from twisted.python import usage
+
+class Args(usage.Options):
+ optParameters = [
+ ["mode", "m", "alpha", "validation scheme"],
+ ["arity", "k", 2, "k (airty) for hash tree"],
+ ]
+ def opt_arity(self, option):
+ self['arity'] = int(option)
+ def parseArgs(self, *args):
+ if len(args) > 0:
+ self['mode'] = args[0]
+
+
+def charttest():
+ import gdchart
+ sizes = [random.randrange(10, 20) for i in range(10)]
+ x = gdchart.Line()
+ x.width = 250
+ x.height = 250
+ x.xtitle = "sample"
+ x.ytitle = "size"
+ x.title = "Example Graph"
+ #x.ext_color = [ "white", "yellow", "red", "blue", "green"]
+ x.setData(sizes)
+ #x.setLabels(["Mon", "Tue", "Wed", "Thu", "Fri"])
+ x.draw("simple.png")
+
+KiB=1024
+MiB=1024*KiB
+GiB=1024*MiB
+TiB=1024*GiB
+PiB=1024*TiB
+
+class Sizes:
+ def __init__(self, mode, file_size, arity=2):
+ MAX_SEGSIZE = 2*MiB
+ self.mode = mode
+ self.file_size = file_size
+ self.seg_size = seg_size = 1.0 * min(MAX_SEGSIZE, file_size)
+ self.num_segs = num_segs = math.ceil(file_size / seg_size)
+ self.num_subblocks = num_subblocks = num_segs
+
+ self.num_blocks = num_blocks = 100
+ self.blocks_needed = blocks_needed = 25
+
+ self.subblock_size = subblock_size = seg_size / blocks_needed
+ self.block_size = block_size = subblock_size * num_subblocks
+
+ # none of this includes the block-level hash chain yet, since that is
+ # only a function of the number of blocks. All overhead numbers
+ # assume that the block-level hash chain has already been sent,
+ # including the root of the subblock-level hash tree.
+
+ if mode == "alpha":
+ # no hash tree at all
+ self.subblock_arity = 0
+ self.subblock_tree_depth = 0
+ self.subblock_overhead = 0
+ self.bytes_until_some_data = 20 + block_size
+ self.block_storage_overhead = 0
+ self.block_transmission_overhead = 0
+
+ elif mode == "beta":
+ # k=num_subblocks, d=1
+ # each subblock has a 20-byte hash
+ self.subblock_arity = num_subblocks
+ self.subblock_tree_depth = 1
+ self.subblock_overhead = 20
+ # the block has a list of hashes, one for each subblock
+ self.block_storage_overhead = (self.subblock_overhead *
+ num_subblocks)
+ # we can get away with not sending the hash of the block that
+ # we're sending in full, once
+ self.block_transmission_overhead = self.block_storage_overhead - 20
+ # we must get the whole list (so it can be validated) before
+ # any data can be validated
+ self.bytes_until_some_data = (self.block_transmission_overhead +
+ subblock_size)
+
+ elif mode == "gamma":
+ self.subblock_arity = k = arity
+ d = math.ceil(math.log(num_subblocks, k))
+ self.subblock_tree_depth = d
+ num_leaves = k ** d
+ # to make things easier, we make the pessimistic assumption that
+ # we have to store hashes for all the empty places in the tree
+ # (when the number of blocks is not an exact exponent of k)
+ self.subblock_overhead = 20
+ # the subblock hashes are organized into a k-ary tree, which
+ # means storing (and eventually transmitting) more hashes. This
+ # count includes all the low-level block hashes and the root.
+ hash_nodes = (num_leaves*k - 1) / (k - 1)
+ #print "hash_depth", d
+ #print "num_leaves", num_leaves
+ #print "hash_nodes", hash_nodes
+ # the storage overhead is this
+ self.block_storage_overhead = 20 * (hash_nodes - 1)
+ # the transmission overhead is smaller: if we actually transmit
+ # every subblock, we don't have to transmit 1/k of the
+ # lowest-level subblock hashes, and we don't have to transmit the
+ # root because it was already sent with the block-level hash tree
+ self.block_transmission_overhead = 20 * (hash_nodes
+ - 1 # the root
+ - num_leaves / k)
+ # we must get a full sibling hash chain before we can validate
+ # any data
+ sibling_length = d * (k-1)
+ self.bytes_until_some_data = 20 * sibling_length + subblock_size
+
+
+
+ else:
+ raise RuntimeError("unknown mode '%s" % mode)
+
+ self.storage_overhead = self.block_storage_overhead * num_blocks
+ self.storage_overhead_percentage = 100.0 * self.storage_overhead / file_size
+
+ def dump(self):
+ for k in ("mode", "file_size", "seg_size",
+ "num_segs", "num_subblocks", "num_blocks", "blocks_needed",
+ "subblock_size", "block_size",
+ "subblock_arity", "subblock_tree_depth",
+ "subblock_overhead",
+ "block_storage_overhead", "block_transmission_overhead",
+ "storage_overhead", "storage_overhead_percentage",
+ "bytes_until_some_data"):
+ print k, getattr(self, k)
+
+def fmt(num, trim=False):
+ if num < KiB:
+ #s = str(num) + "#"
+ s = "%.2f#" % num
+ elif num < MiB:
+ s = "%.2fk" % (num / KiB)
+ elif num < GiB:
+ s = "%.2fM" % (num / MiB)
+ elif num < TiB:
+ s = "%.2fG" % (num / GiB)
+ elif num < PiB:
+ s = "%.2fT" % (num / TiB)
+ else:
+ s = "big"
+ if trim:
+ s = re.sub(r'(\.0+)([kMGT#])',
+ lambda m: m.group(2),
+ s)
+ else:
+ s = re.sub(r'(\.0+)([kMGT#])',
+ lambda m: (" "*len(m.group(1))+m.group(2)),
+ s)
+ if s.endswith("#"):
+ s = s[:-1] + " "
+ return s
+
+def text():
+ opts = Args()
+ opts.parseOptions()
+ mode = opts["mode"]
+ arity = opts["arity"]
+ # 0123456789012345678901234567890123456789012345678901234567890123456
+ print "mode=%s" % mode, " arity=%d" % arity
+ print " storage storage"
+ print "Size blocksize overhead overhead k d alacrity"
+ print " (bytes) (%)"
+ print "------- ------- -------- -------- ---- -- --------"
+ #sizes = [2 ** i for i in range(7, 41)]
+ radix = math.sqrt(10); expstep = 2
+ radix = 2; expstep = 2
+ #radix = 10; expstep = 1
+ maxexp = int(math.ceil(math.log(1e12, radix)))+2
+ sizes = [radix ** i for i in range(2,maxexp,expstep)]
+ for file_size in sizes:
+ s = Sizes(mode, file_size, arity)
+ out = ""
+ out += "%7s " % fmt(file_size, trim=True)
+ out += "%7s " % fmt(s.block_size)
+ out += "%8s" % fmt(s.storage_overhead)
+ out += "%10.2f " % s.storage_overhead_percentage
+ out += " %4d" % int(s.subblock_arity)
+ out += " %2d" % int(s.subblock_tree_depth)
+ out += " %8s" % fmt(s.bytes_until_some_data)
+ print out
+
+
+def graph():
+ # doesn't work yet
+ import Gnuplot
+ opts = Args()
+ opts.parseOptions()
+ mode = opts["mode"]
+ arity = opts["arity"]
+ g = Gnuplot.Gnuplot(debug=1)
+ g.title("overhead / alacrity tradeoffs")
+ g.xlabel("file size")
+ g.ylabel("stuff")
+ sizes = [2 ** i for i in range(7, 32)]
+ series = {"overhead": {}, "alacrity": {}}
+ for file_size in sizes:
+ s = Sizes(mode, file_size, arity)
+ series["overhead"][file_size] = s.storage_overhead_percentage
+ series["alacrity"][file_size] = s.bytes_until_some_data
+ g.plot([ (fs, series["overhead"][fs])
+ for fs in sizes ])
+ raw_input("press return")
+
+
+if __name__ == '__main__':
+ text()
+ #graph()
+++ /dev/null
-# -*- python -*-
-
-from allmydata import queen
-from twisted.application import service
-
-c = queen.Queen()
-
-application = service.Application("allmydata_queen")
-c.setServiceParent(application)
+++ /dev/null
-#! /usr/bin/env python
-
-import sha as shamodule
-import os, random
-
-from pkg_resources import require
-require('PyRRD')
-from pyrrd import graph
-from pyrrd.rrd import DataSource, RRD, RRA
-
-
-def sha(s):
- return shamodule.new(s).digest()
-
-def randomid():
- return os.urandom(20)
-
-class Node:
- def __init__(self, nid, queen, simulator):
- self.nid = nid
- self.queen = queen
- self.simulator = simulator
- self.shares = {}
- self.capacity = random.randrange(1000)
- self.utilization = 0
- self.files = []
-
- def permute_peers(self, fileid):
- permuted = [(sha(fileid+n.nid),n)
- for n in self.queen.get_all_nodes()]
- permuted.sort()
- return permuted
-
- def publish_file(self, fileid, size, numshares=100):
- sharesize = 4 * size / numshares
- permuted = self.permute_peers(fileid)
- last_givento = None
- tried = 0
- givento = []
- while numshares and permuted:
- pid,node = permuted.pop(0)
- tried += 1
- last_givento = pid
- if node.accept_share(fileid, sharesize):
- givento.append((pid,node))
- numshares -= 1
- if numshares:
- # couldn't push, should delete
- for pid,node in givento:
- node.delete_share(fileid)
- return False
- self.files.append((fileid, numshares))
- self.queen.please_preserve(fileid, size, tried, last_givento)
- return (True, tried)
-
- def accept_share(self, fileid, sharesize):
- accept = False
- if self.utilization < self.capacity:
- # we have room! yay!
- self.shares[fileid] = sharesize
- self.utilization += sharesize
- return True
- if self.decide(sharesize):
- # we don't, but we'll make room
- self.make_space(sharesize)
- self.shares[fileid] = sharesize
- self.utilization += sharesize
- return True
- else:
- # we're full, try elsewhere
- return False
-
- def decide(self, sharesize):
- if sharesize > self.capacity:
- return False
- return False
- return random.random() > 0.5
-
- def make_space(self, sharesize):
- assert sharesize <= self.capacity
- while self.capacity - self.utilization < sharesize:
- victim = random.choice(self.shares.keys())
- self.simulator.lost_data(self.shares[victim])
- self.delete_share(victim)
-
- def delete_share(self, fileid):
- if fileid in self.shares:
- self.utilization -= self.shares[fileid]
- del self.shares[fileid]
- return True
- return False
-
- def retrieve_file(self):
- if not self.files:
- return
- fileid,numshares = random.choice(self.files)
- needed = numshares / 4
- peers = []
- for pid,node in self.permute_peers(fileid):
- if random.random() > self.simulator.P_NODEAVAIL:
- continue # node isn't available right now
- if node.has_share(fileid):
- peers.append(node)
- if len(peers) >= needed:
- return True
- return False
-
- def delete_file(self):
- if not self.files:
- return False
- which = random.choice(self.files)
- self.files.remove(which)
- fileid,numshares = which
- self.queen.delete(fileid)
- return True
-
-class Queen:
- def __init__(self, simulator):
- self.living_files = {}
- self.utilization = 0 # total size of all active files
- self.simulator = simulator
- self.simulator.stamp_utilization(self.utilization)
-
- def get_all_nodes(self):
- return self.all_nodes
-
- def please_preserve(self, fileid, size, tried, last_givento):
- self.living_files[fileid] = (size, tried, last_givento)
- self.utilization += size
- self.simulator.stamp_utilization(self.utilization)
-
- def please_delete(self, fileid):
- self.delete(fileid)
-
- def permute_peers(self, fileid):
- permuted = [(sha(fileid+n.nid),n)
- for n in self.get_all_nodes()]
- permuted.sort()
- return permuted
-
- def delete(self, fileid):
- permuted = self.permute_peers(fileid)
- size, tried, last_givento = self.living_files[fileid]
- pid = ""
- while tried and pid < last_givento:
- pid,node = permuted.pop(0)
- had_it = node.delete_share(fileid)
- if had_it:
- tried -= 1
- self.utilization -= size
- self.simulator.stamp_utilization(self.utilization)
- del self.living_files[fileid]
-
-class Simulator:
- NUM_NODES = 1000
- EVENTS = ["ADDFILE", "DELFILE", "ADDNODE", "DELNODE"]
- RATE_ADDFILE = 1.0 / 10
- RATE_DELFILE = 1.0 / 20
- RATE_ADDNODE = 1.0 / 3000
- RATE_DELNODE = 1.0 / 4000
- P_NODEAVAIL = 1.0
-
- def __init__(self):
- self.time = 1164783600 # small numbers of seconds since the epoch confuse rrdtool
- self.prevstamptime = int(self.time)
-
- ds = DataSource(ds_name='utilizationds', ds_type='GAUGE', heartbeat=1)
- rra = RRA(cf='AVERAGE', xff=0.1, steps=1, rows=1200)
- self.rrd = RRD("/tmp/utilization.rrd", ds=[ds], rra=[rra], start=self.time)
- self.rrd.create()
-
- self.queen = q = Queen(self)
- self.all_nodes = [Node(randomid(), q, self)
- for i in range(self.NUM_NODES)]
- q.all_nodes = self.all_nodes
- self.next = []
- self.schedule_events()
- self.verbose = False
-
- self.added_files = 0
- self.added_data = 0
- self.deleted_files = 0
- self.published_files = []
- self.failed_files = 0
- self.lost_data_bytes = 0 # bytes deleted to make room for new shares
-
- def stamp_utilization(self, utilization):
- if int(self.time) > (self.prevstamptime+1):
- self.rrd.bufferValue(self.time, utilization)
- self.prevstamptime = int(self.time)
-
- def write_graph(self):
- self.rrd.update()
- self.rrd = None
- import gc
- gc.collect()
-
- def1 = graph.DataDefinition(vname="a", rrdfile='/tmp/utilization.rrd', ds_name='utilizationds')
- area1 = graph.Area(value="a", color="#990033", legend='utilizationlegend')
- g = graph.Graph('/tmp/utilization.png', imgformat='PNG', width=540, height=100, vertical_label='utilizationverticallabel', title='utilizationtitle', lower_limit=0)
- g.data.append(def1)
- g.data.append(area1)
- g.write()
-
- def add_file(self):
- size = random.randrange(1000)
- n = random.choice(self.all_nodes)
- if self.verbose:
- print "add_file(size=%d, from node %s)" % (size, n)
- fileid = randomid()
- able = n.publish_file(fileid, size)
- if able:
- able, tried = able
- self.added_files += 1
- self.added_data += size
- self.published_files.append(tried)
- else:
- self.failed_files += 1
-
- def lost_data(self, size):
- self.lost_data_bytes += size
-
- def delete_file(self):
- all_nodes = self.all_nodes[:]
- random.shuffle(all_nodes)
- for n in all_nodes:
- if n.delete_file():
- self.deleted_files += 1
- return
- print "no files to delete"
-
- def _add_event(self, etype):
- rate = getattr(self, "RATE_" + etype)
- next = self.time + random.expovariate(rate)
- self.next.append((next, etype))
- self.next.sort()
-
- def schedule_events(self):
- types = set([e[1] for e in self.next])
- for etype in self.EVENTS:
- if not etype in types:
- self._add_event(etype)
-
- def do_event(self):
- time, etype = self.next.pop(0)
- assert time > self.time
- current_time = self.time
- self.time = time
- self._add_event(etype)
- if etype == "ADDFILE":
- self.add_file()
- elif etype == "DELFILE":
- self.delete_file()
- elif etype == "ADDNODE":
- pass
- #self.add_node()
- elif etype == "DELNODE":
- #self.del_node()
- pass
- # self.print_stats(current_time, etype)
-
- def print_stats_header(self):
- print "time: added failed lost avg_tried"
-
- def print_stats(self, time, etype):
- if not self.published_files:
- avg_tried = "NONE"
- else:
- avg_tried = sum(self.published_files) / len(self.published_files)
- print time, etype, self.added_data, self.failed_files, self.lost_data_bytes, avg_tried, len(self.queen.living_files), self.queen.utilization
-
-global s
-s = None
-
-def main():
-# rrdtool.create("foo.rrd",
-# "--step 10",
-# "DS:files-added:DERIVE::0:1000",
-# "RRA:AVERAGE:1:1:1200",
-# )
- global s
- s = Simulator()
- # s.print_stats_header()
- for i in range(1000):
- s.do_event()
- print "%d files added, %d files deleted" % (s.added_files, s.deleted_files)
- return s
-
-if __name__ == '__main__':
- main()
-
-
+++ /dev/null
-#! /usr/bin/env python
-
-import random, math, os, re
-from twisted.python import usage
-
-class Args(usage.Options):
- optParameters = [
- ["mode", "m", "alpha", "validation scheme"],
- ["arity", "k", 2, "k (airty) for hash tree"],
- ]
- def opt_arity(self, option):
- self['arity'] = int(option)
- def parseArgs(self, *args):
- if len(args) > 0:
- self['mode'] = args[0]
-
-
-def charttest():
- import gdchart
- sizes = [random.randrange(10, 20) for i in range(10)]
- x = gdchart.Line()
- x.width = 250
- x.height = 250
- x.xtitle = "sample"
- x.ytitle = "size"
- x.title = "Example Graph"
- #x.ext_color = [ "white", "yellow", "red", "blue", "green"]
- x.setData(sizes)
- #x.setLabels(["Mon", "Tue", "Wed", "Thu", "Fri"])
- x.draw("simple.png")
-
-KiB=1024
-MiB=1024*KiB
-GiB=1024*MiB
-TiB=1024*GiB
-PiB=1024*TiB
-
-class Sizes:
- def __init__(self, mode, file_size, arity=2):
- MAX_SEGSIZE = 2*MiB
- self.mode = mode
- self.file_size = file_size
- self.seg_size = seg_size = 1.0 * min(MAX_SEGSIZE, file_size)
- self.num_segs = num_segs = math.ceil(file_size / seg_size)
- self.num_subblocks = num_subblocks = num_segs
-
- self.num_blocks = num_blocks = 100
- self.blocks_needed = blocks_needed = 25
-
- self.subblock_size = subblock_size = seg_size / blocks_needed
- self.block_size = block_size = subblock_size * num_subblocks
-
- # none of this includes the block-level hash chain yet, since that is
- # only a function of the number of blocks. All overhead numbers
- # assume that the block-level hash chain has already been sent,
- # including the root of the subblock-level hash tree.
-
- if mode == "alpha":
- # no hash tree at all
- self.subblock_arity = 0
- self.subblock_tree_depth = 0
- self.subblock_overhead = 0
- self.bytes_until_some_data = 20 + block_size
- self.block_storage_overhead = 0
- self.block_transmission_overhead = 0
-
- elif mode == "beta":
- # k=num_subblocks, d=1
- # each subblock has a 20-byte hash
- self.subblock_arity = num_subblocks
- self.subblock_tree_depth = 1
- self.subblock_overhead = 20
- # the block has a list of hashes, one for each subblock
- self.block_storage_overhead = (self.subblock_overhead *
- num_subblocks)
- # we can get away with not sending the hash of the block that
- # we're sending in full, once
- self.block_transmission_overhead = self.block_storage_overhead - 20
- # we must get the whole list (so it can be validated) before
- # any data can be validated
- self.bytes_until_some_data = (self.block_transmission_overhead +
- subblock_size)
-
- elif mode == "gamma":
- self.subblock_arity = k = arity
- d = math.ceil(math.log(num_subblocks, k))
- self.subblock_tree_depth = d
- num_leaves = k ** d
- # to make things easier, we make the pessimistic assumption that
- # we have to store hashes for all the empty places in the tree
- # (when the number of blocks is not an exact exponent of k)
- self.subblock_overhead = 20
- # the subblock hashes are organized into a k-ary tree, which
- # means storing (and eventually transmitting) more hashes. This
- # count includes all the low-level block hashes and the root.
- hash_nodes = (num_leaves*k - 1) / (k - 1)
- #print "hash_depth", d
- #print "num_leaves", num_leaves
- #print "hash_nodes", hash_nodes
- # the storage overhead is this
- self.block_storage_overhead = 20 * (hash_nodes - 1)
- # the transmission overhead is smaller: if we actually transmit
- # every subblock, we don't have to transmit 1/k of the
- # lowest-level subblock hashes, and we don't have to transmit the
- # root because it was already sent with the block-level hash tree
- self.block_transmission_overhead = 20 * (hash_nodes
- - 1 # the root
- - num_leaves / k)
- # we must get a full sibling hash chain before we can validate
- # any data
- sibling_length = d * (k-1)
- self.bytes_until_some_data = 20 * sibling_length + subblock_size
-
-
-
- else:
- raise RuntimeError("unknown mode '%s" % mode)
-
- self.storage_overhead = self.block_storage_overhead * num_blocks
- self.storage_overhead_percentage = 100.0 * self.storage_overhead / file_size
-
- def dump(self):
- for k in ("mode", "file_size", "seg_size",
- "num_segs", "num_subblocks", "num_blocks", "blocks_needed",
- "subblock_size", "block_size",
- "subblock_arity", "subblock_tree_depth",
- "subblock_overhead",
- "block_storage_overhead", "block_transmission_overhead",
- "storage_overhead", "storage_overhead_percentage",
- "bytes_until_some_data"):
- print k, getattr(self, k)
-
-def fmt(num, trim=False):
- if num < KiB:
- #s = str(num) + "#"
- s = "%.2f#" % num
- elif num < MiB:
- s = "%.2fk" % (num / KiB)
- elif num < GiB:
- s = "%.2fM" % (num / MiB)
- elif num < TiB:
- s = "%.2fG" % (num / GiB)
- elif num < PiB:
- s = "%.2fT" % (num / TiB)
- else:
- s = "big"
- if trim:
- s = re.sub(r'(\.0+)([kMGT#])',
- lambda m: m.group(2),
- s)
- else:
- s = re.sub(r'(\.0+)([kMGT#])',
- lambda m: (" "*len(m.group(1))+m.group(2)),
- s)
- if s.endswith("#"):
- s = s[:-1] + " "
- return s
-
-def text():
- opts = Args()
- opts.parseOptions()
- mode = opts["mode"]
- arity = opts["arity"]
- # 0123456789012345678901234567890123456789012345678901234567890123456
- print "mode=%s" % mode, " arity=%d" % arity
- print " storage storage"
- print "Size blocksize overhead overhead k d alacrity"
- print " (bytes) (%)"
- print "------- ------- -------- -------- ---- -- --------"
- #sizes = [2 ** i for i in range(7, 41)]
- radix = math.sqrt(10); expstep = 2
- radix = 2; expstep = 2
- #radix = 10; expstep = 1
- maxexp = int(math.ceil(math.log(1e12, radix)))+2
- sizes = [radix ** i for i in range(2,maxexp,expstep)]
- for file_size in sizes:
- s = Sizes(mode, file_size, arity)
- out = ""
- out += "%7s " % fmt(file_size, trim=True)
- out += "%7s " % fmt(s.block_size)
- out += "%8s" % fmt(s.storage_overhead)
- out += "%10.2f " % s.storage_overhead_percentage
- out += " %4d" % int(s.subblock_arity)
- out += " %2d" % int(s.subblock_tree_depth)
- out += " %8s" % fmt(s.bytes_until_some_data)
- print out
-
-
-def graph():
- # doesn't work yet
- import Gnuplot
- opts = Args()
- opts.parseOptions()
- mode = opts["mode"]
- arity = opts["arity"]
- g = Gnuplot.Gnuplot(debug=1)
- g.title("overhead / alacrity tradeoffs")
- g.xlabel("file size")
- g.ylabel("stuff")
- sizes = [2 ** i for i in range(7, 32)]
- series = {"overhead": {}, "alacrity": {}}
- for file_size in sizes:
- s = Sizes(mode, file_size, arity)
- series["overhead"][file_size] = s.storage_overhead_percentage
- series["alacrity"][file_size] = s.bytes_until_some_data
- g.plot([ (fs, series["overhead"][fs])
- for fs in sizes ])
- raw_input("press return")
-
-
-if __name__ == '__main__':
- text()
- #graph()