3 import random, math, os, re
4 from twisted.python import usage
6 class Args(usage.Options):
8 ["mode", "m", "alpha", "validation scheme"],
9 ["arity", "k", 2, "k (airty) for hash tree"],
11 def opt_arity(self, option):
12 self['arity'] = int(option)
13 def parseArgs(self, *args):
15 self['mode'] = args[0]
20 sizes = [random.randrange(10, 20) for i in range(10)]
26 x.title = "Example Graph"
27 #x.ext_color = [ "white", "yellow", "red", "blue", "green"]
29 #x.setLabels(["Mon", "Tue", "Wed", "Thu", "Fri"])
39 def __init__(self, mode, file_size, arity=2):
42 self.file_size = file_size
43 self.seg_size = seg_size = 1.0 * min(MAX_SEGSIZE, file_size)
44 self.num_segs = num_segs = math.ceil(file_size / seg_size)
45 self.num_subblocks = num_subblocks = num_segs
47 self.num_blocks = num_blocks = 100
48 self.blocks_needed = blocks_needed = 25
50 self.subblock_size = subblock_size = seg_size / blocks_needed
51 self.block_size = block_size = subblock_size * num_subblocks
53 # none of this includes the block-level hash chain yet, since that is
54 # only a function of the number of blocks. All overhead numbers
55 # assume that the block-level hash chain has already been sent,
56 # including the root of the subblock-level hash tree.
60 self.subblock_arity = 0
61 self.subblock_tree_depth = 0
62 self.subblock_overhead = 0
63 self.bytes_until_some_data = 20 + block_size
64 self.block_storage_overhead = 0
65 self.block_transmission_overhead = 0
68 # k=num_subblocks, d=1
69 # each subblock has a 20-byte hash
70 self.subblock_arity = num_subblocks
71 self.subblock_tree_depth = 1
72 self.subblock_overhead = 20
73 # the block has a list of hashes, one for each subblock
74 self.block_storage_overhead = (self.subblock_overhead *
76 # we can get away with not sending the hash of the block that
77 # we're sending in full, once
78 self.block_transmission_overhead = self.block_storage_overhead - 20
79 # we must get the whole list (so it can be validated) before
80 # any data can be validated
81 self.bytes_until_some_data = (self.block_transmission_overhead +
85 self.subblock_arity = k = arity
86 d = math.ceil(math.log(num_subblocks, k))
87 self.subblock_tree_depth = d
89 # to make things easier, we make the pessimistic assumption that
90 # we have to store hashes for all the empty places in the tree
91 # (when the number of blocks is not an exact exponent of k)
92 self.subblock_overhead = 20
93 # the subblock hashes are organized into a k-ary tree, which
94 # means storing (and eventually transmitting) more hashes. This
95 # count includes all the low-level block hashes and the root.
96 hash_nodes = (num_leaves*k - 1) / (k - 1)
97 #print "hash_depth", d
98 #print "num_leaves", num_leaves
99 #print "hash_nodes", hash_nodes
100 # the storage overhead is this
101 self.block_storage_overhead = 20 * (hash_nodes - 1)
102 # the transmission overhead is smaller: if we actually transmit
103 # every subblock, we don't have to transmit 1/k of the
104 # lowest-level subblock hashes, and we don't have to transmit the
105 # root because it was already sent with the block-level hash tree
106 self.block_transmission_overhead = 20 * (hash_nodes
109 # we must get a full sibling hash chain before we can validate
111 sibling_length = d * (k-1)
112 self.bytes_until_some_data = 20 * sibling_length + subblock_size
117 raise RuntimeError("unknown mode '%s" % mode)
119 self.storage_overhead = self.block_storage_overhead * num_blocks
120 self.storage_overhead_percentage = 100.0 * self.storage_overhead / file_size
123 for k in ("mode", "file_size", "seg_size",
124 "num_segs", "num_subblocks", "num_blocks", "blocks_needed",
125 "subblock_size", "block_size",
126 "subblock_arity", "subblock_tree_depth",
128 "block_storage_overhead", "block_transmission_overhead",
129 "storage_overhead", "storage_overhead_percentage",
130 "bytes_until_some_data"):
131 print k, getattr(self, k)
133 def fmt(num, trim=False):
138 s = "%.2fk" % (num / KiB)
140 s = "%.2fM" % (num / MiB)
142 s = "%.2fG" % (num / GiB)
144 s = "%.2fT" % (num / TiB)
148 s = re.sub(r'(\.0+)([kMGT#])',
149 lambda m: m.group(2),
152 s = re.sub(r'(\.0+)([kMGT#])',
153 lambda m: (" "*len(m.group(1))+m.group(2)),
163 arity = opts["arity"]
164 # 0123456789012345678901234567890123456789012345678901234567890123456
165 print "mode=%s" % mode, " arity=%d" % arity
166 print " storage storage"
167 print "Size blocksize overhead overhead k d alacrity"
169 print "------- ------- -------- -------- ---- -- --------"
170 #sizes = [2 ** i for i in range(7, 41)]
171 radix = math.sqrt(10); expstep = 2
172 radix = 2; expstep = 2
173 #radix = 10; expstep = 1
174 maxexp = int(math.ceil(math.log(1e12, radix)))+2
175 sizes = [radix ** i for i in range(2,maxexp,expstep)]
176 for file_size in sizes:
177 s = Sizes(mode, file_size, arity)
179 out += "%7s " % fmt(file_size, trim=True)
180 out += "%7s " % fmt(s.block_size)
181 out += "%8s" % fmt(s.storage_overhead)
182 out += "%10.2f " % s.storage_overhead_percentage
183 out += " %4d" % int(s.subblock_arity)
184 out += " %2d" % int(s.subblock_tree_depth)
185 out += " %8s" % fmt(s.bytes_until_some_data)
195 arity = opts["arity"]
196 g = Gnuplot.Gnuplot(debug=1)
197 g.title("overhead / alacrity tradeoffs")
198 g.xlabel("file size")
200 sizes = [2 ** i for i in range(7, 32)]
201 series = {"overhead": {}, "alacrity": {}}
202 for file_size in sizes:
203 s = Sizes(mode, file_size, arity)
204 series["overhead"][file_size] = s.storage_overhead_percentage
205 series["alacrity"][file_size] = s.bytes_until_some_data
206 g.plot([ (fs, series["overhead"][fs])
208 raw_input("press return")
211 if __name__ == '__main__':