]> git.rkrishnan.org Git - tahoe-lafs/tahoe-lafs.git/blob - misc/simulators/ringsim.py
Fix pyflakes warnings in misc/ directories other than misc/build_helpers. refs #1557
[tahoe-lafs/tahoe-lafs.git] / misc / simulators / ringsim.py
1 #! /usr/bin/python
2
3 # used to discuss ticket #302: "stop permuting peerlist?"
4
5 # import time
6 import math
7 from hashlib import md5  # sha1, sha256
8 myhash = md5
9 # md5: 1520 "uploads" per second
10 # sha1: 1350 ups
11 # sha256: 930 ups
12 from itertools import count
13 from twisted.python import usage
14
15 def abbreviate_space(s, SI=True):
16     if s is None:
17         return "unknown"
18     if SI:
19         U = 1000.0
20         isuffix = "B"
21     else:
22         U = 1024.0
23         isuffix = "iB"
24     def r(count, suffix):
25         return "%.2f %s%s" % (count, suffix, isuffix)
26
27     if s < 1024: # 1000-1023 get emitted as bytes, even in SI mode
28         return "%d B" % s
29     if s < U*U:
30         return r(s/U, "k")
31     if s < U*U*U:
32         return r(s/(U*U), "M")
33     if s < U*U*U*U:
34         return r(s/(U*U*U), "G")
35     if s < U*U*U*U*U:
36         return r(s/(U*U*U*U), "T")
37     return r(s/(U*U*U*U*U), "P")
38
39 def make_up_a_file_size(seed):
40     h = int(myhash(seed).hexdigest(),16)
41     max=2**31
42     if 1: # exponential distribution
43         e = 8 + (h % (31-8))
44         return 2 ** e
45     # uniform distribution
46     return h % max # avg 1GB
47
48 sizes = [make_up_a_file_size(str(i)) for i in range(10000)]
49 avg_filesize = sum(sizes)/len(sizes)
50 print "average file size:", abbreviate_space(avg_filesize)
51
52 SERVER_CAPACITY = 10**12
53
54 class Server:
55     def __init__(self, nodeid, capacity):
56         self.nodeid = nodeid
57         self.used = 0
58         self.capacity = capacity
59         self.numshares = 0
60         self.full_at_tick = None
61
62     def upload(self, sharesize):
63         if self.used + sharesize < self.capacity:
64             self.used += sharesize
65             self.numshares += 1
66             return True
67         return False
68
69     def __repr__(self):
70         if self.full_at_tick is not None:
71             return "<%s %s full at %d>" % (self.__class__.__name__, self.nodeid, self.full_at_tick)
72         else:
73             return "<%s %s>" % (self.__class__.__name__, self.nodeid)
74
75 class Ring:
76     SHOW_MINMAX = False
77     def __init__(self, numservers, seed, permute):
78         self.servers = []
79         for i in range(numservers):
80             nodeid = myhash(str(seed)+str(i)).hexdigest()
81             capacity = SERVER_CAPACITY
82             s = Server(nodeid, capacity)
83             self.servers.append(s)
84         self.servers.sort(key=lambda s: s.nodeid)
85         self.permute = permute
86         #self.list_servers()
87
88     def list_servers(self):
89         for i in range(len(self.servers)):
90             s = self.servers[i]
91             next_s = self.servers[(i+1)%len(self.servers)]
92             diff = "%032x" % (int(next_s.nodeid,16) - int(s.nodeid,16))
93             s.next_diff = diff
94             prev_s = self.servers[(i-1)%len(self.servers)]
95             diff = "%032x" % (int(s.nodeid,16) - int(prev_s.nodeid,16))
96             s.prev_diff = diff
97             print s, s.prev_diff
98
99         print "sorted by delta"
100         for s in sorted(self.servers, key=lambda s:s.prev_diff):
101             print s, s.prev_diff
102
103     def servers_for_si(self, si):
104         if self.permute:
105             def sortkey(s):
106                 return myhash(s.nodeid+si).digest()
107             return sorted(self.servers, key=sortkey)
108         for i in range(len(self.servers)):
109             if self.servers[i].nodeid >= si:
110                 return self.servers[i:] + self.servers[:i]
111         return list(self.servers)
112
113     def show_servers(self, picked):
114         bits = []
115         for s in self.servers:
116             if s in picked:
117                 bits.append("1")
118             else:
119                 bits.append("0")
120         #d = [s in picked and "1" or "0" for s in self.servers]
121         return "".join(bits)
122
123     def dump_usage(self, numfiles, avg_space_per_file):
124         print "uploaded", numfiles
125         # avg_space_per_file measures expected grid-wide ciphertext per file
126         used = list(reversed(sorted([s.used for s in self.servers])))
127         # used is actual per-server ciphertext
128         usedpf = [1.0*u/numfiles for u in used]
129         # usedpf is actual per-server-per-file ciphertext
130         #print "min/max usage: %s/%s" % (abbreviate_space(used[-1]),
131         #                                abbreviate_space(used[0]))
132         avg_usage_per_file = avg_space_per_file/len(self.servers)
133         # avg_usage_per_file is expected per-server-per-file ciphertext
134         spreadpf = usedpf[0] - usedpf[-1]
135         average_usagepf = sum(usedpf) / len(usedpf)
136         variance = sum([(u-average_usagepf)**2 for u in usedpf])/(len(usedpf)-1)
137         std_deviation = math.sqrt(variance)
138         sd_of_total = std_deviation / avg_usage_per_file
139
140         print "min/max/(exp) usage-pf-ps %s/%s/(%s):" % (
141             abbreviate_space(usedpf[-1]),
142             abbreviate_space(usedpf[0]),
143             abbreviate_space(avg_usage_per_file) ),
144         print "spread-pf: %s (%.2f%%)" % (
145             abbreviate_space(spreadpf), 100.0*spreadpf/avg_usage_per_file),
146         #print "average_usage:", abbreviate_space(average_usagepf)
147         print "stddev: %s (%.2f%%)" % (abbreviate_space(std_deviation),
148                                        100.0*sd_of_total)
149         if self.SHOW_MINMAX:
150             s2 = sorted(self.servers, key=lambda s: s.used)
151             print "least:", s2[0].nodeid
152             print "most:", s2[-1].nodeid
153
154
155 class Options(usage.Options):
156     optParameters = [
157         ("k", "k", 3, "required shares", int),
158         ("N", "N", 10, "total shares", int),
159         ("servers", None, 100, "number of servers", int),
160         ("seed", None, None, "seed to use for creating ring"),
161         ("fileseed", None, "blah", "seed to use for creating files"),
162         ("permute", "p", 1, "1 to permute, 0 to use flat ring", int),
163         ]
164     def postOptions(self):
165         assert self["seed"]
166
167
168 def do_run(ring, opts):
169     avg_space_per_file = avg_filesize * opts["N"] / opts["k"]
170     fileseed = opts["fileseed"]
171     all_servers_have_room = True
172     no_files_have_wrapped = True
173     for filenum in count(0):
174         #used = list(reversed(sorted([s.used for s in ring.servers])))
175         #used = [s.used for s in ring.servers]
176         #print used
177         si = myhash(fileseed+str(filenum)).hexdigest()
178         filesize = make_up_a_file_size(si)
179         sharesize = filesize / opts["k"]
180         if filenum%4000==0 and filenum > 1:
181             ring.dump_usage(filenum, avg_space_per_file)
182         servers = ring.servers_for_si(si)
183         #print ring.show_servers(servers[:opts["N"]])
184         remaining_shares = opts["N"]
185         index = 0
186         server_was_full = False
187         file_was_wrapped = False
188         remaining_servers = set(servers)
189         while remaining_shares:
190             if index >= len(servers):
191                 index = 0
192                 file_was_wrapped = True
193             s = servers[index]
194             accepted = s.upload(sharesize)
195             if not accepted:
196                 server_was_full = True
197                 remaining_servers.discard(s)
198                 if not remaining_servers:
199                     print "-- GRID IS FULL"
200                     ring.dump_usage(filenum, avg_space_per_file)
201                     return filenum
202                 index += 1
203                 continue
204             remaining_shares -= 1
205             index += 1
206         # file is done being uploaded
207
208         if server_was_full and all_servers_have_room:
209             all_servers_have_room = False
210             print "-- FIRST SERVER FULL"
211             ring.dump_usage(filenum, avg_space_per_file)
212         if file_was_wrapped and no_files_have_wrapped:
213             no_files_have_wrapped = False
214             print "-- FIRST FILE WRAPPED"
215             ring.dump_usage(filenum, avg_space_per_file)
216
217
218 def do_ring(opts):
219     total_capacity = opts["servers"]*SERVER_CAPACITY
220     avg_space_per_file = avg_filesize * opts["N"] / opts["k"]
221     avg_files = total_capacity / avg_space_per_file
222     print "expected number of uploads:", avg_files
223     if opts["permute"]:
224         print " PERMUTED"
225     else:
226         print " LINEAR"
227     seed = opts["seed"]
228
229     ring = Ring(opts["servers"], seed, opts["permute"])
230     do_run(ring, opts)
231
232 def run(opts):
233     do_ring(opts)
234
235 if __name__ == "__main__":
236     opts = Options()
237     opts.parseOptions()
238     run(opts)