3 # used to discuss ticket #302: "stop permuting peerlist?"
7 from hashlib import md5 # sha1, sha256
9 # md5: 1520 "uploads" per second
12 from itertools import count
13 from twisted.python import usage
15 def abbreviate_space(s, SI=True):
25 return "%.2f %s%s" % (count, suffix, isuffix)
27 if s < 1024: # 1000-1023 get emitted as bytes, even in SI mode
32 return r(s/(U*U), "M")
34 return r(s/(U*U*U), "G")
36 return r(s/(U*U*U*U), "T")
37 return r(s/(U*U*U*U*U), "P")
39 def make_up_a_file_size(seed):
40 h = int(myhash(seed).hexdigest(),16)
42 if 1: # exponential distribution
45 # uniform distribution
46 return h % max # avg 1GB
48 sizes = [make_up_a_file_size(str(i)) for i in range(10000)]
49 avg_filesize = sum(sizes)/len(sizes)
50 print "average file size:", abbreviate_space(avg_filesize)
52 SERVER_CAPACITY = 10**12
55 def __init__(self, nodeid, capacity):
58 self.capacity = capacity
60 self.full_at_tick = None
62 def upload(self, sharesize):
63 if self.used + sharesize < self.capacity:
64 self.used += sharesize
70 if self.full_at_tick is not None:
71 return "<%s %s full at %d>" % (self.__class__.__name__, self.nodeid, self.full_at_tick)
73 return "<%s %s>" % (self.__class__.__name__, self.nodeid)
77 def __init__(self, numservers, seed, permute):
79 for i in range(numservers):
80 nodeid = myhash(str(seed)+str(i)).hexdigest()
81 capacity = SERVER_CAPACITY
82 s = Server(nodeid, capacity)
83 self.servers.append(s)
84 self.servers.sort(key=lambda s: s.nodeid)
85 self.permute = permute
88 def list_servers(self):
89 for i in range(len(self.servers)):
91 next_s = self.servers[(i+1)%len(self.servers)]
92 diff = "%032x" % (int(next_s.nodeid,16) - int(s.nodeid,16))
94 prev_s = self.servers[(i-1)%len(self.servers)]
95 diff = "%032x" % (int(s.nodeid,16) - int(prev_s.nodeid,16))
99 print "sorted by delta"
100 for s in sorted(self.servers, key=lambda s:s.prev_diff):
103 def servers_for_si(self, si):
106 return myhash(s.nodeid+si).digest()
107 return sorted(self.servers, key=sortkey)
108 for i in range(len(self.servers)):
109 if self.servers[i].nodeid >= si:
110 return self.servers[i:] + self.servers[:i]
111 return list(self.servers)
113 def show_servers(self, picked):
115 for s in self.servers:
120 #d = [s in picked and "1" or "0" for s in self.servers]
123 def dump_usage(self, numfiles, avg_space_per_file):
124 print "uploaded", numfiles
125 # avg_space_per_file measures expected grid-wide ciphertext per file
126 used = list(reversed(sorted([s.used for s in self.servers])))
127 # used is actual per-server ciphertext
128 usedpf = [1.0*u/numfiles for u in used]
129 # usedpf is actual per-server-per-file ciphertext
130 #print "min/max usage: %s/%s" % (abbreviate_space(used[-1]),
131 # abbreviate_space(used[0]))
132 avg_usage_per_file = avg_space_per_file/len(self.servers)
133 # avg_usage_per_file is expected per-server-per-file ciphertext
134 spreadpf = usedpf[0] - usedpf[-1]
135 average_usagepf = sum(usedpf) / len(usedpf)
136 variance = sum([(u-average_usagepf)**2 for u in usedpf])/(len(usedpf)-1)
137 std_deviation = math.sqrt(variance)
138 sd_of_total = std_deviation / avg_usage_per_file
140 print "min/max/(exp) usage-pf-ps %s/%s/(%s):" % (
141 abbreviate_space(usedpf[-1]),
142 abbreviate_space(usedpf[0]),
143 abbreviate_space(avg_usage_per_file) ),
144 print "spread-pf: %s (%.2f%%)" % (
145 abbreviate_space(spreadpf), 100.0*spreadpf/avg_usage_per_file),
146 #print "average_usage:", abbreviate_space(average_usagepf)
147 print "stddev: %s (%.2f%%)" % (abbreviate_space(std_deviation),
150 s2 = sorted(self.servers, key=lambda s: s.used)
151 print "least:", s2[0].nodeid
152 print "most:", s2[-1].nodeid
155 class Options(usage.Options):
157 ("k", "k", 3, "required shares", int),
158 ("N", "N", 10, "total shares", int),
159 ("servers", None, 100, "number of servers", int),
160 ("seed", None, None, "seed to use for creating ring"),
161 ("fileseed", None, "blah", "seed to use for creating files"),
162 ("permute", "p", 1, "1 to permute, 0 to use flat ring", int),
164 def postOptions(self):
168 def do_run(ring, opts):
169 avg_space_per_file = avg_filesize * opts["N"] / opts["k"]
170 fileseed = opts["fileseed"]
171 all_servers_have_room = True
172 no_files_have_wrapped = True
173 for filenum in count(0):
174 #used = list(reversed(sorted([s.used for s in ring.servers])))
175 #used = [s.used for s in ring.servers]
177 si = myhash(fileseed+str(filenum)).hexdigest()
178 filesize = make_up_a_file_size(si)
179 sharesize = filesize / opts["k"]
180 if filenum%4000==0 and filenum > 1:
181 ring.dump_usage(filenum, avg_space_per_file)
182 servers = ring.servers_for_si(si)
183 #print ring.show_servers(servers[:opts["N"]])
184 remaining_shares = opts["N"]
186 server_was_full = False
187 file_was_wrapped = False
188 remaining_servers = set(servers)
189 while remaining_shares:
190 if index >= len(servers):
192 file_was_wrapped = True
194 accepted = s.upload(sharesize)
196 server_was_full = True
197 remaining_servers.discard(s)
198 if not remaining_servers:
199 print "-- GRID IS FULL"
200 ring.dump_usage(filenum, avg_space_per_file)
204 remaining_shares -= 1
206 # file is done being uploaded
208 if server_was_full and all_servers_have_room:
209 all_servers_have_room = False
210 print "-- FIRST SERVER FULL"
211 ring.dump_usage(filenum, avg_space_per_file)
212 if file_was_wrapped and no_files_have_wrapped:
213 no_files_have_wrapped = False
214 print "-- FIRST FILE WRAPPED"
215 ring.dump_usage(filenum, avg_space_per_file)
219 total_capacity = opts["servers"]*SERVER_CAPACITY
220 avg_space_per_file = avg_filesize * opts["N"] / opts["k"]
221 avg_files = total_capacity / avg_space_per_file
222 print "expected number of uploads:", avg_files
229 ring = Ring(opts["servers"], seed, opts["permute"])
235 if __name__ == "__main__":