From: Brian Warner Date: Sun, 27 Dec 2009 02:48:32 +0000 (-0500) Subject: misc/ringsim.py: make it deterministic, more detail about grid-is-full behavior X-Git-Tag: trac-4200~64 X-Git-Url: https://git.rkrishnan.org/pf/content/en/seg/module-simplejson.scanner.html?a=commitdiff_plain;h=800fc176ec013430585393ce9c14dbb43c55e892;p=tahoe-lafs%2Ftahoe-lafs.git misc/ringsim.py: make it deterministic, more detail about grid-is-full behavior --- diff --git a/misc/ringsim.py b/misc/ringsim.py index b25c5629..46480c44 100755 --- a/misc/ringsim.py +++ b/misc/ringsim.py @@ -3,9 +3,9 @@ # used to discuss ticket #302: "stop permuting peerlist?" import time -import random, math +import math from hashlib import sha1, md5, sha256 -sha1 = md5 +myhash = md5 # md5: 1520 "uploads" per second # sha1: 1350 ups # sha256: 930 ups @@ -36,14 +36,20 @@ def abbreviate_space(s, SI=True): return r(s/(U*U*U*U), "T") return r(s/(U*U*U*U*U), "P") -def make_up_a_file_size(max=2**31): - #return (2 ** random.randrange(8, 31)) # avg=?? - return random.randrange(max) # avg 1GB -sizes = [make_up_a_file_size() for i in range(10000)] +def make_up_a_file_size(seed): + h = int(myhash(seed).hexdigest(),16) + max=2**31 + if 1: # exponential distribution + e = 8 + (h % (31-8)) + return 2 ** e + # uniform distribution + return h % max # avg 1GB + +sizes = [make_up_a_file_size(str(i)) for i in range(10000)] avg_filesize = sum(sizes)/len(sizes) print "average file size:", abbreviate_space(avg_filesize) -SERVER_CAPACITY = 10**12 * 1000 +SERVER_CAPACITY = 10**12 class Server: def __init__(self, nodeid, capacity): @@ -67,14 +73,19 @@ class Server: return "<%s %s>" % (self.__class__.__name__, self.nodeid) class Ring: + SHOW_MINMAX = False def __init__(self, numservers, seed, permute): self.servers = [] for i in range(numservers): - nodeid = sha1(str(seed)+str(i)).hexdigest() + nodeid = myhash(str(seed)+str(i)).hexdigest() capacity = SERVER_CAPACITY s = Server(nodeid, capacity) self.servers.append(s) self.servers.sort(key=lambda s: s.nodeid) + self.permute = permute + #self.list_servers() + + def list_servers(self): for i in range(len(self.servers)): s = self.servers[i] next_s = self.servers[(i+1)%len(self.servers)] @@ -88,12 +99,11 @@ class Ring: print "sorted by delta" for s in sorted(self.servers, key=lambda s:s.prev_diff): print s, s.prev_diff - self.permute = permute def servers_for_si(self, si): if self.permute: def sortkey(s): - return sha1(s.nodeid+si).digest() + return myhash(s.nodeid+si).digest() return sorted(self.servers, key=sortkey) for i in range(len(self.servers)): if self.servers[i].nodeid >= si: @@ -136,9 +146,10 @@ class Ring: #print "average_usage:", abbreviate_space(average_usagepf) print "stddev: %s (%.2f%%)" % (abbreviate_space(std_deviation), 100.0*sd_of_total) - s2 = sorted(self.servers, key=lambda s: s.used) - print "least:", s2[0].nodeid - print "most:", s2[-1].nodeid + if self.SHOW_MINMAX: + s2 = sorted(self.servers, key=lambda s: s.used) + print "least:", s2[0].nodeid + print "most:", s2[-1].nodeid class Options(usage.Options): @@ -147,6 +158,7 @@ class Options(usage.Options): ("N", "N", 10, "total shares", int), ("servers", None, 100, "number of servers", int), ("seed", None, None, "seed to use for creating ring"), + ("fileseed", None, "blah", "seed to use for creating files"), ("permute", "p", 1, "1 to permute, 0 to use flat ring", int), ] def postOptions(self): @@ -155,31 +167,56 @@ class Options(usage.Options): def do_run(ring, opts): avg_space_per_file = avg_filesize * opts["N"] / opts["k"] + fileseed = opts["fileseed"] start = time.time() + all_servers_have_room = True + no_files_have_wrapped = True for filenum in count(0): #used = list(reversed(sorted([s.used for s in ring.servers]))) #used = [s.used for s in ring.servers] #print used - filesize = make_up_a_file_size() + si = myhash(fileseed+str(filenum)).hexdigest() + filesize = make_up_a_file_size(si) sharesize = filesize / opts["k"] - si = sha1(str(random.randrange(2**40))).hexdigest() if filenum%4000==0 and filenum > 1: ring.dump_usage(filenum, avg_space_per_file) servers = ring.servers_for_si(si) #print ring.show_servers(servers[:opts["N"]]) remaining_shares = opts["N"] index = 0 + server_was_full = False + file_was_wrapped = False + remaining_servers = set(servers) while remaining_shares: + if index >= len(servers): + index = 0 + file_was_wrapped = True s = servers[index] accepted = s.upload(sharesize) if not accepted: - return filenum # number of files successfully uploaded + server_was_full = True + remaining_servers.discard(s) + if not remaining_servers: + print "-- GRID IS FULL" + ring.dump_usage(filenum, avg_space_per_file) + return filenum + index += 1 + continue remaining_shares -= 1 index += 1 + # file is done being uploaded + + if server_was_full and all_servers_have_room: + all_servers_have_room = False + print "-- FIRST SERVER FULL" + ring.dump_usage(filenum, avg_space_per_file) + if file_was_wrapped and no_files_have_wrapped: + no_files_have_wrapped = False + print "-- FIRST FILE WRAPPED" + ring.dump_usage(filenum, avg_space_per_file) def do_ring(opts): - #seed = str(random.randrange(2**31)) total_capacity = opts["servers"]*SERVER_CAPACITY avg_space_per_file = avg_filesize * opts["N"] / opts["k"] avg_files = total_capacity / avg_space_per_file