# used to discuss ticket #302: "stop permuting peerlist?"
import time
-import random, math
+import math
from hashlib import sha1, md5, sha256
-sha1 = md5
+myhash = md5
# md5: 1520 "uploads" per second
# sha1: 1350 ups
# sha256: 930 ups
return r(s/(U*U*U*U), "T")
return r(s/(U*U*U*U*U), "P")
-def make_up_a_file_size(max=2**31):
- #return (2 ** random.randrange(8, 31)) # avg=??
- return random.randrange(max) # avg 1GB
-sizes = [make_up_a_file_size() for i in range(10000)]
+def make_up_a_file_size(seed):
+ h = int(myhash(seed).hexdigest(),16)
+ max=2**31
+ if 1: # exponential distribution
+ e = 8 + (h % (31-8))
+ return 2 ** e
+ # uniform distribution
+ return h % max # avg 1GB
+
+sizes = [make_up_a_file_size(str(i)) for i in range(10000)]
avg_filesize = sum(sizes)/len(sizes)
print "average file size:", abbreviate_space(avg_filesize)
-SERVER_CAPACITY = 10**12 * 1000
+SERVER_CAPACITY = 10**12
class Server:
def __init__(self, nodeid, capacity):
return "<%s %s>" % (self.__class__.__name__, self.nodeid)
class Ring:
+ SHOW_MINMAX = False
def __init__(self, numservers, seed, permute):
self.servers = []
for i in range(numservers):
- nodeid = sha1(str(seed)+str(i)).hexdigest()
+ nodeid = myhash(str(seed)+str(i)).hexdigest()
capacity = SERVER_CAPACITY
s = Server(nodeid, capacity)
self.servers.append(s)
self.servers.sort(key=lambda s: s.nodeid)
+ self.permute = permute
+ #self.list_servers()
+
+ def list_servers(self):
for i in range(len(self.servers)):
s = self.servers[i]
next_s = self.servers[(i+1)%len(self.servers)]
print "sorted by delta"
for s in sorted(self.servers, key=lambda s:s.prev_diff):
print s, s.prev_diff
- self.permute = permute
def servers_for_si(self, si):
if self.permute:
def sortkey(s):
- return sha1(s.nodeid+si).digest()
+ return myhash(s.nodeid+si).digest()
return sorted(self.servers, key=sortkey)
for i in range(len(self.servers)):
if self.servers[i].nodeid >= si:
#print "average_usage:", abbreviate_space(average_usagepf)
print "stddev: %s (%.2f%%)" % (abbreviate_space(std_deviation),
100.0*sd_of_total)
- s2 = sorted(self.servers, key=lambda s: s.used)
- print "least:", s2[0].nodeid
- print "most:", s2[-1].nodeid
+ if self.SHOW_MINMAX:
+ s2 = sorted(self.servers, key=lambda s: s.used)
+ print "least:", s2[0].nodeid
+ print "most:", s2[-1].nodeid
class Options(usage.Options):
("N", "N", 10, "total shares", int),
("servers", None, 100, "number of servers", int),
("seed", None, None, "seed to use for creating ring"),
+ ("fileseed", None, "blah", "seed to use for creating files"),
("permute", "p", 1, "1 to permute, 0 to use flat ring", int),
]
def postOptions(self):
def do_run(ring, opts):
avg_space_per_file = avg_filesize * opts["N"] / opts["k"]
+ fileseed = opts["fileseed"]
start = time.time()
+ all_servers_have_room = True
+ no_files_have_wrapped = True
for filenum in count(0):
#used = list(reversed(sorted([s.used for s in ring.servers])))
#used = [s.used for s in ring.servers]
#print used
- filesize = make_up_a_file_size()
+ si = myhash(fileseed+str(filenum)).hexdigest()
+ filesize = make_up_a_file_size(si)
sharesize = filesize / opts["k"]
- si = sha1(str(random.randrange(2**40))).hexdigest()
if filenum%4000==0 and filenum > 1:
ring.dump_usage(filenum, avg_space_per_file)
servers = ring.servers_for_si(si)
#print ring.show_servers(servers[:opts["N"]])
remaining_shares = opts["N"]
index = 0
+ server_was_full = False
+ file_was_wrapped = False
+ remaining_servers = set(servers)
while remaining_shares:
+ if index >= len(servers):
+ index = 0
+ file_was_wrapped = True
s = servers[index]
accepted = s.upload(sharesize)
if not accepted:
- return filenum # number of files successfully uploaded
+ server_was_full = True
+ remaining_servers.discard(s)
+ if not remaining_servers:
+ print "-- GRID IS FULL"
+ ring.dump_usage(filenum, avg_space_per_file)
+ return filenum
+ index += 1
+ continue
remaining_shares -= 1
index += 1
+ # file is done being uploaded
+
+ if server_was_full and all_servers_have_room:
+ all_servers_have_room = False
+ print "-- FIRST SERVER FULL"
+ ring.dump_usage(filenum, avg_space_per_file)
+ if file_was_wrapped and no_files_have_wrapped:
+ no_files_have_wrapped = False
+ print "-- FIRST FILE WRAPPED"
+ ring.dump_usage(filenum, avg_space_per_file)
def do_ring(opts):
- #seed = str(random.randrange(2**31))
total_capacity = opts["servers"]*SERVER_CAPACITY
avg_space_per_file = avg_filesize * opts["N"] / opts["k"]
avg_files = total_capacity / avg_space_per_file