misc/ringsim.py: make it deterministic, more detail about grid-is-full behavior
authorBrian Warner <warner@lothar.com>
Sun, 27 Dec 2009 02:48:32 +0000 (21:48 -0500)
committerBrian Warner <warner@lothar.com>
Sun, 27 Dec 2009 02:48:32 +0000 (21:48 -0500)
misc/ringsim.py

index b25c5629ab178ab260e4980f1744c447c490556e..46480c44313aa6a1debb29a82b828ae186257715 100755 (executable)
@@ -3,9 +3,9 @@
 # used to discuss ticket #302: "stop permuting peerlist?"
 
 import time
-import random, math
+import math
 from hashlib import sha1, md5, sha256
-sha1 = md5
+myhash = md5
 # md5: 1520 "uploads" per second
 # sha1: 1350 ups
 # sha256: 930 ups
@@ -36,14 +36,20 @@ def abbreviate_space(s, SI=True):
         return r(s/(U*U*U*U), "T")
     return r(s/(U*U*U*U*U), "P")
 
-def make_up_a_file_size(max=2**31):
-    #return (2 ** random.randrange(8, 31)) # avg=??
-    return random.randrange(max) # avg 1GB
-sizes = [make_up_a_file_size() for i in range(10000)]
+def make_up_a_file_size(seed):
+    h = int(myhash(seed).hexdigest(),16)
+    max=2**31
+    if 1: # exponential distribution
+        e = 8 + (h % (31-8))
+        return 2 ** e
+    # uniform distribution
+    return h % max # avg 1GB
+
+sizes = [make_up_a_file_size(str(i)) for i in range(10000)]
 avg_filesize = sum(sizes)/len(sizes)
 print "average file size:", abbreviate_space(avg_filesize)
 
-SERVER_CAPACITY = 10**12 * 1000
+SERVER_CAPACITY = 10**12
 
 class Server:
     def __init__(self, nodeid, capacity):
@@ -67,14 +73,19 @@ class Server:
             return "<%s %s>" % (self.__class__.__name__, self.nodeid)
 
 class Ring:
+    SHOW_MINMAX = False
     def __init__(self, numservers, seed, permute):
         self.servers = []
         for i in range(numservers):
-            nodeid = sha1(str(seed)+str(i)).hexdigest()
+            nodeid = myhash(str(seed)+str(i)).hexdigest()
             capacity = SERVER_CAPACITY
             s = Server(nodeid, capacity)
             self.servers.append(s)
         self.servers.sort(key=lambda s: s.nodeid)
+        self.permute = permute
+        #self.list_servers()
+
+    def list_servers(self):
         for i in range(len(self.servers)):
             s = self.servers[i]
             next_s = self.servers[(i+1)%len(self.servers)]
@@ -88,12 +99,11 @@ class Ring:
         print "sorted by delta"
         for s in sorted(self.servers, key=lambda s:s.prev_diff):
             print s, s.prev_diff
-        self.permute = permute
 
     def servers_for_si(self, si):
         if self.permute:
             def sortkey(s):
-                return sha1(s.nodeid+si).digest()
+                return myhash(s.nodeid+si).digest()
             return sorted(self.servers, key=sortkey)
         for i in range(len(self.servers)):
             if self.servers[i].nodeid >= si:
@@ -136,9 +146,10 @@ class Ring:
         #print "average_usage:", abbreviate_space(average_usagepf)
         print "stddev: %s (%.2f%%)" % (abbreviate_space(std_deviation),
                                        100.0*sd_of_total)
-        s2 = sorted(self.servers, key=lambda s: s.used)
-        print "least:", s2[0].nodeid
-        print "most:", s2[-1].nodeid
+        if self.SHOW_MINMAX:
+            s2 = sorted(self.servers, key=lambda s: s.used)
+            print "least:", s2[0].nodeid
+            print "most:", s2[-1].nodeid
 
 
 class Options(usage.Options):
@@ -147,6 +158,7 @@ class Options(usage.Options):
         ("N", "N", 10, "total shares", int),
         ("servers", None, 100, "number of servers", int),
         ("seed", None, None, "seed to use for creating ring"),
+        ("fileseed", None, "blah", "seed to use for creating files"),
         ("permute", "p", 1, "1 to permute, 0 to use flat ring", int),
         ]
     def postOptions(self):
@@ -155,31 +167,56 @@ class Options(usage.Options):
 
 def do_run(ring, opts):
     avg_space_per_file = avg_filesize * opts["N"] / opts["k"]
+    fileseed = opts["fileseed"]
     start = time.time()
+    all_servers_have_room = True
+    no_files_have_wrapped = True
     for filenum in count(0):
         #used = list(reversed(sorted([s.used for s in ring.servers])))
         #used = [s.used for s in ring.servers]
         #print used
-        filesize = make_up_a_file_size()
+        si = myhash(fileseed+str(filenum)).hexdigest()
+        filesize = make_up_a_file_size(si)
         sharesize = filesize / opts["k"]
-        si = sha1(str(random.randrange(2**40))).hexdigest()
         if filenum%4000==0 and filenum > 1:
             ring.dump_usage(filenum, avg_space_per_file)
         servers = ring.servers_for_si(si)
         #print ring.show_servers(servers[:opts["N"]])
         remaining_shares = opts["N"]
         index = 0
+        server_was_full = False
+        file_was_wrapped = False
+        remaining_servers = set(servers)
         while remaining_shares:
+            if index >= len(servers):
+                index = 0
+                file_was_wrapped = True
             s = servers[index]
             accepted = s.upload(sharesize)
             if not accepted:
-                return filenum # number of files successfully uploaded
+                server_was_full = True
+                remaining_servers.discard(s)
+                if not remaining_servers:
+                    print "-- GRID IS FULL"
+                    ring.dump_usage(filenum, avg_space_per_file)
+                    return filenum
+                index += 1
+                continue
             remaining_shares -= 1
             index += 1
+        # file is done being uploaded
+
+        if server_was_full and all_servers_have_room:
+            all_servers_have_room = False
+            print "-- FIRST SERVER FULL"
+            ring.dump_usage(filenum, avg_space_per_file)
+        if file_was_wrapped and no_files_have_wrapped:
+            no_files_have_wrapped = False
+            print "-- FIRST FILE WRAPPED"
+            ring.dump_usage(filenum, avg_space_per_file)
 
 
 def do_ring(opts):
-    #seed = str(random.randrange(2**31))
     total_capacity = opts["servers"]*SERVER_CAPACITY
     avg_space_per_file = avg_filesize * opts["N"] / opts["k"]
     avg_files = total_capacity / avg_space_per_file