]> git.rkrishnan.org Git - tahoe-lafs/tahoe-lafs.git/blob - misc/simulators/simulator.py
Merge pull request #236 from daira/2725.timezone-test.0
[tahoe-lafs/tahoe-lafs.git] / misc / simulators / simulator.py
1 #! /usr/bin/env python
2
3 import sha as shamodule
4 import os, random
5
6 from pkg_resources import require
7 require('PyRRD')
8 from pyrrd import graph
9 from pyrrd.rrd import DataSource, RRD, RRA
10
11
12 def sha(s):
13     return shamodule.new(s).digest()
14
15 def randomid():
16     return os.urandom(20)
17
18 class Node:
19     def __init__(self, nid, introducer, simulator):
20         self.nid = nid
21         self.introducer = introducer
22         self.simulator = simulator
23         self.shares = {}
24         self.capacity = random.randrange(1000)
25         self.utilization = 0
26         self.files = []
27
28     def permute_peers(self, fileid):
29         permuted = [(sha(fileid+n.nid),n)
30                     for n in self.introducer.get_all_nodes()]
31         permuted.sort()
32         return permuted
33
34     def publish_file(self, fileid, size, numshares=100):
35         sharesize = 4 * size / numshares
36         permuted = self.permute_peers(fileid)
37         last_givento = None
38         tried = 0
39         givento = []
40         while numshares and permuted:
41             pid,node = permuted.pop(0)
42             tried += 1
43             last_givento = pid
44             if node.accept_share(fileid, sharesize):
45                 givento.append((pid,node))
46                 numshares -= 1
47         if numshares:
48             # couldn't push, should delete
49             for pid,node in givento:
50                 node.delete_share(fileid)
51             return False
52         self.files.append((fileid, numshares))
53         self.introducer.please_preserve(fileid, size, tried, last_givento)
54         return (True, tried)
55
56     def accept_share(self, fileid, sharesize):
57         if self.utilization < self.capacity:
58             # we have room! yay!
59             self.shares[fileid] = sharesize
60             self.utilization += sharesize
61             return True
62         if self.decide(sharesize):
63             # we don't, but we'll make room
64             self.make_space(sharesize)
65             self.shares[fileid] = sharesize
66             self.utilization += sharesize
67             return True
68         else:
69             # we're full, try elsewhere
70             return False
71
72     def decide(self, sharesize):
73         if sharesize > self.capacity:
74             return False
75         return False
76         return random.random() > 0.5
77
78     def make_space(self, sharesize):
79         assert sharesize <= self.capacity
80         while self.capacity - self.utilization < sharesize:
81             victim = random.choice(self.shares.keys())
82             self.simulator.lost_data(self.shares[victim])
83             self.delete_share(victim)
84
85     def delete_share(self, fileid):
86         if fileid in self.shares:
87             self.utilization -= self.shares[fileid]
88             del self.shares[fileid]
89             return True
90         return False
91
92     def retrieve_file(self):
93         if not self.files:
94             return
95         fileid,numshares = random.choice(self.files)
96         needed = numshares / 4
97         peers = []
98         for pid,node in self.permute_peers(fileid):
99             if random.random() > self.simulator.P_NODEAVAIL:
100                 continue # node isn't available right now
101             if node.has_share(fileid):
102                 peers.append(node)
103             if len(peers) >= needed:
104                 return True
105         return False
106
107     def delete_file(self):
108         if not self.files:
109             return False
110         which = random.choice(self.files)
111         self.files.remove(which)
112         fileid,numshares = which
113         self.introducer.delete(fileid)
114         return True
115
116 class Introducer:
117     def __init__(self, simulator):
118         self.living_files = {}
119         self.utilization = 0 # total size of all active files
120         self.simulator = simulator
121         self.simulator.stamp_utilization(self.utilization)
122
123     def get_all_nodes(self):
124         return self.all_nodes
125
126     def please_preserve(self, fileid, size, tried, last_givento):
127         self.living_files[fileid] = (size, tried, last_givento)
128         self.utilization += size
129         self.simulator.stamp_utilization(self.utilization)
130
131     def please_delete(self, fileid):
132         self.delete(fileid)
133
134     def permute_peers(self, fileid):
135         permuted = [(sha(fileid+n.nid),n)
136                     for n in self.get_all_nodes()]
137         permuted.sort()
138         return permuted
139
140     def delete(self, fileid):
141         permuted = self.permute_peers(fileid)
142         size, tried, last_givento = self.living_files[fileid]
143         pid = ""
144         while tried and pid < last_givento:
145             pid,node = permuted.pop(0)
146             had_it = node.delete_share(fileid)
147             if had_it:
148                 tried -= 1
149         self.utilization -= size
150         self.simulator.stamp_utilization(self.utilization)
151         del self.living_files[fileid]
152
153 class Simulator:
154     NUM_NODES = 1000
155     EVENTS = ["ADDFILE", "DELFILE", "ADDNODE", "DELNODE"]
156     RATE_ADDFILE = 1.0 / 10
157     RATE_DELFILE = 1.0 / 20
158     RATE_ADDNODE = 1.0 / 3000
159     RATE_DELNODE = 1.0 / 4000
160     P_NODEAVAIL = 1.0
161
162     def __init__(self):
163         self.time = 1164783600 # small numbers of seconds since the epoch confuse rrdtool
164         self.prevstamptime = int(self.time)
165
166         ds = DataSource(ds_name='utilizationds', ds_type='GAUGE', heartbeat=1)
167         rra = RRA(cf='AVERAGE', xff=0.1, steps=1, rows=1200)
168         self.rrd = RRD("/tmp/utilization.rrd", ds=[ds], rra=[rra], start=self.time)
169         self.rrd.create()
170
171         self.introducer = q = Introducer(self)
172         self.all_nodes = [Node(randomid(), q, self)
173                           for i in range(self.NUM_NODES)]
174         q.all_nodes = self.all_nodes
175         self.next = []
176         self.schedule_events()
177         self.verbose = False
178
179         self.added_files = 0
180         self.added_data = 0
181         self.deleted_files = 0
182         self.published_files = []
183         self.failed_files = 0
184         self.lost_data_bytes = 0 # bytes deleted to make room for new shares
185
186     def stamp_utilization(self, utilization):
187         if int(self.time) > (self.prevstamptime+1):
188             self.rrd.bufferValue(self.time, utilization)
189             self.prevstamptime = int(self.time)
190
191     def write_graph(self):
192         self.rrd.update()
193         self.rrd = None
194         import gc
195         gc.collect()
196
197         def1 = graph.DataDefinition(vname="a", rrdfile='/tmp/utilization.rrd', ds_name='utilizationds')
198         area1 = graph.Area(value="a", color="#990033", legend='utilizationlegend')
199         g = graph.Graph('/tmp/utilization.png', imgformat='PNG', width=540, height=100, vertical_label='utilizationverticallabel', title='utilizationtitle', lower_limit=0)
200         g.data.append(def1)
201         g.data.append(area1)
202         g.write()
203
204     def add_file(self):
205         size = random.randrange(1000)
206         n = random.choice(self.all_nodes)
207         if self.verbose:
208             print "add_file(size=%d, from node %s)" % (size, n)
209         fileid = randomid()
210         able = n.publish_file(fileid, size)
211         if able:
212             able, tried = able
213             self.added_files += 1
214             self.added_data += size
215             self.published_files.append(tried)
216         else:
217             self.failed_files += 1
218
219     def lost_data(self, size):
220         self.lost_data_bytes += size
221
222     def delete_file(self):
223         all_nodes = self.all_nodes[:]
224         random.shuffle(all_nodes)
225         for n in all_nodes:
226             if n.delete_file():
227                 self.deleted_files += 1
228                 return
229         print "no files to delete"
230
231     def _add_event(self, etype):
232         rate = getattr(self, "RATE_" + etype)
233         next = self.time + random.expovariate(rate)
234         self.next.append((next, etype))
235         self.next.sort()
236
237     def schedule_events(self):
238         types = set([e[1] for e in self.next])
239         for etype in self.EVENTS:
240             if not etype in types:
241                 self._add_event(etype)
242
243     def do_event(self):
244         time, etype = self.next.pop(0)
245         assert time > self.time
246         # current_time = self.time
247         self.time = time
248         self._add_event(etype)
249         if etype == "ADDFILE":
250             self.add_file()
251         elif etype == "DELFILE":
252             self.delete_file()
253         elif etype == "ADDNODE":
254             pass
255             #self.add_node()
256         elif etype == "DELNODE":
257             #self.del_node()
258             pass
259         # self.print_stats(current_time, etype)
260
261     def print_stats_header(self):
262         print "time:  added   failed   lost  avg_tried"
263
264     def print_stats(self, time, etype):
265         if not self.published_files:
266             avg_tried = "NONE"
267         else:
268             avg_tried = sum(self.published_files) / len(self.published_files)
269         print time, etype, self.added_data, self.failed_files, self.lost_data_bytes, avg_tried, len(self.introducer.living_files), self.introducer.utilization
270
271 global s
272 s = None
273
274 def main():
275 #    rrdtool.create("foo.rrd",
276 #                   "--step 10",
277 #                   "DS:files-added:DERIVE::0:1000",
278 #                   "RRA:AVERAGE:1:1:1200",
279 #                   )
280     global s
281     s = Simulator()
282     # s.print_stats_header()
283     for i in range(1000):
284         s.do_event()
285     print "%d files added, %d files deleted" % (s.added_files, s.deleted_files)
286     return s
287
288 if __name__ == '__main__':
289     main()