]> git.rkrishnan.org Git - tahoe-lafs/tahoe-lafs.git/blob - simulator.py
change #!/usr/bin/python to #!/usr/bin/env python
[tahoe-lafs/tahoe-lafs.git] / simulator.py
1 #! /usr/bin/env python
2
3 import sha as shamodule
4 import os, random
5
6 from pkg_resources import require
7 require('PyRRD')
8 from pyrrd import graph
9 from pyrrd.rrd import DataSource, RRD, RRA
10
11
12 def sha(s):
13     return shamodule.new(s).digest()
14
15 def randomid():
16     return os.urandom(20)
17
18 class Node:
19     def __init__(self, nid, queen, simulator):
20         self.nid = nid
21         self.queen = queen
22         self.simulator = simulator
23         self.shares = {}
24         self.capacity = random.randrange(1000)
25         self.utilization = 0
26         self.files = []
27
28     def permute_peers(self, fileid):
29         permuted = [(sha(fileid+n.nid),n)
30                     for n in self.queen.get_all_nodes()]
31         permuted.sort()
32         return permuted
33
34     def publish_file(self, fileid, size, numshares=100):
35         sharesize = 4 * size / numshares
36         permuted = self.permute_peers(fileid)
37         last_givento = None
38         tried = 0
39         givento = []
40         while numshares and permuted:
41             pid,node = permuted.pop(0)
42             tried += 1
43             last_givento = pid
44             if node.accept_share(fileid, sharesize):
45                 givento.append((pid,node))
46                 numshares -= 1
47         if numshares:
48             # couldn't push, should delete
49             for pid,node in givento:
50                 node.delete_share(fileid)
51             return False
52         self.files.append((fileid, numshares))
53         self.queen.please_preserve(fileid, size, tried, last_givento)
54         return (True, tried)
55
56     def accept_share(self, fileid, sharesize):
57         accept = False
58         if self.utilization < self.capacity:
59             # we have room! yay!
60             self.shares[fileid] = sharesize
61             self.utilization += sharesize
62             return True
63         if self.decide(sharesize):
64             # we don't, but we'll make room
65             self.make_space(sharesize)
66             self.shares[fileid] = sharesize
67             self.utilization += sharesize
68             return True
69         else:
70             # we're full, try elsewhere
71             return False
72
73     def decide(self, sharesize):
74         if sharesize > self.capacity:
75             return False
76         return False
77         return random.random() > 0.5
78
79     def make_space(self, sharesize):
80         assert sharesize <= self.capacity
81         while self.capacity - self.utilization < sharesize:
82             victim = random.choice(self.shares.keys())
83             self.simulator.lost_data(self.shares[victim])
84             self.delete_share(victim)
85
86     def delete_share(self, fileid):
87         if fileid in self.shares:
88             self.utilization -= self.shares[fileid]
89             del self.shares[fileid]
90             return True
91         return False
92
93     def retrieve_file(self):
94         if not self.files:
95             return
96         fileid,numshares = random.choice(self.files)
97         needed = numshares / 4
98         peers = []
99         for pid,node in self.permute_peers(fileid):
100             if random.random() > self.simulator.P_NODEAVAIL:
101                 continue # node isn't available right now
102             if node.has_share(fileid):
103                 peers.append(node)
104             if len(peers) >= needed:
105                 return True
106         return False
107
108     def delete_file(self):
109         if not self.files:
110             return False
111         which = random.choice(self.files)
112         self.files.remove(which)
113         fileid,numshares = which
114         self.queen.delete(fileid)
115         return True
116
117 class Queen:
118     def __init__(self, simulator):
119         self.living_files = {}
120         self.utilization = 0 # total size of all active files
121         self.simulator = simulator
122         self.simulator.stamp_utilization(self.utilization)
123
124     def get_all_nodes(self):
125         return self.all_nodes
126
127     def please_preserve(self, fileid, size, tried, last_givento):
128         self.living_files[fileid] = (size, tried, last_givento)
129         self.utilization += size
130         self.simulator.stamp_utilization(self.utilization)
131
132     def please_delete(self, fileid):
133         self.delete(fileid)
134
135     def permute_peers(self, fileid):
136         permuted = [(sha(fileid+n.nid),n)
137                     for n in self.get_all_nodes()]
138         permuted.sort()
139         return permuted
140
141     def delete(self, fileid):
142         permuted = self.permute_peers(fileid)
143         size, tried, last_givento = self.living_files[fileid]
144         pid = ""
145         while tried and pid < last_givento:
146             pid,node = permuted.pop(0)
147             had_it = node.delete_share(fileid)
148             if had_it:
149                 tried -= 1
150         self.utilization -= size
151         self.simulator.stamp_utilization(self.utilization)
152         del self.living_files[fileid]
153
154 class Simulator:
155     NUM_NODES = 1000
156     EVENTS = ["ADDFILE", "DELFILE", "ADDNODE", "DELNODE"]
157     RATE_ADDFILE = 1.0 / 10
158     RATE_DELFILE = 1.0 / 20
159     RATE_ADDNODE = 1.0 / 3000
160     RATE_DELNODE = 1.0 / 4000
161     P_NODEAVAIL = 1.0
162
163     def __init__(self):
164         self.time = 1164783600 # small numbers of seconds since the epoch confuse rrdtool
165         self.prevstamptime = int(self.time)
166
167         ds = DataSource(ds_name='utilizationds', ds_type='GAUGE', heartbeat=1)
168         rra = RRA(cf='AVERAGE', xff=0.1, steps=1, rows=1200)
169         self.rrd = RRD("/tmp/utilization.rrd", ds=[ds], rra=[rra], start=self.time)
170         self.rrd.create()
171
172         self.queen = q = Queen(self)
173         self.all_nodes = [Node(randomid(), q, self)
174                           for i in range(self.NUM_NODES)]
175         q.all_nodes = self.all_nodes
176         self.next = []
177         self.schedule_events()
178         self.verbose = False
179
180         self.added_files = 0
181         self.added_data = 0
182         self.deleted_files = 0
183         self.published_files = []
184         self.failed_files = 0
185         self.lost_data_bytes = 0 # bytes deleted to make room for new shares
186
187     def stamp_utilization(self, utilization):
188         if int(self.time) > (self.prevstamptime+1):
189             self.rrd.bufferValue(self.time, utilization)
190             self.prevstamptime = int(self.time)
191
192     def write_graph(self):
193         self.rrd.update()
194         self.rrd = None
195         import gc
196         gc.collect()
197
198         def1 = graph.DataDefinition(vname="a", rrdfile='/tmp/utilization.rrd', ds_name='utilizationds')
199         area1 = graph.Area(value="a", color="#990033", legend='utilizationlegend')
200         g = graph.Graph('/tmp/utilization.png', imgformat='PNG', width=540, height=100, vertical_label='utilizationverticallabel', title='utilizationtitle', lower_limit=0)
201         g.data.append(def1)
202         g.data.append(area1)
203         g.write()
204
205     def add_file(self):
206         size = random.randrange(1000)
207         n = random.choice(self.all_nodes)
208         if self.verbose:
209             print "add_file(size=%d, from node %s)" % (size, n)
210         fileid = randomid()
211         able = n.publish_file(fileid, size)
212         if able:
213             able, tried = able
214             self.added_files += 1
215             self.added_data += size
216             self.published_files.append(tried)
217         else:
218             self.failed_files += 1
219
220     def lost_data(self, size):
221         self.lost_data_bytes += size
222
223     def delete_file(self):
224         all_nodes = self.all_nodes[:]
225         random.shuffle(all_nodes)
226         for n in all_nodes:
227             if n.delete_file():
228                 self.deleted_files += 1
229                 return
230         print "no files to delete"
231
232     def _add_event(self, etype):
233         rate = getattr(self, "RATE_" + etype)
234         next = self.time + random.expovariate(rate)
235         self.next.append((next, etype))
236         self.next.sort()
237
238     def schedule_events(self):
239         types = set([e[1] for e in self.next])
240         for etype in self.EVENTS:
241             if not etype in types:
242                 self._add_event(etype)
243
244     def do_event(self):
245         time, etype = self.next.pop(0)
246         assert time > self.time
247         current_time = self.time
248         self.time = time
249         self._add_event(etype)
250         if etype == "ADDFILE":
251             self.add_file()
252         elif etype == "DELFILE":
253             self.delete_file()
254         elif etype == "ADDNODE":
255             pass
256             #self.add_node()
257         elif etype == "DELNODE":
258             #self.del_node()
259             pass
260         # self.print_stats(current_time, etype)
261
262     def print_stats_header(self):
263         print "time:  added   failed   lost  avg_tried"
264
265     def print_stats(self, time, etype):
266         if not self.published_files:
267             avg_tried = "NONE"
268         else:
269             avg_tried = sum(self.published_files) / len(self.published_files)
270         print time, etype, self.added_data, self.failed_files, self.lost_data_bytes, avg_tried, len(self.queen.living_files), self.queen.utilization
271
272 global s
273 s = None
274
275 def main():
276 #    rrdtool.create("foo.rrd",
277 #                   "--step 10",
278 #                   "DS:files-added:DERIVE::0:1000",
279 #                   "RRA:AVERAGE:1:1:1200",
280 #                   )
281     global s
282     s = Simulator()
283     # s.print_stats_header()
284     for i in range(1000):
285         s.do_event()
286     print "%d files added, %d files deleted" % (s.added_files, s.deleted_files)
287     return s
288
289 if __name__ == '__main__':
290     main()
291     
292