]> git.rkrishnan.org Git - tahoe-lafs/tahoe-lafs.git/blob - src/allmydata/test/check_load.py
tolerate simplejson-2.0.0 and newer, which frequently return bytestrings instead...
[tahoe-lafs/tahoe-lafs.git] / src / allmydata / test / check_load.py
1 #! /usr/bin/python
2
3 """
4 this is a load-generating client program. It does all of its work through a
5 given tahoe node (specified by URL), and performs random reads and writes
6 to the target.
7
8 Run this in a directory with the following files:
9  server-URLs : a list of tahoe node URLs (one per line). Each operation
10                will use a randomly-selected server.
11  root.cap: (string) the top-level directory rwcap to use
12  delay: (float) seconds to delay between operations
13  operation-mix: "R/W": two ints, relative frequency of read and write ops
14  #size:?
15
16 Set argv[1] to a per-client stats-NN.out file. This will will be updated with
17 running totals of bytes-per-second and operations-per-second. The stats from
18 multiple clients can be totalled together and averaged over time to compute
19 the traffic being accepted by the grid.
20
21 Each time a 'read' operation is performed, the client will begin at the root
22 and randomly choose a child. If the child is a directory, the client will
23 recurse. If the child is a file, the client will read the contents of the
24 file.
25
26 Each time a 'write' operation is performed, the client will generate a target
27 filename (a random string). 90% of the time, the file will be written into
28 the same directory that was used last time (starting at the root). 10% of the
29 time, a new directory is created by assembling 1 to 5 pathnames chosen at
30 random. The client then writes a certain number of zero bytes to this file.
31 The filesize is determined with something like a power-law distribution, with
32 a mean of 10kB and a max of 100MB, so filesize=min(int(1.0/random(.0002)),1e8)
33
34
35 """
36
37 import os, sys, httplib, binascii
38 import urllib, simplejson, random, time, urlparse
39
40 if sys.argv[1] == "--stats":
41     statsfiles = sys.argv[2:]
42     # gather stats every 10 seconds, do a moving-window average of the last
43     # 60 seconds
44     DELAY = 10
45     MAXSAMPLES = 6
46     totals = []
47     last_stats = {}
48     while True:
49         stats = {}
50         for sf in statsfiles:
51             for line in open(sf, "r").readlines():
52                 name, value = line.split(":")
53                 value = int(value.strip())
54                 if name not in stats:
55                     stats[name] = 0
56                 stats[name] += float(value)
57         if last_stats:
58             delta = dict( [ (name,stats[name]-last_stats[name])
59                             for name in stats ] )
60             print "THIS SAMPLE:"
61             for name in sorted(delta.keys()):
62                 avg = float(delta[name]) / float(DELAY)
63                 print "%20s: %0.2f per second" % (name, avg)
64             totals.append(delta)
65             while len(totals) > MAXSAMPLES:
66                 totals.pop(0)
67
68             # now compute average
69             print
70             print "MOVING WINDOW AVERAGE:"
71             for name in sorted(delta.keys()):
72                 avg = sum([ s[name] for s in totals]) / (DELAY*len(totals))
73                 print "%20s %0.2f per second" % (name, avg)
74
75         last_stats = stats
76         print
77         print
78         time.sleep(DELAY)
79
80 stats_out = sys.argv[1]
81
82 server_urls = []
83 for url in open("server-URLs", "r").readlines():
84     url = url.strip()
85     if url:
86         server_urls.append(url)
87 root = open("root.cap", "r").read().strip()
88 delay = float(open("delay", "r").read().strip())
89 readfreq, writefreq = (
90     [int(x) for x in open("operation-mix", "r").read().strip().split("/")])
91
92
93 files_uploaded = 0
94 files_downloaded = 0
95 bytes_uploaded = 0
96 bytes_downloaded = 0
97 directories_read = 0
98 directories_written = 0
99
100 def listdir(nodeurl, root, vdrive_pathname):
101     if nodeurl[-1] != "/":
102         nodeurl += "/"
103     url = nodeurl + "uri/%s/" % urllib.quote(root)
104     if vdrive_pathname:
105         url += urllib.quote(vdrive_pathname)
106     url += "?t=json"
107     data = urllib.urlopen(url).read()
108     try:
109         parsed = simplejson.loads(data)
110     except ValueError:
111         print "URL was", url
112         print "DATA was", data
113         raise
114     nodetype, d = parsed
115     assert nodetype == "dirnode"
116     global directories_read
117     directories_read += 1
118     children = dict( [(unicode(name),value)
119                       for (name,value)
120                       in d["children"].iteritems()] )
121     return children
122
123
124 def choose_random_descendant(server_url, root, pathname=""):
125     children = listdir(server_url, root, pathname)
126     name = random.choice(children.keys())
127     child = children[name]
128     if pathname:
129         new_pathname = pathname + "/" + name
130     else:
131         new_pathname = name
132     if child[0] == "filenode":
133         return new_pathname
134     return choose_random_descendant(server_url, root, new_pathname)
135
136 def read_and_discard(nodeurl, root, pathname):
137     if nodeurl[-1] != "/":
138         nodeurl += "/"
139     url = nodeurl + "uri/%s/" % urllib.quote(root)
140     if pathname:
141         url += urllib.quote(pathname)
142     f = urllib.urlopen(url)
143     global bytes_downloaded
144     while True:
145         data = f.read(4096)
146         if not data:
147             break
148         bytes_downloaded += len(data)
149
150
151 directories = [
152     "dreamland/disengaging/hucksters",
153     "dreamland/disengaging/klondikes",
154     "dreamland/disengaging/neatly",
155     "dreamland/cottages/richmond",
156     "dreamland/cottages/perhaps",
157     "dreamland/cottages/spies",
158     "dreamland/finder/diversion",
159     "dreamland/finder/cigarette",
160     "dreamland/finder/album",
161     "hazing/licences/comedian",
162     "hazing/licences/goat",
163     "hazing/licences/shopkeeper",
164     "hazing/regiment/frigate",
165     "hazing/regiment/quackery",
166     "hazing/regiment/centerpiece",
167     "hazing/disassociate/mob",
168     "hazing/disassociate/nihilistic",
169     "hazing/disassociate/bilbo",
170     ]
171
172 def create_random_directory():
173     d = random.choice(directories)
174     pieces = d.split("/")
175     numsegs = random.randint(1, len(pieces))
176     return "/".join(pieces[0:numsegs])
177
178 def generate_filename():
179     fn = binascii.hexlify(os.urandom(4))
180     return fn
181
182 def choose_size():
183     mean = 10e3
184     size = random.expovariate(1.0 / mean)
185     return int(min(size, 100e6))
186
187 # copied from twisted/web/client.py
188 def parse_url(url, defaultPort=None):
189     url = url.strip()
190     parsed = urlparse.urlparse(url)
191     scheme = parsed[0]
192     path = urlparse.urlunparse(('','')+parsed[2:])
193     if defaultPort is None:
194         if scheme == 'https':
195             defaultPort = 443
196         else:
197             defaultPort = 80
198     host, port = parsed[1], defaultPort
199     if ':' in host:
200         host, port = host.split(':')
201         port = int(port)
202     if path == "":
203         path = "/"
204     return scheme, host, port, path
205
206 def generate_and_put(nodeurl, root, vdrive_fname, size):
207     if nodeurl[-1] != "/":
208         nodeurl += "/"
209     url = nodeurl + "uri/%s/" % urllib.quote(root)
210     url += urllib.quote(vdrive_fname)
211
212     scheme, host, port, path = parse_url(url)
213     if scheme == "http":
214         c = httplib.HTTPConnection(host, port)
215     elif scheme == "https":
216         c = httplib.HTTPSConnection(host, port)
217     else:
218         raise ValueError("unknown scheme '%s', need http or https" % scheme)
219     c.putrequest("PUT", path)
220     c.putheader("Hostname", host)
221     c.putheader("User-Agent", "tahoe-check-load")
222     c.putheader("Connection", "close")
223     c.putheader("Content-Length", "%d" % size)
224     c.endheaders()
225     global bytes_uploaded
226     while size:
227         chunksize = min(size, 4096)
228         size -= chunksize
229         c.send("\x00" * chunksize)
230         bytes_uploaded += chunksize
231     return c.getresponse()
232
233
234 current_writedir = ""
235
236 while True:
237     time.sleep(delay)
238     if random.uniform(0, readfreq+writefreq) < readfreq:
239         op = "read"
240     else:
241         op = "write"
242     print "OP:", op
243     server = random.choice(server_urls)
244     if op == "read":
245         pathname = choose_random_descendant(server, root)
246         print "  reading", pathname
247         read_and_discard(server, root, pathname)
248         files_downloaded += 1
249     elif op == "write":
250         if random.uniform(0, 100) < 10:
251             current_writedir = create_random_directory()
252         filename = generate_filename()
253         if current_writedir:
254             pathname = current_writedir + "/" + filename
255         else:
256             pathname = filename
257         print "  writing", pathname
258         size = choose_size()
259         print "   size", size
260         generate_and_put(server, root, pathname, size)
261         files_uploaded += 1
262
263     f = open(stats_out+".tmp", "w")
264     f.write("files-uploaded: %d\n" % files_uploaded)
265     f.write("files-downloaded: %d\n" % files_downloaded)
266     f.write("bytes-uploaded: %d\n" % bytes_uploaded)
267     f.write("bytes-downloaded: %d\n" % bytes_downloaded)
268     f.write("directories-read: %d\n" % directories_read)
269     f.write("directories-written: %d\n" % directories_written)
270     f.close()
271     os.rename(stats_out+".tmp", stats_out)
272