]> git.rkrishnan.org Git - tahoe-lafs/tahoe-lafs.git/blob - src/allmydata/test/check_load.py
test/check_load: hush (newer) pyflakes
[tahoe-lafs/tahoe-lafs.git] / src / allmydata / test / check_load.py
1 """
2 this is a load-generating client program. It does all of its work through a
3 given tahoe node (specified by URL), and performs random reads and writes
4 to the target.
5
6 Run this in a directory with the following files:
7  server-URLs : a list of tahoe node URLs (one per line). Each operation
8                will use a randomly-selected server.
9  root.cap: (string) the top-level directory rwcap to use
10  delay: (float) seconds to delay between operations
11  operation-mix: "R/W": two ints, relative frequency of read and write ops
12  #size:?
13
14 Set argv[1] to a per-client stats-NN.out file. This will will be updated with
15 running totals of bytes-per-second and operations-per-second. The stats from
16 multiple clients can be totalled together and averaged over time to compute
17 the traffic being accepted by the grid.
18
19 Each time a 'read' operation is performed, the client will begin at the root
20 and randomly choose a child. If the child is a directory, the client will
21 recurse. If the child is a file, the client will read the contents of the
22 file.
23
24 Each time a 'write' operation is performed, the client will generate a target
25 filename (a random string). 90% of the time, the file will be written into
26 the same directory that was used last time (starting at the root). 10% of the
27 time, a new directory is created by assembling 1 to 5 pathnames chosen at
28 random. The client then writes a certain number of zero bytes to this file.
29 The filesize is determined with something like a power-law distribution, with
30 a mean of 10kB and a max of 100MB, so filesize=min(int(1.0/random(.0002)),1e8)
31
32
33 """
34
35 import os, sys, httplib, binascii
36 import urllib, simplejson, random, time, urlparse
37
38 if sys.argv[1] == "--stats":
39     statsfiles = sys.argv[2:]
40     # gather stats every 10 seconds, do a moving-window average of the last
41     # 60 seconds
42     DELAY = 10
43     MAXSAMPLES = 6
44     totals = []
45     last_stats = {}
46     while True:
47         stats = {}
48         for sf in statsfiles:
49             for line in open(sf, "r").readlines():
50                 name, value = line.split(":")
51                 value = int(value.strip())
52                 if name not in stats:
53                     stats[name] = 0
54                 stats[name] += float(value)
55         del name
56         if last_stats:
57             delta = dict( [ (n,stats[n]-last_stats[n])
58                             for n in stats ] )
59             print "THIS SAMPLE:"
60             for name in sorted(delta.keys()):
61                 avg = float(delta[name]) / float(DELAY)
62                 print "%20s: %0.2f per second" % (name, avg)
63             totals.append(delta)
64             while len(totals) > MAXSAMPLES:
65                 totals.pop(0)
66
67             # now compute average
68             print
69             print "MOVING WINDOW AVERAGE:"
70             for name in sorted(delta.keys()):
71                 avg = sum([ s[name] for s in totals]) / (DELAY*len(totals))
72                 print "%20s %0.2f per second" % (name, avg)
73
74         last_stats = stats
75         print
76         print
77         time.sleep(DELAY)
78
79 stats_out = sys.argv[1]
80
81 server_urls = []
82 for url in open("server-URLs", "r").readlines():
83     url = url.strip()
84     if url:
85         server_urls.append(url)
86 root = open("root.cap", "r").read().strip()
87 delay = float(open("delay", "r").read().strip())
88 readfreq, writefreq = (
89     [int(x) for x in open("operation-mix", "r").read().strip().split("/")])
90
91
92 files_uploaded = 0
93 files_downloaded = 0
94 bytes_uploaded = 0
95 bytes_downloaded = 0
96 directories_read = 0
97 directories_written = 0
98
99 def listdir(nodeurl, root, remote_pathname):
100     if nodeurl[-1] != "/":
101         nodeurl += "/"
102     url = nodeurl + "uri/%s/" % urllib.quote(root)
103     if remote_pathname:
104         url += urllib.quote(remote_pathname)
105     url += "?t=json"
106     data = urllib.urlopen(url).read()
107     try:
108         parsed = simplejson.loads(data)
109     except ValueError:
110         print "URL was", url
111         print "DATA was", data
112         raise
113     nodetype, d = parsed
114     assert nodetype == "dirnode"
115     global directories_read
116     directories_read += 1
117     children = dict( [(unicode(name),value)
118                       for (name,value)
119                       in d["children"].iteritems()] )
120     return children
121
122
123 def choose_random_descendant(server_url, root, pathname=""):
124     children = listdir(server_url, root, pathname)
125     name = random.choice(children.keys())
126     child = children[name]
127     if pathname:
128         new_pathname = pathname + "/" + name
129     else:
130         new_pathname = name
131     if child[0] == "filenode":
132         return new_pathname
133     return choose_random_descendant(server_url, root, new_pathname)
134
135 def read_and_discard(nodeurl, root, pathname):
136     if nodeurl[-1] != "/":
137         nodeurl += "/"
138     url = nodeurl + "uri/%s/" % urllib.quote(root)
139     if pathname:
140         url += urllib.quote(pathname)
141     f = urllib.urlopen(url)
142     global bytes_downloaded
143     while True:
144         data = f.read(4096)
145         if not data:
146             break
147         bytes_downloaded += len(data)
148
149
150 directories = [
151     "dreamland/disengaging/hucksters",
152     "dreamland/disengaging/klondikes",
153     "dreamland/disengaging/neatly",
154     "dreamland/cottages/richmond",
155     "dreamland/cottages/perhaps",
156     "dreamland/cottages/spies",
157     "dreamland/finder/diversion",
158     "dreamland/finder/cigarette",
159     "dreamland/finder/album",
160     "hazing/licences/comedian",
161     "hazing/licences/goat",
162     "hazing/licences/shopkeeper",
163     "hazing/regiment/frigate",
164     "hazing/regiment/quackery",
165     "hazing/regiment/centerpiece",
166     "hazing/disassociate/mob",
167     "hazing/disassociate/nihilistic",
168     "hazing/disassociate/bilbo",
169     ]
170
171 def create_random_directory():
172     d = random.choice(directories)
173     pieces = d.split("/")
174     numsegs = random.randint(1, len(pieces))
175     return "/".join(pieces[0:numsegs])
176
177 def generate_filename():
178     fn = binascii.hexlify(os.urandom(4))
179     return fn
180
181 def choose_size():
182     mean = 10e3
183     size = random.expovariate(1.0 / mean)
184     return int(min(size, 100e6))
185
186 # copied from twisted/web/client.py
187 def parse_url(url, defaultPort=None):
188     url = url.strip()
189     parsed = urlparse.urlparse(url)
190     scheme = parsed[0]
191     path = urlparse.urlunparse(('','')+parsed[2:])
192     if defaultPort is None:
193         if scheme == 'https':
194             defaultPort = 443
195         else:
196             defaultPort = 80
197     host, port = parsed[1], defaultPort
198     if ':' in host:
199         host, port = host.split(':')
200         port = int(port)
201     if path == "":
202         path = "/"
203     return scheme, host, port, path
204
205 def generate_and_put(nodeurl, root, remote_filename, size):
206     if nodeurl[-1] != "/":
207         nodeurl += "/"
208     url = nodeurl + "uri/%s/" % urllib.quote(root)
209     url += urllib.quote(remote_filename)
210
211     scheme, host, port, path = parse_url(url)
212     if scheme == "http":
213         c = httplib.HTTPConnection(host, port)
214     elif scheme == "https":
215         c = httplib.HTTPSConnection(host, port)
216     else:
217         raise ValueError("unknown scheme '%s', need http or https" % scheme)
218     c.putrequest("PUT", path)
219     c.putheader("Hostname", host)
220     c.putheader("User-Agent", "tahoe-check-load")
221     c.putheader("Connection", "close")
222     c.putheader("Content-Length", "%d" % size)
223     c.endheaders()
224     global bytes_uploaded
225     while size:
226         chunksize = min(size, 4096)
227         size -= chunksize
228         c.send("\x00" * chunksize)
229         bytes_uploaded += chunksize
230     return c.getresponse()
231
232
233 current_writedir = ""
234
235 while True:
236     time.sleep(delay)
237     if random.uniform(0, readfreq+writefreq) < readfreq:
238         op = "read"
239     else:
240         op = "write"
241     print "OP:", op
242     server = random.choice(server_urls)
243     if op == "read":
244         pathname = choose_random_descendant(server, root)
245         print "  reading", pathname
246         read_and_discard(server, root, pathname)
247         files_downloaded += 1
248     elif op == "write":
249         if random.uniform(0, 100) < 10:
250             current_writedir = create_random_directory()
251         filename = generate_filename()
252         if current_writedir:
253             pathname = current_writedir + "/" + filename
254         else:
255             pathname = filename
256         print "  writing", pathname
257         size = choose_size()
258         print "   size", size
259         generate_and_put(server, root, pathname, size)
260         files_uploaded += 1
261
262     f = open(stats_out+".tmp", "w")
263     f.write("files-uploaded: %d\n" % files_uploaded)
264     f.write("files-downloaded: %d\n" % files_downloaded)
265     f.write("bytes-uploaded: %d\n" % bytes_uploaded)
266     f.write("bytes-downloaded: %d\n" % bytes_downloaded)
267     f.write("directories-read: %d\n" % directories_read)
268     f.write("directories-written: %d\n" % directories_written)
269     f.close()
270     os.rename(stats_out+".tmp", stats_out)
271