]> git.rkrishnan.org Git - tahoe-lafs/tahoe-lafs.git/blob - src/allmydata/test/check_load.py
add a simple load-generating tool to do random reads and writes
[tahoe-lafs/tahoe-lafs.git] / src / allmydata / test / check_load.py
1 #! /usr/bin/python
2
3 """
4 this is a load-generating client program. It does all of its work through a
5 given tahoe node (specified by URL), and performs random reads and writes
6 to the target.
7
8 Run this in a directory with the following files:
9  server-URLs : a list of tahoe node URLs (one per line). Each operation
10                will use a randomly-selected server.
11  root.cap: (string) the top-level directory rwcap to use
12  delay: (float) seconds to delay between operations
13  operation-mix: "R/W": two ints, relative frequency of read and write ops
14  #size:?
15
16 Set argv[1] to a per-client stats-NN.out file. This will will be updated with
17 running totals of bytes-per-second and operations-per-second. The stats from
18 multiple clients can be totalled together and averaged over time to compute
19 the traffic being accepted by the grid.
20
21 Each time a 'read' operation is performed, the client will begin at the root
22 and randomly choose a child. If the child is a directory, the client will
23 recurse. If the child is a file, the client will read the contents of the
24 file.
25
26 Each time a 'write' operation is performed, the client will generate a target
27 filename (a random string). 90% of the time, the file will be written into
28 the same directory that was used last time (starting at the root). 10% of the
29 time, a new directory is created by assembling 1 to 5 pathnames chosen at
30 random. The client then writes a certain number of zero bytes to this file.
31 The filesize is determined with something like a power-law distribution, with
32 a mean of 10kB and a max of 100MB, so filesize=min(int(1.0/random(.0002)),1e8)
33
34
35 """
36
37 import os, sys, httplib, binascii
38 import urllib, simplejson, random, time, urlparse
39
40 stats_out = sys.argv[1]
41
42 server_urls = []
43 for url in open("server-URLs", "r").readlines():
44     url = url.strip()
45     if url:
46         server_urls.append(url)
47 root = open("root.cap", "r").read().strip()
48 delay = float(open("delay", "r").read().strip())
49 readfreq, writefreq = (
50     [int(x) for x in open("operation-mix", "r").read().strip().split("/")])
51
52
53
54 def listdir(nodeurl, root, vdrive_pathname):
55     if nodeurl[-1] != "/":
56         nodeurl += "/"
57     url = nodeurl + "uri/%s/" % urllib.quote(root)
58     if vdrive_pathname:
59         url += urllib.quote(vdrive_pathname)
60     url += "?t=json"
61     data = urllib.urlopen(url).read()
62     try:
63         parsed = simplejson.loads(data)
64     except ValueError:
65         print "URL was", url
66         print "DATA was", data
67         raise
68     nodetype, d = parsed
69     assert nodetype == "dirnode"
70     return d['children']
71
72
73 def choose_random_descendant(server_url, root, pathname=""):
74     children = listdir(server_url, root, pathname)
75     name = random.choice(children.keys())
76     child = children[name]
77     if pathname:
78         new_pathname = pathname + "/" + name
79     else:
80         new_pathname = name
81     if child[0] == "filenode":
82         return new_pathname
83     return choose_random_descendant(server_url, root, new_pathname)
84
85 def read_and_discard(nodeurl, root, pathname):
86     if nodeurl[-1] != "/":
87         nodeurl += "/"
88     url = nodeurl + "uri/%s/" % urllib.quote(root)
89     if pathname:
90         url += urllib.quote(pathname)
91     f = urllib.urlopen(url)
92     while True:
93         data = f.read(4096)
94         if not data:
95             break
96
97 directories = [
98     "dreamland/disengaging/hucksters",
99     "dreamland/disengaging/klondikes",
100     "dreamland/disengaging/neatly",
101     "dreamland/cottages/richmond",
102     "dreamland/cottages/perhaps",
103     "dreamland/cottages/spies",
104     "dreamland/finder/diversion",
105     "dreamland/finder/cigarette",
106     "dreamland/finder/album",
107     "hazing/licences/comedian",
108     "hazing/licences/goat",
109     "hazing/licences/shopkeeper",
110     "hazing/regiment/frigate",
111     "hazing/regiment/quackery",
112     "hazing/regiment/centerpiece",
113     "hazing/disassociate/mob",
114     "hazing/disassociate/nihilistic",
115     "hazing/disassociate/bilbo",
116     ]
117
118 def create_random_directory():
119     d = random.choice(directories)
120     pieces = d.split("/")
121     numsegs = random.randint(1, len(pieces))
122     return "/".join(pieces[0:numsegs])
123
124 def generate_filename():
125     fn = binascii.hexlify(os.urandom(4))
126     return fn
127
128 def choose_size():
129     mean = 10e3
130     size = random.expovariate(1.0 / mean)
131     return int(min(size, 100e6))
132
133 # copied from twisted/web/client.py
134 def parse_url(url, defaultPort=None):
135     url = url.strip()
136     parsed = urlparse.urlparse(url)
137     scheme = parsed[0]
138     path = urlparse.urlunparse(('','')+parsed[2:])
139     if defaultPort is None:
140         if scheme == 'https':
141             defaultPort = 443
142         else:
143             defaultPort = 80
144     host, port = parsed[1], defaultPort
145     if ':' in host:
146         host, port = host.split(':')
147         port = int(port)
148     if path == "":
149         path = "/"
150     return scheme, host, port, path
151
152 def generate_and_put(nodeurl, root, vdrive_fname, size):
153     if nodeurl[-1] != "/":
154         nodeurl += "/"
155     url = nodeurl + "uri/%s/" % urllib.quote(root)
156     url += urllib.quote(vdrive_fname)
157
158     scheme, host, port, path = parse_url(url)
159     if scheme == "http":
160         c = httplib.HTTPConnection(host, port)
161     elif scheme == "https":
162         c = httplib.HTTPSConnection(host, port)
163     else:
164         raise ValueError("unknown scheme '%s', need http or https" % scheme)
165     c.putrequest("PUT", path)
166     c.putheader("Hostname", host)
167     c.putheader("User-Agent", "tahoe-check-load")
168     c.putheader("Connection", "close")
169     c.putheader("Content-Length", "%d" % size)
170     c.endheaders()
171     while size:
172         chunksize = min(size, 4096)
173         size -= chunksize
174         c.send("\x00" * chunksize)
175     return c.getresponse()
176
177
178 current_writedir = ""
179
180 while True:
181     time.sleep(delay)
182     if random.uniform(0, readfreq+writefreq) > readfreq:
183         op = "read"
184     else:
185         op = "write"
186     print "OP:", op
187     server = random.choice(server_urls)
188     if op == "read":
189         pathname = choose_random_descendant(server, root)
190         print "  reading", pathname
191         read_and_discard(server, root, pathname)
192     elif op == "write":
193         if random.uniform(0, 100) < 10:
194             current_writedir = create_random_directory()
195         filename = generate_filename()
196         if current_writedir:
197             pathname = current_writedir + "/" + filename
198         else:
199             pathname = filename
200         print "  writing", pathname
201         size = choose_size()
202         print "   size", size
203         generate_and_put(server, root, pathname, size)
204