4 this is a load-generating client program. It does all of its work through a
5 given tahoe node (specified by URL), and performs random reads and writes
8 Run this in a directory with the following files:
9 server-URLs : a list of tahoe node URLs (one per line). Each operation
10 will use a randomly-selected server.
11 root.cap: (string) the top-level directory rwcap to use
12 delay: (float) seconds to delay between operations
13 operation-mix: "R/W": two ints, relative frequency of read and write ops
16 Set argv[1] to a per-client stats-NN.out file. This will will be updated with
17 running totals of bytes-per-second and operations-per-second. The stats from
18 multiple clients can be totalled together and averaged over time to compute
19 the traffic being accepted by the grid.
21 Each time a 'read' operation is performed, the client will begin at the root
22 and randomly choose a child. If the child is a directory, the client will
23 recurse. If the child is a file, the client will read the contents of the
26 Each time a 'write' operation is performed, the client will generate a target
27 filename (a random string). 90% of the time, the file will be written into
28 the same directory that was used last time (starting at the root). 10% of the
29 time, a new directory is created by assembling 1 to 5 pathnames chosen at
30 random. The client then writes a certain number of zero bytes to this file.
31 The filesize is determined with something like a power-law distribution, with
32 a mean of 10kB and a max of 100MB, so filesize=min(int(1.0/random(.0002)),1e8)
37 import os, sys, httplib, binascii
38 import urllib, simplejson, random, time, urlparse
40 if sys.argv[1] == "--stats":
41 statsfiles = sys.argv[2:]
42 # gather stats every 10 seconds, do a moving-window average of the last
51 for line in open(sf, "r").readlines():
52 name, value = line.split(":")
53 value = int(value.strip())
56 stats[name] += float(value)
58 delta = dict( [ (name,stats[name]-last_stats[name])
61 for name in sorted(delta.keys()):
62 avg = float(delta[name]) / float(DELAY)
63 print "%20s: %0.2f per second" % (name, avg)
65 while len(totals) > MAXSAMPLES:
70 print "MOVING WINDOW AVERAGE:"
71 for name in sorted(delta.keys()):
72 avg = sum([ s[name] for s in totals]) / (DELAY*len(totals))
73 print "%20s %0.2f per second" % (name, avg)
80 stats_out = sys.argv[1]
83 for url in open("server-URLs", "r").readlines():
86 server_urls.append(url)
87 root = open("root.cap", "r").read().strip()
88 delay = float(open("delay", "r").read().strip())
89 readfreq, writefreq = (
90 [int(x) for x in open("operation-mix", "r").read().strip().split("/")])
98 directories_written = 0
100 def listdir(nodeurl, root, vdrive_pathname):
101 if nodeurl[-1] != "/":
103 url = nodeurl + "uri/%s/" % urllib.quote(root)
105 url += urllib.quote(vdrive_pathname)
107 data = urllib.urlopen(url).read()
109 parsed = simplejson.loads(data)
112 print "DATA was", data
115 assert nodetype == "dirnode"
116 global directories_read
117 directories_read += 1
118 children = dict( [(unicode(name),value)
120 in d["children"].iteritems()] )
124 def choose_random_descendant(server_url, root, pathname=""):
125 children = listdir(server_url, root, pathname)
126 name = random.choice(children.keys())
127 child = children[name]
129 new_pathname = pathname + "/" + name
132 if child[0] == "filenode":
134 return choose_random_descendant(server_url, root, new_pathname)
136 def read_and_discard(nodeurl, root, pathname):
137 if nodeurl[-1] != "/":
139 url = nodeurl + "uri/%s/" % urllib.quote(root)
141 url += urllib.quote(pathname)
142 f = urllib.urlopen(url)
143 global bytes_downloaded
148 bytes_downloaded += len(data)
152 "dreamland/disengaging/hucksters",
153 "dreamland/disengaging/klondikes",
154 "dreamland/disengaging/neatly",
155 "dreamland/cottages/richmond",
156 "dreamland/cottages/perhaps",
157 "dreamland/cottages/spies",
158 "dreamland/finder/diversion",
159 "dreamland/finder/cigarette",
160 "dreamland/finder/album",
161 "hazing/licences/comedian",
162 "hazing/licences/goat",
163 "hazing/licences/shopkeeper",
164 "hazing/regiment/frigate",
165 "hazing/regiment/quackery",
166 "hazing/regiment/centerpiece",
167 "hazing/disassociate/mob",
168 "hazing/disassociate/nihilistic",
169 "hazing/disassociate/bilbo",
172 def create_random_directory():
173 d = random.choice(directories)
174 pieces = d.split("/")
175 numsegs = random.randint(1, len(pieces))
176 return "/".join(pieces[0:numsegs])
178 def generate_filename():
179 fn = binascii.hexlify(os.urandom(4))
184 size = random.expovariate(1.0 / mean)
185 return int(min(size, 100e6))
187 # copied from twisted/web/client.py
188 def parse_url(url, defaultPort=None):
190 parsed = urlparse.urlparse(url)
192 path = urlparse.urlunparse(('','')+parsed[2:])
193 if defaultPort is None:
194 if scheme == 'https':
198 host, port = parsed[1], defaultPort
200 host, port = host.split(':')
204 return scheme, host, port, path
206 def generate_and_put(nodeurl, root, vdrive_fname, size):
207 if nodeurl[-1] != "/":
209 url = nodeurl + "uri/%s/" % urllib.quote(root)
210 url += urllib.quote(vdrive_fname)
212 scheme, host, port, path = parse_url(url)
214 c = httplib.HTTPConnection(host, port)
215 elif scheme == "https":
216 c = httplib.HTTPSConnection(host, port)
218 raise ValueError("unknown scheme '%s', need http or https" % scheme)
219 c.putrequest("PUT", path)
220 c.putheader("Hostname", host)
221 c.putheader("User-Agent", "tahoe-check-load")
222 c.putheader("Connection", "close")
223 c.putheader("Content-Length", "%d" % size)
225 global bytes_uploaded
227 chunksize = min(size, 4096)
229 c.send("\x00" * chunksize)
230 bytes_uploaded += chunksize
231 return c.getresponse()
234 current_writedir = ""
238 if random.uniform(0, readfreq+writefreq) < readfreq:
243 server = random.choice(server_urls)
245 pathname = choose_random_descendant(server, root)
246 print " reading", pathname
247 read_and_discard(server, root, pathname)
248 files_downloaded += 1
250 if random.uniform(0, 100) < 10:
251 current_writedir = create_random_directory()
252 filename = generate_filename()
254 pathname = current_writedir + "/" + filename
257 print " writing", pathname
260 generate_and_put(server, root, pathname, size)
263 f = open(stats_out+".tmp", "w")
264 f.write("files-uploaded: %d\n" % files_uploaded)
265 f.write("files-downloaded: %d\n" % files_downloaded)
266 f.write("bytes-uploaded: %d\n" % bytes_uploaded)
267 f.write("bytes-downloaded: %d\n" % bytes_downloaded)
268 f.write("directories-read: %d\n" % directories_read)
269 f.write("directories-written: %d\n" % directories_written)
271 os.rename(stats_out+".tmp", stats_out)