4 from twisted.trial import unittest
5 from twisted.internet import defer
6 from allmydata import check_results, uri
7 from allmydata import uri as tahoe_uri
8 from allmydata.util import base32
9 from allmydata.web import check_results as web_check_results
10 from allmydata.storage_client import StorageFarmBroker, NativeStorageServer
11 from allmydata.storage.server import storage_index_to_dir
12 from allmydata.monitor import Monitor
13 from allmydata.test.no_network import GridTestMixin
14 from allmydata.immutable.upload import Data
15 from allmydata.test.common_web import WebRenderingMixin
16 from allmydata.mutable.publish import MutableData
19 def get_storage_broker(self):
20 return self.storage_broker
22 class WebResultsRendering(unittest.TestCase, WebRenderingMixin):
24 def create_fake_client(self):
25 sb = StorageFarmBroker(None, True)
26 # s.get_name() (the "short description") will be "v0-00000000".
27 # s.get_longname() will include the -long suffix.
28 # s.get_peerid() (i.e. tubid) will be "aaa.." or "777.." or "ceir.."
29 servers = [("v0-00000000-long", "\x00"*20, "peer-0"),
30 ("v0-ffffffff-long", "\xff"*20, "peer-f"),
31 ("v0-11111111-long", "\x11"*20, "peer-11")]
32 for (key_s, peerid, nickname) in servers:
33 tubid_b32 = base32.b2a(peerid)
34 furl = "pb://%s@nowhere/fake" % tubid_b32
36 "service-name": "storage",
37 "anonymous-storage-FURL": furl,
38 "permutation-seed-base32": "",
39 "nickname": unicode(nickname),
40 "app-versions": {}, # need #466 and v2 introducer
42 "oldest-supported": "oldest",
44 s = NativeStorageServer(key_s, ann)
45 sb.test_add_server(peerid, s) # XXX: maybe use key_s?
50 def render_json(self, page):
51 d = self.render1(page, args={"output": ["json"]})
54 def test_literal(self):
55 c = self.create_fake_client()
56 lcr = web_check_results.LiteralCheckResultsRenderer(c)
60 s = self.remove_tags(html)
61 self.failUnlessIn("Literal files are always healthy", s)
63 d.addCallback(lambda ignored:
64 self.render1(lcr, args={"return_to": ["FOOURL"]}))
65 def _check_return_to(html):
66 s = self.remove_tags(html)
67 self.failUnlessIn("Literal files are always healthy", s)
68 self.failUnlessIn('<a href="FOOURL">Return to file.</a>',
70 d.addCallback(_check_return_to)
71 d.addCallback(lambda ignored: self.render_json(lcr))
72 def _check_json(json):
73 j = simplejson.loads(json)
74 self.failUnlessEqual(j["storage-index"], "")
75 self.failUnlessEqual(j["results"]["healthy"], True)
76 d.addCallback(_check_json)
80 c = self.create_fake_client()
82 serverid_1 = "\x00"*20
83 serverid_f = "\xff"*20
84 server_1 = sb.get_stub_server(serverid_1)
85 server_f = sb.get_stub_server(serverid_f)
86 u = uri.CHKFileURI("\x00"*16, "\x00"*32, 3, 10, 1234)
87 data = { "count_shares_needed": 3,
88 "count_shares_expected": 9,
89 "count_shares_good": 10,
90 "count_good_share_hosts": 11,
91 "count_recoverable_versions": 1,
92 "count_unrecoverable_versions": 0,
93 "servers_responding": [],
94 "sharemap": {"shareid1": [server_1, server_f]},
95 "count_wrong_shares": 0,
96 "list_corrupt_shares": [],
97 "count_corrupt_shares": 0,
98 "list_incompatible_shares": [],
99 "count_incompatible_shares": 0,
100 "report": [], "share_problems": [], "servermap": None,
102 cr = check_results.CheckResults(u, u.get_storage_index(),
103 healthy=True, recoverable=True,
104 needs_rebalancing=False,
107 w = web_check_results.CheckResultsRenderer(c, cr)
108 html = self.render2(w)
109 s = self.remove_tags(html)
110 self.failUnlessIn("File Check Results for SI=2k6avp", s) # abbreviated
111 self.failUnlessIn("Healthy : groovy", s)
112 self.failUnlessIn("Share Counts: need 3-of-9, have 10", s)
113 self.failUnlessIn("Hosts with good shares: 11", s)
114 self.failUnlessIn("Corrupt shares: none", s)
115 self.failUnlessIn("Wrong Shares: 0", s)
116 self.failUnlessIn("Recoverable Versions: 1", s)
117 self.failUnlessIn("Unrecoverable Versions: 0", s)
118 self.failUnlessIn("Good Shares (sorted in share order): Share ID Nickname Node ID shareid1 peer-0 00000000 peer-f ffffffff", s)
120 cr = check_results.CheckResults(u, u.get_storage_index(),
121 healthy=False, recoverable=True,
122 needs_rebalancing=False,
125 w = web_check_results.CheckResultsRenderer(c, cr)
126 html = self.render2(w)
127 s = self.remove_tags(html)
128 self.failUnlessIn("File Check Results for SI=2k6avp", s) # abbreviated
129 self.failUnlessIn("Not Healthy! : ungroovy", s)
131 data["count_corrupt_shares"] = 1
132 data["list_corrupt_shares"] = [(server_1, u.get_storage_index(), 2)]
133 cr = check_results.CheckResults(u, u.get_storage_index(),
134 healthy=False, recoverable=False,
135 needs_rebalancing=False,
136 summary="rather dead",
138 w = web_check_results.CheckResultsRenderer(c, cr)
139 html = self.render2(w)
140 s = self.remove_tags(html)
141 self.failUnlessIn("File Check Results for SI=2k6avp", s) # abbreviated
142 self.failUnlessIn("Not Recoverable! : rather dead", s)
143 self.failUnlessIn("Corrupt shares: Share ID Nickname Node ID sh#2 peer-0 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", s)
145 html = self.render2(w)
146 s = self.remove_tags(html)
147 self.failUnlessIn("File Check Results for SI=2k6avp", s) # abbreviated
148 self.failUnlessIn("Not Recoverable! : rather dead", s)
150 html = self.render2(w, args={"return_to": ["FOOURL"]})
151 self.failUnlessIn('<a href="FOOURL">Return to file/directory.</a>',
154 d = self.render_json(w)
155 def _check_json(jdata):
156 j = simplejson.loads(jdata)
157 self.failUnlessEqual(j["summary"], "rather dead")
158 self.failUnlessEqual(j["storage-index"],
159 "2k6avpjga3dho3zsjo6nnkt7n4")
160 expected = {'needs-rebalancing': False,
161 'count-shares-expected': 9,
163 'count-unrecoverable-versions': 0,
164 'count-shares-needed': 3,
165 'sharemap': {"shareid1":
166 ["v0-00000000-long", "v0-ffffffff-long"]},
167 'count-recoverable-versions': 1,
168 'list-corrupt-shares':
169 [["aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
170 "2k6avpjga3dho3zsjo6nnkt7n4", 2]],
171 'count-good-share-hosts': 11,
172 'count-wrong-shares': 0,
173 'count-shares-good': 10,
174 'count-corrupt-shares': 1,
175 'servers-responding': [],
176 'recoverable': False,
178 self.failUnlessEqual(j["results"], expected)
179 d.addCallback(_check_json)
180 d.addCallback(lambda ignored: self.render1(w))
182 s = self.remove_tags(html)
183 self.failUnlessIn("File Check Results for SI=2k6avp", s)
184 self.failUnlessIn("Not Recoverable! : rather dead", s)
185 d.addCallback(_check)
189 def test_check_and_repair(self):
190 c = self.create_fake_client()
191 sb = c.storage_broker
192 serverid_1 = "\x00"*20
193 serverid_f = "\xff"*20
194 u = uri.CHKFileURI("\x00"*16, "\x00"*32, 3, 10, 1234)
196 data = { "count_shares_needed": 3,
197 "count_shares_expected": 10,
198 "count_shares_good": 6,
199 "count_good_share_hosts": 7,
200 "count_recoverable_versions": 1,
201 "count_unrecoverable_versions": 0,
202 "servers_responding": [],
203 "sharemap": {"shareid1": [sb.get_stub_server(serverid_1),
204 sb.get_stub_server(serverid_f)]},
205 "count_wrong_shares": 0,
206 "list_corrupt_shares": [],
207 "count_corrupt_shares": 0,
208 "list_incompatible_shares": [],
209 "count_incompatible_shares": 0,
210 "report": [], "share_problems": [], "servermap": None,
212 pre_cr = check_results.CheckResults(u, u.get_storage_index(),
213 healthy=False, recoverable=True,
214 needs_rebalancing=False,
218 data = { "count_shares_needed": 3,
219 "count_shares_expected": 10,
220 "count_shares_good": 10,
221 "count_good_share_hosts": 11,
222 "count_recoverable_versions": 1,
223 "count_unrecoverable_versions": 0,
224 "servers_responding": [],
225 "sharemap": {"shareid1": [sb.get_stub_server(serverid_1),
226 sb.get_stub_server(serverid_f)]},
227 "count_wrong_shares": 0,
228 "count_corrupt_shares": 0,
229 "list_corrupt_shares": [],
230 "list_incompatible_shares": [],
231 "count_incompatible_shares": 0,
232 "report": [], "share_problems": [], "servermap": None,
234 post_cr = check_results.CheckResults(u, u.get_storage_index(),
235 healthy=True, recoverable=True,
236 needs_rebalancing=False,
240 crr = check_results.CheckAndRepairResults(u.get_storage_index())
241 crr.pre_repair_results = pre_cr
242 crr.post_repair_results = post_cr
243 crr.repair_attempted = False
245 w = web_check_results.CheckAndRepairResultsRenderer(c, crr)
246 html = self.render2(w)
247 s = self.remove_tags(html)
249 self.failUnlessIn("File Check-And-Repair Results for SI=2k6avp", s)
250 self.failUnlessIn("Healthy : groovy", s)
251 self.failUnlessIn("No repair necessary", s)
252 self.failUnlessIn("Post-Repair Checker Results:", s)
253 self.failUnlessIn("Share Counts: need 3-of-10, have 10", s)
255 crr.repair_attempted = True
256 crr.repair_successful = True
257 html = self.render2(w)
258 s = self.remove_tags(html)
260 self.failUnlessIn("File Check-And-Repair Results for SI=2k6avp", s)
261 self.failUnlessIn("Healthy : groovy", s)
262 self.failUnlessIn("Repair successful", s)
263 self.failUnlessIn("Post-Repair Checker Results:", s)
265 crr.repair_attempted = True
266 crr.repair_successful = False
267 post_cr = check_results.CheckResults(u, u.get_storage_index(),
268 healthy=False, recoverable=True,
269 needs_rebalancing=False,
272 crr.post_repair_results = post_cr
273 html = self.render2(w)
274 s = self.remove_tags(html)
276 self.failUnlessIn("File Check-And-Repair Results for SI=2k6avp", s)
277 self.failUnlessIn("Not Healthy! : better", s)
278 self.failUnlessIn("Repair unsuccessful", s)
279 self.failUnlessIn("Post-Repair Checker Results:", s)
281 crr.repair_attempted = True
282 crr.repair_successful = False
283 post_cr = check_results.CheckResults(u, u.get_storage_index(),
284 healthy=False, recoverable=False,
285 needs_rebalancing=False,
288 crr.post_repair_results = post_cr
289 html = self.render2(w)
290 s = self.remove_tags(html)
292 self.failUnlessIn("File Check-And-Repair Results for SI=2k6avp", s)
293 self.failUnlessIn("Not Recoverable! : worse", s)
294 self.failUnlessIn("Repair unsuccessful", s)
295 self.failUnlessIn("Post-Repair Checker Results:", s)
297 d = self.render_json(w)
299 j = simplejson.loads(data)
300 self.failUnlessEqual(j["repair-attempted"], True)
301 self.failUnlessEqual(j["storage-index"],
302 "2k6avpjga3dho3zsjo6nnkt7n4")
303 self.failUnlessEqual(j["pre-repair-results"]["summary"], "illing")
304 self.failUnlessEqual(j["post-repair-results"]["summary"], "worse")
305 d.addCallback(_got_json)
307 w2 = web_check_results.CheckAndRepairResultsRenderer(c, None)
308 d.addCallback(lambda ignored: self.render_json(w2))
309 def _got_lit_results(data):
310 j = simplejson.loads(data)
311 self.failUnlessEqual(j["repair-attempted"], False)
312 self.failUnlessEqual(j["storage-index"], "")
313 d.addCallback(_got_lit_results)
316 class BalancingAct(GridTestMixin, unittest.TestCase):
317 # test for #1115 regarding the 'count-good-share-hosts' metric
320 def add_server(self, server_number, readonly=False):
321 assert self.g, "I tried to find a grid at self.g, but failed"
322 ss = self.g.make_server(server_number, readonly)
323 #log.msg("just created a server, number: %s => %s" % (server_number, ss,))
324 self.g.add_server(server_number, ss)
326 def add_server_with_share(self, server_number, uri, share_number=None,
328 self.add_server(server_number, readonly)
329 if share_number is not None:
330 self.copy_share_to_server(uri, share_number, server_number)
332 def copy_share_to_server(self, uri, share_number, server_number):
333 ss = self.g.servers_by_number[server_number]
334 # Copy share i from the directory associated with the first
335 # storage server to the directory associated with this one.
336 assert self.g, "I tried to find a grid at self.g, but failed"
337 assert self.shares, "I tried to find shares at self.shares, but failed"
338 old_share_location = self.shares[share_number][2]
339 new_share_location = os.path.join(ss.storedir, "shares")
340 si = tahoe_uri.from_string(self.uri).get_storage_index()
341 new_share_location = os.path.join(new_share_location,
342 storage_index_to_dir(si))
343 if not os.path.exists(new_share_location):
344 os.makedirs(new_share_location)
345 new_share_location = os.path.join(new_share_location,
347 if old_share_location != new_share_location:
348 shutil.copy(old_share_location, new_share_location)
349 shares = self.find_uri_shares(uri)
350 # Make sure that the storage server has the share.
351 self.failUnless((share_number, ss.my_nodeid, new_share_location)
354 def _pretty_shares_chart(self, uri):
355 # Servers are labeled A-Z, shares are labeled 0-9
356 letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
357 assert len(self.g.servers_by_number) < len(letters), \
358 "This little printing function is only meant for < 26 servers"
360 names = dict(zip([ss.my_nodeid
361 for _,ss in self.g.servers_by_number.iteritems()],
363 for shnum, serverid, _ in self.find_uri_shares(uri):
364 shares_chart.setdefault(shnum, []).append(names[serverid])
367 def test_good_share_hosts(self):
368 self.basedir = "checker/BalancingAct/1115"
369 self.set_up_grid(num_servers=1)
370 c0 = self.g.clients[0]
371 c0.DEFAULT_ENCODING_PARAMETERS['happy'] = 1
372 c0.DEFAULT_ENCODING_PARAMETERS['n'] = 4
373 c0.DEFAULT_ENCODING_PARAMETERS['k'] = 3
376 d = c0.upload(Data(DATA, convergence=""))
377 def _stash_immutable(ur):
378 self.imm = c0.create_node_from_uri(ur.get_uri())
379 self.uri = self.imm.get_uri()
380 d.addCallback(_stash_immutable)
381 d.addCallback(lambda ign:
382 self.find_uri_shares(self.uri))
383 def _store_shares(shares):
385 d.addCallback(_store_shares)
388 # Add a new server with just share 3
389 self.add_server_with_share(i, self.uri, 3)
390 #print self._pretty_shares_chart(self.uri)
392 d.addCallback(add_three, i)
394 def _check_and_repair(_):
395 return self.imm.check_and_repair(Monitor())
396 def _check_counts(crr, shares_good, good_share_hosts):
397 prr = crr.get_post_repair_results()
398 #print self._pretty_shares_chart(self.uri)
399 self.failUnlessEqual(prr.get_share_counter_good(), shares_good)
400 self.failUnlessEqual(prr.get_host_counter_good_shares(),
405 0:[A] 1:[A] 2:[A] 3:[A,B,C,D,E]
406 4 good shares, but 5 good hosts
407 After deleting all instances of share #3 and repairing:
408 0:[A,B], 1:[A,C], 2:[A,D], 3:[E]
409 Still 4 good shares and 5 good hosts
411 d.addCallback(_check_and_repair)
412 d.addCallback(_check_counts, 4, 5)
413 d.addCallback(lambda _: self.delete_shares_numbered(self.uri, [3]))
414 d.addCallback(_check_and_repair)
415 d.addCallback(_check_counts, 4, 5)
416 d.addCallback(lambda _: [self.g.break_server(sid)
417 for sid in self.g.get_all_serverids()])
418 d.addCallback(_check_and_repair)
419 d.addCallback(_check_counts, 0, 0)
422 class AddLease(GridTestMixin, unittest.TestCase):
423 # test for #875, in which failures in the add-lease call cause
424 # false-negatives in the checker
427 self.basedir = "checker/AddLease/875"
428 self.set_up_grid(num_servers=1)
429 c0 = self.g.clients[0]
430 c0.DEFAULT_ENCODING_PARAMETERS['happy'] = 1
433 d = c0.upload(Data(DATA, convergence=""))
434 def _stash_immutable(ur):
435 self.imm = c0.create_node_from_uri(ur.get_uri())
436 d.addCallback(_stash_immutable)
437 d.addCallback(lambda ign:
438 c0.create_mutable_file(MutableData("contents")))
439 def _stash_mutable(node):
441 d.addCallback(_stash_mutable)
443 def _check_cr(cr, which):
444 self.failUnless(cr.is_healthy(), which)
446 # these two should work normally
447 d.addCallback(lambda ign: self.imm.check(Monitor(), add_lease=True))
448 d.addCallback(_check_cr, "immutable-normal")
449 d.addCallback(lambda ign: self.mut.check(Monitor(), add_lease=True))
450 d.addCallback(_check_cr, "mutable-normal")
452 really_did_break = []
453 # now break the server's remote_add_lease call
454 def _break_add_lease(ign):
455 def broken_add_lease(*args, **kwargs):
456 really_did_break.append(1)
457 raise KeyError("intentional failure, should be ignored")
458 assert self.g.servers_by_number[0].remote_add_lease
459 self.g.servers_by_number[0].remote_add_lease = broken_add_lease
460 d.addCallback(_break_add_lease)
462 # and confirm that the files still look healthy
463 d.addCallback(lambda ign: self.mut.check(Monitor(), add_lease=True))
464 d.addCallback(_check_cr, "mutable-broken")
465 d.addCallback(lambda ign: self.imm.check(Monitor(), add_lease=True))
466 d.addCallback(_check_cr, "immutable-broken")
468 d.addCallback(lambda ign: self.failUnless(really_did_break))
471 class CounterHolder(object):
473 self._num_active_block_fetches = 0
474 self._max_active_block_fetches = 0
476 from allmydata.immutable.checker import ValidatedReadBucketProxy
477 class MockVRBP(ValidatedReadBucketProxy):
478 def __init__(self, sharenum, bucket, share_hash_tree, num_blocks, block_size, share_size, counterholder):
479 ValidatedReadBucketProxy.__init__(self, sharenum, bucket,
480 share_hash_tree, num_blocks,
481 block_size, share_size)
482 self.counterholder = counterholder
484 def get_block(self, blocknum):
485 self.counterholder._num_active_block_fetches += 1
486 if self.counterholder._num_active_block_fetches > self.counterholder._max_active_block_fetches:
487 self.counterholder._max_active_block_fetches = self.counterholder._num_active_block_fetches
488 d = ValidatedReadBucketProxy.get_block(self, blocknum)
489 def _mark_no_longer_active(res):
490 self.counterholder._num_active_block_fetches -= 1
492 d.addBoth(_mark_no_longer_active)
495 class TooParallel(GridTestMixin, unittest.TestCase):
496 # bug #1395: immutable verifier was aggressively parallized, checking all
497 # blocks of all shares at the same time, blowing our memory budget and
498 # crashing with MemoryErrors on >1GB files.
500 def test_immutable(self):
501 import allmydata.immutable.checker
502 origVRBP = allmydata.immutable.checker.ValidatedReadBucketProxy
504 self.basedir = "checker/TooParallel/immutable"
506 # If any code asks to instantiate a ValidatedReadBucketProxy,
507 # we give them a MockVRBP which is configured to use our
509 counterholder = CounterHolder()
510 def make_mock_VRBP(*args, **kwargs):
511 return MockVRBP(counterholder=counterholder, *args, **kwargs)
512 allmydata.immutable.checker.ValidatedReadBucketProxy = make_mock_VRBP
514 d = defer.succeed(None)
516 self.set_up_grid(num_servers=4)
517 self.c0 = self.g.clients[0]
518 self.c0.DEFAULT_ENCODING_PARAMETERS = { "k": 1,
521 "max_segment_size": 5,
524 DATA = "data" * 100 # 400/5 = 80 blocks
525 return self.c0.upload(Data(DATA, convergence=""))
526 d.addCallback(_start)
528 n = self.c0.create_node_from_uri(ur.get_uri())
529 return n.check(Monitor(), verify=True)
530 d.addCallback(_do_check)
532 # the verifier works on all 4 shares in parallel, but only
533 # fetches one block from each share at a time, so we expect to
534 # see 4 parallel fetches
535 self.failUnlessEqual(counterholder._max_active_block_fetches, 4)
536 d.addCallback(_check)
538 allmydata.immutable.checker.ValidatedReadBucketProxy = origVRBP
543 test_immutable.timeout = 80