]> git.rkrishnan.org Git - tahoe-lafs/tahoe-lafs.git/blob - src/allmydata/test/test_checker.py
CheckResults.get_sharemap() now returns IServers
[tahoe-lafs/tahoe-lafs.git] / src / allmydata / test / test_checker.py
1
2 import simplejson
3 import os.path, shutil
4 from twisted.trial import unittest
5 from twisted.internet import defer
6 from allmydata import check_results, uri
7 from allmydata import uri as tahoe_uri
8 from allmydata.util import base32
9 from allmydata.web import check_results as web_check_results
10 from allmydata.storage_client import StorageFarmBroker, NativeStorageServer
11 from allmydata.storage.server import storage_index_to_dir
12 from allmydata.monitor import Monitor
13 from allmydata.test.no_network import GridTestMixin
14 from allmydata.immutable.upload import Data
15 from allmydata.test.common_web import WebRenderingMixin
16 from allmydata.mutable.publish import MutableData
17
18 class FakeClient:
19     def get_storage_broker(self):
20         return self.storage_broker
21
22 class WebResultsRendering(unittest.TestCase, WebRenderingMixin):
23
24     def create_fake_client(self):
25         sb = StorageFarmBroker(None, True)
26         # s.get_name() (the "short description") will be "v0-00000000".
27         # s.get_longname() will include the -long suffix.
28         # s.get_peerid() (i.e. tubid) will be "aaa.." or "777.." or "ceir.."
29         servers = [("v0-00000000-long", "\x00"*20, "peer-0"),
30                    ("v0-ffffffff-long", "\xff"*20, "peer-f"),
31                    ("v0-11111111-long", "\x11"*20, "peer-11")]
32         for (key_s, peerid, nickname) in servers:
33             tubid_b32 = base32.b2a(peerid)
34             furl = "pb://%s@nowhere/fake" % tubid_b32
35             ann = { "version": 0,
36                     "service-name": "storage",
37                     "anonymous-storage-FURL": furl,
38                     "permutation-seed-base32": "",
39                     "nickname": unicode(nickname),
40                     "app-versions": {}, # need #466 and v2 introducer
41                     "my-version": "ver",
42                     "oldest-supported": "oldest",
43                     }
44             s = NativeStorageServer(key_s, ann)
45             sb.test_add_server(peerid, s) # XXX: maybe use key_s?
46         c = FakeClient()
47         c.storage_broker = sb
48         return c
49
50     def render_json(self, page):
51         d = self.render1(page, args={"output": ["json"]})
52         return d
53
54     def test_literal(self):
55         c = self.create_fake_client()
56         lcr = web_check_results.LiteralCheckResultsRenderer(c)
57
58         d = self.render1(lcr)
59         def _check(html):
60             s = self.remove_tags(html)
61             self.failUnlessIn("Literal files are always healthy", s)
62         d.addCallback(_check)
63         d.addCallback(lambda ignored:
64                       self.render1(lcr, args={"return_to": ["FOOURL"]}))
65         def _check_return_to(html):
66             s = self.remove_tags(html)
67             self.failUnlessIn("Literal files are always healthy", s)
68             self.failUnlessIn('<a href="FOOURL">Return to file.</a>',
69                               html)
70         d.addCallback(_check_return_to)
71         d.addCallback(lambda ignored: self.render_json(lcr))
72         def _check_json(json):
73             j = simplejson.loads(json)
74             self.failUnlessEqual(j["storage-index"], "")
75             self.failUnlessEqual(j["results"]["healthy"], True)
76         d.addCallback(_check_json)
77         return d
78
79     def test_check(self):
80         c = self.create_fake_client()
81         sb = c.storage_broker
82         serverid_1 = "\x00"*20
83         serverid_f = "\xff"*20
84         server_1 = sb.get_stub_server(serverid_1)
85         server_f = sb.get_stub_server(serverid_f)
86         u = uri.CHKFileURI("\x00"*16, "\x00"*32, 3, 10, 1234)
87         data = { "count_shares_needed": 3,
88                  "count_shares_expected": 9,
89                  "count_shares_good": 10,
90                  "count_good_share_hosts": 11,
91                  "count_recoverable_versions": 1,
92                  "count_unrecoverable_versions": 0,
93                  "servers_responding": [],
94                  "sharemap": {"shareid1": [server_1, server_f]},
95                  "count_wrong_shares": 0,
96                  "list_corrupt_shares": [],
97                  "count_corrupt_shares": 0,
98                  "list_incompatible_shares": [],
99                  "count_incompatible_shares": 0,
100                  "report": [], "share_problems": [], "servermap": None,
101                  }
102         cr = check_results.CheckResults(u, u.get_storage_index(),
103                                         healthy=True, recoverable=True,
104                                         needs_rebalancing=False,
105                                         summary="groovy",
106                                         **data)
107         w = web_check_results.CheckResultsRenderer(c, cr)
108         html = self.render2(w)
109         s = self.remove_tags(html)
110         self.failUnlessIn("File Check Results for SI=2k6avp", s) # abbreviated
111         self.failUnlessIn("Healthy : groovy", s)
112         self.failUnlessIn("Share Counts: need 3-of-9, have 10", s)
113         self.failUnlessIn("Hosts with good shares: 11", s)
114         self.failUnlessIn("Corrupt shares: none", s)
115         self.failUnlessIn("Wrong Shares: 0", s)
116         self.failUnlessIn("Recoverable Versions: 1", s)
117         self.failUnlessIn("Unrecoverable Versions: 0", s)
118         self.failUnlessIn("Good Shares (sorted in share order): Share ID Nickname Node ID shareid1 peer-0 00000000 peer-f ffffffff", s)
119
120         cr = check_results.CheckResults(u, u.get_storage_index(),
121                                         healthy=False, recoverable=True,
122                                         needs_rebalancing=False,
123                                         summary="ungroovy",
124                                         **data)
125         w = web_check_results.CheckResultsRenderer(c, cr)
126         html = self.render2(w)
127         s = self.remove_tags(html)
128         self.failUnlessIn("File Check Results for SI=2k6avp", s) # abbreviated
129         self.failUnlessIn("Not Healthy! : ungroovy", s)
130
131         data["count_corrupt_shares"] = 1
132         data["list_corrupt_shares"] = [(server_1, u.get_storage_index(), 2)]
133         cr = check_results.CheckResults(u, u.get_storage_index(),
134                                         healthy=False, recoverable=False,
135                                         needs_rebalancing=False,
136                                         summary="rather dead",
137                                         **data)
138         w = web_check_results.CheckResultsRenderer(c, cr)
139         html = self.render2(w)
140         s = self.remove_tags(html)
141         self.failUnlessIn("File Check Results for SI=2k6avp", s) # abbreviated
142         self.failUnlessIn("Not Recoverable! : rather dead", s)
143         self.failUnlessIn("Corrupt shares: Share ID Nickname Node ID sh#2 peer-0 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", s)
144
145         html = self.render2(w)
146         s = self.remove_tags(html)
147         self.failUnlessIn("File Check Results for SI=2k6avp", s) # abbreviated
148         self.failUnlessIn("Not Recoverable! : rather dead", s)
149
150         html = self.render2(w, args={"return_to": ["FOOURL"]})
151         self.failUnlessIn('<a href="FOOURL">Return to file/directory.</a>',
152                           html)
153
154         d = self.render_json(w)
155         def _check_json(jdata):
156             j = simplejson.loads(jdata)
157             self.failUnlessEqual(j["summary"], "rather dead")
158             self.failUnlessEqual(j["storage-index"],
159                                  "2k6avpjga3dho3zsjo6nnkt7n4")
160             expected = {'needs-rebalancing': False,
161                         'count-shares-expected': 9,
162                         'healthy': False,
163                         'count-unrecoverable-versions': 0,
164                         'count-shares-needed': 3,
165                         'sharemap': {"shareid1":
166                                      ["v0-00000000-long", "v0-ffffffff-long"]},
167                         'count-recoverable-versions': 1,
168                         'list-corrupt-shares':
169                         [["aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
170                           "2k6avpjga3dho3zsjo6nnkt7n4", 2]],
171                         'count-good-share-hosts': 11,
172                         'count-wrong-shares': 0,
173                         'count-shares-good': 10,
174                         'count-corrupt-shares': 1,
175                         'servers-responding': [],
176                         'recoverable': False,
177                         }
178             self.failUnlessEqual(j["results"], expected)
179         d.addCallback(_check_json)
180         d.addCallback(lambda ignored: self.render1(w))
181         def _check(html):
182             s = self.remove_tags(html)
183             self.failUnlessIn("File Check Results for SI=2k6avp", s)
184             self.failUnlessIn("Not Recoverable! : rather dead", s)
185         d.addCallback(_check)
186         return d
187
188
189     def test_check_and_repair(self):
190         c = self.create_fake_client()
191         sb = c.storage_broker
192         serverid_1 = "\x00"*20
193         serverid_f = "\xff"*20
194         u = uri.CHKFileURI("\x00"*16, "\x00"*32, 3, 10, 1234)
195
196         data = { "count_shares_needed": 3,
197                  "count_shares_expected": 10,
198                  "count_shares_good": 6,
199                  "count_good_share_hosts": 7,
200                  "count_recoverable_versions": 1,
201                  "count_unrecoverable_versions": 0,
202                  "servers_responding": [],
203                  "sharemap": {"shareid1": [sb.get_stub_server(serverid_1),
204                                            sb.get_stub_server(serverid_f)]},
205                  "count_wrong_shares": 0,
206                  "list_corrupt_shares": [],
207                  "count_corrupt_shares": 0,
208                  "list_incompatible_shares": [],
209                  "count_incompatible_shares": 0,
210                  "report": [], "share_problems": [], "servermap": None,
211                  }
212         pre_cr = check_results.CheckResults(u, u.get_storage_index(),
213                                             healthy=False, recoverable=True,
214                                             needs_rebalancing=False,
215                                             summary="illing",
216                                             **data)
217
218         data = { "count_shares_needed": 3,
219                  "count_shares_expected": 10,
220                  "count_shares_good": 10,
221                  "count_good_share_hosts": 11,
222                  "count_recoverable_versions": 1,
223                  "count_unrecoverable_versions": 0,
224                  "servers_responding": [],
225                  "sharemap": {"shareid1": [sb.get_stub_server(serverid_1),
226                                            sb.get_stub_server(serverid_f)]},
227                  "count_wrong_shares": 0,
228                  "count_corrupt_shares": 0,
229                  "list_corrupt_shares": [],
230                  "list_incompatible_shares": [],
231                  "count_incompatible_shares": 0,
232                  "report": [], "share_problems": [], "servermap": None,
233                  }
234         post_cr = check_results.CheckResults(u, u.get_storage_index(),
235                                              healthy=True, recoverable=True,
236                                              needs_rebalancing=False,
237                                              summary="groovy",
238                                              **data)
239
240         crr = check_results.CheckAndRepairResults(u.get_storage_index())
241         crr.pre_repair_results = pre_cr
242         crr.post_repair_results = post_cr
243         crr.repair_attempted = False
244
245         w = web_check_results.CheckAndRepairResultsRenderer(c, crr)
246         html = self.render2(w)
247         s = self.remove_tags(html)
248
249         self.failUnlessIn("File Check-And-Repair Results for SI=2k6avp", s)
250         self.failUnlessIn("Healthy : groovy", s)
251         self.failUnlessIn("No repair necessary", s)
252         self.failUnlessIn("Post-Repair Checker Results:", s)
253         self.failUnlessIn("Share Counts: need 3-of-10, have 10", s)
254
255         crr.repair_attempted = True
256         crr.repair_successful = True
257         html = self.render2(w)
258         s = self.remove_tags(html)
259
260         self.failUnlessIn("File Check-And-Repair Results for SI=2k6avp", s)
261         self.failUnlessIn("Healthy : groovy", s)
262         self.failUnlessIn("Repair successful", s)
263         self.failUnlessIn("Post-Repair Checker Results:", s)
264
265         crr.repair_attempted = True
266         crr.repair_successful = False
267         post_cr = check_results.CheckResults(u, u.get_storage_index(),
268                                              healthy=False, recoverable=True,
269                                              needs_rebalancing=False,
270                                              summary="better",
271                                              **data)
272         crr.post_repair_results = post_cr
273         html = self.render2(w)
274         s = self.remove_tags(html)
275
276         self.failUnlessIn("File Check-And-Repair Results for SI=2k6avp", s)
277         self.failUnlessIn("Not Healthy! : better", s)
278         self.failUnlessIn("Repair unsuccessful", s)
279         self.failUnlessIn("Post-Repair Checker Results:", s)
280
281         crr.repair_attempted = True
282         crr.repair_successful = False
283         post_cr = check_results.CheckResults(u, u.get_storage_index(),
284                                              healthy=False, recoverable=False,
285                                              needs_rebalancing=False,
286                                              summary="worse",
287                                              **data)
288         crr.post_repair_results = post_cr
289         html = self.render2(w)
290         s = self.remove_tags(html)
291
292         self.failUnlessIn("File Check-And-Repair Results for SI=2k6avp", s)
293         self.failUnlessIn("Not Recoverable! : worse", s)
294         self.failUnlessIn("Repair unsuccessful", s)
295         self.failUnlessIn("Post-Repair Checker Results:", s)
296
297         d = self.render_json(w)
298         def _got_json(data):
299             j = simplejson.loads(data)
300             self.failUnlessEqual(j["repair-attempted"], True)
301             self.failUnlessEqual(j["storage-index"],
302                                  "2k6avpjga3dho3zsjo6nnkt7n4")
303             self.failUnlessEqual(j["pre-repair-results"]["summary"], "illing")
304             self.failUnlessEqual(j["post-repair-results"]["summary"], "worse")
305         d.addCallback(_got_json)
306
307         w2 = web_check_results.CheckAndRepairResultsRenderer(c, None)
308         d.addCallback(lambda ignored: self.render_json(w2))
309         def _got_lit_results(data):
310             j = simplejson.loads(data)
311             self.failUnlessEqual(j["repair-attempted"], False)
312             self.failUnlessEqual(j["storage-index"], "")
313         d.addCallback(_got_lit_results)
314         return d
315
316 class BalancingAct(GridTestMixin, unittest.TestCase):
317     # test for #1115 regarding the 'count-good-share-hosts' metric
318
319
320     def add_server(self, server_number, readonly=False):
321         assert self.g, "I tried to find a grid at self.g, but failed"
322         ss = self.g.make_server(server_number, readonly)
323         #log.msg("just created a server, number: %s => %s" % (server_number, ss,))
324         self.g.add_server(server_number, ss)
325
326     def add_server_with_share(self, server_number, uri, share_number=None,
327                               readonly=False):
328         self.add_server(server_number, readonly)
329         if share_number is not None:
330             self.copy_share_to_server(uri, share_number, server_number)
331
332     def copy_share_to_server(self, uri, share_number, server_number):
333         ss = self.g.servers_by_number[server_number]
334         # Copy share i from the directory associated with the first
335         # storage server to the directory associated with this one.
336         assert self.g, "I tried to find a grid at self.g, but failed"
337         assert self.shares, "I tried to find shares at self.shares, but failed"
338         old_share_location = self.shares[share_number][2]
339         new_share_location = os.path.join(ss.storedir, "shares")
340         si = tahoe_uri.from_string(self.uri).get_storage_index()
341         new_share_location = os.path.join(new_share_location,
342                                           storage_index_to_dir(si))
343         if not os.path.exists(new_share_location):
344             os.makedirs(new_share_location)
345         new_share_location = os.path.join(new_share_location,
346                                           str(share_number))
347         if old_share_location != new_share_location:
348             shutil.copy(old_share_location, new_share_location)
349         shares = self.find_uri_shares(uri)
350         # Make sure that the storage server has the share.
351         self.failUnless((share_number, ss.my_nodeid, new_share_location)
352                         in shares)
353
354     def _pretty_shares_chart(self, uri):
355         # Servers are labeled A-Z, shares are labeled 0-9
356         letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
357         assert len(self.g.servers_by_number) < len(letters), \
358             "This little printing function is only meant for < 26 servers"
359         shares_chart = {}
360         names = dict(zip([ss.my_nodeid
361                           for _,ss in self.g.servers_by_number.iteritems()],
362                          letters))
363         for shnum, serverid, _ in self.find_uri_shares(uri):
364             shares_chart.setdefault(shnum, []).append(names[serverid])
365         return shares_chart
366
367     def test_good_share_hosts(self):
368         self.basedir = "checker/BalancingAct/1115"
369         self.set_up_grid(num_servers=1)
370         c0 = self.g.clients[0]
371         c0.DEFAULT_ENCODING_PARAMETERS['happy'] = 1
372         c0.DEFAULT_ENCODING_PARAMETERS['n'] = 4
373         c0.DEFAULT_ENCODING_PARAMETERS['k'] = 3
374
375         DATA = "data" * 100
376         d = c0.upload(Data(DATA, convergence=""))
377         def _stash_immutable(ur):
378             self.imm = c0.create_node_from_uri(ur.get_uri())
379             self.uri = self.imm.get_uri()
380         d.addCallback(_stash_immutable)
381         d.addCallback(lambda ign:
382             self.find_uri_shares(self.uri))
383         def _store_shares(shares):
384             self.shares = shares
385         d.addCallback(_store_shares)
386
387         def add_three(_, i):
388             # Add a new server with just share 3
389             self.add_server_with_share(i, self.uri, 3)
390             #print self._pretty_shares_chart(self.uri)
391         for i in range(1,5):
392             d.addCallback(add_three, i)
393
394         def _check_and_repair(_):
395             return self.imm.check_and_repair(Monitor())
396         def _check_counts(crr, shares_good, good_share_hosts):
397             prr = crr.get_post_repair_results()
398             #print self._pretty_shares_chart(self.uri)
399             self.failUnlessEqual(prr.get_share_counter_good(), shares_good)
400             self.failUnlessEqual(prr.get_host_counter_good_shares(),
401                                  good_share_hosts)
402
403         """
404         Initial sharemap:
405             0:[A] 1:[A] 2:[A] 3:[A,B,C,D,E]
406           4 good shares, but 5 good hosts
407         After deleting all instances of share #3 and repairing:
408             0:[A,B], 1:[A,C], 2:[A,D], 3:[E]
409           Still 4 good shares and 5 good hosts
410             """
411         d.addCallback(_check_and_repair)
412         d.addCallback(_check_counts, 4, 5)
413         d.addCallback(lambda _: self.delete_shares_numbered(self.uri, [3]))
414         d.addCallback(_check_and_repair)
415         d.addCallback(_check_counts, 4, 5)
416         d.addCallback(lambda _: [self.g.break_server(sid)
417                                  for sid in self.g.get_all_serverids()])
418         d.addCallback(_check_and_repair)
419         d.addCallback(_check_counts, 0, 0)
420         return d
421
422 class AddLease(GridTestMixin, unittest.TestCase):
423     # test for #875, in which failures in the add-lease call cause
424     # false-negatives in the checker
425
426     def test_875(self):
427         self.basedir = "checker/AddLease/875"
428         self.set_up_grid(num_servers=1)
429         c0 = self.g.clients[0]
430         c0.DEFAULT_ENCODING_PARAMETERS['happy'] = 1
431         self.uris = {}
432         DATA = "data" * 100
433         d = c0.upload(Data(DATA, convergence=""))
434         def _stash_immutable(ur):
435             self.imm = c0.create_node_from_uri(ur.get_uri())
436         d.addCallback(_stash_immutable)
437         d.addCallback(lambda ign:
438             c0.create_mutable_file(MutableData("contents")))
439         def _stash_mutable(node):
440             self.mut = node
441         d.addCallback(_stash_mutable)
442
443         def _check_cr(cr, which):
444             self.failUnless(cr.is_healthy(), which)
445
446         # these two should work normally
447         d.addCallback(lambda ign: self.imm.check(Monitor(), add_lease=True))
448         d.addCallback(_check_cr, "immutable-normal")
449         d.addCallback(lambda ign: self.mut.check(Monitor(), add_lease=True))
450         d.addCallback(_check_cr, "mutable-normal")
451
452         really_did_break = []
453         # now break the server's remote_add_lease call
454         def _break_add_lease(ign):
455             def broken_add_lease(*args, **kwargs):
456                 really_did_break.append(1)
457                 raise KeyError("intentional failure, should be ignored")
458             assert self.g.servers_by_number[0].remote_add_lease
459             self.g.servers_by_number[0].remote_add_lease = broken_add_lease
460         d.addCallback(_break_add_lease)
461
462         # and confirm that the files still look healthy
463         d.addCallback(lambda ign: self.mut.check(Monitor(), add_lease=True))
464         d.addCallback(_check_cr, "mutable-broken")
465         d.addCallback(lambda ign: self.imm.check(Monitor(), add_lease=True))
466         d.addCallback(_check_cr, "immutable-broken")
467
468         d.addCallback(lambda ign: self.failUnless(really_did_break))
469         return d
470
471 class CounterHolder(object):
472     def __init__(self):
473         self._num_active_block_fetches = 0
474         self._max_active_block_fetches = 0
475
476 from allmydata.immutable.checker import ValidatedReadBucketProxy
477 class MockVRBP(ValidatedReadBucketProxy):
478     def __init__(self, sharenum, bucket, share_hash_tree, num_blocks, block_size, share_size, counterholder):
479         ValidatedReadBucketProxy.__init__(self, sharenum, bucket,
480                                           share_hash_tree, num_blocks,
481                                           block_size, share_size)
482         self.counterholder = counterholder
483
484     def get_block(self, blocknum):
485         self.counterholder._num_active_block_fetches += 1
486         if self.counterholder._num_active_block_fetches > self.counterholder._max_active_block_fetches:
487             self.counterholder._max_active_block_fetches = self.counterholder._num_active_block_fetches
488         d = ValidatedReadBucketProxy.get_block(self, blocknum)
489         def _mark_no_longer_active(res):
490             self.counterholder._num_active_block_fetches -= 1
491             return res
492         d.addBoth(_mark_no_longer_active)
493         return d
494
495 class TooParallel(GridTestMixin, unittest.TestCase):
496     # bug #1395: immutable verifier was aggressively parallized, checking all
497     # blocks of all shares at the same time, blowing our memory budget and
498     # crashing with MemoryErrors on >1GB files.
499
500     def test_immutable(self):
501         import allmydata.immutable.checker
502         origVRBP = allmydata.immutable.checker.ValidatedReadBucketProxy
503
504         self.basedir = "checker/TooParallel/immutable"
505
506         # If any code asks to instantiate a ValidatedReadBucketProxy,
507         # we give them a MockVRBP which is configured to use our
508         # CounterHolder.
509         counterholder = CounterHolder()
510         def make_mock_VRBP(*args, **kwargs):
511             return MockVRBP(counterholder=counterholder, *args, **kwargs)
512         allmydata.immutable.checker.ValidatedReadBucketProxy = make_mock_VRBP
513
514         d = defer.succeed(None)
515         def _start(ign):
516             self.set_up_grid(num_servers=4)
517             self.c0 = self.g.clients[0]
518             self.c0.DEFAULT_ENCODING_PARAMETERS = { "k": 1,
519                                                "happy": 4,
520                                                "n": 4,
521                                                "max_segment_size": 5,
522                                                }
523             self.uris = {}
524             DATA = "data" * 100 # 400/5 = 80 blocks
525             return self.c0.upload(Data(DATA, convergence=""))
526         d.addCallback(_start)
527         def _do_check(ur):
528             n = self.c0.create_node_from_uri(ur.get_uri())
529             return n.check(Monitor(), verify=True)
530         d.addCallback(_do_check)
531         def _check(cr):
532             # the verifier works on all 4 shares in parallel, but only
533             # fetches one block from each share at a time, so we expect to
534             # see 4 parallel fetches
535             self.failUnlessEqual(counterholder._max_active_block_fetches, 4)
536         d.addCallback(_check)
537         def _clean_up(res):
538             allmydata.immutable.checker.ValidatedReadBucketProxy = origVRBP
539             return res
540         d.addBoth(_clean_up)
541         return d
542
543     test_immutable.timeout = 80