]> git.rkrishnan.org Git - tahoe-lafs/tahoe-lafs.git/blob - src/allmydata/test/test_checker.py
f6e604778b9c15756600c1844a3c2b4f503e1a50
[tahoe-lafs/tahoe-lafs.git] / src / allmydata / test / test_checker.py
1
2 import simplejson
3 import os.path, shutil
4 from twisted.trial import unittest
5 from twisted.internet import defer
6 from allmydata import check_results, uri
7 from allmydata import uri as tahoe_uri
8 from allmydata.util import base32
9 from allmydata.web import check_results as web_check_results
10 from allmydata.storage_client import StorageFarmBroker, NativeStorageServer
11 from allmydata.storage.server import storage_index_to_dir
12 from allmydata.monitor import Monitor
13 from allmydata.test.no_network import GridTestMixin
14 from allmydata.immutable.upload import Data
15 from allmydata.test.common_web import WebRenderingMixin
16 from allmydata.mutable.publish import MutableData
17
18 class FakeClient:
19     def get_storage_broker(self):
20         return self.storage_broker
21
22 class WebResultsRendering(unittest.TestCase, WebRenderingMixin):
23
24     def create_fake_client(self):
25         sb = StorageFarmBroker(None, True)
26         # s.get_name() (the "short description") will be "v0-00000000".
27         # s.get_longname() will include the -long suffix.
28         # s.get_peerid() (i.e. tubid) will be "aaa.." or "777.." or "ceir.."
29         servers = [("v0-00000000-long", "\x00"*20, "peer-0"),
30                    ("v0-ffffffff-long", "\xff"*20, "peer-f"),
31                    ("v0-11111111-long", "\x11"*20, "peer-11")]
32         for (key_s, peerid, nickname) in servers:
33             tubid_b32 = base32.b2a(peerid)
34             furl = "pb://%s@nowhere/fake" % tubid_b32
35             ann = { "version": 0,
36                     "service-name": "storage",
37                     "anonymous-storage-FURL": furl,
38                     "permutation-seed-base32": "",
39                     "nickname": unicode(nickname),
40                     "app-versions": {}, # need #466 and v2 introducer
41                     "my-version": "ver",
42                     "oldest-supported": "oldest",
43                     }
44             s = NativeStorageServer(key_s, ann)
45             sb.test_add_server(peerid, s) # XXX: maybe use key_s?
46         c = FakeClient()
47         c.storage_broker = sb
48         return c
49
50     def render_json(self, page):
51         d = self.render1(page, args={"output": ["json"]})
52         return d
53
54     def test_literal(self):
55         c = self.create_fake_client()
56         lcr = web_check_results.LiteralCheckResultsRenderer(c)
57
58         d = self.render1(lcr)
59         def _check(html):
60             s = self.remove_tags(html)
61             self.failUnlessIn("Literal files are always healthy", s)
62         d.addCallback(_check)
63         d.addCallback(lambda ignored:
64                       self.render1(lcr, args={"return_to": ["FOOURL"]}))
65         def _check_return_to(html):
66             s = self.remove_tags(html)
67             self.failUnlessIn("Literal files are always healthy", s)
68             self.failUnlessIn('<a href="FOOURL">Return to file.</a>',
69                               html)
70         d.addCallback(_check_return_to)
71         d.addCallback(lambda ignored: self.render_json(lcr))
72         def _check_json(json):
73             j = simplejson.loads(json)
74             self.failUnlessEqual(j["storage-index"], "")
75             self.failUnlessEqual(j["results"]["healthy"], True)
76         d.addCallback(_check_json)
77         return d
78
79     def test_check(self):
80         c = self.create_fake_client()
81         serverid_1 = "\x00"*20
82         serverid_f = "\xff"*20
83         u = uri.CHKFileURI("\x00"*16, "\x00"*32, 3, 10, 1234)
84         cr = check_results.CheckResults(u, u.get_storage_index())
85         cr.set_healthy(True)
86         cr.set_needs_rebalancing(False)
87         cr.set_summary("groovy")
88         data = { "count-shares-needed": 3,
89                  "count-shares-expected": 9,
90                  "count-shares-good": 10,
91                  "count-good-share-hosts": 11,
92                  "list-corrupt-shares": [],
93                  "count-wrong-shares": 0,
94                  "sharemap": {"shareid1": [serverid_1, serverid_f]},
95                  "count-recoverable-versions": 1,
96                  "count-unrecoverable-versions": 0,
97                  "servers-responding": [],
98                  }
99         cr.set_data(data)
100
101         w = web_check_results.CheckResultsRenderer(c, cr)
102         html = self.render2(w)
103         s = self.remove_tags(html)
104         self.failUnlessIn("File Check Results for SI=2k6avp", s) # abbreviated
105         self.failUnlessIn("Healthy : groovy", s)
106         self.failUnlessIn("Share Counts: need 3-of-9, have 10", s)
107         self.failUnlessIn("Hosts with good shares: 11", s)
108         self.failUnlessIn("Corrupt shares: none", s)
109         self.failUnlessIn("Wrong Shares: 0", s)
110         self.failUnlessIn("Recoverable Versions: 1", s)
111         self.failUnlessIn("Unrecoverable Versions: 0", s)
112
113         cr.set_healthy(False)
114         cr.set_recoverable(True)
115         cr.set_summary("ungroovy")
116         html = self.render2(w)
117         s = self.remove_tags(html)
118         self.failUnlessIn("File Check Results for SI=2k6avp", s) # abbreviated
119         self.failUnlessIn("Not Healthy! : ungroovy", s)
120
121         cr.set_healthy(False)
122         cr.set_recoverable(False)
123         cr.set_summary("rather dead")
124         data["list-corrupt-shares"] = [(serverid_1, u.get_storage_index(), 2)]
125         cr.set_data(data)
126         html = self.render2(w)
127         s = self.remove_tags(html)
128         self.failUnlessIn("File Check Results for SI=2k6avp", s) # abbreviated
129         self.failUnlessIn("Not Recoverable! : rather dead", s)
130         self.failUnlessIn("Corrupt shares: Share ID Nickname Node ID sh#2 peer-0 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", s)
131
132         html = self.render2(w)
133         s = self.remove_tags(html)
134         self.failUnlessIn("File Check Results for SI=2k6avp", s) # abbreviated
135         self.failUnlessIn("Not Recoverable! : rather dead", s)
136
137         html = self.render2(w, args={"return_to": ["FOOURL"]})
138         self.failUnlessIn('<a href="FOOURL">Return to file/directory.</a>',
139                           html)
140
141         d = self.render_json(w)
142         def _check_json(jdata):
143             j = simplejson.loads(jdata)
144             self.failUnlessEqual(j["summary"], "rather dead")
145             self.failUnlessEqual(j["storage-index"],
146                                  "2k6avpjga3dho3zsjo6nnkt7n4")
147             expected = {'needs-rebalancing': False,
148                         'count-shares-expected': 9,
149                         'healthy': False,
150                         'count-unrecoverable-versions': 0,
151                         'count-shares-needed': 3,
152                         'sharemap': {"shareid1":
153                                      ["aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
154                                       "77777777777777777777777777777777"]},
155                         'count-recoverable-versions': 1,
156                         'list-corrupt-shares':
157                         [["aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
158                           "2k6avpjga3dho3zsjo6nnkt7n4", 2]],
159                         'count-good-share-hosts': 11,
160                         'count-wrong-shares': 0,
161                         'count-shares-good': 10,
162                         'count-corrupt-shares': 0,
163                         'servers-responding': [],
164                         'recoverable': False,
165                         }
166             self.failUnlessEqual(j["results"], expected)
167         d.addCallback(_check_json)
168         d.addCallback(lambda ignored: self.render1(w))
169         def _check(html):
170             s = self.remove_tags(html)
171             self.failUnlessIn("File Check Results for SI=2k6avp", s)
172             self.failUnlessIn("Not Recoverable! : rather dead", s)
173         d.addCallback(_check)
174         return d
175
176
177     def test_check_and_repair(self):
178         c = self.create_fake_client()
179         serverid_1 = "\x00"*20
180         serverid_f = "\xff"*20
181         u = uri.CHKFileURI("\x00"*16, "\x00"*32, 3, 10, 1234)
182
183         pre_cr = check_results.CheckResults(u, u.get_storage_index())
184         pre_cr.set_healthy(False)
185         pre_cr.set_recoverable(True)
186         pre_cr.set_needs_rebalancing(False)
187         pre_cr.set_summary("illing")
188         data = { "count-shares-needed": 3,
189                  "count-shares-expected": 10,
190                  "count-shares-good": 6,
191                  "count-good-share-hosts": 7,
192                  "list-corrupt-shares": [],
193                  "count-wrong-shares": 0,
194                  "sharemap": {"shareid1": [serverid_1, serverid_f]},
195                  "count-recoverable-versions": 1,
196                  "count-unrecoverable-versions": 0,
197                  "servers-responding": [],
198                  }
199         pre_cr.set_data(data)
200
201         post_cr = check_results.CheckResults(u, u.get_storage_index())
202         post_cr.set_healthy(True)
203         post_cr.set_recoverable(True)
204         post_cr.set_needs_rebalancing(False)
205         post_cr.set_summary("groovy")
206         data = { "count-shares-needed": 3,
207                  "count-shares-expected": 10,
208                  "count-shares-good": 10,
209                  "count-good-share-hosts": 11,
210                  "list-corrupt-shares": [],
211                  "count-wrong-shares": 0,
212                  "sharemap": {"shareid1": [serverid_1, serverid_f]},
213                  "count-recoverable-versions": 1,
214                  "count-unrecoverable-versions": 0,
215                  "servers-responding": [],
216                  }
217         post_cr.set_data(data)
218
219         crr = check_results.CheckAndRepairResults(u.get_storage_index())
220         crr.pre_repair_results = pre_cr
221         crr.post_repair_results = post_cr
222         crr.repair_attempted = False
223
224         w = web_check_results.CheckAndRepairResultsRenderer(c, crr)
225         html = self.render2(w)
226         s = self.remove_tags(html)
227
228         self.failUnlessIn("File Check-And-Repair Results for SI=2k6avp", s)
229         self.failUnlessIn("Healthy : groovy", s)
230         self.failUnlessIn("No repair necessary", s)
231         self.failUnlessIn("Post-Repair Checker Results:", s)
232         self.failUnlessIn("Share Counts: need 3-of-10, have 10", s)
233
234         crr.repair_attempted = True
235         crr.repair_successful = True
236         html = self.render2(w)
237         s = self.remove_tags(html)
238
239         self.failUnlessIn("File Check-And-Repair Results for SI=2k6avp", s)
240         self.failUnlessIn("Healthy : groovy", s)
241         self.failUnlessIn("Repair successful", s)
242         self.failUnlessIn("Post-Repair Checker Results:", s)
243
244         crr.repair_attempted = True
245         crr.repair_successful = False
246         post_cr.set_healthy(False)
247         post_cr.set_summary("better")
248         html = self.render2(w)
249         s = self.remove_tags(html)
250
251         self.failUnlessIn("File Check-And-Repair Results for SI=2k6avp", s)
252         self.failUnlessIn("Not Healthy! : better", s)
253         self.failUnlessIn("Repair unsuccessful", s)
254         self.failUnlessIn("Post-Repair Checker Results:", s)
255
256         crr.repair_attempted = True
257         crr.repair_successful = False
258         post_cr.set_healthy(False)
259         post_cr.set_recoverable(False)
260         post_cr.set_summary("worse")
261         html = self.render2(w)
262         s = self.remove_tags(html)
263
264         self.failUnlessIn("File Check-And-Repair Results for SI=2k6avp", s)
265         self.failUnlessIn("Not Recoverable! : worse", s)
266         self.failUnlessIn("Repair unsuccessful", s)
267         self.failUnlessIn("Post-Repair Checker Results:", s)
268
269         d = self.render_json(w)
270         def _got_json(data):
271             j = simplejson.loads(data)
272             self.failUnlessEqual(j["repair-attempted"], True)
273             self.failUnlessEqual(j["storage-index"],
274                                  "2k6avpjga3dho3zsjo6nnkt7n4")
275             self.failUnlessEqual(j["pre-repair-results"]["summary"], "illing")
276             self.failUnlessEqual(j["post-repair-results"]["summary"], "worse")
277         d.addCallback(_got_json)
278
279         w2 = web_check_results.CheckAndRepairResultsRenderer(c, None)
280         d.addCallback(lambda ignored: self.render_json(w2))
281         def _got_lit_results(data):
282             j = simplejson.loads(data)
283             self.failUnlessEqual(j["repair-attempted"], False)
284             self.failUnlessEqual(j["storage-index"], "")
285         d.addCallback(_got_lit_results)
286         return d
287
288 class BalancingAct(GridTestMixin, unittest.TestCase):
289     # test for #1115 regarding the 'count-good-share-hosts' metric
290
291
292     def add_server(self, server_number, readonly=False):
293         assert self.g, "I tried to find a grid at self.g, but failed"
294         ss = self.g.make_server(server_number, readonly)
295         #log.msg("just created a server, number: %s => %s" % (server_number, ss,))
296         self.g.add_server(server_number, ss)
297
298     def add_server_with_share(self, server_number, uri, share_number=None,
299                               readonly=False):
300         self.add_server(server_number, readonly)
301         if share_number is not None:
302             self.copy_share_to_server(uri, share_number, server_number)
303
304     def copy_share_to_server(self, uri, share_number, server_number):
305         ss = self.g.servers_by_number[server_number]
306         # Copy share i from the directory associated with the first
307         # storage server to the directory associated with this one.
308         assert self.g, "I tried to find a grid at self.g, but failed"
309         assert self.shares, "I tried to find shares at self.shares, but failed"
310         old_share_location = self.shares[share_number][2]
311         new_share_location = os.path.join(ss.storedir, "shares")
312         si = tahoe_uri.from_string(self.uri).get_storage_index()
313         new_share_location = os.path.join(new_share_location,
314                                           storage_index_to_dir(si))
315         if not os.path.exists(new_share_location):
316             os.makedirs(new_share_location)
317         new_share_location = os.path.join(new_share_location,
318                                           str(share_number))
319         if old_share_location != new_share_location:
320             shutil.copy(old_share_location, new_share_location)
321         shares = self.find_uri_shares(uri)
322         # Make sure that the storage server has the share.
323         self.failUnless((share_number, ss.my_nodeid, new_share_location)
324                         in shares)
325
326     def _pretty_shares_chart(self, uri):
327         # Servers are labeled A-Z, shares are labeled 0-9
328         letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
329         assert len(self.g.servers_by_number) < len(letters), \
330             "This little printing function is only meant for < 26 servers"
331         shares_chart = {}
332         names = dict(zip([ss.my_nodeid
333                           for _,ss in self.g.servers_by_number.iteritems()],
334                          letters))
335         for shnum, serverid, _ in self.find_uri_shares(uri):
336             shares_chart.setdefault(shnum, []).append(names[serverid])
337         return shares_chart
338
339     def test_good_share_hosts(self):
340         self.basedir = "checker/BalancingAct/1115"
341         self.set_up_grid(num_servers=1)
342         c0 = self.g.clients[0]
343         c0.DEFAULT_ENCODING_PARAMETERS['happy'] = 1
344         c0.DEFAULT_ENCODING_PARAMETERS['n'] = 4
345         c0.DEFAULT_ENCODING_PARAMETERS['k'] = 3
346
347         DATA = "data" * 100
348         d = c0.upload(Data(DATA, convergence=""))
349         def _stash_immutable(ur):
350             self.imm = c0.create_node_from_uri(ur.uri)
351             self.uri = self.imm.get_uri()
352         d.addCallback(_stash_immutable)
353         d.addCallback(lambda ign:
354             self.find_uri_shares(self.uri))
355         def _store_shares(shares):
356             self.shares = shares
357         d.addCallback(_store_shares)
358
359         def add_three(_, i):
360             # Add a new server with just share 3
361             self.add_server_with_share(i, self.uri, 3)
362             #print self._pretty_shares_chart(self.uri)
363         for i in range(1,5):
364             d.addCallback(add_three, i)
365
366         def _check_and_repair(_):
367             return self.imm.check_and_repair(Monitor())
368         def _check_counts(crr, shares_good, good_share_hosts):
369             p_crr = crr.get_post_repair_results().data
370             #print self._pretty_shares_chart(self.uri)
371             self.failUnlessEqual(p_crr['count-shares-good'], shares_good)
372             self.failUnlessEqual(p_crr['count-good-share-hosts'],
373                                  good_share_hosts)
374
375         """
376         Initial sharemap:
377             0:[A] 1:[A] 2:[A] 3:[A,B,C,D,E]
378           4 good shares, but 5 good hosts
379         After deleting all instances of share #3 and repairing:
380             0:[A,B], 1:[A,C], 2:[A,D], 3:[E]
381           Still 4 good shares and 5 good hosts
382             """
383         d.addCallback(_check_and_repair)
384         d.addCallback(_check_counts, 4, 5)
385         d.addCallback(lambda _: self.delete_shares_numbered(self.uri, [3]))
386         d.addCallback(_check_and_repair)
387         d.addCallback(_check_counts, 4, 5)
388         d.addCallback(lambda _: [self.g.break_server(sid)
389                                  for sid in self.g.get_all_serverids()])
390         d.addCallback(_check_and_repair)
391         d.addCallback(_check_counts, 0, 0)
392         return d
393
394 class AddLease(GridTestMixin, unittest.TestCase):
395     # test for #875, in which failures in the add-lease call cause
396     # false-negatives in the checker
397
398     def test_875(self):
399         self.basedir = "checker/AddLease/875"
400         self.set_up_grid(num_servers=1)
401         c0 = self.g.clients[0]
402         c0.DEFAULT_ENCODING_PARAMETERS['happy'] = 1
403         self.uris = {}
404         DATA = "data" * 100
405         d = c0.upload(Data(DATA, convergence=""))
406         def _stash_immutable(ur):
407             self.imm = c0.create_node_from_uri(ur.uri)
408         d.addCallback(_stash_immutable)
409         d.addCallback(lambda ign:
410             c0.create_mutable_file(MutableData("contents")))
411         def _stash_mutable(node):
412             self.mut = node
413         d.addCallback(_stash_mutable)
414
415         def _check_cr(cr, which):
416             self.failUnless(cr.is_healthy(), which)
417
418         # these two should work normally
419         d.addCallback(lambda ign: self.imm.check(Monitor(), add_lease=True))
420         d.addCallback(_check_cr, "immutable-normal")
421         d.addCallback(lambda ign: self.mut.check(Monitor(), add_lease=True))
422         d.addCallback(_check_cr, "mutable-normal")
423
424         really_did_break = []
425         # now break the server's remote_add_lease call
426         def _break_add_lease(ign):
427             def broken_add_lease(*args, **kwargs):
428                 really_did_break.append(1)
429                 raise KeyError("intentional failure, should be ignored")
430             assert self.g.servers_by_number[0].remote_add_lease
431             self.g.servers_by_number[0].remote_add_lease = broken_add_lease
432         d.addCallback(_break_add_lease)
433
434         # and confirm that the files still look healthy
435         d.addCallback(lambda ign: self.mut.check(Monitor(), add_lease=True))
436         d.addCallback(_check_cr, "mutable-broken")
437         d.addCallback(lambda ign: self.imm.check(Monitor(), add_lease=True))
438         d.addCallback(_check_cr, "immutable-broken")
439
440         d.addCallback(lambda ign: self.failUnless(really_did_break))
441         return d
442
443 class CounterHolder(object):
444     def __init__(self):
445         self._num_active_block_fetches = 0
446         self._max_active_block_fetches = 0
447
448 from allmydata.immutable.checker import ValidatedReadBucketProxy
449 class MockVRBP(ValidatedReadBucketProxy):
450     def __init__(self, sharenum, bucket, share_hash_tree, num_blocks, block_size, share_size, counterholder):
451         ValidatedReadBucketProxy.__init__(self, sharenum, bucket,
452                                           share_hash_tree, num_blocks,
453                                           block_size, share_size)
454         self.counterholder = counterholder
455
456     def get_block(self, blocknum):
457         self.counterholder._num_active_block_fetches += 1
458         if self.counterholder._num_active_block_fetches > self.counterholder._max_active_block_fetches:
459             self.counterholder._max_active_block_fetches = self.counterholder._num_active_block_fetches
460         d = ValidatedReadBucketProxy.get_block(self, blocknum)
461         def _mark_no_longer_active(res):
462             self.counterholder._num_active_block_fetches -= 1
463             return res
464         d.addBoth(_mark_no_longer_active)
465         return d
466
467 class TooParallel(GridTestMixin, unittest.TestCase):
468     # bug #1395: immutable verifier was aggressively parallized, checking all
469     # blocks of all shares at the same time, blowing our memory budget and
470     # crashing with MemoryErrors on >1GB files.
471
472     def test_immutable(self):
473         import allmydata.immutable.checker
474         origVRBP = allmydata.immutable.checker.ValidatedReadBucketProxy
475
476         self.basedir = "checker/TooParallel/immutable"
477
478         # If any code asks to instantiate a ValidatedReadBucketProxy,
479         # we give them a MockVRBP which is configured to use our
480         # CounterHolder.
481         counterholder = CounterHolder()
482         def make_mock_VRBP(*args, **kwargs):
483             return MockVRBP(counterholder=counterholder, *args, **kwargs)
484         allmydata.immutable.checker.ValidatedReadBucketProxy = make_mock_VRBP
485
486         d = defer.succeed(None)
487         def _start(ign):
488             self.set_up_grid(num_servers=4)
489             self.c0 = self.g.clients[0]
490             self.c0.DEFAULT_ENCODING_PARAMETERS = { "k": 1,
491                                                "happy": 4,
492                                                "n": 4,
493                                                "max_segment_size": 5,
494                                                }
495             self.uris = {}
496             DATA = "data" * 100 # 400/5 = 80 blocks
497             return self.c0.upload(Data(DATA, convergence=""))
498         d.addCallback(_start)
499         def _do_check(ur):
500             n = self.c0.create_node_from_uri(ur.uri)
501             return n.check(Monitor(), verify=True)
502         d.addCallback(_do_check)
503         def _check(cr):
504             # the verifier works on all 4 shares in parallel, but only
505             # fetches one block from each share at a time, so we expect to
506             # see 4 parallel fetches
507             self.failUnlessEqual(counterholder._max_active_block_fetches, 4)
508         d.addCallback(_check)
509         def _clean_up(res):
510             allmydata.immutable.checker.ValidatedReadBucketProxy = origVRBP
511             return res
512         d.addBoth(_clean_up)
513         return d
514
515     test_immutable.timeout = 80