]> git.rkrishnan.org Git - tahoe-lafs/tahoe-lafs.git/blob - src/allmydata/test/test_checker.py
Fixed an error in previous commit where an empty servermap would throw an exception...
[tahoe-lafs/tahoe-lafs.git] / src / allmydata / test / test_checker.py
1
2 import simplejson
3 import os.path, shutil
4 from twisted.trial import unittest
5 from twisted.internet import defer
6 from allmydata import check_results, uri
7 from allmydata import uri as tahoe_uri
8 from allmydata.web import check_results as web_check_results
9 from allmydata.storage_client import StorageFarmBroker, NativeStorageServer
10 from allmydata.storage.server import storage_index_to_dir
11 from allmydata.monitor import Monitor
12 from allmydata.test.no_network import GridTestMixin
13 from allmydata.immutable.upload import Data
14 from allmydata.test.common_web import WebRenderingMixin
15 from allmydata.mutable.publish import MutableData
16
17 class FakeClient:
18     def get_storage_broker(self):
19         return self.storage_broker
20
21 class WebResultsRendering(unittest.TestCase, WebRenderingMixin):
22
23     def create_fake_client(self):
24         sb = StorageFarmBroker(None, True)
25         for (peerid, nickname) in [("\x00"*20, "peer-0"),
26                                    ("\xff"*20, "peer-f"),
27                                    ("\x11"*20, "peer-11")] :
28             ann = { "version": 0,
29                     "service-name": "storage",
30                     "anonymous-storage-FURL": "pb://abcde@nowhere/fake",
31                     "permutation-seed-base32": "",
32                     "nickname": unicode(nickname),
33                     "app-versions": {}, # need #466 and v2 introducer
34                     "my-version": "ver",
35                     "oldest-supported": "oldest",
36                     }
37             s = NativeStorageServer(peerid, ann)
38             sb.test_add_server(peerid, s)
39         c = FakeClient()
40         c.storage_broker = sb
41         return c
42
43     def render_json(self, page):
44         d = self.render1(page, args={"output": ["json"]})
45         return d
46
47     def test_literal(self):
48         c = self.create_fake_client()
49         lcr = web_check_results.LiteralCheckResultsRenderer(c)
50
51         d = self.render1(lcr)
52         def _check(html):
53             s = self.remove_tags(html)
54             self.failUnlessIn("Literal files are always healthy", s)
55         d.addCallback(_check)
56         d.addCallback(lambda ignored:
57                       self.render1(lcr, args={"return_to": ["FOOURL"]}))
58         def _check_return_to(html):
59             s = self.remove_tags(html)
60             self.failUnlessIn("Literal files are always healthy", s)
61             self.failUnlessIn('<a href="FOOURL">Return to file.</a>',
62                               html)
63         d.addCallback(_check_return_to)
64         d.addCallback(lambda ignored: self.render_json(lcr))
65         def _check_json(json):
66             j = simplejson.loads(json)
67             self.failUnlessEqual(j["storage-index"], "")
68             self.failUnlessEqual(j["results"]["healthy"], True)
69         d.addCallback(_check_json)
70         return d
71
72     def test_check(self):
73         c = self.create_fake_client()
74         serverid_1 = "\x00"*20
75         serverid_f = "\xff"*20
76         u = uri.CHKFileURI("\x00"*16, "\x00"*32, 3, 10, 1234)
77         cr = check_results.CheckResults(u, u.get_storage_index())
78         cr.set_healthy(True)
79         cr.set_needs_rebalancing(False)
80         cr.set_summary("groovy")
81         data = { "count-shares-needed": 3,
82                  "count-shares-expected": 9,
83                  "count-shares-good": 10,
84                  "count-good-share-hosts": 11,
85                  "list-corrupt-shares": [],
86                  "count-wrong-shares": 0,
87                  "sharemap": {"shareid1": [serverid_1, serverid_f]},
88                  "count-recoverable-versions": 1,
89                  "count-unrecoverable-versions": 0,
90                  "servers-responding": [],
91                  }
92         cr.set_data(data)
93
94         w = web_check_results.CheckResultsRenderer(c, cr)
95         html = self.render2(w)
96         s = self.remove_tags(html)
97         self.failUnlessIn("File Check Results for SI=2k6avp", s) # abbreviated
98         self.failUnlessIn("Healthy : groovy", s)
99         self.failUnlessIn("Share Counts: need 3-of-9, have 10", s)
100         self.failUnlessIn("Hosts with good shares: 11", s)
101         self.failUnlessIn("Corrupt shares: none", s)
102         self.failUnlessIn("Wrong Shares: 0", s)
103         self.failUnlessIn("Recoverable Versions: 1", s)
104         self.failUnlessIn("Unrecoverable Versions: 0", s)
105
106         cr.set_healthy(False)
107         cr.set_recoverable(True)
108         cr.set_summary("ungroovy")
109         html = self.render2(w)
110         s = self.remove_tags(html)
111         self.failUnlessIn("File Check Results for SI=2k6avp", s) # abbreviated
112         self.failUnlessIn("Not Healthy! : ungroovy", s)
113
114         cr.set_healthy(False)
115         cr.set_recoverable(False)
116         cr.set_summary("rather dead")
117         data["list-corrupt-shares"] = [(serverid_1, u.get_storage_index(), 2)]
118         cr.set_data(data)
119         html = self.render2(w)
120         s = self.remove_tags(html)
121         self.failUnlessIn("File Check Results for SI=2k6avp", s) # abbreviated
122         self.failUnlessIn("Not Recoverable! : rather dead", s)
123         self.failUnlessIn("Corrupt shares: Share ID Nickname Node ID sh#2 peer-0 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", s)
124
125         html = self.render2(w)
126         s = self.remove_tags(html)
127         self.failUnlessIn("File Check Results for SI=2k6avp", s) # abbreviated
128         self.failUnlessIn("Not Recoverable! : rather dead", s)
129
130         html = self.render2(w, args={"return_to": ["FOOURL"]})
131         self.failUnlessIn('<a href="FOOURL">Return to file/directory.</a>',
132                           html)
133
134         d = self.render_json(w)
135         def _check_json(jdata):
136             j = simplejson.loads(jdata)
137             self.failUnlessEqual(j["summary"], "rather dead")
138             self.failUnlessEqual(j["storage-index"],
139                                  "2k6avpjga3dho3zsjo6nnkt7n4")
140             expected = {'needs-rebalancing': False,
141                         'count-shares-expected': 9,
142                         'healthy': False,
143                         'count-unrecoverable-versions': 0,
144                         'count-shares-needed': 3,
145                         'sharemap': {"shareid1":
146                                      ["aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
147                                       "77777777777777777777777777777777"]},
148                         'count-recoverable-versions': 1,
149                         'list-corrupt-shares':
150                         [["aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
151                           "2k6avpjga3dho3zsjo6nnkt7n4", 2]],
152                         'count-good-share-hosts': 11,
153                         'count-wrong-shares': 0,
154                         'count-shares-good': 10,
155                         'count-corrupt-shares': 0,
156                         'servers-responding': [],
157                         'recoverable': False,
158                         }
159             self.failUnlessEqual(j["results"], expected)
160         d.addCallback(_check_json)
161         d.addCallback(lambda ignored: self.render1(w))
162         def _check(html):
163             s = self.remove_tags(html)
164             self.failUnlessIn("File Check Results for SI=2k6avp", s)
165             self.failUnlessIn("Not Recoverable! : rather dead", s)
166         d.addCallback(_check)
167         return d
168
169
170     def test_check_and_repair(self):
171         c = self.create_fake_client()
172         serverid_1 = "\x00"*20
173         serverid_f = "\xff"*20
174         u = uri.CHKFileURI("\x00"*16, "\x00"*32, 3, 10, 1234)
175
176         pre_cr = check_results.CheckResults(u, u.get_storage_index())
177         pre_cr.set_healthy(False)
178         pre_cr.set_recoverable(True)
179         pre_cr.set_needs_rebalancing(False)
180         pre_cr.set_summary("illing")
181         data = { "count-shares-needed": 3,
182                  "count-shares-expected": 10,
183                  "count-shares-good": 6,
184                  "count-good-share-hosts": 7,
185                  "list-corrupt-shares": [],
186                  "count-wrong-shares": 0,
187                  "sharemap": {"shareid1": [serverid_1, serverid_f]},
188                  "count-recoverable-versions": 1,
189                  "count-unrecoverable-versions": 0,
190                  "servers-responding": [],
191                  }
192         pre_cr.set_data(data)
193
194         post_cr = check_results.CheckResults(u, u.get_storage_index())
195         post_cr.set_healthy(True)
196         post_cr.set_recoverable(True)
197         post_cr.set_needs_rebalancing(False)
198         post_cr.set_summary("groovy")
199         data = { "count-shares-needed": 3,
200                  "count-shares-expected": 10,
201                  "count-shares-good": 10,
202                  "count-good-share-hosts": 11,
203                  "list-corrupt-shares": [],
204                  "count-wrong-shares": 0,
205                  "sharemap": {"shareid1": [serverid_1, serverid_f]},
206                  "count-recoverable-versions": 1,
207                  "count-unrecoverable-versions": 0,
208                  "servers-responding": [],
209                  }
210         post_cr.set_data(data)
211
212         crr = check_results.CheckAndRepairResults(u.get_storage_index())
213         crr.pre_repair_results = pre_cr
214         crr.post_repair_results = post_cr
215         crr.repair_attempted = False
216
217         w = web_check_results.CheckAndRepairResultsRenderer(c, crr)
218         html = self.render2(w)
219         s = self.remove_tags(html)
220
221         self.failUnlessIn("File Check-And-Repair Results for SI=2k6avp", s)
222         self.failUnlessIn("Healthy : groovy", s)
223         self.failUnlessIn("No repair necessary", s)
224         self.failUnlessIn("Post-Repair Checker Results:", s)
225         self.failUnlessIn("Share Counts: need 3-of-10, have 10", s)
226
227         crr.repair_attempted = True
228         crr.repair_successful = True
229         html = self.render2(w)
230         s = self.remove_tags(html)
231
232         self.failUnlessIn("File Check-And-Repair Results for SI=2k6avp", s)
233         self.failUnlessIn("Healthy : groovy", s)
234         self.failUnlessIn("Repair successful", s)
235         self.failUnlessIn("Post-Repair Checker Results:", s)
236
237         crr.repair_attempted = True
238         crr.repair_successful = False
239         post_cr.set_healthy(False)
240         post_cr.set_summary("better")
241         html = self.render2(w)
242         s = self.remove_tags(html)
243
244         self.failUnlessIn("File Check-And-Repair Results for SI=2k6avp", s)
245         self.failUnlessIn("Not Healthy! : better", s)
246         self.failUnlessIn("Repair unsuccessful", s)
247         self.failUnlessIn("Post-Repair Checker Results:", s)
248
249         crr.repair_attempted = True
250         crr.repair_successful = False
251         post_cr.set_healthy(False)
252         post_cr.set_recoverable(False)
253         post_cr.set_summary("worse")
254         html = self.render2(w)
255         s = self.remove_tags(html)
256
257         self.failUnlessIn("File Check-And-Repair Results for SI=2k6avp", s)
258         self.failUnlessIn("Not Recoverable! : worse", s)
259         self.failUnlessIn("Repair unsuccessful", s)
260         self.failUnlessIn("Post-Repair Checker Results:", s)
261
262         d = self.render_json(w)
263         def _got_json(data):
264             j = simplejson.loads(data)
265             self.failUnlessEqual(j["repair-attempted"], True)
266             self.failUnlessEqual(j["storage-index"],
267                                  "2k6avpjga3dho3zsjo6nnkt7n4")
268             self.failUnlessEqual(j["pre-repair-results"]["summary"], "illing")
269             self.failUnlessEqual(j["post-repair-results"]["summary"], "worse")
270         d.addCallback(_got_json)
271
272         w2 = web_check_results.CheckAndRepairResultsRenderer(c, None)
273         d.addCallback(lambda ignored: self.render_json(w2))
274         def _got_lit_results(data):
275             j = simplejson.loads(data)
276             self.failUnlessEqual(j["repair-attempted"], False)
277             self.failUnlessEqual(j["storage-index"], "")
278         d.addCallback(_got_lit_results)
279         return d
280
281 class BalancingAct(GridTestMixin, unittest.TestCase):
282     # test for #1115 regarding the 'count-good-share-hosts' metric
283
284
285     def add_server(self, server_number, readonly=False):
286         assert self.g, "I tried to find a grid at self.g, but failed"
287         ss = self.g.make_server(server_number, readonly)
288         #log.msg("just created a server, number: %s => %s" % (server_number, ss,))
289         self.g.add_server(server_number, ss)
290
291     def add_server_with_share(self, server_number, uri, share_number=None,
292                                readonly=False):
293         self.add_server(server_number, readonly)
294         if share_number is not None:
295             self.copy_share_to_server(uri, share_number, server_number)
296
297     def copy_share_to_server(self, uri, share_number, server_number):
298         ss = self.g.servers_by_number[server_number]
299         # Copy share i from the directory associated with the first
300         # storage server to the directory associated with this one.
301         assert self.g, "I tried to find a grid at self.g, but failed"
302         assert self.shares, "I tried to find shares at self.shares, but failed"
303         old_share_location = self.shares[share_number][2]
304         new_share_location = os.path.join(ss.storedir, "shares")
305         si = tahoe_uri.from_string(self.uri).get_storage_index()
306         new_share_location = os.path.join(new_share_location,
307                                           storage_index_to_dir(si))
308         if not os.path.exists(new_share_location):
309             os.makedirs(new_share_location)
310         new_share_location = os.path.join(new_share_location,
311                                           str(share_number))
312         if old_share_location != new_share_location:
313             shutil.copy(old_share_location, new_share_location)
314         shares = self.find_uri_shares(uri)
315         # Make sure that the storage server has the share.
316         self.failUnless((share_number, ss.my_nodeid, new_share_location)
317                         in shares)
318
319     def _pretty_shares_chart(self, uri):
320         # Servers are labeled A-Z, shares are labeled 0-9
321         letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
322         assert len(self.g.servers_by_number) < len(letters), \
323             "This little printing function is only meant for < 26 servers"
324         shares_chart = {}
325         names = dict(zip([ss.my_nodeid for _,ss in 
326                           self.g.servers_by_number.iteritems()], letters))
327         for shnum, serverid, _ in self.find_uri_shares(uri):
328             shares_chart.setdefault(shnum, []).append(names[serverid])
329         return shares_chart
330
331     def test_1115(self):
332         self.basedir = "checker/BalancingAct/1115"
333         self.set_up_grid(num_servers=1)
334         c0 = self.g.clients[0]
335         c0.DEFAULT_ENCODING_PARAMETERS['happy'] = 1
336         c0.DEFAULT_ENCODING_PARAMETERS['n'] = 4
337         c0.DEFAULT_ENCODING_PARAMETERS['k'] = 3
338
339         DATA = "data" * 100
340         d = c0.upload(Data(DATA, convergence=""))
341         def _stash_immutable(ur):
342             self.imm = c0.create_node_from_uri(ur.uri)
343             self.uri = self.imm.get_uri()
344         d.addCallback(_stash_immutable)
345         d.addCallback(lambda ign:
346             self.find_uri_shares(self.uri))
347         def _store_shares(shares):
348             self.shares = shares
349         d.addCallback(_store_shares)
350
351         def add_three(_, i):
352             # Add a new server with just share 3
353             self.add_server_with_share(i, self.uri, 3)
354             #print self._pretty_shares_chart(self.uri)
355         for i in range(1,5):
356             d.addCallback(add_three, i)
357     
358         def _check_and_repair(_):
359             return self.imm.check_and_repair(Monitor())
360         def _check_counts(crr, shares_good, good_share_hosts):
361             p_crr = crr.get_post_repair_results().data
362             #print self._pretty_shares_chart(self.uri)
363             self.failUnless(p_crr['count-shares-good'] == shares_good)
364             self.failUnless(p_crr['count-good-share-hosts'] == good_share_hosts)
365
366         """
367         Initial sharemap: 
368             0:[A] 1:[A] 2:[A] 3:[A,B,C,D,E]
369           4 good shares, but 5 good hosts
370         After deleting all instances of share #3 and repairing:
371             0:[A,B], 1:[A,C], 2:[A,D], 3:[E]
372           Still 4 good shares and 5 good hosts
373             """
374         d.addCallback(_check_and_repair)
375         d.addCallback(_check_counts, 4, 5)
376         d.addCallback(lambda _: self.delete_shares_numbered(self.uri, [3]))
377         d.addCallback(_check_and_repair)
378         d.addCallback(_check_counts, 4, 5)
379         d.addCallback(lambda _: [self.g.break_server(sid) for sid 
380                                  in self.g.get_all_serverids()])
381         d.addCallback(_check_and_repair)
382         d.addCallback(_check_counts, 0, 0)
383         return d
384
385 class AddLease(GridTestMixin, unittest.TestCase):
386     # test for #875, in which failures in the add-lease call cause
387     # false-negatives in the checker
388
389     def test_875(self):
390         self.basedir = "checker/AddLease/875"
391         self.set_up_grid(num_servers=1)
392         c0 = self.g.clients[0]
393         c0.DEFAULT_ENCODING_PARAMETERS['happy'] = 1
394         self.uris = {}
395         DATA = "data" * 100
396         d = c0.upload(Data(DATA, convergence=""))
397         def _stash_immutable(ur):
398             self.imm = c0.create_node_from_uri(ur.uri)
399         d.addCallback(_stash_immutable)
400         d.addCallback(lambda ign:
401             c0.create_mutable_file(MutableData("contents")))
402         def _stash_mutable(node):
403             self.mut = node
404         d.addCallback(_stash_mutable)
405
406         def _check_cr(cr, which):
407             self.failUnless(cr.is_healthy(), which)
408
409         # these two should work normally
410         d.addCallback(lambda ign: self.imm.check(Monitor(), add_lease=True))
411         d.addCallback(_check_cr, "immutable-normal")
412         d.addCallback(lambda ign: self.mut.check(Monitor(), add_lease=True))
413         d.addCallback(_check_cr, "mutable-normal")
414
415         really_did_break = []
416         # now break the server's remote_add_lease call
417         def _break_add_lease(ign):
418             def broken_add_lease(*args, **kwargs):
419                 really_did_break.append(1)
420                 raise KeyError("intentional failure, should be ignored")
421             assert self.g.servers_by_number[0].remote_add_lease
422             self.g.servers_by_number[0].remote_add_lease = broken_add_lease
423         d.addCallback(_break_add_lease)
424
425         # and confirm that the files still look healthy
426         d.addCallback(lambda ign: self.mut.check(Monitor(), add_lease=True))
427         d.addCallback(_check_cr, "mutable-broken")
428         d.addCallback(lambda ign: self.imm.check(Monitor(), add_lease=True))
429         d.addCallback(_check_cr, "immutable-broken")
430
431         d.addCallback(lambda ign: self.failUnless(really_did_break))
432         return d
433
434 class CounterHolder(object):
435     def __init__(self):
436         self._num_active_block_fetches = 0
437         self._max_active_block_fetches = 0
438
439 from allmydata.immutable.checker import ValidatedReadBucketProxy
440 class MockVRBP(ValidatedReadBucketProxy):
441     def __init__(self, sharenum, bucket, share_hash_tree, num_blocks, block_size, share_size, counterholder):
442         ValidatedReadBucketProxy.__init__(self, sharenum, bucket,
443                                           share_hash_tree, num_blocks,
444                                           block_size, share_size)
445         self.counterholder = counterholder
446
447     def get_block(self, blocknum):
448         self.counterholder._num_active_block_fetches += 1
449         if self.counterholder._num_active_block_fetches > self.counterholder._max_active_block_fetches:
450             self.counterholder._max_active_block_fetches = self.counterholder._num_active_block_fetches
451         d = ValidatedReadBucketProxy.get_block(self, blocknum)
452         def _mark_no_longer_active(res):
453             self.counterholder._num_active_block_fetches -= 1
454             return res
455         d.addBoth(_mark_no_longer_active)
456         return d
457
458 class TooParallel(GridTestMixin, unittest.TestCase):
459     # bug #1395: immutable verifier was aggressively parallized, checking all
460     # blocks of all shares at the same time, blowing our memory budget and
461     # crashing with MemoryErrors on >1GB files.
462
463     def test_immutable(self):
464         import allmydata.immutable.checker
465         origVRBP = allmydata.immutable.checker.ValidatedReadBucketProxy
466
467         self.basedir = "checker/TooParallel/immutable"
468
469         # If any code asks to instantiate a ValidatedReadBucketProxy,
470         # we give them a MockVRBP which is configured to use our
471         # CounterHolder.
472         counterholder = CounterHolder()
473         def make_mock_VRBP(*args, **kwargs):
474             return MockVRBP(counterholder=counterholder, *args, **kwargs)
475         allmydata.immutable.checker.ValidatedReadBucketProxy = make_mock_VRBP
476
477         d = defer.succeed(None)
478         def _start(ign):
479             self.set_up_grid(num_servers=4)
480             self.c0 = self.g.clients[0]
481             self.c0.DEFAULT_ENCODING_PARAMETERS = { "k": 1,
482                                                "happy": 4,
483                                                "n": 4,
484                                                "max_segment_size": 5,
485                                                }
486             self.uris = {}
487             DATA = "data" * 100 # 400/5 = 80 blocks
488             return self.c0.upload(Data(DATA, convergence=""))
489         d.addCallback(_start)
490         def _do_check(ur):
491             n = self.c0.create_node_from_uri(ur.uri)
492             return n.check(Monitor(), verify=True)
493         d.addCallback(_do_check)
494         def _check(cr):
495             # the verifier works on all 4 shares in parallel, but only
496             # fetches one block from each share at a time, so we expect to
497             # see 4 parallel fetches
498             self.failUnlessEqual(counterholder._max_active_block_fetches, 4)
499         d.addCallback(_check)
500         def _clean_up(res):
501             allmydata.immutable.checker.ValidatedReadBucketProxy = origVRBP
502             return res
503         d.addBoth(_clean_up)
504         return d
505
506     test_immutable.timeout = 80