]> git.rkrishnan.org Git - tahoe-lafs/tahoe-lafs.git/blob - src/allmydata/test/test_hung_server.py
Fix some more potential bugs in test code exposed by check-miscaptures.py. refs ...
[tahoe-lafs/tahoe-lafs.git] / src / allmydata / test / test_hung_server.py
1 # -*- coding: utf-8 -*-
2
3 import os, shutil
4 from twisted.trial import unittest
5 from twisted.internet import defer
6 from allmydata import uri
7 from allmydata.util.consumer import download_to_data
8 from allmydata.immutable import upload
9 from allmydata.mutable.common import UnrecoverableFileError
10 from allmydata.mutable.publish import MutableData
11 from allmydata.storage.common import storage_index_to_dir
12 from allmydata.test.no_network import GridTestMixin
13 from allmydata.test.common import ShouldFailMixin
14 from allmydata.util.pollmixin import PollMixin
15 from allmydata.interfaces import NotEnoughSharesError
16
17 immutable_plaintext = "data" * 10000
18 mutable_plaintext = "muta" * 10000
19
20 class HungServerDownloadTest(GridTestMixin, ShouldFailMixin, PollMixin,
21                              unittest.TestCase):
22     # Many of these tests take around 60 seconds on François's ARM buildslave:
23     # http://tahoe-lafs.org/buildbot/builders/FranXois%20lenny-armv5tel
24     # allmydata.test.test_hung_server.HungServerDownloadTest.test_2_good_8_broken_duplicate_share_fail
25     # once ERRORed after 197 seconds on Midnight Magic's NetBSD buildslave:
26     # http://tahoe-lafs.org/buildbot/builders/MM%20netbsd4%20i386%20warp
27     # MM's buildslave varies a lot in how long it takes to run tests.
28
29     timeout = 240
30
31     def _break(self, servers):
32         for (id, ss) in servers:
33             self.g.break_server(id)
34
35     def _hang(self, servers, **kwargs):
36         for (id, ss) in servers:
37             self.g.hang_server(id, **kwargs)
38
39     def _unhang(self, servers, **kwargs):
40         for (id, ss) in servers:
41             self.g.unhang_server(id, **kwargs)
42
43     def _hang_shares(self, shnums, **kwargs):
44         # hang all servers who are holding the given shares
45         hung_serverids = set()
46         for (i_shnum, i_serverid, i_sharefile) in self.shares:
47             if i_shnum in shnums:
48                 if i_serverid not in hung_serverids:
49                     self.g.hang_server(i_serverid, **kwargs)
50                     hung_serverids.add(i_serverid)
51
52     def _delete_all_shares_from(self, servers):
53         serverids = [id for (id, ss) in servers]
54         for (i_shnum, i_serverid, i_sharefile) in self.shares:
55             if i_serverid in serverids:
56                 os.unlink(i_sharefile)
57
58     def _corrupt_all_shares_in(self, servers, corruptor_func):
59         serverids = [id for (id, ss) in servers]
60         for (i_shnum, i_serverid, i_sharefile) in self.shares:
61             if i_serverid in serverids:
62                 self._corrupt_share((i_shnum, i_sharefile), corruptor_func)
63
64     def _copy_all_shares_from(self, from_servers, to_server):
65         serverids = [id for (id, ss) in from_servers]
66         for (i_shnum, i_serverid, i_sharefile) in self.shares:
67             if i_serverid in serverids:
68                 self._copy_share((i_shnum, i_sharefile), to_server)
69
70     def _copy_share(self, share, to_server):
71         (sharenum, sharefile) = share
72         (id, ss) = to_server
73         shares_dir = os.path.join(ss.original.storedir, "shares")
74         si = uri.from_string(self.uri).get_storage_index()
75         si_dir = os.path.join(shares_dir, storage_index_to_dir(si))
76         if not os.path.exists(si_dir):
77             os.makedirs(si_dir)
78         new_sharefile = os.path.join(si_dir, str(sharenum))
79         shutil.copy(sharefile, new_sharefile)
80         self.shares = self.find_uri_shares(self.uri)
81         # Make sure that the storage server has the share.
82         self.failUnless((sharenum, ss.original.my_nodeid, new_sharefile)
83                         in self.shares)
84
85     def _corrupt_share(self, share, corruptor_func):
86         (sharenum, sharefile) = share
87         data = open(sharefile, "rb").read()
88         newdata = corruptor_func(data)
89         os.unlink(sharefile)
90         wf = open(sharefile, "wb")
91         wf.write(newdata)
92         wf.close()
93
94     def _set_up(self, mutable, testdir, num_clients=1, num_servers=10):
95         self.mutable = mutable
96         if mutable:
97             self.basedir = "hung_server/mutable_" + testdir
98         else:
99             self.basedir = "hung_server/immutable_" + testdir
100
101         self.set_up_grid(num_clients=num_clients, num_servers=num_servers)
102
103         self.c0 = self.g.clients[0]
104         nm = self.c0.nodemaker
105         self.servers = sorted([(s.get_serverid(), s.get_rref())
106                                for s in nm.storage_broker.get_connected_servers()])
107         self.servers = self.servers[5:] + self.servers[:5]
108
109         if mutable:
110             uploadable = MutableData(mutable_plaintext)
111             d = nm.create_mutable_file(uploadable)
112             def _uploaded_mutable(node):
113                 self.uri = node.get_uri()
114                 self.shares = self.find_uri_shares(self.uri)
115             d.addCallback(_uploaded_mutable)
116         else:
117             data = upload.Data(immutable_plaintext, convergence="")
118             d = self.c0.upload(data)
119             def _uploaded_immutable(upload_res):
120                 self.uri = upload_res.uri
121                 self.shares = self.find_uri_shares(self.uri)
122             d.addCallback(_uploaded_immutable)
123         return d
124
125     def _start_download(self):
126         n = self.c0.create_node_from_uri(self.uri)
127         if self.mutable:
128             d = n.download_best_version()
129         else:
130             d = download_to_data(n)
131         return d
132
133     def _wait_for_data(self, n):
134         if self.mutable:
135             d = n.download_best_version()
136         else:
137             d = download_to_data(n)
138         return d
139
140     def _check(self, resultingdata):
141         if self.mutable:
142             self.failUnlessEqual(resultingdata, mutable_plaintext)
143         else:
144             self.failUnlessEqual(resultingdata, immutable_plaintext)
145
146     def _download_and_check(self):
147         d = self._start_download()
148         d.addCallback(self._check)
149         return d
150
151     def _should_fail_download(self):
152         if self.mutable:
153             return self.shouldFail(UnrecoverableFileError, self.basedir,
154                                    "no recoverable versions",
155                                    self._download_and_check)
156         else:
157             return self.shouldFail(NotEnoughSharesError, self.basedir,
158                                    "ran out of shares",
159                                    self._download_and_check)
160
161
162     def test_10_good_sanity_check(self):
163         d = defer.succeed(None)
164         for mutable in [False, True]:
165             d.addCallback(lambda ign, mutable=mutable: self._set_up(mutable, "test_10_good_sanity_check"))
166             d.addCallback(lambda ign: self._download_and_check())
167         return d
168
169     def test_10_good_copied_share(self):
170         d = defer.succeed(None)
171         for mutable in [False, True]:
172             d.addCallback(lambda ign, mutable=mutable: self._set_up(mutable, "test_10_good_copied_share"))
173             d.addCallback(lambda ign: self._copy_all_shares_from(self.servers[2:3], self.servers[0]))
174             d.addCallback(lambda ign: self._download_and_check())
175             return d
176
177     def test_3_good_7_noshares(self):
178         d = defer.succeed(None)
179         for mutable in [False, True]:
180             d.addCallback(lambda ign, mutable=mutable: self._set_up(mutable, "test_3_good_7_noshares"))
181             d.addCallback(lambda ign: self._delete_all_shares_from(self.servers[3:]))
182             d.addCallback(lambda ign: self._download_and_check())
183         return d
184
185     def test_2_good_8_broken_fail(self):
186         d = defer.succeed(None)
187         for mutable in [False, True]:
188             d.addCallback(lambda ign, mutable=mutable: self._set_up(mutable, "test_2_good_8_broken_fail"))
189             d.addCallback(lambda ign: self._break(self.servers[2:]))
190             d.addCallback(lambda ign: self._should_fail_download())
191         return d
192
193     def test_2_good_8_noshares_fail(self):
194         d = defer.succeed(None)
195         for mutable in [False, True]:
196             d.addCallback(lambda ign, mutable=mutable: self._set_up(mutable, "test_2_good_8_noshares_fail"))
197             d.addCallback(lambda ign: self._delete_all_shares_from(self.servers[2:]))
198             d.addCallback(lambda ign: self._should_fail_download())
199         return d
200
201     def test_2_good_8_broken_copied_share(self):
202         d = defer.succeed(None)
203         for mutable in [False, True]:
204             d.addCallback(lambda ign, mutable=mutable: self._set_up(mutable, "test_2_good_8_broken_copied_share"))
205             d.addCallback(lambda ign: self._copy_all_shares_from(self.servers[2:3], self.servers[0]))
206             d.addCallback(lambda ign: self._break(self.servers[2:]))
207             d.addCallback(lambda ign: self._download_and_check())
208         return d
209
210     def test_2_good_8_broken_duplicate_share_fail(self):
211         d = defer.succeed(None)
212         for mutable in [False, True]:
213             d.addCallback(lambda ign, mutable=mutable: self._set_up(mutable, "test_2_good_8_broken_duplicate_share_fail"))
214             d.addCallback(lambda ign: self._copy_all_shares_from(self.servers[1:2], self.servers[0]))
215             d.addCallback(lambda ign: self._break(self.servers[2:]))
216             d.addCallback(lambda ign: self._should_fail_download())
217         return d
218
219     def test_3_good_7_hung_immutable(self):
220         d = defer.succeed(None)
221         d.addCallback(lambda ign: self._set_up(False, "test_3_good_7_hung"))
222         d.addCallback(lambda ign: self._hang(self.servers[3:]))
223         d.addCallback(lambda ign: self._download_and_check())
224         return d
225
226     def test_5_overdue_immutable(self):
227         # restrict the ShareFinder to only allow 5 outstanding requests, and
228         # arrange for the first 5 servers to hang. Then trigger the OVERDUE
229         # timers (simulating 10 seconds passed), at which point the
230         # ShareFinder should send additional queries and finish the download
231         # quickly. If we didn't have OVERDUE timers, this test would fail by
232         # timing out.
233         done = []
234         d = self._set_up(False, "test_5_overdue_immutable")
235         def _reduce_max_outstanding_requests_and_download(ign):
236             self._hang_shares(range(5))
237             n = self.c0.create_node_from_uri(self.uri)
238             n._cnode._maybe_create_download_node()
239             self._sf = n._cnode._node._sharefinder
240             self._sf.max_outstanding_requests = 5
241             self._sf.OVERDUE_TIMEOUT = 1000.0
242             d2 = download_to_data(n)
243             # start download, but don't wait for it to complete yet
244             def _done(res):
245                 done.append(res) # we will poll for this later
246             d2.addBoth(_done)
247         d.addCallback(_reduce_max_outstanding_requests_and_download)
248         from foolscap.eventual import fireEventually, flushEventualQueue
249         # wait here a while
250         d.addCallback(lambda res: fireEventually(res))
251         d.addCallback(lambda res: flushEventualQueue())
252         d.addCallback(lambda ign: self.failIf(done))
253         def _check_waiting(ign):
254             # all the share requests should now be stuck waiting
255             self.failUnlessEqual(len(self._sf.pending_requests), 5)
256             # but none should be marked as OVERDUE until the timers expire
257             self.failUnlessEqual(len(self._sf.overdue_requests), 0)
258         d.addCallback(_check_waiting)
259         def _mark_overdue(ign):
260             # declare four requests overdue, allowing new requests to take
261             # their place, and leaving one stuck. The finder will keep
262             # sending requests until there are 5 non-overdue ones
263             # outstanding, at which point we'll have 4 OVERDUE, 1
264             # stuck-but-not-overdue, and 4 live requests. All 4 live requests
265             # will retire before the download is complete and the ShareFinder
266             # is shut off. That will leave 4 OVERDUE and 1
267             # stuck-but-not-overdue, for a total of 5 requests in in
268             # _sf.pending_requests
269             for t in self._sf.overdue_timers.values()[:4]:
270                 t.reset(-1.0)
271             # the timers ought to fire before the eventual-send does
272             return fireEventually()
273         d.addCallback(_mark_overdue)
274         def _we_are_done():
275             return bool(done)
276         d.addCallback(lambda ign: self.poll(_we_are_done))
277         def _check_done(ign):
278             self.failUnlessEqual(done, [immutable_plaintext])
279             self.failUnlessEqual(len(self._sf.pending_requests), 5)
280             self.failUnlessEqual(len(self._sf.overdue_requests), 4)
281         d.addCallback(_check_done)
282         return d
283
284     def test_2_good_8_hung_then_1_recovers_immutable(self):
285         d = defer.succeed(None)
286         d.addCallback(lambda ign: self._set_up(False, "test_2_good_8_hung_then_1_recovers"))
287         d.addCallback(lambda ign: self._hang(self.servers[2:3]))
288         d.addCallback(lambda ign: self._hang(self.servers[3:]))
289         d.addCallback(lambda ign: self._unhang(self.servers[2:3]))
290         d.addCallback(lambda ign: self._download_and_check())
291         return d
292
293     def test_2_good_8_hung_then_1_recovers_with_2_shares_immutable(self):
294         d = defer.succeed(None)
295         d.addCallback(lambda ign: self._set_up(False, "test_2_good_8_hung_then_1_recovers_with_2_shares"))
296         d.addCallback(lambda ign: self._copy_all_shares_from(self.servers[0:1], self.servers[2]))
297         d.addCallback(lambda ign: self._hang(self.servers[2:3]))
298         d.addCallback(lambda ign: self._hang(self.servers[3:]))
299         d.addCallback(lambda ign: self._unhang(self.servers[2:3]))
300         d.addCallback(lambda ign: self._download_and_check())
301         return d
302
303     # The tests below do not currently pass for mutable files. The
304     # mutable-file downloader does not yet handle hung servers, and the tests
305     # hang forever (hence the use of SkipTest rather than .todo)
306
307     def test_3_good_7_hung_mutable(self):
308         raise unittest.SkipTest("still broken")
309         d = defer.succeed(None)
310         d.addCallback(lambda ign: self._set_up(True, "test_3_good_7_hung"))
311         d.addCallback(lambda ign: self._hang(self.servers[3:]))
312         d.addCallback(lambda ign: self._download_and_check())
313         return d
314
315     def test_2_good_8_hung_then_1_recovers_mutable(self):
316         raise unittest.SkipTest("still broken")
317         d = defer.succeed(None)
318         d.addCallback(lambda ign: self._set_up(True, "test_2_good_8_hung_then_1_recovers"))
319         d.addCallback(lambda ign: self._hang(self.servers[2:3]))
320         d.addCallback(lambda ign: self._hang(self.servers[3:]))
321         d.addCallback(lambda ign: self._unhang(self.servers[2:3]))
322         d.addCallback(lambda ign: self._download_and_check())
323         return d
324
325     def test_2_good_8_hung_then_1_recovers_with_2_shares_mutable(self):
326         raise unittest.SkipTest("still broken")
327         d = defer.succeed(None)
328         d.addCallback(lambda ign: self._set_up(True, "test_2_good_8_hung_then_1_recovers_with_2_shares"))
329         d.addCallback(lambda ign: self._copy_all_shares_from(self.servers[0:1], self.servers[2]))
330         d.addCallback(lambda ign: self._hang(self.servers[2:3]))
331         d.addCallback(lambda ign: self._hang(self.servers[3:]))
332         d.addCallback(lambda ign: self._unhang(self.servers[2:3]))
333         d.addCallback(lambda ign: self._download_and_check())
334         return d