1 # -*- coding: utf-8 -*-
4 from twisted.trial import unittest
5 from twisted.internet import defer
6 from allmydata import uri
7 from allmydata.util.consumer import download_to_data
8 from allmydata.immutable import upload
9 from allmydata.mutable.common import UnrecoverableFileError
10 from allmydata.mutable.publish import MutableData
11 from allmydata.storage.common import storage_index_to_dir
12 from allmydata.test.no_network import GridTestMixin
13 from allmydata.test.common import ShouldFailMixin
14 from allmydata.util.pollmixin import PollMixin
15 from allmydata.interfaces import NotEnoughSharesError
17 immutable_plaintext = "data" * 10000
18 mutable_plaintext = "muta" * 10000
20 class HungServerDownloadTest(GridTestMixin, ShouldFailMixin, PollMixin,
22 # Many of these tests take around 60 seconds on François's ARM buildslave:
23 # http://tahoe-lafs.org/buildbot/builders/FranXois%20lenny-armv5tel
24 # allmydata.test.test_hung_server.HungServerDownloadTest.test_2_good_8_broken_duplicate_share_fail
25 # once ERRORed after 197 seconds on Midnight Magic's NetBSD buildslave:
26 # http://tahoe-lafs.org/buildbot/builders/MM%20netbsd4%20i386%20warp
27 # MM's buildslave varies a lot in how long it takes to run tests.
31 def _break(self, servers):
32 for (id, ss) in servers:
33 self.g.break_server(id)
35 def _hang(self, servers, **kwargs):
36 for (id, ss) in servers:
37 self.g.hang_server(id, **kwargs)
39 def _unhang(self, servers, **kwargs):
40 for (id, ss) in servers:
41 self.g.unhang_server(id, **kwargs)
43 def _hang_shares(self, shnums, **kwargs):
44 # hang all servers who are holding the given shares
45 hung_serverids = set()
46 for (i_shnum, i_serverid, i_sharefile) in self.shares:
48 if i_serverid not in hung_serverids:
49 self.g.hang_server(i_serverid, **kwargs)
50 hung_serverids.add(i_serverid)
52 def _delete_all_shares_from(self, servers):
53 serverids = [id for (id, ss) in servers]
54 for (i_shnum, i_serverid, i_sharefile) in self.shares:
55 if i_serverid in serverids:
56 os.unlink(i_sharefile)
58 def _corrupt_all_shares_in(self, servers, corruptor_func):
59 serverids = [id for (id, ss) in servers]
60 for (i_shnum, i_serverid, i_sharefile) in self.shares:
61 if i_serverid in serverids:
62 self._corrupt_share((i_shnum, i_sharefile), corruptor_func)
64 def _copy_all_shares_from(self, from_servers, to_server):
65 serverids = [id for (id, ss) in from_servers]
66 for (i_shnum, i_serverid, i_sharefile) in self.shares:
67 if i_serverid in serverids:
68 self._copy_share((i_shnum, i_sharefile), to_server)
70 def _copy_share(self, share, to_server):
71 (sharenum, sharefile) = share
73 shares_dir = os.path.join(ss.original.storedir, "shares")
74 si = uri.from_string(self.uri).get_storage_index()
75 si_dir = os.path.join(shares_dir, storage_index_to_dir(si))
76 if not os.path.exists(si_dir):
78 new_sharefile = os.path.join(si_dir, str(sharenum))
79 shutil.copy(sharefile, new_sharefile)
80 self.shares = self.find_uri_shares(self.uri)
81 # Make sure that the storage server has the share.
82 self.failUnless((sharenum, ss.original.my_nodeid, new_sharefile)
85 def _corrupt_share(self, share, corruptor_func):
86 (sharenum, sharefile) = share
87 data = open(sharefile, "rb").read()
88 newdata = corruptor_func(data)
90 wf = open(sharefile, "wb")
94 def _set_up(self, mutable, testdir, num_clients=1, num_servers=10):
95 self.mutable = mutable
97 self.basedir = "hung_server/mutable_" + testdir
99 self.basedir = "hung_server/immutable_" + testdir
101 self.set_up_grid(num_clients=num_clients, num_servers=num_servers)
103 self.c0 = self.g.clients[0]
104 nm = self.c0.nodemaker
105 self.servers = sorted([(s.get_serverid(), s.get_rref())
106 for s in nm.storage_broker.get_connected_servers()])
107 self.servers = self.servers[5:] + self.servers[:5]
110 uploadable = MutableData(mutable_plaintext)
111 d = nm.create_mutable_file(uploadable)
112 def _uploaded_mutable(node):
113 self.uri = node.get_uri()
114 self.shares = self.find_uri_shares(self.uri)
115 d.addCallback(_uploaded_mutable)
117 data = upload.Data(immutable_plaintext, convergence="")
118 d = self.c0.upload(data)
119 def _uploaded_immutable(upload_res):
120 self.uri = upload_res.uri
121 self.shares = self.find_uri_shares(self.uri)
122 d.addCallback(_uploaded_immutable)
125 def _start_download(self):
126 n = self.c0.create_node_from_uri(self.uri)
128 d = n.download_best_version()
130 d = download_to_data(n)
133 def _wait_for_data(self, n):
135 d = n.download_best_version()
137 d = download_to_data(n)
140 def _check(self, resultingdata):
142 self.failUnlessEqual(resultingdata, mutable_plaintext)
144 self.failUnlessEqual(resultingdata, immutable_plaintext)
146 def _download_and_check(self):
147 d = self._start_download()
148 d.addCallback(self._check)
151 def _should_fail_download(self):
153 return self.shouldFail(UnrecoverableFileError, self.basedir,
154 "no recoverable versions",
155 self._download_and_check)
157 return self.shouldFail(NotEnoughSharesError, self.basedir,
159 self._download_and_check)
162 def test_10_good_sanity_check(self):
163 d = defer.succeed(None)
164 for mutable in [False, True]:
165 d.addCallback(lambda ign, mutable=mutable: self._set_up(mutable, "test_10_good_sanity_check"))
166 d.addCallback(lambda ign: self._download_and_check())
169 def test_10_good_copied_share(self):
170 d = defer.succeed(None)
171 for mutable in [False, True]:
172 d.addCallback(lambda ign, mutable=mutable: self._set_up(mutable, "test_10_good_copied_share"))
173 d.addCallback(lambda ign: self._copy_all_shares_from(self.servers[2:3], self.servers[0]))
174 d.addCallback(lambda ign: self._download_and_check())
177 def test_3_good_7_noshares(self):
178 d = defer.succeed(None)
179 for mutable in [False, True]:
180 d.addCallback(lambda ign, mutable=mutable: self._set_up(mutable, "test_3_good_7_noshares"))
181 d.addCallback(lambda ign: self._delete_all_shares_from(self.servers[3:]))
182 d.addCallback(lambda ign: self._download_and_check())
185 def test_2_good_8_broken_fail(self):
186 d = defer.succeed(None)
187 for mutable in [False, True]:
188 d.addCallback(lambda ign, mutable=mutable: self._set_up(mutable, "test_2_good_8_broken_fail"))
189 d.addCallback(lambda ign: self._break(self.servers[2:]))
190 d.addCallback(lambda ign: self._should_fail_download())
193 def test_2_good_8_noshares_fail(self):
194 d = defer.succeed(None)
195 for mutable in [False, True]:
196 d.addCallback(lambda ign, mutable=mutable: self._set_up(mutable, "test_2_good_8_noshares_fail"))
197 d.addCallback(lambda ign: self._delete_all_shares_from(self.servers[2:]))
198 d.addCallback(lambda ign: self._should_fail_download())
201 def test_2_good_8_broken_copied_share(self):
202 d = defer.succeed(None)
203 for mutable in [False, True]:
204 d.addCallback(lambda ign, mutable=mutable: self._set_up(mutable, "test_2_good_8_broken_copied_share"))
205 d.addCallback(lambda ign: self._copy_all_shares_from(self.servers[2:3], self.servers[0]))
206 d.addCallback(lambda ign: self._break(self.servers[2:]))
207 d.addCallback(lambda ign: self._download_and_check())
210 def test_2_good_8_broken_duplicate_share_fail(self):
211 d = defer.succeed(None)
212 for mutable in [False, True]:
213 d.addCallback(lambda ign, mutable=mutable: self._set_up(mutable, "test_2_good_8_broken_duplicate_share_fail"))
214 d.addCallback(lambda ign: self._copy_all_shares_from(self.servers[1:2], self.servers[0]))
215 d.addCallback(lambda ign: self._break(self.servers[2:]))
216 d.addCallback(lambda ign: self._should_fail_download())
219 def test_3_good_7_hung_immutable(self):
220 d = defer.succeed(None)
221 d.addCallback(lambda ign: self._set_up(False, "test_3_good_7_hung"))
222 d.addCallback(lambda ign: self._hang(self.servers[3:]))
223 d.addCallback(lambda ign: self._download_and_check())
226 def test_5_overdue_immutable(self):
227 # restrict the ShareFinder to only allow 5 outstanding requests, and
228 # arrange for the first 5 servers to hang. Then trigger the OVERDUE
229 # timers (simulating 10 seconds passed), at which point the
230 # ShareFinder should send additional queries and finish the download
231 # quickly. If we didn't have OVERDUE timers, this test would fail by
234 d = self._set_up(False, "test_5_overdue_immutable")
235 def _reduce_max_outstanding_requests_and_download(ign):
236 self._hang_shares(range(5))
237 n = self.c0.create_node_from_uri(self.uri)
238 n._cnode._maybe_create_download_node()
239 self._sf = n._cnode._node._sharefinder
240 self._sf.max_outstanding_requests = 5
241 self._sf.OVERDUE_TIMEOUT = 1000.0
242 d2 = download_to_data(n)
243 # start download, but don't wait for it to complete yet
245 done.append(res) # we will poll for this later
247 d.addCallback(_reduce_max_outstanding_requests_and_download)
248 from foolscap.eventual import fireEventually, flushEventualQueue
250 d.addCallback(lambda res: fireEventually(res))
251 d.addCallback(lambda res: flushEventualQueue())
252 d.addCallback(lambda ign: self.failIf(done))
253 def _check_waiting(ign):
254 # all the share requests should now be stuck waiting
255 self.failUnlessEqual(len(self._sf.pending_requests), 5)
256 # but none should be marked as OVERDUE until the timers expire
257 self.failUnlessEqual(len(self._sf.overdue_requests), 0)
258 d.addCallback(_check_waiting)
259 def _mark_overdue(ign):
260 # declare four requests overdue, allowing new requests to take
261 # their place, and leaving one stuck. The finder will keep
262 # sending requests until there are 5 non-overdue ones
263 # outstanding, at which point we'll have 4 OVERDUE, 1
264 # stuck-but-not-overdue, and 4 live requests. All 4 live requests
265 # will retire before the download is complete and the ShareFinder
266 # is shut off. That will leave 4 OVERDUE and 1
267 # stuck-but-not-overdue, for a total of 5 requests in in
268 # _sf.pending_requests
269 for t in self._sf.overdue_timers.values()[:4]:
271 # the timers ought to fire before the eventual-send does
272 return fireEventually()
273 d.addCallback(_mark_overdue)
276 d.addCallback(lambda ign: self.poll(_we_are_done))
277 def _check_done(ign):
278 self.failUnlessEqual(done, [immutable_plaintext])
279 self.failUnlessEqual(len(self._sf.pending_requests), 5)
280 self.failUnlessEqual(len(self._sf.overdue_requests), 4)
281 d.addCallback(_check_done)
284 def test_2_good_8_hung_then_1_recovers_immutable(self):
285 d = defer.succeed(None)
286 d.addCallback(lambda ign: self._set_up(False, "test_2_good_8_hung_then_1_recovers"))
287 d.addCallback(lambda ign: self._hang(self.servers[2:3]))
288 d.addCallback(lambda ign: self._hang(self.servers[3:]))
289 d.addCallback(lambda ign: self._unhang(self.servers[2:3]))
290 d.addCallback(lambda ign: self._download_and_check())
293 def test_2_good_8_hung_then_1_recovers_with_2_shares_immutable(self):
294 d = defer.succeed(None)
295 d.addCallback(lambda ign: self._set_up(False, "test_2_good_8_hung_then_1_recovers_with_2_shares"))
296 d.addCallback(lambda ign: self._copy_all_shares_from(self.servers[0:1], self.servers[2]))
297 d.addCallback(lambda ign: self._hang(self.servers[2:3]))
298 d.addCallback(lambda ign: self._hang(self.servers[3:]))
299 d.addCallback(lambda ign: self._unhang(self.servers[2:3]))
300 d.addCallback(lambda ign: self._download_and_check())
303 # The tests below do not currently pass for mutable files. The
304 # mutable-file downloader does not yet handle hung servers, and the tests
305 # hang forever (hence the use of SkipTest rather than .todo)
307 def test_3_good_7_hung_mutable(self):
308 raise unittest.SkipTest("still broken")
309 d = defer.succeed(None)
310 d.addCallback(lambda ign: self._set_up(True, "test_3_good_7_hung"))
311 d.addCallback(lambda ign: self._hang(self.servers[3:]))
312 d.addCallback(lambda ign: self._download_and_check())
315 def test_2_good_8_hung_then_1_recovers_mutable(self):
316 raise unittest.SkipTest("still broken")
317 d = defer.succeed(None)
318 d.addCallback(lambda ign: self._set_up(True, "test_2_good_8_hung_then_1_recovers"))
319 d.addCallback(lambda ign: self._hang(self.servers[2:3]))
320 d.addCallback(lambda ign: self._hang(self.servers[3:]))
321 d.addCallback(lambda ign: self._unhang(self.servers[2:3]))
322 d.addCallback(lambda ign: self._download_and_check())
325 def test_2_good_8_hung_then_1_recovers_with_2_shares_mutable(self):
326 raise unittest.SkipTest("still broken")
327 d = defer.succeed(None)
328 d.addCallback(lambda ign: self._set_up(True, "test_2_good_8_hung_then_1_recovers_with_2_shares"))
329 d.addCallback(lambda ign: self._copy_all_shares_from(self.servers[0:1], self.servers[2]))
330 d.addCallback(lambda ign: self._hang(self.servers[2:3]))
331 d.addCallback(lambda ign: self._hang(self.servers[3:]))
332 d.addCallback(lambda ign: self._unhang(self.servers[2:3]))
333 d.addCallback(lambda ign: self._download_and_check())