]> git.rkrishnan.org Git - tahoe-lafs/tahoe-lafs.git/blob - src/allmydata/test/test_hung_server.py
a4f53f818908e3690eb44b68e2c053a914347156
[tahoe-lafs/tahoe-lafs.git] / src / allmydata / test / test_hung_server.py
1 # -*- coding: utf-8 -*-
2
3 import os, shutil
4 from twisted.trial import unittest
5 from twisted.internet import defer
6 from allmydata import uri
7 from allmydata.util.consumer import download_to_data
8 from allmydata.immutable import upload
9 from allmydata.mutable.common import UnrecoverableFileError
10 from allmydata.storage.common import storage_index_to_dir
11 from allmydata.test.no_network import GridTestMixin
12 from allmydata.test.common import ShouldFailMixin, _corrupt_share_data
13 from allmydata.interfaces import NotEnoughSharesError
14
15 immutable_plaintext = "data" * 10000
16 mutable_plaintext = "muta" * 10000
17
18 class HungServerDownloadTest(GridTestMixin, ShouldFailMixin, unittest.TestCase):
19     # Many of these tests take around 60 seconds on François's ARM buildslave:
20     # http://tahoe-lafs.org/buildbot/builders/FranXois%20lenny-armv5tel
21     # allmydata.test.test_hung_server.HungServerDownloadTest.test_2_good_8_broken_duplicate_share_fail once ERRORed after 197 seconds on Midnight Magic's NetBSD buildslave:
22     # http://tahoe-lafs.org/buildbot/builders/MM%20netbsd4%20i386%20warp
23     # MM's buildslave varies a lot in how long it takes to run tests.
24
25     timeout = 240
26
27     def _break(self, servers):
28         for (id, ss) in servers:
29             self.g.break_server(id)
30
31     def _hang(self, servers, **kwargs):
32         for (id, ss) in servers:
33             self.g.hang_server(id, **kwargs)
34
35     def _unhang(self, servers, **kwargs):
36         for (id, ss) in servers:
37             self.g.unhang_server(id, **kwargs)
38
39     def _delete_all_shares_from(self, servers):
40         serverids = [id for (id, ss) in servers]
41         for (i_shnum, i_serverid, i_sharefile) in self.shares:
42             if i_serverid in serverids:
43                 os.unlink(i_sharefile)
44
45     def _corrupt_all_shares_in(self, servers, corruptor_func):
46         serverids = [id for (id, ss) in servers]
47         for (i_shnum, i_serverid, i_sharefile) in self.shares:
48             if i_serverid in serverids:
49                 self._corrupt_share((i_shnum, i_sharefile), corruptor_func)
50
51     def _copy_all_shares_from(self, from_servers, to_server):
52         serverids = [id for (id, ss) in from_servers]
53         for (i_shnum, i_serverid, i_sharefile) in self.shares:
54             if i_serverid in serverids:
55                 self._copy_share((i_shnum, i_sharefile), to_server)
56
57     def _copy_share(self, share, to_server):
58         (sharenum, sharefile) = share
59         (id, ss) = to_server
60         shares_dir = os.path.join(ss.original.storedir, "shares")
61         si = uri.from_string(self.uri).get_storage_index()
62         si_dir = os.path.join(shares_dir, storage_index_to_dir(si))
63         if not os.path.exists(si_dir):
64             os.makedirs(si_dir)
65         new_sharefile = os.path.join(si_dir, str(sharenum))
66         shutil.copy(sharefile, new_sharefile)
67         self.shares = self.find_shares(self.uri)
68         # Make sure that the storage server has the share.
69         self.failUnless((sharenum, ss.original.my_nodeid, new_sharefile)
70                         in self.shares)
71
72     def _corrupt_share(self, share, corruptor_func):
73         (sharenum, sharefile) = share
74         data = open(sharefile, "rb").read()
75         newdata = corruptor_func(data)
76         os.unlink(sharefile)
77         wf = open(sharefile, "wb")
78         wf.write(newdata)
79         wf.close()
80
81     def _set_up(self, mutable, testdir, num_clients=1, num_servers=10):
82         self.mutable = mutable
83         if mutable:
84             self.basedir = "hung_server/mutable_" + testdir
85         else:
86             self.basedir = "hung_server/immutable_" + testdir
87
88         self.set_up_grid(num_clients=num_clients, num_servers=num_servers)
89
90         self.c0 = self.g.clients[0]
91         nm = self.c0.nodemaker
92         self.servers = [(id, ss) for (id, ss) in nm.storage_broker.get_all_servers()]
93
94         if mutable:
95             d = nm.create_mutable_file(mutable_plaintext)
96             def _uploaded_mutable(node):
97                 self.uri = node.get_uri()
98                 self.shares = self.find_shares(self.uri)
99             d.addCallback(_uploaded_mutable)
100         else:
101             data = upload.Data(immutable_plaintext, convergence="")
102             d = self.c0.upload(data)
103             def _uploaded_immutable(upload_res):
104                 self.uri = upload_res.uri
105                 self.shares = self.find_shares(self.uri)
106             d.addCallback(_uploaded_immutable)
107         return d
108
109     def _start_download(self):
110         n = self.c0.create_node_from_uri(self.uri)
111         if self.mutable:
112             d = n.download_best_version()
113             stage_4_d = None # currently we aren't doing any tests which require this for mutable files
114         else:
115             d = download_to_data(n)
116             stage_4_d = n._downloader._all_downloads.keys()[0]._stage_4_d # too ugly! FIXME
117         return (d, stage_4_d,)
118
119     def _wait_for_data(self, n):
120         if self.mutable:
121             d = n.download_best_version()
122         else:
123             d = download_to_data(n)
124         return d
125
126     def _check(self, resultingdata):
127         if self.mutable:
128             self.failUnlessEqual(resultingdata, mutable_plaintext)
129         else:
130             self.failUnlessEqual(resultingdata, immutable_plaintext)
131
132     def _download_and_check(self):
133         d, stage4d = self._start_download()
134         d.addCallback(self._check)
135         return d
136
137     def _should_fail_download(self):
138         if self.mutable:
139             return self.shouldFail(UnrecoverableFileError, self.basedir,
140                                    "no recoverable versions",
141                                    self._download_and_check)
142         else:
143             return self.shouldFail(NotEnoughSharesError, self.basedir,
144                                    "Failed to get enough shareholders",
145                                    self._download_and_check)
146
147
148     def test_10_good_sanity_check(self):
149         d = defer.succeed(None)
150         for mutable in [False, True]:
151             d.addCallback(lambda ign: self._set_up(mutable, "test_10_good_sanity_check"))
152             d.addCallback(lambda ign: self._download_and_check())
153         return d
154
155     def test_10_good_copied_share(self):
156         d = defer.succeed(None)
157         for mutable in [False, True]:
158             d.addCallback(lambda ign: self._set_up(mutable, "test_10_good_copied_share"))
159             d.addCallback(lambda ign: self._copy_all_shares_from(self.servers[2:3], self.servers[0]))
160             d.addCallback(lambda ign: self._download_and_check())
161             return d
162
163     def test_3_good_7_noshares(self):
164         d = defer.succeed(None)
165         for mutable in [False, True]:
166             d.addCallback(lambda ign: self._set_up(mutable, "test_3_good_7_noshares"))
167             d.addCallback(lambda ign: self._delete_all_shares_from(self.servers[3:]))
168             d.addCallback(lambda ign: self._download_and_check())
169         return d
170
171     def test_2_good_8_broken_fail(self):
172         d = defer.succeed(None)
173         for mutable in [False, True]:
174             d.addCallback(lambda ign: self._set_up(mutable, "test_2_good_8_broken_fail"))
175             d.addCallback(lambda ign: self._break(self.servers[2:]))
176             d.addCallback(lambda ign: self._should_fail_download())
177         return d
178
179     def test_2_good_8_noshares_fail(self):
180         d = defer.succeed(None)
181         for mutable in [False, True]:
182             d.addCallback(lambda ign: self._set_up(mutable, "test_2_good_8_noshares_fail"))
183             d.addCallback(lambda ign: self._delete_all_shares_from(self.servers[2:]))
184             d.addCallback(lambda ign: self._should_fail_download())
185         return d
186
187     def test_2_good_8_broken_copied_share(self):
188         d = defer.succeed(None)
189         for mutable in [False, True]:
190             d.addCallback(lambda ign: self._set_up(mutable, "test_2_good_8_broken_copied_share"))
191             d.addCallback(lambda ign: self._copy_all_shares_from(self.servers[2:3], self.servers[0]))
192             d.addCallback(lambda ign: self._break(self.servers[2:]))
193             d.addCallback(lambda ign: self._download_and_check())
194         return d
195
196     def test_2_good_8_broken_duplicate_share_fail(self):
197         d = defer.succeed(None)
198         for mutable in [False, True]:
199             d.addCallback(lambda ign: self._set_up(mutable, "test_2_good_8_broken_duplicate_share_fail"))
200             d.addCallback(lambda ign: self._copy_all_shares_from(self.servers[1:2], self.servers[0]))
201             d.addCallback(lambda ign: self._break(self.servers[2:]))
202             d.addCallback(lambda ign: self._should_fail_download())
203         return d
204
205     # The tests below do not currently pass for mutable files.
206
207     def test_3_good_7_hung(self):
208         d = defer.succeed(None)
209         for mutable in [False]:
210             d.addCallback(lambda ign: self._set_up(mutable, "test_3_good_7_hung"))
211             d.addCallback(lambda ign: self._hang(self.servers[3:]))
212             d.addCallback(lambda ign: self._download_and_check())
213         return d
214
215     def test_2_good_8_hung_then_1_recovers(self):
216         d = defer.succeed(None)
217         for mutable in [False]:
218             d.addCallback(lambda ign: self._set_up(mutable, "test_2_good_8_hung_then_1_recovers"))
219             d.addCallback(lambda ign: self._hang(self.servers[2:3]))
220             d.addCallback(lambda ign: self._hang(self.servers[3:]))
221             d.addCallback(lambda ign: self._unhang(self.servers[2:3]))
222             d.addCallback(lambda ign: self._download_and_check())
223         return d
224
225     def test_2_good_8_hung_then_1_recovers_with_2_shares(self):
226         d = defer.succeed(None)
227         for mutable in [False]:
228             d.addCallback(lambda ign: self._set_up(mutable, "test_2_good_8_hung_then_1_recovers_with_2_shares"))
229             d.addCallback(lambda ign: self._copy_all_shares_from(self.servers[0:1], self.servers[2]))
230             d.addCallback(lambda ign: self._hang(self.servers[2:3]))
231             d.addCallback(lambda ign: self._hang(self.servers[3:]))
232             d.addCallback(lambda ign: self._unhang(self.servers[2:3]))
233             d.addCallback(lambda ign: self._download_and_check())
234         return d
235
236     def test_failover_during_stage_4(self):
237         # See #287
238         d = defer.succeed(None)
239         for mutable in [False]:
240             d.addCallback(lambda ign: self._set_up(mutable, "test_failover_during_stage_4"))
241             d.addCallback(lambda ign: self._corrupt_all_shares_in(self.servers[2:3], _corrupt_share_data))
242             d.addCallback(lambda ign: self._set_up(mutable, "test_failover_during_stage_4"))
243             d.addCallback(lambda ign: self._hang(self.servers[3:]))
244             d.addCallback(lambda ign: self._start_download())
245             def _after_starting_download((doned, started4d)):
246                 started4d.addCallback(lambda ign: self._unhang(self.servers[3:4]))
247                 doned.addCallback(self._check)
248                 return doned
249             d.addCallback(_after_starting_download)
250
251         return d