2 from twisted.trial import unittest
3 from cStringIO import StringIO
8 from allmydata.util import fileutil
9 from allmydata.util.fileutil import abspath_expanduser_unicode
10 from allmydata.util.encodingutil import get_io_encoding, unicode_to_argv
11 from allmydata.scripts import cli, backupdb
12 from .common_util import StallMixin
13 from .no_network import GridTestMixin
14 from .test_cli import CLITestMixin, parse_options
16 timeout = 480 # deep_check takes 360s on Zandr's linksys box, others take > 240s
18 class Backup(GridTestMixin, CLITestMixin, StallMixin, unittest.TestCase):
20 def writeto(self, path, data):
21 full_path = os.path.join(self.basedir, "home", path)
22 fileutil.make_dirs(os.path.dirname(full_path))
23 fileutil.write(full_path, data)
25 def count_output(self, out):
26 mo = re.search(r"(\d)+ files uploaded \((\d+) reused\), "
27 "(\d)+ files skipped, "
28 "(\d+) directories created \((\d+) reused\), "
29 "(\d+) directories skipped", out)
30 return [int(s) for s in mo.groups()]
32 def count_output2(self, out):
33 mo = re.search(r"(\d)+ files checked, (\d+) directories checked", out)
34 return [int(s) for s in mo.groups()]
36 def test_backup(self):
37 self.basedir = "cli/Backup/backup"
40 # is the backupdb available? If so, we test that a second backup does
41 # not create new directories.
43 bdb = backupdb.get_backupdb(os.path.join(self.basedir, "dbtest"),
47 # create a small local directory with a couple of files
48 source = os.path.join(self.basedir, "home")
49 fileutil.make_dirs(os.path.join(source, "empty"))
50 self.writeto("parent/subdir/foo.txt", "foo")
51 self.writeto("parent/subdir/bar.txt", "bar\n" * 1000)
52 self.writeto("parent/blah.txt", "blah")
54 def do_backup(verbose=False):
57 cmd.append("--verbose")
59 cmd.append("tahoe:backups")
60 return self.do_cli(*cmd)
62 d = self.do_cli("create-alias", "tahoe")
64 d.addCallback(lambda res: do_backup())
65 def _check0((rc, out, err)):
66 self.failUnlessReallyEqual(err, "")
67 self.failUnlessReallyEqual(rc, 0)
68 fu, fr, fs, dc, dr, ds = self.count_output(out)
69 # foo.txt, bar.txt, blah.txt
70 self.failUnlessReallyEqual(fu, 3)
71 self.failUnlessReallyEqual(fr, 0)
72 self.failUnlessReallyEqual(fs, 0)
73 # empty, home, home/parent, home/parent/subdir
74 self.failUnlessReallyEqual(dc, 4)
75 self.failUnlessReallyEqual(dr, 0)
76 self.failUnlessReallyEqual(ds, 0)
77 d.addCallback(_check0)
79 d.addCallback(lambda res: self.do_cli("ls", "--uri", "tahoe:backups"))
80 def _check1((rc, out, err)):
81 self.failUnlessReallyEqual(err, "")
82 self.failUnlessReallyEqual(rc, 0)
83 lines = out.split("\n")
84 children = dict([line.split() for line in lines if line])
85 latest_uri = children["Latest"]
86 self.failUnless(latest_uri.startswith("URI:DIR2-CHK:"), latest_uri)
87 childnames = children.keys()
88 self.failUnlessReallyEqual(sorted(childnames), ["Archives", "Latest"])
89 d.addCallback(_check1)
90 d.addCallback(lambda res: self.do_cli("ls", "tahoe:backups/Latest"))
91 def _check2((rc, out, err)):
92 self.failUnlessReallyEqual(err, "")
93 self.failUnlessReallyEqual(rc, 0)
94 self.failUnlessReallyEqual(sorted(out.split()), ["empty", "parent"])
95 d.addCallback(_check2)
96 d.addCallback(lambda res: self.do_cli("ls", "tahoe:backups/Latest/empty"))
97 def _check2a((rc, out, err)):
98 self.failUnlessReallyEqual(err, "")
99 self.failUnlessReallyEqual(rc, 0)
100 self.failUnlessReallyEqual(out.strip(), "")
101 d.addCallback(_check2a)
102 d.addCallback(lambda res: self.do_cli("get", "tahoe:backups/Latest/parent/subdir/foo.txt"))
103 def _check3((rc, out, err)):
104 self.failUnlessReallyEqual(err, "")
105 self.failUnlessReallyEqual(rc, 0)
106 self.failUnlessReallyEqual(out, "foo")
107 d.addCallback(_check3)
108 d.addCallback(lambda res: self.do_cli("ls", "tahoe:backups/Archives"))
109 def _check4((rc, out, err)):
110 self.failUnlessReallyEqual(err, "")
111 self.failUnlessReallyEqual(rc, 0)
112 self.old_archives = out.split()
113 self.failUnlessReallyEqual(len(self.old_archives), 1)
114 d.addCallback(_check4)
117 d.addCallback(self.stall, 1.1)
118 d.addCallback(lambda res: do_backup())
119 def _check4a((rc, out, err)):
120 # second backup should reuse everything, if the backupdb is
122 self.failUnlessReallyEqual(err, "")
123 self.failUnlessReallyEqual(rc, 0)
124 fu, fr, fs, dc, dr, ds = self.count_output(out)
125 # foo.txt, bar.txt, blah.txt
126 self.failUnlessReallyEqual(fu, 0)
127 self.failUnlessReallyEqual(fr, 3)
128 self.failUnlessReallyEqual(fs, 0)
129 # empty, home, home/parent, home/parent/subdir
130 self.failUnlessReallyEqual(dc, 0)
131 self.failUnlessReallyEqual(dr, 4)
132 self.failUnlessReallyEqual(ds, 0)
133 d.addCallback(_check4a)
135 # sneak into the backupdb, crank back the "last checked"
136 # timestamp to force a check on all files
137 def _reset_last_checked(res):
138 dbfile = os.path.join(self.get_clientdir(),
139 "private", "backupdb.sqlite")
140 self.failUnless(os.path.exists(dbfile), dbfile)
141 bdb = backupdb.get_backupdb(dbfile)
142 bdb.cursor.execute("UPDATE last_upload SET last_checked=0")
143 bdb.cursor.execute("UPDATE directories SET last_checked=0")
144 bdb.connection.commit()
146 d.addCallback(_reset_last_checked)
148 d.addCallback(self.stall, 1.1)
149 d.addCallback(lambda res: do_backup(verbose=True))
150 def _check4b((rc, out, err)):
151 # we should check all files, and re-use all of them. None of
152 # the directories should have been changed, so we should
153 # re-use all of them too.
154 self.failUnlessReallyEqual(err, "")
155 self.failUnlessReallyEqual(rc, 0)
156 fu, fr, fs, dc, dr, ds = self.count_output(out)
157 fchecked, dchecked = self.count_output2(out)
158 self.failUnlessReallyEqual(fchecked, 3)
159 self.failUnlessReallyEqual(fu, 0)
160 self.failUnlessReallyEqual(fr, 3)
161 self.failUnlessReallyEqual(fs, 0)
162 self.failUnlessReallyEqual(dchecked, 4)
163 self.failUnlessReallyEqual(dc, 0)
164 self.failUnlessReallyEqual(dr, 4)
165 self.failUnlessReallyEqual(ds, 0)
166 d.addCallback(_check4b)
168 d.addCallback(lambda res: self.do_cli("ls", "tahoe:backups/Archives"))
169 def _check5((rc, out, err)):
170 self.failUnlessReallyEqual(err, "")
171 self.failUnlessReallyEqual(rc, 0)
172 self.new_archives = out.split()
173 self.failUnlessReallyEqual(len(self.new_archives), 3, out)
174 # the original backup should still be the oldest (i.e. sorts
175 # alphabetically towards the beginning)
176 self.failUnlessReallyEqual(sorted(self.new_archives)[0],
177 self.old_archives[0])
178 d.addCallback(_check5)
180 d.addCallback(self.stall, 1.1)
182 self.writeto("parent/subdir/foo.txt", "FOOF!")
183 # and turn a file into a directory
184 os.unlink(os.path.join(source, "parent/blah.txt"))
185 os.mkdir(os.path.join(source, "parent/blah.txt"))
186 self.writeto("parent/blah.txt/surprise file", "surprise")
187 self.writeto("parent/blah.txt/surprisedir/subfile", "surprise")
188 # turn a directory into a file
189 os.rmdir(os.path.join(source, "empty"))
190 self.writeto("empty", "imagine nothing being here")
192 d.addCallback(_modify)
193 def _check5a((rc, out, err)):
194 # second backup should reuse bar.txt (if backupdb is available),
195 # and upload the rest. None of the directories can be reused.
196 self.failUnlessReallyEqual(err, "")
197 self.failUnlessReallyEqual(rc, 0)
198 fu, fr, fs, dc, dr, ds = self.count_output(out)
199 # new foo.txt, surprise file, subfile, empty
200 self.failUnlessReallyEqual(fu, 4)
202 self.failUnlessReallyEqual(fr, 1)
203 self.failUnlessReallyEqual(fs, 0)
204 # home, parent, subdir, blah.txt, surprisedir
205 self.failUnlessReallyEqual(dc, 5)
206 self.failUnlessReallyEqual(dr, 0)
207 self.failUnlessReallyEqual(ds, 0)
208 d.addCallback(_check5a)
209 d.addCallback(lambda res: self.do_cli("ls", "tahoe:backups/Archives"))
210 def _check6((rc, out, err)):
211 self.failUnlessReallyEqual(err, "")
212 self.failUnlessReallyEqual(rc, 0)
213 self.new_archives = out.split()
214 self.failUnlessReallyEqual(len(self.new_archives), 4)
215 self.failUnlessReallyEqual(sorted(self.new_archives)[0],
216 self.old_archives[0])
217 d.addCallback(_check6)
218 d.addCallback(lambda res: self.do_cli("get", "tahoe:backups/Latest/parent/subdir/foo.txt"))
219 def _check7((rc, out, err)):
220 self.failUnlessReallyEqual(err, "")
221 self.failUnlessReallyEqual(rc, 0)
222 self.failUnlessReallyEqual(out, "FOOF!")
223 # the old snapshot should not be modified
224 return self.do_cli("get", "tahoe:backups/Archives/%s/parent/subdir/foo.txt" % self.old_archives[0])
225 d.addCallback(_check7)
226 def _check8((rc, out, err)):
227 self.failUnlessReallyEqual(err, "")
228 self.failUnlessReallyEqual(rc, 0)
229 self.failUnlessReallyEqual(out, "foo")
230 d.addCallback(_check8)
234 # on our old dapper buildslave, this test takes a long time (usually
235 # 130s), so we have to bump up the default 120s timeout. The create-alias
236 # and initial backup alone take 60s, probably because of the handful of
237 # dirnodes being created (RSA key generation). The backup between check4
238 # and check4a takes 6s, as does the backup before check4b.
239 test_backup.timeout = 3000
241 def _check_filtering(self, filtered, all, included, excluded):
242 filtered = set(filtered)
244 included = set(included)
245 excluded = set(excluded)
246 self.failUnlessReallyEqual(filtered, included)
247 self.failUnlessReallyEqual(all.difference(filtered), excluded)
249 def test_exclude_options(self):
250 root_listdir = (u'lib.a', u'_darcs', u'subdir', u'nice_doc.lyx')
251 subdir_listdir = (u'another_doc.lyx', u'run_snake_run.py', u'CVS', u'.svn', u'_darcs')
252 basedir = "cli/Backup/exclude_options"
253 fileutil.make_dirs(basedir)
254 nodeurl_path = os.path.join(basedir, 'node.url')
255 fileutil.write(nodeurl_path, 'http://example.net:2357/')
256 def parse(args): return parse_options(basedir, "backup", args)
258 # test simple exclude
259 backup_options = parse(['--exclude', '*lyx', 'from', 'to'])
260 filtered = list(backup_options.filter_listdir(root_listdir))
261 self._check_filtering(filtered, root_listdir, (u'lib.a', u'_darcs', u'subdir'),
264 backup_options = parse(['--exclude', '*lyx', '--exclude', 'lib.?', 'from', 'to'])
265 filtered = list(backup_options.filter_listdir(root_listdir))
266 self._check_filtering(filtered, root_listdir, (u'_darcs', u'subdir'),
267 (u'nice_doc.lyx', u'lib.a'))
268 # vcs metadata exclusion
269 backup_options = parse(['--exclude-vcs', 'from', 'to'])
270 filtered = list(backup_options.filter_listdir(subdir_listdir))
271 self._check_filtering(filtered, subdir_listdir, (u'another_doc.lyx', u'run_snake_run.py',),
272 (u'CVS', u'.svn', u'_darcs'))
273 # read exclude patterns from file
274 exclusion_string = "_darcs\n*py\n.svn"
275 excl_filepath = os.path.join(basedir, 'exclusion')
276 fileutil.write(excl_filepath, exclusion_string)
277 backup_options = parse(['--exclude-from', excl_filepath, 'from', 'to'])
278 filtered = list(backup_options.filter_listdir(subdir_listdir))
279 self._check_filtering(filtered, subdir_listdir, (u'another_doc.lyx', u'CVS'),
280 (u'.svn', u'_darcs', u'run_snake_run.py'))
281 # test BackupConfigurationError
282 self.failUnlessRaises(cli.BackupConfigurationError,
284 ['--exclude-from', excl_filepath + '.no', 'from', 'to'])
286 # test that an iterator works too
287 backup_options = parse(['--exclude', '*lyx', 'from', 'to'])
288 filtered = list(backup_options.filter_listdir(iter(root_listdir)))
289 self._check_filtering(filtered, root_listdir, (u'lib.a', u'_darcs', u'subdir'),
292 def test_exclude_options_unicode(self):
293 nice_doc = u"nice_d\u00F8c.lyx"
295 doc_pattern_arg = u"*d\u00F8c*".encode(get_io_encoding())
296 except UnicodeEncodeError:
297 raise unittest.SkipTest("A non-ASCII command argument could not be encoded on this platform.")
299 root_listdir = (u'lib.a', u'_darcs', u'subdir', nice_doc)
300 basedir = "cli/Backup/exclude_options_unicode"
301 fileutil.make_dirs(basedir)
302 nodeurl_path = os.path.join(basedir, 'node.url')
303 fileutil.write(nodeurl_path, 'http://example.net:2357/')
304 def parse(args): return parse_options(basedir, "backup", args)
306 # test simple exclude
307 backup_options = parse(['--exclude', doc_pattern_arg, 'from', 'to'])
308 filtered = list(backup_options.filter_listdir(root_listdir))
309 self._check_filtering(filtered, root_listdir, (u'lib.a', u'_darcs', u'subdir'),
312 backup_options = parse(['--exclude', doc_pattern_arg, '--exclude', 'lib.?', 'from', 'to'])
313 filtered = list(backup_options.filter_listdir(root_listdir))
314 self._check_filtering(filtered, root_listdir, (u'_darcs', u'subdir'),
315 (nice_doc, u'lib.a'))
316 # read exclude patterns from file
317 exclusion_string = doc_pattern_arg + "\nlib.?"
318 excl_filepath = os.path.join(basedir, 'exclusion')
319 fileutil.write(excl_filepath, exclusion_string)
320 backup_options = parse(['--exclude-from', excl_filepath, 'from', 'to'])
321 filtered = list(backup_options.filter_listdir(root_listdir))
322 self._check_filtering(filtered, root_listdir, (u'_darcs', u'subdir'),
323 (nice_doc, u'lib.a'))
325 # test that an iterator works too
326 backup_options = parse(['--exclude', doc_pattern_arg, 'from', 'to'])
327 filtered = list(backup_options.filter_listdir(iter(root_listdir)))
328 self._check_filtering(filtered, root_listdir, (u'lib.a', u'_darcs', u'subdir'),
331 @patch('__builtin__.file')
332 def test_exclude_from_tilde_expansion(self, mock):
333 basedir = "cli/Backup/exclude_from_tilde_expansion"
334 fileutil.make_dirs(basedir)
335 nodeurl_path = os.path.join(basedir, 'node.url')
336 fileutil.write(nodeurl_path, 'http://example.net:2357/')
337 def parse(args): return parse_options(basedir, "backup", args)
339 # ensure that tilde expansion is performed on exclude-from argument
340 exclude_file = u'~/.tahoe/excludes.dummy'
342 mock.return_value = StringIO()
343 parse(['--exclude-from', unicode_to_argv(exclude_file), 'from', 'to'])
344 self.failUnlessIn(((abspath_expanduser_unicode(exclude_file),), {}), mock.call_args_list)
346 def test_ignore_symlinks(self):
347 if not hasattr(os, 'symlink'):
348 raise unittest.SkipTest("Symlinks are not supported by Python on this platform.")
350 self.basedir = os.path.dirname(self.mktemp())
353 source = os.path.join(self.basedir, "home")
354 self.writeto("foo.txt", "foo")
355 os.symlink(os.path.join(source, "foo.txt"), os.path.join(source, "foo2.txt"))
357 d = self.do_cli("create-alias", "tahoe")
358 d.addCallback(lambda res: self.do_cli("backup", "--verbose", source, "tahoe:test"))
360 def _check((rc, out, err)):
361 self.failUnlessReallyEqual(rc, 2)
362 foo2 = os.path.join(source, "foo2.txt")
363 self.failUnlessIn("WARNING: cannot backup symlink ", err)
364 self.failUnlessIn(foo2, err)
366 fu, fr, fs, dc, dr, ds = self.count_output(out)
368 self.failUnlessReallyEqual(fu, 1)
369 self.failUnlessReallyEqual(fr, 0)
371 self.failUnlessReallyEqual(fs, 1)
373 self.failUnlessReallyEqual(dc, 1)
374 self.failUnlessReallyEqual(dr, 0)
375 self.failUnlessReallyEqual(ds, 0)
377 d.addCallback(_check)
380 def test_ignore_unreadable_file(self):
381 self.basedir = os.path.dirname(self.mktemp())
384 source = os.path.join(self.basedir, "home")
385 self.writeto("foo.txt", "foo")
386 os.chmod(os.path.join(source, "foo.txt"), 0000)
388 d = self.do_cli("create-alias", "tahoe")
389 d.addCallback(lambda res: self.do_cli("backup", source, "tahoe:test"))
391 def _check((rc, out, err)):
392 self.failUnlessReallyEqual(rc, 2)
393 self.failUnlessReallyEqual(err, "WARNING: permission denied on file %s\n" % os.path.join(source, "foo.txt"))
395 fu, fr, fs, dc, dr, ds = self.count_output(out)
396 self.failUnlessReallyEqual(fu, 0)
397 self.failUnlessReallyEqual(fr, 0)
399 self.failUnlessReallyEqual(fs, 1)
401 self.failUnlessReallyEqual(dc, 1)
402 self.failUnlessReallyEqual(dr, 0)
403 self.failUnlessReallyEqual(ds, 0)
404 d.addCallback(_check)
406 # This is necessary for the temp files to be correctly removed
408 os.chmod(os.path.join(source, "foo.txt"), 0644)
409 d.addCallback(_cleanup)
410 d.addErrback(_cleanup)
414 def test_ignore_unreadable_directory(self):
415 self.basedir = os.path.dirname(self.mktemp())
418 source = os.path.join(self.basedir, "home")
420 os.mkdir(os.path.join(source, "test"))
421 os.chmod(os.path.join(source, "test"), 0000)
423 d = self.do_cli("create-alias", "tahoe")
424 d.addCallback(lambda res: self.do_cli("backup", source, "tahoe:test"))
426 def _check((rc, out, err)):
427 self.failUnlessReallyEqual(rc, 2)
428 self.failUnlessReallyEqual(err, "WARNING: permission denied on directory %s\n" % os.path.join(source, "test"))
430 fu, fr, fs, dc, dr, ds = self.count_output(out)
431 self.failUnlessReallyEqual(fu, 0)
432 self.failUnlessReallyEqual(fr, 0)
433 self.failUnlessReallyEqual(fs, 0)
435 self.failUnlessReallyEqual(dc, 2)
436 self.failUnlessReallyEqual(dr, 0)
438 self.failUnlessReallyEqual(ds, 1)
439 d.addCallback(_check)
441 # This is necessary for the temp files to be correctly removed
443 os.chmod(os.path.join(source, "test"), 0655)
444 d.addCallback(_cleanup)
445 d.addErrback(_cleanup)
448 def test_backup_without_alias(self):
449 # 'tahoe backup' should output a sensible error message when invoked
450 # without an alias instead of a stack trace.
451 self.basedir = os.path.dirname(self.mktemp())
453 source = os.path.join(self.basedir, "file1")
454 d = self.do_cli('backup', source, source)
455 def _check((rc, out, err)):
456 self.failUnlessReallyEqual(rc, 1)
457 self.failUnlessIn("error:", err)
458 self.failUnlessReallyEqual(out, "")
459 d.addCallback(_check)
462 def test_backup_with_nonexistent_alias(self):
463 # 'tahoe backup' should output a sensible error message when invoked
464 # with a nonexistent alias.
465 self.basedir = os.path.dirname(self.mktemp())
467 source = os.path.join(self.basedir, "file1")
468 d = self.do_cli("backup", source, "nonexistent:" + source)
469 def _check((rc, out, err)):
470 self.failUnlessReallyEqual(rc, 1)
471 self.failUnlessIn("error:", err)
472 self.failUnlessIn("nonexistent", err)
473 self.failUnlessReallyEqual(out, "")
474 d.addCallback(_check)