]> git.rkrishnan.org Git - tahoe-lafs/tahoe-lafs.git/blob - src/allmydata/test/test_cli_backup.py
6d1dbe92a78cffddcf269a5c54a8d65f11b785fe
[tahoe-lafs/tahoe-lafs.git] / src / allmydata / test / test_cli_backup.py
1 import os.path
2 from twisted.trial import unittest
3 from cStringIO import StringIO
4 import re
5
6 from mock import patch
7
8 from allmydata.util import fileutil
9 from allmydata.util.fileutil import abspath_expanduser_unicode
10 from allmydata.util.encodingutil import get_io_encoding, unicode_to_argv
11 from allmydata.scripts import cli, backupdb
12 from .common_util import StallMixin
13 from .no_network import GridTestMixin
14 from .test_cli import CLITestMixin, parse_options
15
16 timeout = 480 # deep_check takes 360s on Zandr's linksys box, others take > 240s
17
18 class Backup(GridTestMixin, CLITestMixin, StallMixin, unittest.TestCase):
19
20     def writeto(self, path, data):
21         full_path = os.path.join(self.basedir, "home", path)
22         fileutil.make_dirs(os.path.dirname(full_path))
23         fileutil.write(full_path, data)
24
25     def count_output(self, out):
26         mo = re.search(r"(\d)+ files uploaded \((\d+) reused\), "
27                         "(\d)+ files skipped, "
28                         "(\d+) directories created \((\d+) reused\), "
29                         "(\d+) directories skipped", out)
30         return [int(s) for s in mo.groups()]
31
32     def count_output2(self, out):
33         mo = re.search(r"(\d)+ files checked, (\d+) directories checked", out)
34         return [int(s) for s in mo.groups()]
35
36     def test_backup(self):
37         self.basedir = "cli/Backup/backup"
38         self.set_up_grid()
39
40         # is the backupdb available? If so, we test that a second backup does
41         # not create new directories.
42         hush = StringIO()
43         bdb = backupdb.get_backupdb(os.path.join(self.basedir, "dbtest"),
44                                     hush)
45         self.failUnless(bdb)
46
47         # create a small local directory with a couple of files
48         source = os.path.join(self.basedir, "home")
49         fileutil.make_dirs(os.path.join(source, "empty"))
50         self.writeto("parent/subdir/foo.txt", "foo")
51         self.writeto("parent/subdir/bar.txt", "bar\n" * 1000)
52         self.writeto("parent/blah.txt", "blah")
53
54         def do_backup(verbose=False):
55             cmd = ["backup"]
56             if verbose:
57                 cmd.append("--verbose")
58             cmd.append(source)
59             cmd.append("tahoe:backups")
60             return self.do_cli(*cmd)
61
62         d = self.do_cli("create-alias", "tahoe")
63
64         d.addCallback(lambda res: do_backup())
65         def _check0((rc, out, err)):
66             self.failUnlessReallyEqual(err, "")
67             self.failUnlessReallyEqual(rc, 0)
68             fu, fr, fs, dc, dr, ds = self.count_output(out)
69             # foo.txt, bar.txt, blah.txt
70             self.failUnlessReallyEqual(fu, 3)
71             self.failUnlessReallyEqual(fr, 0)
72             self.failUnlessReallyEqual(fs, 0)
73             # empty, home, home/parent, home/parent/subdir
74             self.failUnlessReallyEqual(dc, 4)
75             self.failUnlessReallyEqual(dr, 0)
76             self.failUnlessReallyEqual(ds, 0)
77         d.addCallback(_check0)
78
79         d.addCallback(lambda res: self.do_cli("ls", "--uri", "tahoe:backups"))
80         def _check1((rc, out, err)):
81             self.failUnlessReallyEqual(err, "")
82             self.failUnlessReallyEqual(rc, 0)
83             lines = out.split("\n")
84             children = dict([line.split() for line in lines if line])
85             latest_uri = children["Latest"]
86             self.failUnless(latest_uri.startswith("URI:DIR2-CHK:"), latest_uri)
87             childnames = children.keys()
88             self.failUnlessReallyEqual(sorted(childnames), ["Archives", "Latest"])
89         d.addCallback(_check1)
90         d.addCallback(lambda res: self.do_cli("ls", "tahoe:backups/Latest"))
91         def _check2((rc, out, err)):
92             self.failUnlessReallyEqual(err, "")
93             self.failUnlessReallyEqual(rc, 0)
94             self.failUnlessReallyEqual(sorted(out.split()), ["empty", "parent"])
95         d.addCallback(_check2)
96         d.addCallback(lambda res: self.do_cli("ls", "tahoe:backups/Latest/empty"))
97         def _check2a((rc, out, err)):
98             self.failUnlessReallyEqual(err, "")
99             self.failUnlessReallyEqual(rc, 0)
100             self.failUnlessReallyEqual(out.strip(), "")
101         d.addCallback(_check2a)
102         d.addCallback(lambda res: self.do_cli("get", "tahoe:backups/Latest/parent/subdir/foo.txt"))
103         def _check3((rc, out, err)):
104             self.failUnlessReallyEqual(err, "")
105             self.failUnlessReallyEqual(rc, 0)
106             self.failUnlessReallyEqual(out, "foo")
107         d.addCallback(_check3)
108         d.addCallback(lambda res: self.do_cli("ls", "tahoe:backups/Archives"))
109         def _check4((rc, out, err)):
110             self.failUnlessReallyEqual(err, "")
111             self.failUnlessReallyEqual(rc, 0)
112             self.old_archives = out.split()
113             self.failUnlessReallyEqual(len(self.old_archives), 1)
114         d.addCallback(_check4)
115
116
117         d.addCallback(self.stall, 1.1)
118         d.addCallback(lambda res: do_backup())
119         def _check4a((rc, out, err)):
120             # second backup should reuse everything, if the backupdb is
121             # available
122             self.failUnlessReallyEqual(err, "")
123             self.failUnlessReallyEqual(rc, 0)
124             fu, fr, fs, dc, dr, ds = self.count_output(out)
125             # foo.txt, bar.txt, blah.txt
126             self.failUnlessReallyEqual(fu, 0)
127             self.failUnlessReallyEqual(fr, 3)
128             self.failUnlessReallyEqual(fs, 0)
129             # empty, home, home/parent, home/parent/subdir
130             self.failUnlessReallyEqual(dc, 0)
131             self.failUnlessReallyEqual(dr, 4)
132             self.failUnlessReallyEqual(ds, 0)
133         d.addCallback(_check4a)
134
135         # sneak into the backupdb, crank back the "last checked"
136         # timestamp to force a check on all files
137         def _reset_last_checked(res):
138             dbfile = os.path.join(self.get_clientdir(),
139                                   "private", "backupdb.sqlite")
140             self.failUnless(os.path.exists(dbfile), dbfile)
141             bdb = backupdb.get_backupdb(dbfile)
142             bdb.cursor.execute("UPDATE last_upload SET last_checked=0")
143             bdb.cursor.execute("UPDATE directories SET last_checked=0")
144             bdb.connection.commit()
145
146         d.addCallback(_reset_last_checked)
147
148         d.addCallback(self.stall, 1.1)
149         d.addCallback(lambda res: do_backup(verbose=True))
150         def _check4b((rc, out, err)):
151             # we should check all files, and re-use all of them. None of
152             # the directories should have been changed, so we should
153             # re-use all of them too.
154             self.failUnlessReallyEqual(err, "")
155             self.failUnlessReallyEqual(rc, 0)
156             fu, fr, fs, dc, dr, ds = self.count_output(out)
157             fchecked, dchecked = self.count_output2(out)
158             self.failUnlessReallyEqual(fchecked, 3)
159             self.failUnlessReallyEqual(fu, 0)
160             self.failUnlessReallyEqual(fr, 3)
161             self.failUnlessReallyEqual(fs, 0)
162             self.failUnlessReallyEqual(dchecked, 4)
163             self.failUnlessReallyEqual(dc, 0)
164             self.failUnlessReallyEqual(dr, 4)
165             self.failUnlessReallyEqual(ds, 0)
166         d.addCallback(_check4b)
167
168         d.addCallback(lambda res: self.do_cli("ls", "tahoe:backups/Archives"))
169         def _check5((rc, out, err)):
170             self.failUnlessReallyEqual(err, "")
171             self.failUnlessReallyEqual(rc, 0)
172             self.new_archives = out.split()
173             self.failUnlessReallyEqual(len(self.new_archives), 3, out)
174             # the original backup should still be the oldest (i.e. sorts
175             # alphabetically towards the beginning)
176             self.failUnlessReallyEqual(sorted(self.new_archives)[0],
177                                  self.old_archives[0])
178         d.addCallback(_check5)
179
180         d.addCallback(self.stall, 1.1)
181         def _modify(res):
182             self.writeto("parent/subdir/foo.txt", "FOOF!")
183             # and turn a file into a directory
184             os.unlink(os.path.join(source, "parent/blah.txt"))
185             os.mkdir(os.path.join(source, "parent/blah.txt"))
186             self.writeto("parent/blah.txt/surprise file", "surprise")
187             self.writeto("parent/blah.txt/surprisedir/subfile", "surprise")
188             # turn a directory into a file
189             os.rmdir(os.path.join(source, "empty"))
190             self.writeto("empty", "imagine nothing being here")
191             return do_backup()
192         d.addCallback(_modify)
193         def _check5a((rc, out, err)):
194             # second backup should reuse bar.txt (if backupdb is available),
195             # and upload the rest. None of the directories can be reused.
196             self.failUnlessReallyEqual(err, "")
197             self.failUnlessReallyEqual(rc, 0)
198             fu, fr, fs, dc, dr, ds = self.count_output(out)
199             # new foo.txt, surprise file, subfile, empty
200             self.failUnlessReallyEqual(fu, 4)
201             # old bar.txt
202             self.failUnlessReallyEqual(fr, 1)
203             self.failUnlessReallyEqual(fs, 0)
204             # home, parent, subdir, blah.txt, surprisedir
205             self.failUnlessReallyEqual(dc, 5)
206             self.failUnlessReallyEqual(dr, 0)
207             self.failUnlessReallyEqual(ds, 0)
208         d.addCallback(_check5a)
209         d.addCallback(lambda res: self.do_cli("ls", "tahoe:backups/Archives"))
210         def _check6((rc, out, err)):
211             self.failUnlessReallyEqual(err, "")
212             self.failUnlessReallyEqual(rc, 0)
213             self.new_archives = out.split()
214             self.failUnlessReallyEqual(len(self.new_archives), 4)
215             self.failUnlessReallyEqual(sorted(self.new_archives)[0],
216                                  self.old_archives[0])
217         d.addCallback(_check6)
218         d.addCallback(lambda res: self.do_cli("get", "tahoe:backups/Latest/parent/subdir/foo.txt"))
219         def _check7((rc, out, err)):
220             self.failUnlessReallyEqual(err, "")
221             self.failUnlessReallyEqual(rc, 0)
222             self.failUnlessReallyEqual(out, "FOOF!")
223             # the old snapshot should not be modified
224             return self.do_cli("get", "tahoe:backups/Archives/%s/parent/subdir/foo.txt" % self.old_archives[0])
225         d.addCallback(_check7)
226         def _check8((rc, out, err)):
227             self.failUnlessReallyEqual(err, "")
228             self.failUnlessReallyEqual(rc, 0)
229             self.failUnlessReallyEqual(out, "foo")
230         d.addCallback(_check8)
231
232         return d
233
234     # on our old dapper buildslave, this test takes a long time (usually
235     # 130s), so we have to bump up the default 120s timeout. The create-alias
236     # and initial backup alone take 60s, probably because of the handful of
237     # dirnodes being created (RSA key generation). The backup between check4
238     # and check4a takes 6s, as does the backup before check4b.
239     test_backup.timeout = 3000
240
241     def _check_filtering(self, filtered, all, included, excluded):
242         filtered = set(filtered)
243         all = set(all)
244         included = set(included)
245         excluded = set(excluded)
246         self.failUnlessReallyEqual(filtered, included)
247         self.failUnlessReallyEqual(all.difference(filtered), excluded)
248
249     def test_exclude_options(self):
250         root_listdir = (u'lib.a', u'_darcs', u'subdir', u'nice_doc.lyx')
251         subdir_listdir = (u'another_doc.lyx', u'run_snake_run.py', u'CVS', u'.svn', u'_darcs')
252         basedir = "cli/Backup/exclude_options"
253         fileutil.make_dirs(basedir)
254         nodeurl_path = os.path.join(basedir, 'node.url')
255         fileutil.write(nodeurl_path, 'http://example.net:2357/')
256         def parse(args): return parse_options(basedir, "backup", args)
257
258         # test simple exclude
259         backup_options = parse(['--exclude', '*lyx', 'from', 'to'])
260         filtered = list(backup_options.filter_listdir(root_listdir))
261         self._check_filtering(filtered, root_listdir, (u'lib.a', u'_darcs', u'subdir'),
262                               (u'nice_doc.lyx',))
263         # multiple exclude
264         backup_options = parse(['--exclude', '*lyx', '--exclude', 'lib.?', 'from', 'to'])
265         filtered = list(backup_options.filter_listdir(root_listdir))
266         self._check_filtering(filtered, root_listdir, (u'_darcs', u'subdir'),
267                               (u'nice_doc.lyx', u'lib.a'))
268         # vcs metadata exclusion
269         backup_options = parse(['--exclude-vcs', 'from', 'to'])
270         filtered = list(backup_options.filter_listdir(subdir_listdir))
271         self._check_filtering(filtered, subdir_listdir, (u'another_doc.lyx', u'run_snake_run.py',),
272                               (u'CVS', u'.svn', u'_darcs'))
273         # read exclude patterns from file
274         exclusion_string = "_darcs\n*py\n.svn"
275         excl_filepath = os.path.join(basedir, 'exclusion')
276         fileutil.write(excl_filepath, exclusion_string)
277         backup_options = parse(['--exclude-from', excl_filepath, 'from', 'to'])
278         filtered = list(backup_options.filter_listdir(subdir_listdir))
279         self._check_filtering(filtered, subdir_listdir, (u'another_doc.lyx', u'CVS'),
280                               (u'.svn', u'_darcs', u'run_snake_run.py'))
281         # test BackupConfigurationError
282         self.failUnlessRaises(cli.BackupConfigurationError,
283                               parse,
284                               ['--exclude-from', excl_filepath + '.no', 'from', 'to'])
285
286         # test that an iterator works too
287         backup_options = parse(['--exclude', '*lyx', 'from', 'to'])
288         filtered = list(backup_options.filter_listdir(iter(root_listdir)))
289         self._check_filtering(filtered, root_listdir, (u'lib.a', u'_darcs', u'subdir'),
290                               (u'nice_doc.lyx',))
291
292     def test_exclude_options_unicode(self):
293         nice_doc = u"nice_d\u00F8c.lyx"
294         try:
295             doc_pattern_arg = u"*d\u00F8c*".encode(get_io_encoding())
296         except UnicodeEncodeError:
297             raise unittest.SkipTest("A non-ASCII command argument could not be encoded on this platform.")
298
299         root_listdir = (u'lib.a', u'_darcs', u'subdir', nice_doc)
300         basedir = "cli/Backup/exclude_options_unicode"
301         fileutil.make_dirs(basedir)
302         nodeurl_path = os.path.join(basedir, 'node.url')
303         fileutil.write(nodeurl_path, 'http://example.net:2357/')
304         def parse(args): return parse_options(basedir, "backup", args)
305
306         # test simple exclude
307         backup_options = parse(['--exclude', doc_pattern_arg, 'from', 'to'])
308         filtered = list(backup_options.filter_listdir(root_listdir))
309         self._check_filtering(filtered, root_listdir, (u'lib.a', u'_darcs', u'subdir'),
310                               (nice_doc,))
311         # multiple exclude
312         backup_options = parse(['--exclude', doc_pattern_arg, '--exclude', 'lib.?', 'from', 'to'])
313         filtered = list(backup_options.filter_listdir(root_listdir))
314         self._check_filtering(filtered, root_listdir, (u'_darcs', u'subdir'),
315                              (nice_doc, u'lib.a'))
316         # read exclude patterns from file
317         exclusion_string = doc_pattern_arg + "\nlib.?"
318         excl_filepath = os.path.join(basedir, 'exclusion')
319         fileutil.write(excl_filepath, exclusion_string)
320         backup_options = parse(['--exclude-from', excl_filepath, 'from', 'to'])
321         filtered = list(backup_options.filter_listdir(root_listdir))
322         self._check_filtering(filtered, root_listdir, (u'_darcs', u'subdir'),
323                              (nice_doc, u'lib.a'))
324
325         # test that an iterator works too
326         backup_options = parse(['--exclude', doc_pattern_arg, 'from', 'to'])
327         filtered = list(backup_options.filter_listdir(iter(root_listdir)))
328         self._check_filtering(filtered, root_listdir, (u'lib.a', u'_darcs', u'subdir'),
329                               (nice_doc,))
330
331     @patch('__builtin__.file')
332     def test_exclude_from_tilde_expansion(self, mock):
333         basedir = "cli/Backup/exclude_from_tilde_expansion"
334         fileutil.make_dirs(basedir)
335         nodeurl_path = os.path.join(basedir, 'node.url')
336         fileutil.write(nodeurl_path, 'http://example.net:2357/')
337         def parse(args): return parse_options(basedir, "backup", args)
338
339         # ensure that tilde expansion is performed on exclude-from argument
340         exclude_file = u'~/.tahoe/excludes.dummy'
341
342         mock.return_value = StringIO()
343         parse(['--exclude-from', unicode_to_argv(exclude_file), 'from', 'to'])
344         self.failUnlessIn(((abspath_expanduser_unicode(exclude_file),), {}), mock.call_args_list)
345
346     def test_ignore_symlinks(self):
347         if not hasattr(os, 'symlink'):
348             raise unittest.SkipTest("Symlinks are not supported by Python on this platform.")
349
350         self.basedir = os.path.dirname(self.mktemp())
351         self.set_up_grid()
352
353         source = os.path.join(self.basedir, "home")
354         self.writeto("foo.txt", "foo")
355         os.symlink(os.path.join(source, "foo.txt"), os.path.join(source, "foo2.txt"))
356
357         d = self.do_cli("create-alias", "tahoe")
358         d.addCallback(lambda res: self.do_cli("backup", "--verbose", source, "tahoe:test"))
359
360         def _check((rc, out, err)):
361             self.failUnlessReallyEqual(rc, 2)
362             foo2 = os.path.join(source, "foo2.txt")
363             self.failUnlessIn("WARNING: cannot backup symlink ", err)
364             self.failUnlessIn(foo2, err)
365
366             fu, fr, fs, dc, dr, ds = self.count_output(out)
367             # foo.txt
368             self.failUnlessReallyEqual(fu, 1)
369             self.failUnlessReallyEqual(fr, 0)
370             # foo2.txt
371             self.failUnlessReallyEqual(fs, 1)
372             # home
373             self.failUnlessReallyEqual(dc, 1)
374             self.failUnlessReallyEqual(dr, 0)
375             self.failUnlessReallyEqual(ds, 0)
376
377         d.addCallback(_check)
378         return d
379
380     def test_ignore_unreadable_file(self):
381         self.basedir = os.path.dirname(self.mktemp())
382         self.set_up_grid()
383
384         source = os.path.join(self.basedir, "home")
385         self.writeto("foo.txt", "foo")
386         os.chmod(os.path.join(source, "foo.txt"), 0000)
387
388         d = self.do_cli("create-alias", "tahoe")
389         d.addCallback(lambda res: self.do_cli("backup", source, "tahoe:test"))
390
391         def _check((rc, out, err)):
392             self.failUnlessReallyEqual(rc, 2)
393             self.failUnlessReallyEqual(err, "WARNING: permission denied on file %s\n" % os.path.join(source, "foo.txt"))
394
395             fu, fr, fs, dc, dr, ds = self.count_output(out)
396             self.failUnlessReallyEqual(fu, 0)
397             self.failUnlessReallyEqual(fr, 0)
398             # foo.txt
399             self.failUnlessReallyEqual(fs, 1)
400             # home
401             self.failUnlessReallyEqual(dc, 1)
402             self.failUnlessReallyEqual(dr, 0)
403             self.failUnlessReallyEqual(ds, 0)
404         d.addCallback(_check)
405
406         # This is necessary for the temp files to be correctly removed
407         def _cleanup(self):
408             os.chmod(os.path.join(source, "foo.txt"), 0644)
409         d.addCallback(_cleanup)
410         d.addErrback(_cleanup)
411
412         return d
413
414     def test_ignore_unreadable_directory(self):
415         self.basedir = os.path.dirname(self.mktemp())
416         self.set_up_grid()
417
418         source = os.path.join(self.basedir, "home")
419         os.mkdir(source)
420         os.mkdir(os.path.join(source, "test"))
421         os.chmod(os.path.join(source, "test"), 0000)
422
423         d = self.do_cli("create-alias", "tahoe")
424         d.addCallback(lambda res: self.do_cli("backup", source, "tahoe:test"))
425
426         def _check((rc, out, err)):
427             self.failUnlessReallyEqual(rc, 2)
428             self.failUnlessReallyEqual(err, "WARNING: permission denied on directory %s\n" % os.path.join(source, "test"))
429
430             fu, fr, fs, dc, dr, ds = self.count_output(out)
431             self.failUnlessReallyEqual(fu, 0)
432             self.failUnlessReallyEqual(fr, 0)
433             self.failUnlessReallyEqual(fs, 0)
434             # home, test
435             self.failUnlessReallyEqual(dc, 2)
436             self.failUnlessReallyEqual(dr, 0)
437             # test
438             self.failUnlessReallyEqual(ds, 1)
439         d.addCallback(_check)
440
441         # This is necessary for the temp files to be correctly removed
442         def _cleanup(self):
443             os.chmod(os.path.join(source, "test"), 0655)
444         d.addCallback(_cleanup)
445         d.addErrback(_cleanup)
446         return d
447
448     def test_backup_without_alias(self):
449         # 'tahoe backup' should output a sensible error message when invoked
450         # without an alias instead of a stack trace.
451         self.basedir = os.path.dirname(self.mktemp())
452         self.set_up_grid()
453         source = os.path.join(self.basedir, "file1")
454         d = self.do_cli('backup', source, source)
455         def _check((rc, out, err)):
456             self.failUnlessReallyEqual(rc, 1)
457             self.failUnlessIn("error:", err)
458             self.failUnlessReallyEqual(out, "")
459         d.addCallback(_check)
460         return d
461
462     def test_backup_with_nonexistent_alias(self):
463         # 'tahoe backup' should output a sensible error message when invoked
464         # with a nonexistent alias.
465         self.basedir = os.path.dirname(self.mktemp())
466         self.set_up_grid()
467         source = os.path.join(self.basedir, "file1")
468         d = self.do_cli("backup", source, "nonexistent:" + source)
469         def _check((rc, out, err)):
470             self.failUnlessReallyEqual(rc, 1)
471             self.failUnlessIn("error:", err)
472             self.failUnlessIn("nonexistent", err)
473             self.failUnlessReallyEqual(out, "")
474         d.addCallback(_check)
475         return d