]> git.rkrishnan.org Git - tahoe-lafs/tahoe-lafs.git/blob - src/allmydata/test/test_cli_backup.py
Eliminate mock dependency.
[tahoe-lafs/tahoe-lafs.git] / src / allmydata / test / test_cli_backup.py
1
2 import os.path
3 from cStringIO import StringIO
4 import re
5
6 from twisted.trial import unittest
7 from twisted.python.monkey import MonkeyPatcher
8
9 import __builtin__
10 from allmydata.util import fileutil
11 from allmydata.util.fileutil import abspath_expanduser_unicode
12 from allmydata.util.encodingutil import get_io_encoding, unicode_to_argv
13 from allmydata.util.namespace import Namespace
14 from allmydata.scripts import cli, backupdb
15 from .common_util import StallMixin
16 from .no_network import GridTestMixin
17 from .test_cli import CLITestMixin, parse_options
18
19 timeout = 480 # deep_check takes 360s on Zandr's linksys box, others take > 240s
20
21 class Backup(GridTestMixin, CLITestMixin, StallMixin, unittest.TestCase):
22
23     def writeto(self, path, data):
24         full_path = os.path.join(self.basedir, "home", path)
25         fileutil.make_dirs(os.path.dirname(full_path))
26         fileutil.write(full_path, data)
27
28     def count_output(self, out):
29         mo = re.search(r"(\d)+ files uploaded \((\d+) reused\), "
30                         "(\d)+ files skipped, "
31                         "(\d+) directories created \((\d+) reused\), "
32                         "(\d+) directories skipped", out)
33         return [int(s) for s in mo.groups()]
34
35     def count_output2(self, out):
36         mo = re.search(r"(\d)+ files checked, (\d+) directories checked", out)
37         return [int(s) for s in mo.groups()]
38
39     def test_backup(self):
40         self.basedir = "cli/Backup/backup"
41         self.set_up_grid()
42
43         # is the backupdb available? If so, we test that a second backup does
44         # not create new directories.
45         hush = StringIO()
46         bdb = backupdb.get_backupdb(os.path.join(self.basedir, "dbtest"),
47                                     hush)
48         self.failUnless(bdb)
49
50         # create a small local directory with a couple of files
51         source = os.path.join(self.basedir, "home")
52         fileutil.make_dirs(os.path.join(source, "empty"))
53         self.writeto("parent/subdir/foo.txt", "foo")
54         self.writeto("parent/subdir/bar.txt", "bar\n" * 1000)
55         self.writeto("parent/blah.txt", "blah")
56
57         def do_backup(verbose=False):
58             cmd = ["backup"]
59             if verbose:
60                 cmd.append("--verbose")
61             cmd.append(source)
62             cmd.append("tahoe:backups")
63             return self.do_cli(*cmd)
64
65         d = self.do_cli("create-alias", "tahoe")
66
67         d.addCallback(lambda res: do_backup())
68         def _check0((rc, out, err)):
69             self.failUnlessReallyEqual(err, "")
70             self.failUnlessReallyEqual(rc, 0)
71             fu, fr, fs, dc, dr, ds = self.count_output(out)
72             # foo.txt, bar.txt, blah.txt
73             self.failUnlessReallyEqual(fu, 3)
74             self.failUnlessReallyEqual(fr, 0)
75             self.failUnlessReallyEqual(fs, 0)
76             # empty, home, home/parent, home/parent/subdir
77             self.failUnlessReallyEqual(dc, 4)
78             self.failUnlessReallyEqual(dr, 0)
79             self.failUnlessReallyEqual(ds, 0)
80         d.addCallback(_check0)
81
82         d.addCallback(lambda res: self.do_cli("ls", "--uri", "tahoe:backups"))
83         def _check1((rc, out, err)):
84             self.failUnlessReallyEqual(err, "")
85             self.failUnlessReallyEqual(rc, 0)
86             lines = out.split("\n")
87             children = dict([line.split() for line in lines if line])
88             latest_uri = children["Latest"]
89             self.failUnless(latest_uri.startswith("URI:DIR2-CHK:"), latest_uri)
90             childnames = children.keys()
91             self.failUnlessReallyEqual(sorted(childnames), ["Archives", "Latest"])
92         d.addCallback(_check1)
93         d.addCallback(lambda res: self.do_cli("ls", "tahoe:backups/Latest"))
94         def _check2((rc, out, err)):
95             self.failUnlessReallyEqual(err, "")
96             self.failUnlessReallyEqual(rc, 0)
97             self.failUnlessReallyEqual(sorted(out.split()), ["empty", "parent"])
98         d.addCallback(_check2)
99         d.addCallback(lambda res: self.do_cli("ls", "tahoe:backups/Latest/empty"))
100         def _check2a((rc, out, err)):
101             self.failUnlessReallyEqual(err, "")
102             self.failUnlessReallyEqual(rc, 0)
103             self.failUnlessReallyEqual(out.strip(), "")
104         d.addCallback(_check2a)
105         d.addCallback(lambda res: self.do_cli("get", "tahoe:backups/Latest/parent/subdir/foo.txt"))
106         def _check3((rc, out, err)):
107             self.failUnlessReallyEqual(err, "")
108             self.failUnlessReallyEqual(rc, 0)
109             self.failUnlessReallyEqual(out, "foo")
110         d.addCallback(_check3)
111         d.addCallback(lambda res: self.do_cli("ls", "tahoe:backups/Archives"))
112         def _check4((rc, out, err)):
113             self.failUnlessReallyEqual(err, "")
114             self.failUnlessReallyEqual(rc, 0)
115             self.old_archives = out.split()
116             self.failUnlessReallyEqual(len(self.old_archives), 1)
117         d.addCallback(_check4)
118
119
120         d.addCallback(self.stall, 1.1)
121         d.addCallback(lambda res: do_backup())
122         def _check4a((rc, out, err)):
123             # second backup should reuse everything, if the backupdb is
124             # available
125             self.failUnlessReallyEqual(err, "")
126             self.failUnlessReallyEqual(rc, 0)
127             fu, fr, fs, dc, dr, ds = self.count_output(out)
128             # foo.txt, bar.txt, blah.txt
129             self.failUnlessReallyEqual(fu, 0)
130             self.failUnlessReallyEqual(fr, 3)
131             self.failUnlessReallyEqual(fs, 0)
132             # empty, home, home/parent, home/parent/subdir
133             self.failUnlessReallyEqual(dc, 0)
134             self.failUnlessReallyEqual(dr, 4)
135             self.failUnlessReallyEqual(ds, 0)
136         d.addCallback(_check4a)
137
138         # sneak into the backupdb, crank back the "last checked"
139         # timestamp to force a check on all files
140         def _reset_last_checked(res):
141             dbfile = os.path.join(self.get_clientdir(),
142                                   "private", "backupdb.sqlite")
143             self.failUnless(os.path.exists(dbfile), dbfile)
144             bdb = backupdb.get_backupdb(dbfile)
145             bdb.cursor.execute("UPDATE last_upload SET last_checked=0")
146             bdb.cursor.execute("UPDATE directories SET last_checked=0")
147             bdb.connection.commit()
148
149         d.addCallback(_reset_last_checked)
150
151         d.addCallback(self.stall, 1.1)
152         d.addCallback(lambda res: do_backup(verbose=True))
153         def _check4b((rc, out, err)):
154             # we should check all files, and re-use all of them. None of
155             # the directories should have been changed, so we should
156             # re-use all of them too.
157             self.failUnlessReallyEqual(err, "")
158             self.failUnlessReallyEqual(rc, 0)
159             fu, fr, fs, dc, dr, ds = self.count_output(out)
160             fchecked, dchecked = self.count_output2(out)
161             self.failUnlessReallyEqual(fchecked, 3)
162             self.failUnlessReallyEqual(fu, 0)
163             self.failUnlessReallyEqual(fr, 3)
164             self.failUnlessReallyEqual(fs, 0)
165             self.failUnlessReallyEqual(dchecked, 4)
166             self.failUnlessReallyEqual(dc, 0)
167             self.failUnlessReallyEqual(dr, 4)
168             self.failUnlessReallyEqual(ds, 0)
169         d.addCallback(_check4b)
170
171         d.addCallback(lambda res: self.do_cli("ls", "tahoe:backups/Archives"))
172         def _check5((rc, out, err)):
173             self.failUnlessReallyEqual(err, "")
174             self.failUnlessReallyEqual(rc, 0)
175             self.new_archives = out.split()
176             self.failUnlessReallyEqual(len(self.new_archives), 3, out)
177             # the original backup should still be the oldest (i.e. sorts
178             # alphabetically towards the beginning)
179             self.failUnlessReallyEqual(sorted(self.new_archives)[0],
180                                  self.old_archives[0])
181         d.addCallback(_check5)
182
183         d.addCallback(self.stall, 1.1)
184         def _modify(res):
185             self.writeto("parent/subdir/foo.txt", "FOOF!")
186             # and turn a file into a directory
187             os.unlink(os.path.join(source, "parent/blah.txt"))
188             os.mkdir(os.path.join(source, "parent/blah.txt"))
189             self.writeto("parent/blah.txt/surprise file", "surprise")
190             self.writeto("parent/blah.txt/surprisedir/subfile", "surprise")
191             # turn a directory into a file
192             os.rmdir(os.path.join(source, "empty"))
193             self.writeto("empty", "imagine nothing being here")
194             return do_backup()
195         d.addCallback(_modify)
196         def _check5a((rc, out, err)):
197             # second backup should reuse bar.txt (if backupdb is available),
198             # and upload the rest. None of the directories can be reused.
199             self.failUnlessReallyEqual(err, "")
200             self.failUnlessReallyEqual(rc, 0)
201             fu, fr, fs, dc, dr, ds = self.count_output(out)
202             # new foo.txt, surprise file, subfile, empty
203             self.failUnlessReallyEqual(fu, 4)
204             # old bar.txt
205             self.failUnlessReallyEqual(fr, 1)
206             self.failUnlessReallyEqual(fs, 0)
207             # home, parent, subdir, blah.txt, surprisedir
208             self.failUnlessReallyEqual(dc, 5)
209             self.failUnlessReallyEqual(dr, 0)
210             self.failUnlessReallyEqual(ds, 0)
211         d.addCallback(_check5a)
212         d.addCallback(lambda res: self.do_cli("ls", "tahoe:backups/Archives"))
213         def _check6((rc, out, err)):
214             self.failUnlessReallyEqual(err, "")
215             self.failUnlessReallyEqual(rc, 0)
216             self.new_archives = out.split()
217             self.failUnlessReallyEqual(len(self.new_archives), 4)
218             self.failUnlessReallyEqual(sorted(self.new_archives)[0],
219                                  self.old_archives[0])
220         d.addCallback(_check6)
221         d.addCallback(lambda res: self.do_cli("get", "tahoe:backups/Latest/parent/subdir/foo.txt"))
222         def _check7((rc, out, err)):
223             self.failUnlessReallyEqual(err, "")
224             self.failUnlessReallyEqual(rc, 0)
225             self.failUnlessReallyEqual(out, "FOOF!")
226             # the old snapshot should not be modified
227             return self.do_cli("get", "tahoe:backups/Archives/%s/parent/subdir/foo.txt" % self.old_archives[0])
228         d.addCallback(_check7)
229         def _check8((rc, out, err)):
230             self.failUnlessReallyEqual(err, "")
231             self.failUnlessReallyEqual(rc, 0)
232             self.failUnlessReallyEqual(out, "foo")
233         d.addCallback(_check8)
234
235         return d
236
237     # on our old dapper buildslave, this test takes a long time (usually
238     # 130s), so we have to bump up the default 120s timeout. The create-alias
239     # and initial backup alone take 60s, probably because of the handful of
240     # dirnodes being created (RSA key generation). The backup between check4
241     # and check4a takes 6s, as does the backup before check4b.
242     test_backup.timeout = 3000
243
244     def _check_filtering(self, filtered, all, included, excluded):
245         filtered = set(filtered)
246         all = set(all)
247         included = set(included)
248         excluded = set(excluded)
249         self.failUnlessReallyEqual(filtered, included)
250         self.failUnlessReallyEqual(all.difference(filtered), excluded)
251
252     def test_exclude_options(self):
253         root_listdir = (u'lib.a', u'_darcs', u'subdir', u'nice_doc.lyx')
254         subdir_listdir = (u'another_doc.lyx', u'run_snake_run.py', u'CVS', u'.svn', u'_darcs')
255         basedir = "cli/Backup/exclude_options"
256         fileutil.make_dirs(basedir)
257         nodeurl_path = os.path.join(basedir, 'node.url')
258         fileutil.write(nodeurl_path, 'http://example.net:2357/')
259         def parse(args): return parse_options(basedir, "backup", args)
260
261         # test simple exclude
262         backup_options = parse(['--exclude', '*lyx', 'from', 'to'])
263         filtered = list(backup_options.filter_listdir(root_listdir))
264         self._check_filtering(filtered, root_listdir, (u'lib.a', u'_darcs', u'subdir'),
265                               (u'nice_doc.lyx',))
266         # multiple exclude
267         backup_options = parse(['--exclude', '*lyx', '--exclude', 'lib.?', 'from', 'to'])
268         filtered = list(backup_options.filter_listdir(root_listdir))
269         self._check_filtering(filtered, root_listdir, (u'_darcs', u'subdir'),
270                               (u'nice_doc.lyx', u'lib.a'))
271         # vcs metadata exclusion
272         backup_options = parse(['--exclude-vcs', 'from', 'to'])
273         filtered = list(backup_options.filter_listdir(subdir_listdir))
274         self._check_filtering(filtered, subdir_listdir, (u'another_doc.lyx', u'run_snake_run.py',),
275                               (u'CVS', u'.svn', u'_darcs'))
276         # read exclude patterns from file
277         exclusion_string = "_darcs\n*py\n.svn"
278         excl_filepath = os.path.join(basedir, 'exclusion')
279         fileutil.write(excl_filepath, exclusion_string)
280         backup_options = parse(['--exclude-from', excl_filepath, 'from', 'to'])
281         filtered = list(backup_options.filter_listdir(subdir_listdir))
282         self._check_filtering(filtered, subdir_listdir, (u'another_doc.lyx', u'CVS'),
283                               (u'.svn', u'_darcs', u'run_snake_run.py'))
284         # test BackupConfigurationError
285         self.failUnlessRaises(cli.BackupConfigurationError,
286                               parse,
287                               ['--exclude-from', excl_filepath + '.no', 'from', 'to'])
288
289         # test that an iterator works too
290         backup_options = parse(['--exclude', '*lyx', 'from', 'to'])
291         filtered = list(backup_options.filter_listdir(iter(root_listdir)))
292         self._check_filtering(filtered, root_listdir, (u'lib.a', u'_darcs', u'subdir'),
293                               (u'nice_doc.lyx',))
294
295     def test_exclude_options_unicode(self):
296         nice_doc = u"nice_d\u00F8c.lyx"
297         try:
298             doc_pattern_arg = u"*d\u00F8c*".encode(get_io_encoding())
299         except UnicodeEncodeError:
300             raise unittest.SkipTest("A non-ASCII command argument could not be encoded on this platform.")
301
302         root_listdir = (u'lib.a', u'_darcs', u'subdir', nice_doc)
303         basedir = "cli/Backup/exclude_options_unicode"
304         fileutil.make_dirs(basedir)
305         nodeurl_path = os.path.join(basedir, 'node.url')
306         fileutil.write(nodeurl_path, 'http://example.net:2357/')
307         def parse(args): return parse_options(basedir, "backup", args)
308
309         # test simple exclude
310         backup_options = parse(['--exclude', doc_pattern_arg, 'from', 'to'])
311         filtered = list(backup_options.filter_listdir(root_listdir))
312         self._check_filtering(filtered, root_listdir, (u'lib.a', u'_darcs', u'subdir'),
313                               (nice_doc,))
314         # multiple exclude
315         backup_options = parse(['--exclude', doc_pattern_arg, '--exclude', 'lib.?', 'from', 'to'])
316         filtered = list(backup_options.filter_listdir(root_listdir))
317         self._check_filtering(filtered, root_listdir, (u'_darcs', u'subdir'),
318                              (nice_doc, u'lib.a'))
319         # read exclude patterns from file
320         exclusion_string = doc_pattern_arg + "\nlib.?"
321         excl_filepath = os.path.join(basedir, 'exclusion')
322         fileutil.write(excl_filepath, exclusion_string)
323         backup_options = parse(['--exclude-from', excl_filepath, 'from', 'to'])
324         filtered = list(backup_options.filter_listdir(root_listdir))
325         self._check_filtering(filtered, root_listdir, (u'_darcs', u'subdir'),
326                              (nice_doc, u'lib.a'))
327
328         # test that an iterator works too
329         backup_options = parse(['--exclude', doc_pattern_arg, 'from', 'to'])
330         filtered = list(backup_options.filter_listdir(iter(root_listdir)))
331         self._check_filtering(filtered, root_listdir, (u'lib.a', u'_darcs', u'subdir'),
332                               (nice_doc,))
333
334     def test_exclude_from_tilde_expansion(self):
335         basedir = "cli/Backup/exclude_from_tilde_expansion"
336         fileutil.make_dirs(basedir)
337         nodeurl_path = os.path.join(basedir, 'node.url')
338         fileutil.write(nodeurl_path, 'http://example.net:2357/')
339
340         # ensure that tilde expansion is performed on exclude-from argument
341         exclude_file = u'~/.tahoe/excludes.dummy'
342
343         ns = Namespace()
344         ns.called = False
345         def call_file(name, *args):
346             ns.called = True
347             self.failUnlessEqual(name, abspath_expanduser_unicode(exclude_file))
348             return StringIO()
349
350         patcher = MonkeyPatcher((__builtin__, 'file', call_file))
351         patcher.runWithPatches(parse_options, basedir, "backup", ['--exclude-from', unicode_to_argv(exclude_file), 'from', 'to'])
352         self.failUnless(ns.called)
353
354     def test_ignore_symlinks(self):
355         if not hasattr(os, 'symlink'):
356             raise unittest.SkipTest("Symlinks are not supported by Python on this platform.")
357
358         self.basedir = os.path.dirname(self.mktemp())
359         self.set_up_grid()
360
361         source = os.path.join(self.basedir, "home")
362         self.writeto("foo.txt", "foo")
363         os.symlink(os.path.join(source, "foo.txt"), os.path.join(source, "foo2.txt"))
364
365         d = self.do_cli("create-alias", "tahoe")
366         d.addCallback(lambda res: self.do_cli("backup", "--verbose", source, "tahoe:test"))
367
368         def _check((rc, out, err)):
369             self.failUnlessReallyEqual(rc, 2)
370             foo2 = os.path.join(source, "foo2.txt")
371             self.failUnlessIn("WARNING: cannot backup symlink ", err)
372             self.failUnlessIn(foo2, err)
373
374             fu, fr, fs, dc, dr, ds = self.count_output(out)
375             # foo.txt
376             self.failUnlessReallyEqual(fu, 1)
377             self.failUnlessReallyEqual(fr, 0)
378             # foo2.txt
379             self.failUnlessReallyEqual(fs, 1)
380             # home
381             self.failUnlessReallyEqual(dc, 1)
382             self.failUnlessReallyEqual(dr, 0)
383             self.failUnlessReallyEqual(ds, 0)
384
385         d.addCallback(_check)
386         return d
387
388     def test_ignore_unreadable_file(self):
389         self.basedir = os.path.dirname(self.mktemp())
390         self.set_up_grid()
391
392         source = os.path.join(self.basedir, "home")
393         self.writeto("foo.txt", "foo")
394         os.chmod(os.path.join(source, "foo.txt"), 0000)
395
396         d = self.do_cli("create-alias", "tahoe")
397         d.addCallback(lambda res: self.do_cli("backup", source, "tahoe:test"))
398
399         def _check((rc, out, err)):
400             self.failUnlessReallyEqual(rc, 2)
401             self.failUnlessReallyEqual(err, "WARNING: permission denied on file %s\n" % os.path.join(source, "foo.txt"))
402
403             fu, fr, fs, dc, dr, ds = self.count_output(out)
404             self.failUnlessReallyEqual(fu, 0)
405             self.failUnlessReallyEqual(fr, 0)
406             # foo.txt
407             self.failUnlessReallyEqual(fs, 1)
408             # home
409             self.failUnlessReallyEqual(dc, 1)
410             self.failUnlessReallyEqual(dr, 0)
411             self.failUnlessReallyEqual(ds, 0)
412         d.addCallback(_check)
413
414         # This is necessary for the temp files to be correctly removed
415         def _cleanup(self):
416             os.chmod(os.path.join(source, "foo.txt"), 0644)
417         d.addCallback(_cleanup)
418         d.addErrback(_cleanup)
419
420         return d
421
422     def test_ignore_unreadable_directory(self):
423         self.basedir = os.path.dirname(self.mktemp())
424         self.set_up_grid()
425
426         source = os.path.join(self.basedir, "home")
427         os.mkdir(source)
428         os.mkdir(os.path.join(source, "test"))
429         os.chmod(os.path.join(source, "test"), 0000)
430
431         d = self.do_cli("create-alias", "tahoe")
432         d.addCallback(lambda res: self.do_cli("backup", source, "tahoe:test"))
433
434         def _check((rc, out, err)):
435             self.failUnlessReallyEqual(rc, 2)
436             self.failUnlessReallyEqual(err, "WARNING: permission denied on directory %s\n" % os.path.join(source, "test"))
437
438             fu, fr, fs, dc, dr, ds = self.count_output(out)
439             self.failUnlessReallyEqual(fu, 0)
440             self.failUnlessReallyEqual(fr, 0)
441             self.failUnlessReallyEqual(fs, 0)
442             # home, test
443             self.failUnlessReallyEqual(dc, 2)
444             self.failUnlessReallyEqual(dr, 0)
445             # test
446             self.failUnlessReallyEqual(ds, 1)
447         d.addCallback(_check)
448
449         # This is necessary for the temp files to be correctly removed
450         def _cleanup(self):
451             os.chmod(os.path.join(source, "test"), 0655)
452         d.addCallback(_cleanup)
453         d.addErrback(_cleanup)
454         return d
455
456     def test_backup_without_alias(self):
457         # 'tahoe backup' should output a sensible error message when invoked
458         # without an alias instead of a stack trace.
459         self.basedir = os.path.dirname(self.mktemp())
460         self.set_up_grid()
461         source = os.path.join(self.basedir, "file1")
462         d = self.do_cli('backup', source, source)
463         def _check((rc, out, err)):
464             self.failUnlessReallyEqual(rc, 1)
465             self.failUnlessIn("error:", err)
466             self.failUnlessReallyEqual(out, "")
467         d.addCallback(_check)
468         return d
469
470     def test_backup_with_nonexistent_alias(self):
471         # 'tahoe backup' should output a sensible error message when invoked
472         # with a nonexistent alias.
473         self.basedir = os.path.dirname(self.mktemp())
474         self.set_up_grid()
475         source = os.path.join(self.basedir, "file1")
476         d = self.do_cli("backup", source, "nonexistent:" + source)
477         def _check((rc, out, err)):
478             self.failUnlessReallyEqual(rc, 1)
479             self.failUnlessIn("error:", err)
480             self.failUnlessIn("nonexistent", err)
481             self.failUnlessReallyEqual(out, "")
482         d.addCallback(_check)
483         return d