3 from cStringIO import StringIO
6 from twisted.trial import unittest
7 from twisted.python.monkey import MonkeyPatcher
10 from allmydata.util import fileutil
11 from allmydata.util.fileutil import abspath_expanduser_unicode
12 from allmydata.util.encodingutil import get_io_encoding, unicode_to_argv
13 from allmydata.util.namespace import Namespace
14 from allmydata.scripts import cli, backupdb
15 from .common_util import StallMixin
16 from .no_network import GridTestMixin
17 from .test_cli import CLITestMixin, parse_options
19 timeout = 480 # deep_check takes 360s on Zandr's linksys box, others take > 240s
21 class Backup(GridTestMixin, CLITestMixin, StallMixin, unittest.TestCase):
23 def writeto(self, path, data):
24 full_path = os.path.join(self.basedir, "home", path)
25 fileutil.make_dirs(os.path.dirname(full_path))
26 fileutil.write(full_path, data)
28 def count_output(self, out):
29 mo = re.search(r"(\d)+ files uploaded \((\d+) reused\), "
30 "(\d)+ files skipped, "
31 "(\d+) directories created \((\d+) reused\), "
32 "(\d+) directories skipped", out)
33 return [int(s) for s in mo.groups()]
35 def count_output2(self, out):
36 mo = re.search(r"(\d)+ files checked, (\d+) directories checked", out)
37 return [int(s) for s in mo.groups()]
39 def test_backup(self):
40 self.basedir = "cli/Backup/backup"
43 # is the backupdb available? If so, we test that a second backup does
44 # not create new directories.
46 bdb = backupdb.get_backupdb(os.path.join(self.basedir, "dbtest"),
50 # create a small local directory with a couple of files
51 source = os.path.join(self.basedir, "home")
52 fileutil.make_dirs(os.path.join(source, "empty"))
53 self.writeto("parent/subdir/foo.txt", "foo")
54 self.writeto("parent/subdir/bar.txt", "bar\n" * 1000)
55 self.writeto("parent/blah.txt", "blah")
57 def do_backup(verbose=False):
60 cmd.append("--verbose")
62 cmd.append("tahoe:backups")
63 return self.do_cli(*cmd)
65 d = self.do_cli("create-alias", "tahoe")
67 d.addCallback(lambda res: do_backup())
68 def _check0((rc, out, err)):
69 self.failUnlessReallyEqual(err, "")
70 self.failUnlessReallyEqual(rc, 0)
71 fu, fr, fs, dc, dr, ds = self.count_output(out)
72 # foo.txt, bar.txt, blah.txt
73 self.failUnlessReallyEqual(fu, 3)
74 self.failUnlessReallyEqual(fr, 0)
75 self.failUnlessReallyEqual(fs, 0)
76 # empty, home, home/parent, home/parent/subdir
77 self.failUnlessReallyEqual(dc, 4)
78 self.failUnlessReallyEqual(dr, 0)
79 self.failUnlessReallyEqual(ds, 0)
80 d.addCallback(_check0)
82 d.addCallback(lambda res: self.do_cli("ls", "--uri", "tahoe:backups"))
83 def _check1((rc, out, err)):
84 self.failUnlessReallyEqual(err, "")
85 self.failUnlessReallyEqual(rc, 0)
86 lines = out.split("\n")
87 children = dict([line.split() for line in lines if line])
88 latest_uri = children["Latest"]
89 self.failUnless(latest_uri.startswith("URI:DIR2-CHK:"), latest_uri)
90 childnames = children.keys()
91 self.failUnlessReallyEqual(sorted(childnames), ["Archives", "Latest"])
92 d.addCallback(_check1)
93 d.addCallback(lambda res: self.do_cli("ls", "tahoe:backups/Latest"))
94 def _check2((rc, out, err)):
95 self.failUnlessReallyEqual(err, "")
96 self.failUnlessReallyEqual(rc, 0)
97 self.failUnlessReallyEqual(sorted(out.split()), ["empty", "parent"])
98 d.addCallback(_check2)
99 d.addCallback(lambda res: self.do_cli("ls", "tahoe:backups/Latest/empty"))
100 def _check2a((rc, out, err)):
101 self.failUnlessReallyEqual(err, "")
102 self.failUnlessReallyEqual(rc, 0)
103 self.failUnlessReallyEqual(out.strip(), "")
104 d.addCallback(_check2a)
105 d.addCallback(lambda res: self.do_cli("get", "tahoe:backups/Latest/parent/subdir/foo.txt"))
106 def _check3((rc, out, err)):
107 self.failUnlessReallyEqual(err, "")
108 self.failUnlessReallyEqual(rc, 0)
109 self.failUnlessReallyEqual(out, "foo")
110 d.addCallback(_check3)
111 d.addCallback(lambda res: self.do_cli("ls", "tahoe:backups/Archives"))
112 def _check4((rc, out, err)):
113 self.failUnlessReallyEqual(err, "")
114 self.failUnlessReallyEqual(rc, 0)
115 self.old_archives = out.split()
116 self.failUnlessReallyEqual(len(self.old_archives), 1)
117 d.addCallback(_check4)
120 d.addCallback(self.stall, 1.1)
121 d.addCallback(lambda res: do_backup())
122 def _check4a((rc, out, err)):
123 # second backup should reuse everything, if the backupdb is
125 self.failUnlessReallyEqual(err, "")
126 self.failUnlessReallyEqual(rc, 0)
127 fu, fr, fs, dc, dr, ds = self.count_output(out)
128 # foo.txt, bar.txt, blah.txt
129 self.failUnlessReallyEqual(fu, 0)
130 self.failUnlessReallyEqual(fr, 3)
131 self.failUnlessReallyEqual(fs, 0)
132 # empty, home, home/parent, home/parent/subdir
133 self.failUnlessReallyEqual(dc, 0)
134 self.failUnlessReallyEqual(dr, 4)
135 self.failUnlessReallyEqual(ds, 0)
136 d.addCallback(_check4a)
138 # sneak into the backupdb, crank back the "last checked"
139 # timestamp to force a check on all files
140 def _reset_last_checked(res):
141 dbfile = os.path.join(self.get_clientdir(),
142 "private", "backupdb.sqlite")
143 self.failUnless(os.path.exists(dbfile), dbfile)
144 bdb = backupdb.get_backupdb(dbfile)
145 bdb.cursor.execute("UPDATE last_upload SET last_checked=0")
146 bdb.cursor.execute("UPDATE directories SET last_checked=0")
147 bdb.connection.commit()
149 d.addCallback(_reset_last_checked)
151 d.addCallback(self.stall, 1.1)
152 d.addCallback(lambda res: do_backup(verbose=True))
153 def _check4b((rc, out, err)):
154 # we should check all files, and re-use all of them. None of
155 # the directories should have been changed, so we should
156 # re-use all of them too.
157 self.failUnlessReallyEqual(err, "")
158 self.failUnlessReallyEqual(rc, 0)
159 fu, fr, fs, dc, dr, ds = self.count_output(out)
160 fchecked, dchecked = self.count_output2(out)
161 self.failUnlessReallyEqual(fchecked, 3)
162 self.failUnlessReallyEqual(fu, 0)
163 self.failUnlessReallyEqual(fr, 3)
164 self.failUnlessReallyEqual(fs, 0)
165 self.failUnlessReallyEqual(dchecked, 4)
166 self.failUnlessReallyEqual(dc, 0)
167 self.failUnlessReallyEqual(dr, 4)
168 self.failUnlessReallyEqual(ds, 0)
169 d.addCallback(_check4b)
171 d.addCallback(lambda res: self.do_cli("ls", "tahoe:backups/Archives"))
172 def _check5((rc, out, err)):
173 self.failUnlessReallyEqual(err, "")
174 self.failUnlessReallyEqual(rc, 0)
175 self.new_archives = out.split()
176 self.failUnlessReallyEqual(len(self.new_archives), 3, out)
177 # the original backup should still be the oldest (i.e. sorts
178 # alphabetically towards the beginning)
179 self.failUnlessReallyEqual(sorted(self.new_archives)[0],
180 self.old_archives[0])
181 d.addCallback(_check5)
183 d.addCallback(self.stall, 1.1)
185 self.writeto("parent/subdir/foo.txt", "FOOF!")
186 # and turn a file into a directory
187 os.unlink(os.path.join(source, "parent/blah.txt"))
188 os.mkdir(os.path.join(source, "parent/blah.txt"))
189 self.writeto("parent/blah.txt/surprise file", "surprise")
190 self.writeto("parent/blah.txt/surprisedir/subfile", "surprise")
191 # turn a directory into a file
192 os.rmdir(os.path.join(source, "empty"))
193 self.writeto("empty", "imagine nothing being here")
195 d.addCallback(_modify)
196 def _check5a((rc, out, err)):
197 # second backup should reuse bar.txt (if backupdb is available),
198 # and upload the rest. None of the directories can be reused.
199 self.failUnlessReallyEqual(err, "")
200 self.failUnlessReallyEqual(rc, 0)
201 fu, fr, fs, dc, dr, ds = self.count_output(out)
202 # new foo.txt, surprise file, subfile, empty
203 self.failUnlessReallyEqual(fu, 4)
205 self.failUnlessReallyEqual(fr, 1)
206 self.failUnlessReallyEqual(fs, 0)
207 # home, parent, subdir, blah.txt, surprisedir
208 self.failUnlessReallyEqual(dc, 5)
209 self.failUnlessReallyEqual(dr, 0)
210 self.failUnlessReallyEqual(ds, 0)
211 d.addCallback(_check5a)
212 d.addCallback(lambda res: self.do_cli("ls", "tahoe:backups/Archives"))
213 def _check6((rc, out, err)):
214 self.failUnlessReallyEqual(err, "")
215 self.failUnlessReallyEqual(rc, 0)
216 self.new_archives = out.split()
217 self.failUnlessReallyEqual(len(self.new_archives), 4)
218 self.failUnlessReallyEqual(sorted(self.new_archives)[0],
219 self.old_archives[0])
220 d.addCallback(_check6)
221 d.addCallback(lambda res: self.do_cli("get", "tahoe:backups/Latest/parent/subdir/foo.txt"))
222 def _check7((rc, out, err)):
223 self.failUnlessReallyEqual(err, "")
224 self.failUnlessReallyEqual(rc, 0)
225 self.failUnlessReallyEqual(out, "FOOF!")
226 # the old snapshot should not be modified
227 return self.do_cli("get", "tahoe:backups/Archives/%s/parent/subdir/foo.txt" % self.old_archives[0])
228 d.addCallback(_check7)
229 def _check8((rc, out, err)):
230 self.failUnlessReallyEqual(err, "")
231 self.failUnlessReallyEqual(rc, 0)
232 self.failUnlessReallyEqual(out, "foo")
233 d.addCallback(_check8)
237 # on our old dapper buildslave, this test takes a long time (usually
238 # 130s), so we have to bump up the default 120s timeout. The create-alias
239 # and initial backup alone take 60s, probably because of the handful of
240 # dirnodes being created (RSA key generation). The backup between check4
241 # and check4a takes 6s, as does the backup before check4b.
242 test_backup.timeout = 3000
244 def _check_filtering(self, filtered, all, included, excluded):
245 filtered = set(filtered)
247 included = set(included)
248 excluded = set(excluded)
249 self.failUnlessReallyEqual(filtered, included)
250 self.failUnlessReallyEqual(all.difference(filtered), excluded)
252 def test_exclude_options(self):
253 root_listdir = (u'lib.a', u'_darcs', u'subdir', u'nice_doc.lyx')
254 subdir_listdir = (u'another_doc.lyx', u'run_snake_run.py', u'CVS', u'.svn', u'_darcs')
255 basedir = "cli/Backup/exclude_options"
256 fileutil.make_dirs(basedir)
257 nodeurl_path = os.path.join(basedir, 'node.url')
258 fileutil.write(nodeurl_path, 'http://example.net:2357/')
259 def parse(args): return parse_options(basedir, "backup", args)
261 # test simple exclude
262 backup_options = parse(['--exclude', '*lyx', 'from', 'to'])
263 filtered = list(backup_options.filter_listdir(root_listdir))
264 self._check_filtering(filtered, root_listdir, (u'lib.a', u'_darcs', u'subdir'),
267 backup_options = parse(['--exclude', '*lyx', '--exclude', 'lib.?', 'from', 'to'])
268 filtered = list(backup_options.filter_listdir(root_listdir))
269 self._check_filtering(filtered, root_listdir, (u'_darcs', u'subdir'),
270 (u'nice_doc.lyx', u'lib.a'))
271 # vcs metadata exclusion
272 backup_options = parse(['--exclude-vcs', 'from', 'to'])
273 filtered = list(backup_options.filter_listdir(subdir_listdir))
274 self._check_filtering(filtered, subdir_listdir, (u'another_doc.lyx', u'run_snake_run.py',),
275 (u'CVS', u'.svn', u'_darcs'))
276 # read exclude patterns from file
277 exclusion_string = "_darcs\n*py\n.svn"
278 excl_filepath = os.path.join(basedir, 'exclusion')
279 fileutil.write(excl_filepath, exclusion_string)
280 backup_options = parse(['--exclude-from', excl_filepath, 'from', 'to'])
281 filtered = list(backup_options.filter_listdir(subdir_listdir))
282 self._check_filtering(filtered, subdir_listdir, (u'another_doc.lyx', u'CVS'),
283 (u'.svn', u'_darcs', u'run_snake_run.py'))
284 # test BackupConfigurationError
285 self.failUnlessRaises(cli.BackupConfigurationError,
287 ['--exclude-from', excl_filepath + '.no', 'from', 'to'])
289 # test that an iterator works too
290 backup_options = parse(['--exclude', '*lyx', 'from', 'to'])
291 filtered = list(backup_options.filter_listdir(iter(root_listdir)))
292 self._check_filtering(filtered, root_listdir, (u'lib.a', u'_darcs', u'subdir'),
295 def test_exclude_options_unicode(self):
296 nice_doc = u"nice_d\u00F8c.lyx"
298 doc_pattern_arg = u"*d\u00F8c*".encode(get_io_encoding())
299 except UnicodeEncodeError:
300 raise unittest.SkipTest("A non-ASCII command argument could not be encoded on this platform.")
302 root_listdir = (u'lib.a', u'_darcs', u'subdir', nice_doc)
303 basedir = "cli/Backup/exclude_options_unicode"
304 fileutil.make_dirs(basedir)
305 nodeurl_path = os.path.join(basedir, 'node.url')
306 fileutil.write(nodeurl_path, 'http://example.net:2357/')
307 def parse(args): return parse_options(basedir, "backup", args)
309 # test simple exclude
310 backup_options = parse(['--exclude', doc_pattern_arg, 'from', 'to'])
311 filtered = list(backup_options.filter_listdir(root_listdir))
312 self._check_filtering(filtered, root_listdir, (u'lib.a', u'_darcs', u'subdir'),
315 backup_options = parse(['--exclude', doc_pattern_arg, '--exclude', 'lib.?', 'from', 'to'])
316 filtered = list(backup_options.filter_listdir(root_listdir))
317 self._check_filtering(filtered, root_listdir, (u'_darcs', u'subdir'),
318 (nice_doc, u'lib.a'))
319 # read exclude patterns from file
320 exclusion_string = doc_pattern_arg + "\nlib.?"
321 excl_filepath = os.path.join(basedir, 'exclusion')
322 fileutil.write(excl_filepath, exclusion_string)
323 backup_options = parse(['--exclude-from', excl_filepath, 'from', 'to'])
324 filtered = list(backup_options.filter_listdir(root_listdir))
325 self._check_filtering(filtered, root_listdir, (u'_darcs', u'subdir'),
326 (nice_doc, u'lib.a'))
328 # test that an iterator works too
329 backup_options = parse(['--exclude', doc_pattern_arg, 'from', 'to'])
330 filtered = list(backup_options.filter_listdir(iter(root_listdir)))
331 self._check_filtering(filtered, root_listdir, (u'lib.a', u'_darcs', u'subdir'),
334 def test_exclude_from_tilde_expansion(self):
335 basedir = "cli/Backup/exclude_from_tilde_expansion"
336 fileutil.make_dirs(basedir)
337 nodeurl_path = os.path.join(basedir, 'node.url')
338 fileutil.write(nodeurl_path, 'http://example.net:2357/')
340 # ensure that tilde expansion is performed on exclude-from argument
341 exclude_file = u'~/.tahoe/excludes.dummy'
345 def call_file(name, *args):
347 self.failUnlessEqual(name, abspath_expanduser_unicode(exclude_file))
350 patcher = MonkeyPatcher((__builtin__, 'file', call_file))
351 patcher.runWithPatches(parse_options, basedir, "backup", ['--exclude-from', unicode_to_argv(exclude_file), 'from', 'to'])
352 self.failUnless(ns.called)
354 def test_ignore_symlinks(self):
355 if not hasattr(os, 'symlink'):
356 raise unittest.SkipTest("Symlinks are not supported by Python on this platform.")
358 self.basedir = os.path.dirname(self.mktemp())
361 source = os.path.join(self.basedir, "home")
362 self.writeto("foo.txt", "foo")
363 os.symlink(os.path.join(source, "foo.txt"), os.path.join(source, "foo2.txt"))
365 d = self.do_cli("create-alias", "tahoe")
366 d.addCallback(lambda res: self.do_cli("backup", "--verbose", source, "tahoe:test"))
368 def _check((rc, out, err)):
369 self.failUnlessReallyEqual(rc, 2)
370 foo2 = os.path.join(source, "foo2.txt")
371 self.failUnlessIn("WARNING: cannot backup symlink ", err)
372 self.failUnlessIn(foo2, err)
374 fu, fr, fs, dc, dr, ds = self.count_output(out)
376 self.failUnlessReallyEqual(fu, 1)
377 self.failUnlessReallyEqual(fr, 0)
379 self.failUnlessReallyEqual(fs, 1)
381 self.failUnlessReallyEqual(dc, 1)
382 self.failUnlessReallyEqual(dr, 0)
383 self.failUnlessReallyEqual(ds, 0)
385 d.addCallback(_check)
388 def test_ignore_unreadable_file(self):
389 self.basedir = os.path.dirname(self.mktemp())
392 source = os.path.join(self.basedir, "home")
393 self.writeto("foo.txt", "foo")
394 os.chmod(os.path.join(source, "foo.txt"), 0000)
396 d = self.do_cli("create-alias", "tahoe")
397 d.addCallback(lambda res: self.do_cli("backup", source, "tahoe:test"))
399 def _check((rc, out, err)):
400 self.failUnlessReallyEqual(rc, 2)
401 self.failUnlessReallyEqual(err, "WARNING: permission denied on file %s\n" % os.path.join(source, "foo.txt"))
403 fu, fr, fs, dc, dr, ds = self.count_output(out)
404 self.failUnlessReallyEqual(fu, 0)
405 self.failUnlessReallyEqual(fr, 0)
407 self.failUnlessReallyEqual(fs, 1)
409 self.failUnlessReallyEqual(dc, 1)
410 self.failUnlessReallyEqual(dr, 0)
411 self.failUnlessReallyEqual(ds, 0)
412 d.addCallback(_check)
414 # This is necessary for the temp files to be correctly removed
416 os.chmod(os.path.join(source, "foo.txt"), 0644)
417 d.addCallback(_cleanup)
418 d.addErrback(_cleanup)
422 def test_ignore_unreadable_directory(self):
423 self.basedir = os.path.dirname(self.mktemp())
426 source = os.path.join(self.basedir, "home")
428 os.mkdir(os.path.join(source, "test"))
429 os.chmod(os.path.join(source, "test"), 0000)
431 d = self.do_cli("create-alias", "tahoe")
432 d.addCallback(lambda res: self.do_cli("backup", source, "tahoe:test"))
434 def _check((rc, out, err)):
435 self.failUnlessReallyEqual(rc, 2)
436 self.failUnlessReallyEqual(err, "WARNING: permission denied on directory %s\n" % os.path.join(source, "test"))
438 fu, fr, fs, dc, dr, ds = self.count_output(out)
439 self.failUnlessReallyEqual(fu, 0)
440 self.failUnlessReallyEqual(fr, 0)
441 self.failUnlessReallyEqual(fs, 0)
443 self.failUnlessReallyEqual(dc, 2)
444 self.failUnlessReallyEqual(dr, 0)
446 self.failUnlessReallyEqual(ds, 1)
447 d.addCallback(_check)
449 # This is necessary for the temp files to be correctly removed
451 os.chmod(os.path.join(source, "test"), 0655)
452 d.addCallback(_cleanup)
453 d.addErrback(_cleanup)
456 def test_backup_without_alias(self):
457 # 'tahoe backup' should output a sensible error message when invoked
458 # without an alias instead of a stack trace.
459 self.basedir = os.path.dirname(self.mktemp())
461 source = os.path.join(self.basedir, "file1")
462 d = self.do_cli('backup', source, source)
463 def _check((rc, out, err)):
464 self.failUnlessReallyEqual(rc, 1)
465 self.failUnlessIn("error:", err)
466 self.failUnlessReallyEqual(out, "")
467 d.addCallback(_check)
470 def test_backup_with_nonexistent_alias(self):
471 # 'tahoe backup' should output a sensible error message when invoked
472 # with a nonexistent alias.
473 self.basedir = os.path.dirname(self.mktemp())
475 source = os.path.join(self.basedir, "file1")
476 d = self.do_cli("backup", source, "nonexistent:" + source)
477 def _check((rc, out, err)):
478 self.failUnlessReallyEqual(rc, 1)
479 self.failUnlessIn("error:", err)
480 self.failUnlessIn("nonexistent", err)
481 self.failUnlessReallyEqual(out, "")
482 d.addCallback(_check)