]> git.rkrishnan.org Git - tahoe-lafs/tahoe-lafs.git/blob - src/allmydata/test/test_consolidate.py
d1562a6f3992615e43c997baca9176c60e146131
[tahoe-lafs/tahoe-lafs.git] / src / allmydata / test / test_consolidate.py
1
2 import os
3 from cStringIO import StringIO
4 import pickle
5 from twisted.trial import unittest
6 from allmydata.test.no_network import GridTestMixin
7 from allmydata.util import fileutil
8 from allmydata.scripts import runner, debug
9 from allmydata.scripts.common import get_aliases
10 from twisted.internet import defer, threads # CLI tests use deferToThread
11 from allmydata.interfaces import IDirectoryNode
12
13
14 class CLITestMixin:
15     def do_cli(self, verb, *args, **kwargs):
16         nodeargs = [
17             "--node-directory", self.get_clientdir(),
18             ]
19         if verb == "debug":
20             argv = [verb, args[0]] + nodeargs + list(args[1:])
21         else:
22             argv = [verb] + nodeargs + list(args)
23         stdin = kwargs.get("stdin", "")
24         stdout, stderr = StringIO(), StringIO()
25         d = threads.deferToThread(runner.runner, argv, run_by_human=False,
26                                   stdin=StringIO(stdin),
27                                   stdout=stdout, stderr=stderr)
28         def _done(rc):
29             return rc, stdout.getvalue(), stderr.getvalue()
30         d.addCallback(_done)
31         return d
32
33 class Consolidate(GridTestMixin, CLITestMixin, unittest.TestCase):
34
35     def writeto(self, path, data):
36         d = os.path.dirname(os.path.join(self.basedir, "home", path))
37         fileutil.make_dirs(d)
38         f = open(os.path.join(self.basedir, "home", path), "w")
39         f.write(data)
40         f.close()
41
42     def writeto_snapshot(self, sn, path, data):
43         p = "Backups/fluxx/Archives/2009-03-%02d 01.01.01/%s" % (sn, path)
44         return self.writeto(p, data)
45
46     def do_cli_good(self, verb, *args, **kwargs):
47         d = self.do_cli(verb, *args, **kwargs)
48         def _check((rc,out,err)):
49             self.failUnlessEqual(err, "", verb)
50             self.failUnlessEqual(rc, 0, verb)
51             return out
52         d.addCallback(_check)
53         return d
54
55     def test_arg_parsing(self):
56         self.basedir = "consolidate/Consolidate/arg_parsing"
57         self.set_up_grid(num_clients=1, num_servers=1)
58         co = debug.ConsolidateOptions()
59         co.parseOptions(["--node-directory", self.get_clientdir(),
60                          "--dbfile", "foo.db", "--backupfile", "backup", "--really",
61                          "URI:DIR2:foo"])
62         self.failUnlessEqual(co["dbfile"], "foo.db")
63         self.failUnlessEqual(co["backupfile"], "backup")
64         self.failUnless(co["really"])
65         self.failUnlessEqual(co.where, "URI:DIR2:foo")
66
67     def OFF_test_basic(self):
68         # rename this method to enable the test. I've disabled it because, in
69         # my opinion:
70         #
71         #  1: 'tahoe debug consolidate' is useful enough to include in trunk,
72         #     but not useful enough justify a lot of compatibility effort or
73         #     extra test time
74         #  2: it requires sqlite3; I did not bother to make it work with
75         #     pysqlite, nor did I bother making it fail gracefully when
76         #     sqlite3 is not available
77         #  3: this test takes 30 seconds to run on my workstation, and it likely
78         #     to take several minutes on the old slow dapper buildslave
79         #  4: I don't want other folks to see a SkipTest and wonder "oh no, what
80         #     did I do wrong to not allow this test to run"
81         #
82         # These may not be strong arguments: I welcome feedback. In particular,
83         # this command may be more suitable for a plugin of some sort, if we
84         # had plugins of some sort. -warner 12-Mar-09
85
86         self.basedir = "consolidate/Consolidate/basic"
87         self.set_up_grid(num_clients=1)
88
89         fileutil.make_dirs(os.path.join(self.basedir, "home/Backups/nonsystem"))
90         fileutil.make_dirs(os.path.join(self.basedir, "home/Backups/fluxx/Latest"))
91         self.writeto(os.path.join(self.basedir,
92                                   "home/Backups/fluxx/Archives/nondir"),
93                      "not a directory: ignore me")
94
95         # set up a number of non-shared "snapshots"
96         for i in range(1,8):
97             self.writeto_snapshot(i, "parent/README", "README")
98             self.writeto_snapshot(i, "parent/foo.txt", "foo")
99             self.writeto_snapshot(i, "parent/subdir1/bar.txt", "bar")
100             self.writeto_snapshot(i, "parent/subdir1/baz.txt", "baz")
101             self.writeto_snapshot(i, "parent/subdir2/yoy.txt", "yoy")
102             self.writeto_snapshot(i, "parent/subdir2/hola.txt", "hola")
103
104             if i >= 1:
105                 pass # initial snapshot
106             if i >= 2:
107                 pass # second snapshot: same as the first
108             if i >= 3:
109                 # modify a file
110                 self.writeto_snapshot(i, "parent/foo.txt", "FOOF!")
111             if i >= 4:
112                 # foo.txt goes back to normal
113                 self.writeto_snapshot(i, "parent/foo.txt", "foo")
114             if i >= 5:
115                 # new file
116                 self.writeto_snapshot(i, "parent/subdir1/new.txt", "new")
117             if i >= 6:
118                 # copy parent/subdir1 to parent/subdir2/copy1
119                 self.writeto_snapshot(i, "parent/subdir2/copy1/bar.txt", "bar")
120                 self.writeto_snapshot(i, "parent/subdir2/copy1/baz.txt", "baz")
121                 self.writeto_snapshot(i, "parent/subdir2/copy1/new.txt", "new")
122             if i >= 7:
123                 # the last snapshot shall remain untouched
124                 pass
125
126         # now copy the whole thing into tahoe
127         d = self.do_cli_good("create-alias", "tahoe")
128         d.addCallback(lambda ign:
129                       self.do_cli_good("cp", "-r",
130                                        os.path.join(self.basedir, "home/Backups"),
131                                        "tahoe:Backups"))
132         def _copied(res):
133             rootcap = get_aliases(self.get_clientdir())["tahoe"]
134             # now scan the initial directory structure
135             n = self.g.clients[0].create_node_from_uri(rootcap)
136             return n.get_child_at_path([u"Backups", u"fluxx", u"Archives"])
137         d.addCallback(_copied)
138         self.nodes = {}
139         self.caps = {}
140         def stash(node, name):
141             self.nodes[name] = node
142             self.caps[name] = node.get_uri()
143             return node
144         d.addCallback(stash, "Archives")
145         self.manifests = {}
146         def stash_manifest(manifest, which):
147             self.manifests[which] = dict(manifest)
148         d.addCallback(lambda ignored: self.build_manifest(self.nodes["Archives"]))
149         d.addCallback(stash_manifest, "start")
150         def c(n):
151             pieces = n.split("-")
152             which = "finish"
153             if len(pieces) == 3:
154                 which = pieces[-1]
155             sn = int(pieces[0])
156             name = pieces[1]
157             path = [u"2009-03-%02d 01.01.01" % sn]
158             path.extend( {"b": [],
159                           "bp": [u"parent"],
160                           "bps1": [u"parent", u"subdir1"],
161                           "bps2": [u"parent", u"subdir2"],
162                           "bps2c1": [u"parent", u"subdir2", u"copy1"],
163                           }[name] )
164             return self.manifests[which][tuple(path)]
165
166         dbfile = os.path.join(self.basedir, "dirhash.db")
167         backupfile = os.path.join(self.basedir, "backup.pickle")
168
169         d.addCallback(lambda ign:
170                       self.do_cli_good("debug", "consolidate",
171                                        "--dbfile", dbfile,
172                                        "--backupfile", backupfile,
173                                        "--verbose",
174                                        "tahoe:"))
175         def _check_consolidate_output1(out):
176             lines = out.splitlines()
177             last = lines[-1]
178             self.failUnlessEqual(last.strip(),
179                                  "system done, dircounts: "
180                                  "25/12 seen/used, 7 created, 2 as-is, 13 reused")
181             self.failUnless(os.path.exists(dbfile))
182             self.failUnless(os.path.exists(backupfile))
183             self.first_backup = backup = pickle.load(open(backupfile, "rb"))
184             self.failUnless(u"fluxx" in backup["systems"])
185             self.failUnless(u"fluxx" in backup["archives"])
186             adata = backup["archives"]["fluxx"]
187             kids = adata[u"children"]
188             self.failUnlessEqual(str(kids[u"2009-03-01 01.01.01"][1][u"rw_uri"]),
189                                  c("1-b-start"))
190         d.addCallback(_check_consolidate_output1)
191         d.addCallback(lambda ign:
192                       self.do_cli_good("debug", "consolidate",
193                                        "--dbfile", dbfile,
194                                        "--backupfile", backupfile,
195                                        "--really", "tahoe:"))
196         def _check_consolidate_output2(out):
197             lines = out.splitlines()
198             last = lines[-1]
199             self.failUnlessEqual(last.strip(),
200                                  "system done, dircounts: "
201                                  "0/0 seen/used, 0 created, 0 as-is, 0 reused")
202             backup = pickle.load(open(backupfile, "rb"))
203             self.failUnlessEqual(backup, self.first_backup)
204             self.failUnless(os.path.exists(backupfile + ".0"))
205         d.addCallback(_check_consolidate_output2)
206
207         d.addCallback(lambda ignored: self.build_manifest(self.nodes["Archives"]))
208         d.addCallback(stash_manifest, "finish")
209
210         def check_consolidation(ignored):
211             #for which in ("finish",):
212             #    for path in sorted(self.manifests[which].keys()):
213             #        print "%s %s %s" % (which, "/".join(path),
214             #                            self.manifests[which][path])
215
216             # last snapshot should be untouched
217             self.failUnlessEqual(c("7-b"), c("7-b-start"))
218
219             # first snapshot should be a readonly form of the original
220             from allmydata.scripts.tahoe_backup import readonly
221             self.failUnlessEqual(c("1-b-finish"), readonly(c("1-b-start")))
222             self.failUnlessEqual(c("1-bp-finish"), readonly(c("1-bp-start")))
223             self.failUnlessEqual(c("1-bps1-finish"), readonly(c("1-bps1-start")))
224             self.failUnlessEqual(c("1-bps2-finish"), readonly(c("1-bps2-start")))
225
226             # new directories should be different than the old ones
227             self.failIfEqual(c("1-b"), c("1-b-start"))
228             self.failIfEqual(c("1-bp"), c("1-bp-start"))
229             self.failIfEqual(c("1-bps1"), c("1-bps1-start"))
230             self.failIfEqual(c("1-bps2"), c("1-bps2-start"))
231             self.failIfEqual(c("2-b"), c("2-b-start"))
232             self.failIfEqual(c("2-bp"), c("2-bp-start"))
233             self.failIfEqual(c("2-bps1"), c("2-bps1-start"))
234             self.failIfEqual(c("2-bps2"), c("2-bps2-start"))
235             self.failIfEqual(c("3-b"), c("3-b-start"))
236             self.failIfEqual(c("3-bp"), c("3-bp-start"))
237             self.failIfEqual(c("3-bps1"), c("3-bps1-start"))
238             self.failIfEqual(c("3-bps2"), c("3-bps2-start"))
239             self.failIfEqual(c("4-b"), c("4-b-start"))
240             self.failIfEqual(c("4-bp"), c("4-bp-start"))
241             self.failIfEqual(c("4-bps1"), c("4-bps1-start"))
242             self.failIfEqual(c("4-bps2"), c("4-bps2-start"))
243             self.failIfEqual(c("5-b"), c("5-b-start"))
244             self.failIfEqual(c("5-bp"), c("5-bp-start"))
245             self.failIfEqual(c("5-bps1"), c("5-bps1-start"))
246             self.failIfEqual(c("5-bps2"), c("5-bps2-start"))
247
248             # snapshot 1 and snapshot 2 should be identical
249             self.failUnlessEqual(c("2-b"), c("1-b"))
250
251             # snapshot 3 modified a file underneath parent/
252             self.failIfEqual(c("3-b"), c("2-b")) # 3 modified a file
253             self.failIfEqual(c("3-bp"), c("2-bp"))
254             # but the subdirs are the same
255             self.failUnlessEqual(c("3-bps1"), c("2-bps1"))
256             self.failUnlessEqual(c("3-bps2"), c("2-bps2"))
257
258             # snapshot 4 should be the same as 2
259             self.failUnlessEqual(c("4-b"), c("2-b"))
260             self.failUnlessEqual(c("4-bp"), c("2-bp"))
261             self.failUnlessEqual(c("4-bps1"), c("2-bps1"))
262             self.failUnlessEqual(c("4-bps2"), c("2-bps2"))
263
264             # snapshot 5 added a file under subdir1
265             self.failIfEqual(c("5-b"), c("4-b"))
266             self.failIfEqual(c("5-bp"), c("4-bp"))
267             self.failIfEqual(c("5-bps1"), c("4-bps1"))
268             self.failUnlessEqual(c("5-bps2"), c("4-bps2"))
269
270             # snapshot 6 copied a directory-it should be shared
271             self.failIfEqual(c("6-b"), c("5-b"))
272             self.failIfEqual(c("6-bp"), c("5-bp"))
273             self.failUnlessEqual(c("6-bps1"), c("5-bps1"))
274             self.failIfEqual(c("6-bps2"), c("5-bps2"))
275             self.failUnlessEqual(c("6-bps2c1"), c("6-bps1"))
276
277         d.addCallback(check_consolidation)
278
279         return d
280
281     def build_manifest(self, root):
282         # like dirnode.build_manifest, but this one doesn't skip duplicate
283         # nodes (i.e. it is not cycle-resistant).
284         manifest = []
285         manifest.append( ( (), root.get_uri() ) )
286         d = self.manifest_of(None, root, manifest, () )
287         d.addCallback(lambda ign: manifest)
288         return d
289
290     def manifest_of(self, ignored, dirnode, manifest, path):
291         d = dirnode.list()
292         def _got_children(children):
293             d = defer.succeed(None)
294             for name, (child, metadata) in children.iteritems():
295                 childpath = path + (name,)
296                 manifest.append( (childpath, child.get_uri()) )
297                 if IDirectoryNode.providedBy(child):
298                     d.addCallback(self.manifest_of, child, manifest, childpath)
299             return d
300         d.addCallback(_got_children)
301         return d