From: david-sarah Date: Mon, 7 Jun 2010 18:37:57 +0000 (-0700) Subject: Remove the 'tahoe debug consolidate' subcommand. X-Git-Url: https://git.rkrishnan.org/%5B/%5D%20/reliability?a=commitdiff_plain;h=7092de1b6f6eb5f614568a6b370720aee37ab3fc;p=tahoe-lafs%2Ftahoe-lafs.git Remove the 'tahoe debug consolidate' subcommand. --- diff --git a/NEWS b/NEWS index 158eaa4e..cb3e8d76 100644 --- a/NEWS +++ b/NEWS @@ -20,6 +20,11 @@ as 'convmv' before using Tahoe CLI. All CLI commands have been improved to support non-ASCII parameters such as filenames and aliases on all supported Operating Systems. +** Removals + +The 'tahoe debug consolidate' subcommand (for converting old allmydata Windows +client backups to a newer format) has been removed. + ** dependency updates no python-2.4.2 or 2.4.3 (2.4.4 is ok) diff --git a/src/allmydata/scripts/consolidate.py b/src/allmydata/scripts/consolidate.py deleted file mode 100644 index da0252fa..00000000 --- a/src/allmydata/scripts/consolidate.py +++ /dev/null @@ -1,456 +0,0 @@ - -import os, pickle, time -import sqlite3 as sqlite - -import urllib -import simplejson -from allmydata.scripts.common_http import do_http, HTTPError -from allmydata.util import hashutil, base32, time_format -from allmydata.util.stringutils import to_str, quote_output, quote_path -from allmydata.util.netstring import netstring -from allmydata.scripts.common import get_alias, DEFAULT_ALIAS -from allmydata import uri - - -def readonly(writedircap): - return uri.from_string_dirnode(writedircap).get_readonly().to_string() - -def parse_old_timestamp(s, options): - try: - if not s.endswith("Z"): - raise ValueError - # This returns seconds-since-epoch for an ISO-8601-ish-formatted UTC - # time string. This might raise ValueError if the string is not in the - # right format. - when = time_format.iso_utc_time_to_seconds(s[:-1]) - return when - except ValueError: - pass - - try: - # "2008-11-16 10.34 PM" (localtime) - if s[-3:] in (" AM", " PM"): - # this might raise ValueError - when = time.strptime(s[:-3], "%Y-%m-%d %I.%M") - if s[-3:] == "PM": - when += 12*60*60 - return when - except ValueError: - pass - - try: - # "2008-11-16 10.34.56 PM" (localtime) - if s[-3:] in (" AM", " PM"): - # this might raise ValueError - when = time.strptime(s[:-3], "%Y-%m-%d %I.%M.%S") - if s[-3:] == "PM": - when += 12*60*60 - return when - except ValueError: - pass - - try: - # "2008-12-31 18.21.43" - when = time.strptime(s, "%Y-%m-%d %H.%M.%S") - return when - except ValueError: - pass - - print >>options.stderr, "unable to parse old timestamp '%s', ignoring" % s - return None - - -TAG = "consolidator_dirhash_v1" - -class CycleDetected(Exception): - pass - - -class Consolidator: - def __init__(self, options): - self.options = options - self.rootcap, path = get_alias(options.aliases, options.where, - DEFAULT_ALIAS) - assert path == "" - # TODO: allow dbfile and backupfile to be Unicode - self.dbfile = options["dbfile"] - assert self.dbfile, "--dbfile is required" - self.backupfile = options["backupfile"] - assert self.backupfile, "--backupfile is required" - self.nodeurl = options["node-url"] - if not self.nodeurl.endswith("/"): - self.nodeurl += "/" - self.must_rescan_readonly_snapshots = not os.path.exists(self.dbfile) - self.db = sqlite.connect(self.dbfile) - self.cursor = self.db.cursor() - try: - self.cursor.execute("CREATE TABLE dirhashes" - "(" - " dirhash TEXT PRIMARY KEY," - " dircap TEXT" - ")") - except sqlite.OperationalError, e: - if "table dirhashes already exists" not in str(e): - raise - - def read_directory_json(self, dircap): - url = self.nodeurl + "uri/%s?t=json" % urllib.quote(dircap) - resp = do_http("GET", url) - if resp.status != 200: - raise HTTPError("Error during directory GET", resp) - jd = simplejson.load(resp) - ntype, ndata = jd - if ntype != "dirnode": - return None - return ndata - - def msg(self, text): - print >>self.options.stdout, text - self.options.stdout.flush() - def err(self, text): - print >>self.options.stderr, text - self.options.stderr.flush() - - def consolidate(self): - try: - data = self.read_directory_json(self.rootcap + "/Backups") - except HTTPError: - self.err("Unable to list /Backups, maybe this account has none?") - return 1 - kids = data["children"] - potential_systems = {} - for (childname, (childtype, childdata)) in kids.items(): - if childtype != "dirnode": - continue - if "rw_uri" not in childdata: - self.msg("%s: not writeable" % quote_output(childname)) - continue - potential_systems[childname] = to_str(childdata["rw_uri"]) - backup_data = {"Backups": data, "systems": {}, "archives": {}} - systems = {} - for name, sdircap in potential_systems.items(): - sdata = self.read_directory_json(sdircap) - kids = sdata["children"] - if not u"Archives" in kids and not u"Latest Backup" in kids: - self.msg("%s: not a backupdir, no 'Archives' and 'Latest'" % quote_output(name)) - continue - archives_capdata = kids[u"Archives"][1] - if "rw_uri" not in archives_capdata: - self.msg("%s: /Archives is not writeable" % quote_output(name)) - continue - self.msg("%s is a system" % quote_output(name)) - backup_data["systems"][name] = sdata - archives_dircap = to_str(archives_capdata["rw_uri"]) - archives_data = self.read_directory_json(archives_dircap) - backup_data["archives"][name] = archives_data - systems[name] = archives_dircap - if not systems: - self.msg("No systems under /Backups, nothing to consolidate") - return 0 - backupfile = self.backupfile - counter = 0 - while os.path.exists(backupfile): - backupfile = self.backupfile + "." + str(counter) - counter += 1 - f = open(backupfile, "wb") - pickle.dump(backup_data, f) - f.close() - - for name, archives_dircap in sorted(systems.items()): - self.do_system(name, archives_dircap) - return 0 - - def do_system(self, system_name, archives_dircap): - # first we walk through the Archives list, looking for the existing - # snapshots. Each one will have a $NAME like "2008-11-16 10.34 PM" - # (in various forms: we use tahoe_backup.parse_old_timestamp to - # interpret it). At first, they'll all have $NAME and be writecaps. - # As we run, we will create $NAME-readonly (with a readcap) for each - # one (the first one will just be the readonly equivalent of the - # oldest snapshot: the others will be constructed out of shared - # directories). When we're done we'll have a $NAME-readonly for - # everything except the latest snapshot (to avoid any danger of - # modifying a backup that's already in progress). The very last step, - # which won't be enabled until we're sure that everything is working - # correctly, will replace each $NAME with $NAME-readonly. - - # We maintain a table that maps dirhash (hash of directory contents) - # to a directory readcap which contains those contents. We use this - # to decide if we can link to an existing directory, or if we must - # create a brand new one. Usually we add to this table in two places: - # when we scan the oldest snapshot (which we've just converted to - # readonly form), and when we must create a brand new one. If the - # table doesn't exist (probably because we've manually deleted it), - # we will scan *all* the existing readonly snapshots, and repopulate - # the table. We keep this table in a SQLite database (rather than a - # pickle) because we want to update it persistently after every - # directory creation, and writing out a 10k entry pickle takes about - # 250ms - - # 'snapshots' maps timestamp to [rwname, writecap, roname, readcap]. - # The possibilities are: - # [$NAME, writecap, None, None] : haven't touched it - # [$NAME, writecap, $NAME-readonly, readcap] : processed, not replaced - # [None, None, $NAME, readcap] : processed and replaced - - self.msg("consolidating system %s" % quote_output(system_name)) - self.directories_reused = 0 - self.directories_used_as_is = 0 - self.directories_created = 0 - self.directories_seen = set() - self.directories_used = set() - - data = self.read_directory_json(archives_dircap) - snapshots = {} - - children = sorted(data["children"].items()) - for i, (childname, (childtype, childdata)) in enumerate(children): - if childtype != "dirnode": - self.msg("non-dirnode %s in Archives/" % quote_output(childname)) - continue - timename = to_str(childname) - if timename.endswith("-readonly"): - timename = timename[:-len("-readonly")] - timestamp = parse_old_timestamp(timename, self.options) - assert timestamp is not None, timename - snapshots.setdefault(timestamp, [None, None, None, None]) - # if the snapshot is readonly (i.e. it has no rw_uri), we might - # need to re-scan it - is_readonly = not childdata.has_key("rw_uri") - if is_readonly: - readcap = to_str(childdata["ro_uri"]) - if self.must_rescan_readonly_snapshots: - self.msg(" scanning old %s (%d/%d)" % - (quote_output(childname), i+1, len(children))) - self.scan_old_directory(to_str(childdata["ro_uri"])) - snapshots[timestamp][2] = childname - snapshots[timestamp][3] = readcap - else: - writecap = to_str(childdata["rw_uri"]) - snapshots[timestamp][0] = childname - snapshots[timestamp][1] = writecap - snapshots = [ [timestamp] + values - for (timestamp, values) in snapshots.items() ] - # now 'snapshots' is [timestamp, rwname, writecap, roname, readcap], - # which makes it easier to process in temporal order - snapshots.sort() - self.msg(" %d snapshots" % len(snapshots)) - # we always ignore the last one, for safety - snapshots = snapshots[:-1] - - first_snapshot = True - for i,(timestamp, rwname, writecap, roname, readcap) in enumerate(snapshots): - eta = "?" - start_created = self.directories_created - start_used_as_is = self.directories_used_as_is - start_reused = self.directories_reused - - # [None, None, $NAME, readcap] : processed and replaced - # [$NAME, writecap, $NAME-readonly, readcap] : processed, not replaced - # [$NAME, writecap, None, None] : haven't touched it - - if readcap and not writecap: - # skip past anything we've already processed and replaced - assert roname - assert not rwname - first_snapshot = False - self.msg(" %s already readonly" % quote_output(roname)) - continue - if readcap and writecap: - # we've processed it, creating a -readonly version, but we - # haven't replaced it. - assert roname - assert rwname - first_snapshot = False - self.msg(" %s processed but not yet replaced" % quote_output(roname)) - if self.options["really"]: - self.msg(" replacing %s with %s" % (quote_output(rwname), quote_output(roname))) - self.put_child(archives_dircap, rwname, readcap) - self.delete_child(archives_dircap, roname) - continue - assert writecap - assert rwname - assert not readcap - assert not roname - roname = rwname + "-readonly" - # for the oldest snapshot, we can do a simple readonly conversion - if first_snapshot: - first_snapshot = False - readcap = readonly(writecap) - self.directories_used_as_is += 1 - self.msg(" %s: oldest snapshot, using as-is" % quote_output(rwname)) - self.scan_old_directory(readcap) - else: - # for the others, we must scan their contents and build up a new - # readonly directory (which shares common subdirs with previous - # backups) - self.msg(" %s: processing (%d/%d)" % (quote_output(rwname), i+1, len(snapshots))) - started = time.time() - readcap = self.process_directory(readonly(writecap), (rwname,)) - elapsed = time.time() - started - eta = "%ds" % (elapsed * (len(snapshots) - i-1)) - if self.options["really"]: - self.msg(" replaced %s" % quote_output(rwname)) - self.put_child(archives_dircap, rwname, readcap) - else: - self.msg(" created %s" % quote_output(roname)) - self.put_child(archives_dircap, roname, readcap) - - snapshot_created = self.directories_created - start_created - snapshot_used_as_is = self.directories_used_as_is - start_used_as_is - snapshot_reused = self.directories_reused - start_reused - self.msg(" %s: done: %d dirs created, %d used as-is, %d reused, eta %s" - % (quote_output(rwname), - snapshot_created, snapshot_used_as_is, snapshot_reused, - eta)) - # done! - self.msg(" system done, dircounts: %d/%d seen/used, %d created, %d as-is, %d reused" \ - % (len(self.directories_seen), len(self.directories_used), - self.directories_created, self.directories_used_as_is, - self.directories_reused)) - - def process_directory(self, readcap, path): - # I walk all my children (recursing over any subdirectories), build - # up a table of my contents, then see if I can re-use an old - # directory with the same contents. If not, I create a new directory - # for my contents. In all cases I return a directory readcap that - # points to my contents. - - readcap = to_str(readcap) - self.directories_seen.add(readcap) - - # build up contents to pass to mkdir() (which uses t=set_children) - contents = {} # childname -> (type, rocap, metadata) - data = self.read_directory_json(readcap) - assert data is not None - hashkids = [] - children_modified = False - for (childname, (childtype, childdata)) in sorted(data["children"].items()): - if childtype == "dirnode": - childpath = path + (childname,) - old_childcap = to_str(childdata["ro_uri"]) - childcap = self.process_directory(old_childcap, childpath) - if childcap != old_childcap: - children_modified = True - contents[childname] = ("dirnode", childcap, None) - else: - childcap = to_str(childdata["ro_uri"]) - contents[childname] = (childtype, childcap, None) - hashkids.append( (childname, childcap) ) - - dirhash = self.hash_directory_contents(hashkids) - old_dircap = self.get_old_dirhash(dirhash) - if old_dircap: - if self.options["verbose"]: - self.msg(" %s: reused" % quote_path(path)) - assert isinstance(old_dircap, str) - self.directories_reused += 1 - self.directories_used.add(old_dircap) - return old_dircap - if not children_modified: - # we're allowed to use this directory as-is - if self.options["verbose"]: - self.msg(" %s: used as-is" % quote_path(path)) - new_dircap = readonly(readcap) - assert isinstance(new_dircap, str) - self.store_dirhash(dirhash, new_dircap) - self.directories_used_as_is += 1 - self.directories_used.add(new_dircap) - return new_dircap - # otherwise, we need to create a new directory - if self.options["verbose"]: - self.msg(" %s: created" % quote_path(path)) - new_dircap = readonly(self.mkdir(contents)) - assert isinstance(new_dircap, str) - self.store_dirhash(dirhash, new_dircap) - self.directories_created += 1 - self.directories_used.add(new_dircap) - return new_dircap - - def put_child(self, dircap, childname, childcap): - url = self.nodeurl + "uri/%s/%s?t=uri" % (urllib.quote(dircap), - urllib.quote(childname)) - resp = do_http("PUT", url, childcap) - if resp.status not in (200, 201): - raise HTTPError("Error during put_child", resp) - - def delete_child(self, dircap, childname): - url = self.nodeurl + "uri/%s/%s" % (urllib.quote(dircap), - urllib.quote(childname)) - resp = do_http("DELETE", url) - if resp.status not in (200, 201): - raise HTTPError("Error during delete_child", resp) - - def mkdir(self, contents): - url = self.nodeurl + "uri?t=mkdir" - resp = do_http("POST", url) - if resp.status < 200 or resp.status >= 300: - raise HTTPError("Error during mkdir", resp) - dircap = to_str(resp.read().strip()) - url = self.nodeurl + "uri/%s?t=set_children" % urllib.quote(dircap) - body = dict([ (childname, (contents[childname][0], - {"ro_uri": contents[childname][1], - "metadata": contents[childname][2], - })) - for childname in contents - ]) - resp = do_http("POST", url, simplejson.dumps(body)) - if resp.status != 200: - raise HTTPError("Error during set_children", resp) - return dircap - - def scan_old_directory(self, dircap, ancestors=()): - # scan this directory (recursively) and stash a hash of its contents - # in the DB. This assumes that all subdirs can be used as-is (i.e. - # they've already been declared immutable) - dircap = readonly(dircap) - if dircap in ancestors: - raise CycleDetected - ancestors = ancestors + (dircap,) - #self.visited.add(dircap) - # TODO: we could use self.visited as a mapping from dircap to dirhash, - # to avoid re-scanning old shared directories multiple times - self.directories_seen.add(dircap) - self.directories_used.add(dircap) - data = self.read_directory_json(dircap) - kids = [] - for (childname, (childtype, childdata)) in data["children"].items(): - childcap = to_str(childdata["ro_uri"]) - if childtype == "dirnode": - self.scan_old_directory(childcap, ancestors) - kids.append( (childname, childcap) ) - dirhash = self.hash_directory_contents(kids) - self.store_dirhash(dirhash, dircap) - return dirhash - - def hash_directory_contents(self, kids): - kids.sort() - s = "".join([netstring(to_str(childname))+netstring(childcap) - for (childname, childcap) in kids]) - return hashutil.tagged_hash(TAG, s) - - def store_dirhash(self, dirhash, dircap): - assert isinstance(dircap, str) - # existing items should prevail - try: - c = self.cursor - c.execute("INSERT INTO dirhashes (dirhash, dircap) VALUES (?,?)", - (base32.b2a(dirhash), dircap)) - self.db.commit() - except sqlite.IntegrityError: - # already present - pass - - def get_old_dirhash(self, dirhash): - self.cursor.execute("SELECT dircap FROM dirhashes WHERE dirhash=?", - (base32.b2a(dirhash),)) - row = self.cursor.fetchone() - if not row: - return None - (dircap,) = row - return str(dircap) - - -def main(options): - c = Consolidator(options) - return c.consolidate() diff --git a/src/allmydata/scripts/debug.py b/src/allmydata/scripts/debug.py index 0af308f6..3c17c7ba 100644 --- a/src/allmydata/scripts/debug.py +++ b/src/allmydata/scripts/debug.py @@ -4,8 +4,6 @@ import struct, time, os from twisted.python import usage, failure from twisted.internet import defer -from allmydata.scripts.cli import VDriveOptions -from allmydata.util.stringutils import argv_to_unicode class DumpOptions(usage.Options): def getSynopsis(self): @@ -759,23 +757,6 @@ def repl(options): return code.interact() -class ConsolidateOptions(VDriveOptions): - optParameters = [ - ("dbfile", None, None, "persistent file for reusable dirhashes"), - ("backupfile", "b", None, "file to store backup of Archives/ contents"), - ] - optFlags = [ - ("really", None, "Really remove old snapshot directories"), - ("verbose", "v", "Emit a line for every directory examined"), - ] - def parseArgs(self, where): - self.where = argv_to_unicode(where) - -def consolidate(options): - from allmydata.scripts.consolidate import main - return main(options) - - class DebugCommand(usage.Options): subCommands = [ ["dump-share", None, DumpOptions, @@ -785,7 +766,6 @@ class DebugCommand(usage.Options): ["catalog-shares", None, CatalogSharesOptions, "Describe shares in node dirs"], ["corrupt-share", None, CorruptShareOptions, "Corrupt a share"], ["repl", None, ReplOptions, "Open a python interpreter"], - ["consolidate", None, ConsolidateOptions, "Consolidate non-shared backups"], ] def postOptions(self): if not hasattr(self, 'subOptions'): @@ -801,7 +781,6 @@ Subcommands: tahoe debug find-shares Locate sharefiles in node directories tahoe debug catalog-shares Describe all shares in node dirs tahoe debug corrupt-share Corrupt a share by flipping a bit. - tahoe debug consolidate Consolidate old non-shared backups into shared ones. Please run e.g. 'tahoe debug dump-share --help' for more details on each subcommand. @@ -815,7 +794,6 @@ subDispatch = { "catalog-shares": catalog_shares, "corrupt-share": corrupt_share, "repl": repl, - "consolidate": consolidate, } diff --git a/src/allmydata/test/test_consolidate.py b/src/allmydata/test/test_consolidate.py deleted file mode 100644 index e02aa50e..00000000 --- a/src/allmydata/test/test_consolidate.py +++ /dev/null @@ -1,298 +0,0 @@ -# -*- coding: utf-8 -*- - -import os -from cStringIO import StringIO -import pickle -from twisted.trial import unittest -from allmydata.test.no_network import GridTestMixin -from allmydata.test.common_util import ReallyEqualMixin -from allmydata.util import fileutil -from allmydata.scripts import runner, debug -from allmydata.scripts.common import get_aliases -from twisted.internet import defer, threads # CLI tests use deferToThread -from allmydata.interfaces import IDirectoryNode - -have_sqlite3 = False -try: - import sqlite3 - sqlite3 # hush pyflakes - have_sqlite3 = True -except ImportError: - pass -else: - from allmydata.scripts import consolidate - - -class CLITestMixin: - def do_cli(self, verb, *args, **kwargs): - nodeargs = [ - "--node-directory", self.get_clientdir(), - ] - if verb == "debug": - argv = [verb, args[0]] + nodeargs + list(args[1:]) - else: - argv = [verb] + nodeargs + list(args) - stdin = kwargs.get("stdin", "") - stdout, stderr = StringIO(), StringIO() - d = threads.deferToThread(runner.runner, argv, run_by_human=False, - stdin=StringIO(stdin), - stdout=stdout, stderr=stderr) - def _done(rc): - return rc, stdout.getvalue(), stderr.getvalue() - d.addCallback(_done) - return d - -class Consolidate(GridTestMixin, CLITestMixin, ReallyEqualMixin, unittest.TestCase): - - def writeto(self, path, data): - d = os.path.dirname(os.path.join(self.basedir, "home", path)) - fileutil.make_dirs(d) - f = open(os.path.join(self.basedir, "home", path), "w") - f.write(data) - f.close() - - def writeto_snapshot(self, sn, path, data): - p = "Backups/fluxx/Archives/2009-03-%02d 01.01.01/%s" % (sn, path) - return self.writeto(p, data) - - def do_cli_good(self, verb, *args, **kwargs): - d = self.do_cli(verb, *args, **kwargs) - def _check((rc,out,err)): - self.failUnlessReallyEqual(err, "", verb) - self.failUnlessReallyEqual(rc, 0, verb) - return out - d.addCallback(_check) - return d - - def test_arg_parsing(self): - self.basedir = "consolidate/Consolidate/arg_parsing" - self.set_up_grid(num_clients=1, num_servers=1) - co = debug.ConsolidateOptions() - co.parseOptions(["--node-directory", self.get_clientdir(), - "--dbfile", "foo.db", "--backupfile", "backup", "--really", - "URI:DIR2:foo"]) - self.failUnlessReallyEqual(co["dbfile"], "foo.db") - self.failUnlessReallyEqual(co["backupfile"], "backup") - self.failUnless(co["really"]) - self.failUnlessReallyEqual(co.where, u"URI:DIR2:foo") - - def test_basic(self): - if not have_sqlite3: - raise unittest.SkipTest("'tahoe debug consolidate' is not supported because sqlite3 is not available.") - - self.basedir = "consolidate/Consolidate/basic" - self.set_up_grid(num_clients=1) - - fileutil.make_dirs(os.path.join(self.basedir, "home/Backups/nonsystem")) - fileutil.make_dirs(os.path.join(self.basedir, "home/Backups/fluxx/Latest")) - self.writeto(os.path.join(self.basedir, - "home/Backups/fluxx/Archives/nondir"), - "not a directory: ignore me") - - # set up a number of non-shared "snapshots" - for i in range(1,8): - self.writeto_snapshot(i, "parent/README", "README") - self.writeto_snapshot(i, "parent/foo.txt", "foo") - self.writeto_snapshot(i, "parent/subdir1/bar.txt", "bar") - self.writeto_snapshot(i, "parent/subdir1/baz.txt", "baz") - self.writeto_snapshot(i, "parent/subdir2/yoy.txt", "yoy") - self.writeto_snapshot(i, "parent/subdir2/hola.txt", "hola") - - if i >= 1: - pass # initial snapshot - if i >= 2: - pass # second snapshot: same as the first - if i >= 3: - # modify a file - self.writeto_snapshot(i, "parent/foo.txt", "FOOF!") - if i >= 4: - # foo.txt goes back to normal - self.writeto_snapshot(i, "parent/foo.txt", "foo") - if i >= 5: - # new file - self.writeto_snapshot(i, "parent/subdir1/new.txt", "new") - if i >= 6: - # copy parent/subdir1 to parent/subdir2/copy1 - self.writeto_snapshot(i, "parent/subdir2/copy1/bar.txt", "bar") - self.writeto_snapshot(i, "parent/subdir2/copy1/baz.txt", "baz") - self.writeto_snapshot(i, "parent/subdir2/copy1/new.txt", "new") - if i >= 7: - # the last snapshot shall remain untouched - pass - - # now copy the whole thing into tahoe - d = self.do_cli_good("create-alias", "tahoe") - d.addCallback(lambda ign: - self.do_cli_good("cp", "-r", - os.path.join(self.basedir, "home/Backups"), - "tahoe:Backups")) - def _copied(res): - rootcap = get_aliases(self.get_clientdir())["tahoe"] - # now scan the initial directory structure - n = self.g.clients[0].create_node_from_uri(rootcap) - return n.get_child_at_path([u"Backups", u"fluxx", u"Archives"]) - d.addCallback(_copied) - self.nodes = {} - self.caps = {} - def stash(node, name): - self.nodes[name] = node - self.caps[name] = node.get_uri() - return node - d.addCallback(stash, "Archives") - self.manifests = {} - def stash_manifest(manifest, which): - self.manifests[which] = dict(manifest) - d.addCallback(lambda ignored: self.build_manifest(self.nodes["Archives"])) - d.addCallback(stash_manifest, "start") - def c(n): - pieces = n.split("-") - which = "finish" - if len(pieces) == 3: - which = pieces[-1] - sn = int(pieces[0]) - name = pieces[1] - path = [u"2009-03-%02d 01.01.01" % sn] - path.extend( {"b": [], - "bp": [u"parent"], - "bps1": [u"parent", u"subdir1"], - "bps2": [u"parent", u"subdir2"], - "bps2c1": [u"parent", u"subdir2", u"copy1"], - }[name] ) - return self.manifests[which][tuple(path)] - - dbfile = os.path.join(self.basedir, "dirhash.db") - backupfile = os.path.join(self.basedir, "backup.pickle") - - d.addCallback(lambda ign: - self.do_cli_good("debug", "consolidate", - "--dbfile", dbfile, - "--backupfile", backupfile, - "--verbose", - "tahoe:")) - def _check_consolidate_output1(out): - lines = out.splitlines() - last = lines[-1] - self.failUnlessReallyEqual(last.strip(), - "system done, dircounts: " - "25/12 seen/used, 7 created, 2 as-is, 13 reused") - self.failUnless(os.path.exists(dbfile)) - self.failUnless(os.path.exists(backupfile)) - self.first_backup = backup = pickle.load(open(backupfile, "rb")) - self.failUnless(u"fluxx" in backup["systems"]) - self.failUnless(u"fluxx" in backup["archives"]) - adata = backup["archives"]["fluxx"] - kids = adata[u"children"] - self.failUnlessReallyEqual(str(kids[u"2009-03-01 01.01.01"][1][u"rw_uri"]), - c("1-b-start")) - d.addCallback(_check_consolidate_output1) - d.addCallback(lambda ign: - self.do_cli_good("debug", "consolidate", - "--dbfile", dbfile, - "--backupfile", backupfile, - "--really", "tahoe:")) - def _check_consolidate_output2(out): - lines = out.splitlines() - last = lines[-1] - self.failUnlessReallyEqual(last.strip(), - "system done, dircounts: " - "0/0 seen/used, 0 created, 0 as-is, 0 reused") - backup = pickle.load(open(backupfile, "rb")) - self.failUnlessReallyEqual(backup, self.first_backup) - self.failUnless(os.path.exists(backupfile + ".0")) - d.addCallback(_check_consolidate_output2) - - d.addCallback(lambda ignored: self.build_manifest(self.nodes["Archives"])) - d.addCallback(stash_manifest, "finish") - - def check_consolidation(ignored): - #for which in ("finish",): - # for path in sorted(self.manifests[which].keys()): - # print "%s %s %s" % (which, "/".join(path), - # self.manifests[which][path]) - - # last snapshot should be untouched - self.failUnlessReallyEqual(c("7-b"), c("7-b-start")) - - # first snapshot should be a readonly form of the original - self.failUnlessReallyEqual(c("1-b-finish"), consolidate.readonly(c("1-b-start"))) - self.failUnlessReallyEqual(c("1-bp-finish"), consolidate.readonly(c("1-bp-start"))) - self.failUnlessReallyEqual(c("1-bps1-finish"), consolidate.readonly(c("1-bps1-start"))) - self.failUnlessReallyEqual(c("1-bps2-finish"), consolidate.readonly(c("1-bps2-start"))) - - # new directories should be different than the old ones - self.failIfEqual(c("1-b"), c("1-b-start")) - self.failIfEqual(c("1-bp"), c("1-bp-start")) - self.failIfEqual(c("1-bps1"), c("1-bps1-start")) - self.failIfEqual(c("1-bps2"), c("1-bps2-start")) - self.failIfEqual(c("2-b"), c("2-b-start")) - self.failIfEqual(c("2-bp"), c("2-bp-start")) - self.failIfEqual(c("2-bps1"), c("2-bps1-start")) - self.failIfEqual(c("2-bps2"), c("2-bps2-start")) - self.failIfEqual(c("3-b"), c("3-b-start")) - self.failIfEqual(c("3-bp"), c("3-bp-start")) - self.failIfEqual(c("3-bps1"), c("3-bps1-start")) - self.failIfEqual(c("3-bps2"), c("3-bps2-start")) - self.failIfEqual(c("4-b"), c("4-b-start")) - self.failIfEqual(c("4-bp"), c("4-bp-start")) - self.failIfEqual(c("4-bps1"), c("4-bps1-start")) - self.failIfEqual(c("4-bps2"), c("4-bps2-start")) - self.failIfEqual(c("5-b"), c("5-b-start")) - self.failIfEqual(c("5-bp"), c("5-bp-start")) - self.failIfEqual(c("5-bps1"), c("5-bps1-start")) - self.failIfEqual(c("5-bps2"), c("5-bps2-start")) - - # snapshot 1 and snapshot 2 should be identical - self.failUnlessReallyEqual(c("2-b"), c("1-b")) - - # snapshot 3 modified a file underneath parent/ - self.failIfEqual(c("3-b"), c("2-b")) # 3 modified a file - self.failIfEqual(c("3-bp"), c("2-bp")) - # but the subdirs are the same - self.failUnlessReallyEqual(c("3-bps1"), c("2-bps1")) - self.failUnlessReallyEqual(c("3-bps2"), c("2-bps2")) - - # snapshot 4 should be the same as 2 - self.failUnlessReallyEqual(c("4-b"), c("2-b")) - self.failUnlessReallyEqual(c("4-bp"), c("2-bp")) - self.failUnlessReallyEqual(c("4-bps1"), c("2-bps1")) - self.failUnlessReallyEqual(c("4-bps2"), c("2-bps2")) - - # snapshot 5 added a file under subdir1 - self.failIfEqual(c("5-b"), c("4-b")) - self.failIfEqual(c("5-bp"), c("4-bp")) - self.failIfEqual(c("5-bps1"), c("4-bps1")) - self.failUnlessReallyEqual(c("5-bps2"), c("4-bps2")) - - # snapshot 6 copied a directory-it should be shared - self.failIfEqual(c("6-b"), c("5-b")) - self.failIfEqual(c("6-bp"), c("5-bp")) - self.failUnlessReallyEqual(c("6-bps1"), c("5-bps1")) - self.failIfEqual(c("6-bps2"), c("5-bps2")) - self.failUnlessReallyEqual(c("6-bps2c1"), c("6-bps1")) - - d.addCallback(check_consolidation) - - return d - test_basic.timeout = 28800 # It took more than 7200 seconds on François's ARM - - def build_manifest(self, root): - # like dirnode.build_manifest, but this one doesn't skip duplicate - # nodes (i.e. it is not cycle-resistant). - manifest = [] - manifest.append( ( (), root.get_uri() ) ) - d = self.manifest_of(None, root, manifest, () ) - d.addCallback(lambda ign: manifest) - return d - - def manifest_of(self, ignored, dirnode, manifest, path): - d = dirnode.list() - def _got_children(children): - d = defer.succeed(None) - for name, (child, metadata) in children.iteritems(): - childpath = path + (name,) - manifest.append( (childpath, child.get_uri()) ) - if IDirectoryNode.providedBy(child): - d.addCallback(self.manifest_of, child, manifest, childpath) - return d - d.addCallback(_got_children) - return d