Remove the 'tahoe debug consolidate' subcommand.

author david-sarah <david-sarah@jacaranda.org>

Mon, 7 Jun 2010 18:37:57 +0000 (11:37 -0700)

committer david-sarah <david-sarah@jacaranda.org>

Mon, 7 Jun 2010 18:37:57 +0000 (11:37 -0700)
author david-sarah <david-sarah@jacaranda.org>
Mon, 7 Jun 2010 18:37:57 +0000 (11:37 -0700)
committer david-sarah <david-sarah@jacaranda.org>
Mon, 7 Jun 2010 18:37:57 +0000 (11:37 -0700)
diff --git a/NEWS b/NEWS

index 158eaa4e165f7243093465cae1fcb013a304f9df..cb3e8d76430ebad83fc67fd540badf1f450bacaa 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -20,6 +20,11 @@ as 'convmv' before using Tahoe CLI.
  All CLI commands have been improved to support non-ASCII parameters such as
  filenames and aliases on all supported Operating Systems.
  
+** Removals
+
+The 'tahoe debug consolidate' subcommand (for converting old allmydata Windows
+client backups to a newer format) has been removed.
+
  ** dependency updates
  
   no python-2.4.2 or 2.4.3 (2.4.4 is ok)
diff --git a/src/allmydata/scripts/consolidate.py b/src/allmydata/scripts/consolidate.py

deleted file mode 100644 (file)

index da0252f..0000000
--- a/src/allmydata/scripts/consolidate.py
+++ /dev/null
@@ -1,456 +0,0 @@
-
-import os, pickle, time
-import sqlite3 as sqlite
-
-import urllib
-import simplejson
-from allmydata.scripts.common_http import do_http, HTTPError
-from allmydata.util import hashutil, base32, time_format
-from allmydata.util.stringutils import to_str, quote_output, quote_path
-from allmydata.util.netstring import netstring
-from allmydata.scripts.common import get_alias, DEFAULT_ALIAS
-from allmydata import uri
-
-
-def readonly(writedircap):
-    return uri.from_string_dirnode(writedircap).get_readonly().to_string()
-
-def parse_old_timestamp(s, options):
-    try:
-        if not s.endswith("Z"):
-            raise ValueError
-        # This returns seconds-since-epoch for an ISO-8601-ish-formatted UTC
-        # time string. This might raise ValueError if the string is not in the
-        # right format.
-        when = time_format.iso_utc_time_to_seconds(s[:-1])
-        return when
-    except ValueError:
-        pass
-
-    try:
-        # "2008-11-16 10.34 PM" (localtime)
-        if s[-3:] in (" AM", " PM"):
-            # this might raise ValueError
-            when = time.strptime(s[:-3], "%Y-%m-%d %I.%M")
-            if s[-3:] == "PM":
-                when += 12*60*60
-            return when
-    except ValueError:
-        pass
-
-    try:
-        # "2008-11-16 10.34.56 PM" (localtime)
-        if s[-3:] in (" AM", " PM"):
-            # this might raise ValueError
-            when = time.strptime(s[:-3], "%Y-%m-%d %I.%M.%S")
-            if s[-3:] == "PM":
-                when += 12*60*60
-            return when
-    except ValueError:
-        pass
-
-    try:
-        # "2008-12-31 18.21.43"
-        when = time.strptime(s, "%Y-%m-%d %H.%M.%S")
-        return when
-    except ValueError:
-        pass
-
-    print >>options.stderr, "unable to parse old timestamp '%s', ignoring" % s
-    return None
-
-
-TAG = "consolidator_dirhash_v1"
-
-class CycleDetected(Exception):
-    pass
-
-
-class Consolidator:
-    def __init__(self, options):
-        self.options = options
-        self.rootcap, path = get_alias(options.aliases, options.where,
-                                       DEFAULT_ALIAS)
-        assert path == ""
-        # TODO: allow dbfile and backupfile to be Unicode
-        self.dbfile = options["dbfile"]
-        assert self.dbfile, "--dbfile is required"
-        self.backupfile = options["backupfile"]
-        assert self.backupfile, "--backupfile is required"
-        self.nodeurl = options["node-url"]
-        if not self.nodeurl.endswith("/"):
-            self.nodeurl += "/"
-        self.must_rescan_readonly_snapshots = not os.path.exists(self.dbfile)
-        self.db = sqlite.connect(self.dbfile)
-        self.cursor = self.db.cursor()
-        try:
-            self.cursor.execute("CREATE TABLE dirhashes"
-                                "("
-                                " dirhash TEXT PRIMARY KEY,"
-                                " dircap TEXT"
-                                ")")
-        except sqlite.OperationalError, e:
-            if "table dirhashes already exists" not in str(e):
-                raise
-
-    def read_directory_json(self, dircap):
-        url = self.nodeurl + "uri/%s?t=json" % urllib.quote(dircap)
-        resp = do_http("GET", url)
-        if resp.status != 200:
-            raise HTTPError("Error during directory GET", resp)
-        jd = simplejson.load(resp)
-        ntype, ndata = jd
-        if ntype != "dirnode":
-            return None
-        return ndata
-
-    def msg(self, text):
-        print >>self.options.stdout, text
-        self.options.stdout.flush()
-    def err(self, text):
-        print >>self.options.stderr, text
-        self.options.stderr.flush()
-
-    def consolidate(self):
-        try:
-            data = self.read_directory_json(self.rootcap + "/Backups")
-        except HTTPError:
-            self.err("Unable to list /Backups, maybe this account has none?")
-            return 1
-        kids = data["children"]
-        potential_systems = {}
-        for (childname, (childtype, childdata)) in kids.items():
-            if childtype != "dirnode":
-                continue
-            if "rw_uri" not in childdata:
-                self.msg("%s: not writeable" % quote_output(childname))
-                continue
-            potential_systems[childname] = to_str(childdata["rw_uri"])
-        backup_data = {"Backups": data, "systems": {}, "archives": {}}
-        systems = {}
-        for name, sdircap in potential_systems.items():
-            sdata = self.read_directory_json(sdircap)
-            kids = sdata["children"]
-            if not u"Archives" in kids and not u"Latest Backup" in kids:
-                self.msg("%s: not a backupdir, no 'Archives' and 'Latest'" % quote_output(name))
-                continue
-            archives_capdata = kids[u"Archives"][1]
-            if "rw_uri" not in archives_capdata:
-                self.msg("%s: /Archives is not writeable" % quote_output(name))
-                continue
-            self.msg("%s is a system" % quote_output(name))
-            backup_data["systems"][name] = sdata
-            archives_dircap = to_str(archives_capdata["rw_uri"])
-            archives_data = self.read_directory_json(archives_dircap)
-            backup_data["archives"][name] = archives_data
-            systems[name] = archives_dircap
-        if not systems:
-            self.msg("No systems under /Backups, nothing to consolidate")
-            return 0
-        backupfile = self.backupfile
-        counter = 0
-        while os.path.exists(backupfile):
-            backupfile = self.backupfile + "." + str(counter)
-            counter += 1
-        f = open(backupfile, "wb")
-        pickle.dump(backup_data, f)
-        f.close()
-
-        for name, archives_dircap in sorted(systems.items()):
-            self.do_system(name, archives_dircap)
-        return 0
-
-    def do_system(self, system_name, archives_dircap):
-        # first we walk through the Archives list, looking for the existing
-        # snapshots. Each one will have a $NAME like "2008-11-16 10.34 PM"
-        # (in various forms: we use tahoe_backup.parse_old_timestamp to
-        # interpret it). At first, they'll all have $NAME and be writecaps.
-        # As we run, we will create $NAME-readonly (with a readcap) for each
-        # one (the first one will just be the readonly equivalent of the
-        # oldest snapshot: the others will be constructed out of shared
-        # directories). When we're done we'll have a $NAME-readonly for
-        # everything except the latest snapshot (to avoid any danger of
-        # modifying a backup that's already in progress). The very last step,
-        # which won't be enabled until we're sure that everything is working
-        # correctly, will replace each $NAME with $NAME-readonly.
-
-        # We maintain a table that maps dirhash (hash of directory contents)
-        # to a directory readcap which contains those contents. We use this
-        # to decide if we can link to an existing directory, or if we must
-        # create a brand new one. Usually we add to this table in two places:
-        # when we scan the oldest snapshot (which we've just converted to
-        # readonly form), and when we must create a brand new one. If the
-        # table doesn't exist (probably because we've manually deleted it),
-        # we will scan *all* the existing readonly snapshots, and repopulate
-        # the table. We keep this table in a SQLite database (rather than a
-        # pickle) because we want to update it persistently after every
-        # directory creation, and writing out a 10k entry pickle takes about
-        # 250ms
-
-        # 'snapshots' maps timestamp to [rwname, writecap, roname, readcap].
-        # The possibilities are:
-        #  [$NAME, writecap, None, None] : haven't touched it
-        #  [$NAME, writecap, $NAME-readonly, readcap] : processed, not replaced
-        #  [None, None, $NAME, readcap] : processed and replaced
-
-        self.msg("consolidating system %s" % quote_output(system_name))
-        self.directories_reused = 0
-        self.directories_used_as_is = 0
-        self.directories_created = 0
-        self.directories_seen = set()
-        self.directories_used = set()
-
-        data = self.read_directory_json(archives_dircap)
-        snapshots = {}
-
-        children = sorted(data["children"].items())
-        for i, (childname, (childtype, childdata)) in enumerate(children):
-            if childtype != "dirnode":
-                self.msg("non-dirnode %s in Archives/" % quote_output(childname))
-                continue
-            timename = to_str(childname)
-            if timename.endswith("-readonly"):
-                timename = timename[:-len("-readonly")]
-            timestamp = parse_old_timestamp(timename, self.options)
-            assert timestamp is not None, timename
-            snapshots.setdefault(timestamp, [None, None, None, None])
-            # if the snapshot is readonly (i.e. it has no rw_uri), we might
-            # need to re-scan it
-            is_readonly = not childdata.has_key("rw_uri")
-            if is_readonly:
-                readcap = to_str(childdata["ro_uri"])
-                if self.must_rescan_readonly_snapshots:
-                    self.msg(" scanning old %s (%d/%d)" %
-                             (quote_output(childname), i+1, len(children)))
-                    self.scan_old_directory(to_str(childdata["ro_uri"]))
-                snapshots[timestamp][2] = childname
-                snapshots[timestamp][3] = readcap
-            else:
-                writecap = to_str(childdata["rw_uri"])
-                snapshots[timestamp][0] = childname
-                snapshots[timestamp][1] = writecap
-        snapshots = [ [timestamp] + values
-                      for (timestamp, values) in snapshots.items() ]
-        # now 'snapshots' is [timestamp, rwname, writecap, roname, readcap],
-        # which makes it easier to process in temporal order
-        snapshots.sort()
-        self.msg(" %d snapshots" % len(snapshots))
-        # we always ignore the last one, for safety
-        snapshots = snapshots[:-1]
-
-        first_snapshot = True
-        for i,(timestamp, rwname, writecap, roname, readcap) in enumerate(snapshots):
-            eta = "?"
-            start_created = self.directories_created
-            start_used_as_is = self.directories_used_as_is
-            start_reused = self.directories_reused
-
-            # [None, None, $NAME, readcap] : processed and replaced
-            # [$NAME, writecap, $NAME-readonly, readcap] : processed, not replaced
-            # [$NAME, writecap, None, None] : haven't touched it
-
-            if readcap and not writecap:
-                # skip past anything we've already processed and replaced
-                assert roname
-                assert not rwname
-                first_snapshot = False
-                self.msg(" %s already readonly" % quote_output(roname))
-                continue
-            if readcap and writecap:
-                # we've processed it, creating a -readonly version, but we
-                # haven't replaced it.
-                assert roname
-                assert rwname
-                first_snapshot = False
-                self.msg(" %s processed but not yet replaced" % quote_output(roname))
-                if self.options["really"]:
-                    self.msg("  replacing %s with %s" % (quote_output(rwname), quote_output(roname)))
-                    self.put_child(archives_dircap, rwname, readcap)
-                    self.delete_child(archives_dircap, roname)
-                continue
-            assert writecap
-            assert rwname
-            assert not readcap
-            assert not roname
-            roname = rwname + "-readonly"
-            # for the oldest snapshot, we can do a simple readonly conversion
-            if first_snapshot:
-                first_snapshot = False
-                readcap = readonly(writecap)
-                self.directories_used_as_is += 1
-                self.msg(" %s: oldest snapshot, using as-is" % quote_output(rwname))
-                self.scan_old_directory(readcap)
-            else:
-                # for the others, we must scan their contents and build up a new
-                # readonly directory (which shares common subdirs with previous
-                # backups)
-                self.msg(" %s: processing (%d/%d)" % (quote_output(rwname), i+1, len(snapshots)))
-                started = time.time()
-                readcap = self.process_directory(readonly(writecap), (rwname,))
-                elapsed = time.time() - started
-                eta = "%ds" % (elapsed * (len(snapshots) - i-1))
-            if self.options["really"]:
-                self.msg("  replaced %s" % quote_output(rwname))
-                self.put_child(archives_dircap, rwname, readcap)
-            else:
-                self.msg("  created %s" % quote_output(roname))
-                self.put_child(archives_dircap, roname, readcap)
-
-            snapshot_created = self.directories_created - start_created
-            snapshot_used_as_is = self.directories_used_as_is - start_used_as_is
-            snapshot_reused = self.directories_reused - start_reused
-            self.msg("  %s: done: %d dirs created, %d used as-is, %d reused, eta %s"
-                     % (quote_output(rwname),
-                        snapshot_created, snapshot_used_as_is, snapshot_reused,
-                        eta))
-        # done!
-        self.msg(" system done, dircounts: %d/%d seen/used, %d created, %d as-is, %d reused" \
-                 % (len(self.directories_seen), len(self.directories_used),
-                    self.directories_created, self.directories_used_as_is,
-                    self.directories_reused))
-
-    def process_directory(self, readcap, path):
-        # I walk all my children (recursing over any subdirectories), build
-        # up a table of my contents, then see if I can re-use an old
-        # directory with the same contents. If not, I create a new directory
-        # for my contents. In all cases I return a directory readcap that
-        # points to my contents.
-
-        readcap = to_str(readcap)
-        self.directories_seen.add(readcap)
-
-        # build up contents to pass to mkdir() (which uses t=set_children)
-        contents = {} # childname -> (type, rocap, metadata)
-        data = self.read_directory_json(readcap)
-        assert data is not None
-        hashkids = []
-        children_modified = False
-        for (childname, (childtype, childdata)) in sorted(data["children"].items()):
-            if childtype == "dirnode":
-                childpath = path + (childname,)
-                old_childcap = to_str(childdata["ro_uri"])
-                childcap = self.process_directory(old_childcap, childpath)
-                if childcap != old_childcap:
-                    children_modified = True
-                contents[childname] = ("dirnode", childcap, None)
-            else:
-                childcap = to_str(childdata["ro_uri"])
-                contents[childname] = (childtype, childcap, None)
-            hashkids.append( (childname, childcap) )
-
-        dirhash = self.hash_directory_contents(hashkids)
-        old_dircap = self.get_old_dirhash(dirhash)
-        if old_dircap:
-            if self.options["verbose"]:
-                self.msg("   %s: reused" % quote_path(path))
-            assert isinstance(old_dircap, str)
-            self.directories_reused += 1
-            self.directories_used.add(old_dircap)
-            return old_dircap
-        if not children_modified:
-            # we're allowed to use this directory as-is
-            if self.options["verbose"]:
-                self.msg("   %s: used as-is" % quote_path(path))
-            new_dircap = readonly(readcap)
-            assert isinstance(new_dircap, str)
-            self.store_dirhash(dirhash, new_dircap)
-            self.directories_used_as_is += 1
-            self.directories_used.add(new_dircap)
-            return new_dircap
-        # otherwise, we need to create a new directory
-        if self.options["verbose"]:
-            self.msg("   %s: created" % quote_path(path))
-        new_dircap = readonly(self.mkdir(contents))
-        assert isinstance(new_dircap, str)
-        self.store_dirhash(dirhash, new_dircap)
-        self.directories_created += 1
-        self.directories_used.add(new_dircap)
-        return new_dircap
-
-    def put_child(self, dircap, childname, childcap):
-        url = self.nodeurl + "uri/%s/%s?t=uri" % (urllib.quote(dircap),
-                                                  urllib.quote(childname))
-        resp = do_http("PUT", url, childcap)
-        if resp.status not in (200, 201):
-            raise HTTPError("Error during put_child", resp)
-
-    def delete_child(self, dircap, childname):
-        url = self.nodeurl + "uri/%s/%s" % (urllib.quote(dircap),
-                                            urllib.quote(childname))
-        resp = do_http("DELETE", url)
-        if resp.status not in (200, 201):
-            raise HTTPError("Error during delete_child", resp)
-
-    def mkdir(self, contents):
-        url = self.nodeurl + "uri?t=mkdir"
-        resp = do_http("POST", url)
-        if resp.status < 200 or resp.status >= 300:
-            raise HTTPError("Error during mkdir", resp)
-        dircap = to_str(resp.read().strip())
-        url = self.nodeurl + "uri/%s?t=set_children" % urllib.quote(dircap)
-        body = dict([ (childname, (contents[childname][0],
-                                   {"ro_uri": contents[childname][1],
-                                    "metadata": contents[childname][2],
-                                    }))
-                      for childname in contents
-                      ])
-        resp = do_http("POST", url, simplejson.dumps(body))
-        if resp.status != 200:
-            raise HTTPError("Error during set_children", resp)
-        return dircap
-
-    def scan_old_directory(self, dircap, ancestors=()):
-        # scan this directory (recursively) and stash a hash of its contents
-        # in the DB. This assumes that all subdirs can be used as-is (i.e.
-        # they've already been declared immutable)
-        dircap = readonly(dircap)
-        if dircap in ancestors:
-            raise CycleDetected
-        ancestors = ancestors + (dircap,)
-        #self.visited.add(dircap)
-        # TODO: we could use self.visited as a mapping from dircap to dirhash,
-        # to avoid re-scanning old shared directories multiple times
-        self.directories_seen.add(dircap)
-        self.directories_used.add(dircap)
-        data = self.read_directory_json(dircap)
-        kids = []
-        for (childname, (childtype, childdata)) in data["children"].items():
-            childcap = to_str(childdata["ro_uri"])
-            if childtype == "dirnode":
-                self.scan_old_directory(childcap, ancestors)
-            kids.append( (childname, childcap) )
-        dirhash = self.hash_directory_contents(kids)
-        self.store_dirhash(dirhash, dircap)
-        return dirhash
-
-    def hash_directory_contents(self, kids):
-        kids.sort()
-        s = "".join([netstring(to_str(childname))+netstring(childcap)
-                     for (childname, childcap) in kids])
-        return hashutil.tagged_hash(TAG, s)
-
-    def store_dirhash(self, dirhash, dircap):
-        assert isinstance(dircap, str)
-        # existing items should prevail
-        try:
-            c = self.cursor
-            c.execute("INSERT INTO dirhashes (dirhash, dircap) VALUES (?,?)",
-                      (base32.b2a(dirhash), dircap))
-            self.db.commit()
-        except sqlite.IntegrityError:
-            # already present
-            pass
-
-    def get_old_dirhash(self, dirhash):
-        self.cursor.execute("SELECT dircap FROM dirhashes WHERE dirhash=?",
-                            (base32.b2a(dirhash),))
-        row = self.cursor.fetchone()
-        if not row:
-            return None
-        (dircap,) = row
-        return str(dircap)
-
-
-def main(options):
-    c = Consolidator(options)
-    return c.consolidate()
diff --git a/src/allmydata/scripts/debug.py b/src/allmydata/scripts/debug.py

index 0af308f6849a0dc603b63f9037d50b2f76b33c7c..3c17c7ba80c8da585d26d4c4aa5843fe399b491b 100644 (file)
--- a/src/allmydata/scripts/debug.py
+++ b/src/allmydata/scripts/debug.py
@@ -4,8 +4,6 @@
  import struct, time, os
  from twisted.python import usage, failure
  from twisted.internet import defer
-from allmydata.scripts.cli import VDriveOptions
-from allmydata.util.stringutils import argv_to_unicode
  
  class DumpOptions(usage.Options):
      def getSynopsis(self):
@@ -759,23 +757,6 @@ def repl(options):
      return code.interact()
  
  
-class ConsolidateOptions(VDriveOptions):
-    optParameters = [
-        ("dbfile", None, None, "persistent file for reusable dirhashes"),
-        ("backupfile", "b", None, "file to store backup of Archives/ contents"),
-        ]
-    optFlags = [
-        ("really", None, "Really remove old snapshot directories"),
-        ("verbose", "v", "Emit a line for every directory examined"),
-        ]
-    def parseArgs(self, where):
-        self.where = argv_to_unicode(where)
-
-def consolidate(options):
-    from allmydata.scripts.consolidate import main
-    return main(options)
-
-
  class DebugCommand(usage.Options):
      subCommands = [
          ["dump-share", None, DumpOptions,
@@ -785,7 +766,6 @@ class DebugCommand(usage.Options):
          ["catalog-shares", None, CatalogSharesOptions, "Describe shares in node dirs"],
          ["corrupt-share", None, CorruptShareOptions, "Corrupt a share"],
          ["repl", None, ReplOptions, "Open a python interpreter"],
-        ["consolidate", None, ConsolidateOptions, "Consolidate non-shared backups"],
          ]
      def postOptions(self):
          if not hasattr(self, 'subOptions'):
@@ -801,7 +781,6 @@ Subcommands:
      tahoe debug find-shares     Locate sharefiles in node directories
      tahoe debug catalog-shares  Describe all shares in node dirs
      tahoe debug corrupt-share   Corrupt a share by flipping a bit.
-    tahoe debug consolidate     Consolidate old non-shared backups into shared ones.
  
  Please run e.g. 'tahoe debug dump-share --help' for more details on each
  subcommand.
@@ -815,7 +794,6 @@ subDispatch = {
      "catalog-shares": catalog_shares,
      "corrupt-share": corrupt_share,
      "repl": repl,
-    "consolidate": consolidate,
      }
  
  
diff --git a/src/allmydata/test/test_consolidate.py b/src/allmydata/test/test_consolidate.py

deleted file mode 100644 (file)

index e02aa50..0000000
--- a/src/allmydata/test/test_consolidate.py
+++ /dev/null
@@ -1,298 +0,0 @@
-# -*- coding: utf-8 -*-
-
-import os
-from cStringIO import StringIO
-import pickle
-from twisted.trial import unittest
-from allmydata.test.no_network import GridTestMixin
-from allmydata.test.common_util import ReallyEqualMixin
-from allmydata.util import fileutil
-from allmydata.scripts import runner, debug
-from allmydata.scripts.common import get_aliases
-from twisted.internet import defer, threads # CLI tests use deferToThread
-from allmydata.interfaces import IDirectoryNode
-
-have_sqlite3 = False
-try:
-    import sqlite3
-    sqlite3  # hush pyflakes
-    have_sqlite3 = True
-except ImportError:
-    pass
-else:
-    from allmydata.scripts import consolidate
-
-
-class CLITestMixin:
-    def do_cli(self, verb, *args, **kwargs):
-        nodeargs = [
-            "--node-directory", self.get_clientdir(),
-            ]
-        if verb == "debug":
-            argv = [verb, args[0]] + nodeargs + list(args[1:])
-        else:
-            argv = [verb] + nodeargs + list(args)
-        stdin = kwargs.get("stdin", "")
-        stdout, stderr = StringIO(), StringIO()
-        d = threads.deferToThread(runner.runner, argv, run_by_human=False,
-                                  stdin=StringIO(stdin),
-                                  stdout=stdout, stderr=stderr)
-        def _done(rc):
-            return rc, stdout.getvalue(), stderr.getvalue()
-        d.addCallback(_done)
-        return d
-
-class Consolidate(GridTestMixin, CLITestMixin, ReallyEqualMixin, unittest.TestCase):
-
-    def writeto(self, path, data):
-        d = os.path.dirname(os.path.join(self.basedir, "home", path))
-        fileutil.make_dirs(d)
-        f = open(os.path.join(self.basedir, "home", path), "w")
-        f.write(data)
-        f.close()
-
-    def writeto_snapshot(self, sn, path, data):
-        p = "Backups/fluxx/Archives/2009-03-%02d 01.01.01/%s" % (sn, path)
-        return self.writeto(p, data)
-
-    def do_cli_good(self, verb, *args, **kwargs):
-        d = self.do_cli(verb, *args, **kwargs)
-        def _check((rc,out,err)):
-            self.failUnlessReallyEqual(err, "", verb)
-            self.failUnlessReallyEqual(rc, 0, verb)
-            return out
-        d.addCallback(_check)
-        return d
-
-    def test_arg_parsing(self):
-        self.basedir = "consolidate/Consolidate/arg_parsing"
-        self.set_up_grid(num_clients=1, num_servers=1)
-        co = debug.ConsolidateOptions()
-        co.parseOptions(["--node-directory", self.get_clientdir(),
-                         "--dbfile", "foo.db", "--backupfile", "backup", "--really",
-                         "URI:DIR2:foo"])
-        self.failUnlessReallyEqual(co["dbfile"], "foo.db")
-        self.failUnlessReallyEqual(co["backupfile"], "backup")
-        self.failUnless(co["really"])
-        self.failUnlessReallyEqual(co.where, u"URI:DIR2:foo")
-
-    def test_basic(self):
-        if not have_sqlite3:
-            raise unittest.SkipTest("'tahoe debug consolidate' is not supported because sqlite3 is not available.")
-
-        self.basedir = "consolidate/Consolidate/basic"
-        self.set_up_grid(num_clients=1)
-
-        fileutil.make_dirs(os.path.join(self.basedir, "home/Backups/nonsystem"))
-        fileutil.make_dirs(os.path.join(self.basedir, "home/Backups/fluxx/Latest"))
-        self.writeto(os.path.join(self.basedir,
-                                  "home/Backups/fluxx/Archives/nondir"),
-                     "not a directory: ignore me")
-
-        # set up a number of non-shared "snapshots"
-        for i in range(1,8):
-            self.writeto_snapshot(i, "parent/README", "README")
-            self.writeto_snapshot(i, "parent/foo.txt", "foo")
-            self.writeto_snapshot(i, "parent/subdir1/bar.txt", "bar")
-            self.writeto_snapshot(i, "parent/subdir1/baz.txt", "baz")
-            self.writeto_snapshot(i, "parent/subdir2/yoy.txt", "yoy")
-            self.writeto_snapshot(i, "parent/subdir2/hola.txt", "hola")
-
-            if i >= 1:
-                pass # initial snapshot
-            if i >= 2:
-                pass # second snapshot: same as the first
-            if i >= 3:
-                # modify a file
-                self.writeto_snapshot(i, "parent/foo.txt", "FOOF!")
-            if i >= 4:
-                # foo.txt goes back to normal
-                self.writeto_snapshot(i, "parent/foo.txt", "foo")
-            if i >= 5:
-                # new file
-                self.writeto_snapshot(i, "parent/subdir1/new.txt", "new")
-            if i >= 6:
-                # copy parent/subdir1 to parent/subdir2/copy1
-                self.writeto_snapshot(i, "parent/subdir2/copy1/bar.txt", "bar")
-                self.writeto_snapshot(i, "parent/subdir2/copy1/baz.txt", "baz")
-                self.writeto_snapshot(i, "parent/subdir2/copy1/new.txt", "new")
-            if i >= 7:
-                # the last snapshot shall remain untouched
-                pass
-
-        # now copy the whole thing into tahoe
-        d = self.do_cli_good("create-alias", "tahoe")
-        d.addCallback(lambda ign:
-                      self.do_cli_good("cp", "-r",
-                                       os.path.join(self.basedir, "home/Backups"),
-                                       "tahoe:Backups"))
-        def _copied(res):
-            rootcap = get_aliases(self.get_clientdir())["tahoe"]
-            # now scan the initial directory structure
-            n = self.g.clients[0].create_node_from_uri(rootcap)
-            return n.get_child_at_path([u"Backups", u"fluxx", u"Archives"])
-        d.addCallback(_copied)
-        self.nodes = {}
-        self.caps = {}
-        def stash(node, name):
-            self.nodes[name] = node
-            self.caps[name] = node.get_uri()
-            return node
-        d.addCallback(stash, "Archives")
-        self.manifests = {}
-        def stash_manifest(manifest, which):
-            self.manifests[which] = dict(manifest)
-        d.addCallback(lambda ignored: self.build_manifest(self.nodes["Archives"]))
-        d.addCallback(stash_manifest, "start")
-        def c(n):
-            pieces = n.split("-")
-            which = "finish"
-            if len(pieces) == 3:
-                which = pieces[-1]
-            sn = int(pieces[0])
-            name = pieces[1]
-            path = [u"2009-03-%02d 01.01.01" % sn]
-            path.extend( {"b": [],
-                          "bp": [u"parent"],
-                          "bps1": [u"parent", u"subdir1"],
-                          "bps2": [u"parent", u"subdir2"],
-                          "bps2c1": [u"parent", u"subdir2", u"copy1"],
-                          }[name] )
-            return self.manifests[which][tuple(path)]
-
-        dbfile = os.path.join(self.basedir, "dirhash.db")
-        backupfile = os.path.join(self.basedir, "backup.pickle")
-
-        d.addCallback(lambda ign:
-                      self.do_cli_good("debug", "consolidate",
-                                       "--dbfile", dbfile,
-                                       "--backupfile", backupfile,
-                                       "--verbose",
-                                       "tahoe:"))
-        def _check_consolidate_output1(out):
-            lines = out.splitlines()
-            last = lines[-1]
-            self.failUnlessReallyEqual(last.strip(),
-                                 "system done, dircounts: "
-                                 "25/12 seen/used, 7 created, 2 as-is, 13 reused")
-            self.failUnless(os.path.exists(dbfile))
-            self.failUnless(os.path.exists(backupfile))
-            self.first_backup = backup = pickle.load(open(backupfile, "rb"))
-            self.failUnless(u"fluxx" in backup["systems"])
-            self.failUnless(u"fluxx" in backup["archives"])
-            adata = backup["archives"]["fluxx"]
-            kids = adata[u"children"]
-            self.failUnlessReallyEqual(str(kids[u"2009-03-01 01.01.01"][1][u"rw_uri"]),
-                                 c("1-b-start"))
-        d.addCallback(_check_consolidate_output1)
-        d.addCallback(lambda ign:
-                      self.do_cli_good("debug", "consolidate",
-                                       "--dbfile", dbfile,
-                                       "--backupfile", backupfile,
-                                       "--really", "tahoe:"))
-        def _check_consolidate_output2(out):
-            lines = out.splitlines()
-            last = lines[-1]
-            self.failUnlessReallyEqual(last.strip(),
-                                 "system done, dircounts: "
-                                 "0/0 seen/used, 0 created, 0 as-is, 0 reused")
-            backup = pickle.load(open(backupfile, "rb"))
-            self.failUnlessReallyEqual(backup, self.first_backup)
-            self.failUnless(os.path.exists(backupfile + ".0"))
-        d.addCallback(_check_consolidate_output2)
-
-        d.addCallback(lambda ignored: self.build_manifest(self.nodes["Archives"]))
-        d.addCallback(stash_manifest, "finish")
-
-        def check_consolidation(ignored):
-            #for which in ("finish",):
-            #    for path in sorted(self.manifests[which].keys()):
-            #        print "%s %s %s" % (which, "/".join(path),
-            #                            self.manifests[which][path])
-
-            # last snapshot should be untouched
-            self.failUnlessReallyEqual(c("7-b"), c("7-b-start"))
-
-            # first snapshot should be a readonly form of the original
-            self.failUnlessReallyEqual(c("1-b-finish"), consolidate.readonly(c("1-b-start")))
-            self.failUnlessReallyEqual(c("1-bp-finish"), consolidate.readonly(c("1-bp-start")))
-            self.failUnlessReallyEqual(c("1-bps1-finish"), consolidate.readonly(c("1-bps1-start")))
-            self.failUnlessReallyEqual(c("1-bps2-finish"), consolidate.readonly(c("1-bps2-start")))
-
-            # new directories should be different than the old ones
-            self.failIfEqual(c("1-b"), c("1-b-start"))
-            self.failIfEqual(c("1-bp"), c("1-bp-start"))
-            self.failIfEqual(c("1-bps1"), c("1-bps1-start"))
-            self.failIfEqual(c("1-bps2"), c("1-bps2-start"))
-            self.failIfEqual(c("2-b"), c("2-b-start"))
-            self.failIfEqual(c("2-bp"), c("2-bp-start"))
-            self.failIfEqual(c("2-bps1"), c("2-bps1-start"))
-            self.failIfEqual(c("2-bps2"), c("2-bps2-start"))
-            self.failIfEqual(c("3-b"), c("3-b-start"))
-            self.failIfEqual(c("3-bp"), c("3-bp-start"))
-            self.failIfEqual(c("3-bps1"), c("3-bps1-start"))
-            self.failIfEqual(c("3-bps2"), c("3-bps2-start"))
-            self.failIfEqual(c("4-b"), c("4-b-start"))
-            self.failIfEqual(c("4-bp"), c("4-bp-start"))
-            self.failIfEqual(c("4-bps1"), c("4-bps1-start"))
-            self.failIfEqual(c("4-bps2"), c("4-bps2-start"))
-            self.failIfEqual(c("5-b"), c("5-b-start"))
-            self.failIfEqual(c("5-bp"), c("5-bp-start"))
-            self.failIfEqual(c("5-bps1"), c("5-bps1-start"))
-            self.failIfEqual(c("5-bps2"), c("5-bps2-start"))
-
-            # snapshot 1 and snapshot 2 should be identical
-            self.failUnlessReallyEqual(c("2-b"), c("1-b"))
-
-            # snapshot 3 modified a file underneath parent/
-            self.failIfEqual(c("3-b"), c("2-b")) # 3 modified a file
-            self.failIfEqual(c("3-bp"), c("2-bp"))
-            # but the subdirs are the same
-            self.failUnlessReallyEqual(c("3-bps1"), c("2-bps1"))
-            self.failUnlessReallyEqual(c("3-bps2"), c("2-bps2"))
-
-            # snapshot 4 should be the same as 2
-            self.failUnlessReallyEqual(c("4-b"), c("2-b"))
-            self.failUnlessReallyEqual(c("4-bp"), c("2-bp"))
-            self.failUnlessReallyEqual(c("4-bps1"), c("2-bps1"))
-            self.failUnlessReallyEqual(c("4-bps2"), c("2-bps2"))
-
-            # snapshot 5 added a file under subdir1
-            self.failIfEqual(c("5-b"), c("4-b"))
-            self.failIfEqual(c("5-bp"), c("4-bp"))
-            self.failIfEqual(c("5-bps1"), c("4-bps1"))
-            self.failUnlessReallyEqual(c("5-bps2"), c("4-bps2"))
-
-            # snapshot 6 copied a directory-it should be shared
-            self.failIfEqual(c("6-b"), c("5-b"))
-            self.failIfEqual(c("6-bp"), c("5-bp"))
-            self.failUnlessReallyEqual(c("6-bps1"), c("5-bps1"))
-            self.failIfEqual(c("6-bps2"), c("5-bps2"))
-            self.failUnlessReallyEqual(c("6-bps2c1"), c("6-bps1"))
-
-        d.addCallback(check_consolidation)
-
-        return d
-    test_basic.timeout = 28800 # It took more than 7200 seconds on François's ARM
-
-    def build_manifest(self, root):
-        # like dirnode.build_manifest, but this one doesn't skip duplicate
-        # nodes (i.e. it is not cycle-resistant).
-        manifest = []
-        manifest.append( ( (), root.get_uri() ) )
-        d = self.manifest_of(None, root, manifest, () )
-        d.addCallback(lambda ign: manifest)
-        return d
-
-    def manifest_of(self, ignored, dirnode, manifest, path):
-        d = dirnode.list()
-        def _got_children(children):
-            d = defer.succeed(None)
-            for name, (child, metadata) in children.iteritems():
-                childpath = path + (name,)
-                manifest.append( (childpath, child.get_uri()) )
-                if IDirectoryNode.providedBy(child):
-                    d.addCallback(self.manifest_of, child, manifest, childpath)
-            return d
-        d.addCallback(_got_children)
-        return d
author	david-sarah <david-sarah@jacaranda.org>
	Mon, 7 Jun 2010 18:37:57 +0000 (11:37 -0700)
committer	david-sarah <david-sarah@jacaranda.org>
	Mon, 7 Jun 2010 18:37:57 +0000 (11:37 -0700)
NEWS		patch \| blob \| history
src/allmydata/scripts/consolidate.py	[deleted file]	patch \| blob \| history
src/allmydata/scripts/debug.py		patch \| blob \| history
src/allmydata/test/test_consolidate.py	[deleted file]	patch \| blob \| history