From: Daira Hopwood <daira@jacaranda.org>
Date: Thu, 8 Oct 2015 15:01:46 +0000 (+0100)
Subject: Add magicfolderdb.py.
X-Git-Url: https://git.rkrishnan.org/%5B/%5D%20/file/reliability?a=commitdiff_plain;h=2502a843c0a580b0206da25a637fe00d28ebd4dc;p=tahoe-lafs%2Ftahoe-lafs.git

Add magicfolderdb.py.

Signed-off-by: Daira Hopwood <daira@jacaranda.org>
---

diff --git a/src/allmydata/backupdb.py b/src/allmydata/backupdb.py
deleted file mode 100644
index 316ebf51..00000000
--- a/src/allmydata/backupdb.py
+++ /dev/null
@@ -1,363 +0,0 @@
-
-import os.path, sys, time, random, stat
-
-from allmydata.util.netstring import netstring
-from allmydata.util.hashutil import backupdb_dirhash
-from allmydata.util import base32
-from allmydata.util.fileutil import abspath_expanduser_unicode
-from allmydata.util.encodingutil import to_str
-from allmydata.util.dbutil import get_db, DBError
-
-
-DAY = 24*60*60
-MONTH = 30*DAY
-
-MAIN_SCHEMA = """
-CREATE TABLE version
-(
- version INTEGER  -- contains one row, set to %s
-);
-
-CREATE TABLE local_files
-(
- path  VARCHAR(1024) PRIMARY KEY, -- index, this is an absolute UTF-8-encoded local filename
- -- note that size is before mtime and ctime here, but after in function parameters
- size  INTEGER,       -- os.stat(fn)[stat.ST_SIZE]   (NULL if the file has been deleted)
- mtime NUMBER,        -- os.stat(fn)[stat.ST_MTIME]
- ctime NUMBER,        -- os.stat(fn)[stat.ST_CTIME]
- fileid INTEGER%s
-);
-
-CREATE TABLE caps
-(
- fileid INTEGER PRIMARY KEY AUTOINCREMENT,
- filecap VARCHAR(256) UNIQUE       -- URI:CHK:...
-);
-
-CREATE TABLE last_upload
-(
- fileid INTEGER PRIMARY KEY,
- last_uploaded TIMESTAMP,
- last_checked TIMESTAMP
-);
-
-"""
-
-SCHEMA_v1 = MAIN_SCHEMA % (1, "")
-
-TABLE_DIRECTORY = """
-
-CREATE TABLE directories -- added in v2
-(
- dirhash varchar(256) PRIMARY KEY,  -- base32(dirhash)
- dircap varchar(256),               -- URI:DIR2-CHK:...
- last_uploaded TIMESTAMP,
- last_checked TIMESTAMP
-);
-
-"""
-
-SCHEMA_v2 = MAIN_SCHEMA % (2, "") + TABLE_DIRECTORY
-
-UPDATE_v1_to_v2 = TABLE_DIRECTORY + """
-UPDATE version SET version=2;
-"""
-
-UPDATERS = {
-    2: UPDATE_v1_to_v2,
-}
-
-
-def get_backupdb(dbfile, stderr=sys.stderr,
-                 create_version=(SCHEMA_v2, 2), just_create=False):
-    # Open or create the given backupdb file. The parent directory must
-    # exist.
-    try:
-        (sqlite3, db) = get_db(dbfile, stderr, create_version, updaters=UPDATERS,
-                               just_create=just_create, dbname="backupdb")
-        if create_version[1] in (1, 2):
-            return BackupDB(sqlite3, db)
-        else:
-            print >>stderr, "invalid db schema version specified"
-            return None
-    except DBError, e:
-        print >>stderr, e
-        return None
-
-
-class FileResult:
-    def __init__(self, bdb, filecap, should_check,
-                 path, mtime, ctime, size):
-        self.bdb = bdb
-        self.filecap = filecap
-        self.should_check_p = should_check
-
-        self.path = path
-        self.mtime = mtime
-        self.ctime = ctime
-        self.size = size
-
-    def was_uploaded(self):
-        if self.filecap:
-            return self.filecap
-        return False
-
-    def did_upload(self, filecap):
-        self.bdb.did_upload_file(filecap, self.path,
-                                 self.mtime, self.ctime, self.size)
-
-    def should_check(self):
-        return self.should_check_p
-
-    def did_check_healthy(self, results):
-        self.bdb.did_check_file_healthy(self.filecap, results)
-
-
-class DirectoryResult:
-    def __init__(self, bdb, dirhash, dircap, should_check):
-        self.bdb = bdb
-        self.dircap = dircap
-        self.should_check_p = should_check
-        self.dirhash = dirhash
-
-    def was_created(self):
-        if self.dircap:
-            return self.dircap
-        return False
-
-    def did_create(self, dircap):
-        self.bdb.did_create_directory(dircap, self.dirhash)
-
-    def should_check(self):
-        return self.should_check_p
-
-    def did_check_healthy(self, results):
-        self.bdb.did_check_directory_healthy(self.dircap, results)
-
-
-class BackupDB:
-    VERSION = 2
-    NO_CHECK_BEFORE = 1*MONTH
-    ALWAYS_CHECK_AFTER = 2*MONTH
-
-    def __init__(self, sqlite_module, connection):
-        self.sqlite_module = sqlite_module
-        self.connection = connection
-        self.cursor = connection.cursor()
-
-    def check_file_db_exists(self, path):
-        """I will tell you if a given file has an entry in my database or not
-        by returning True or False.
-        """
-        c = self.cursor
-        c.execute("SELECT size,mtime,ctime,fileid"
-                  " FROM local_files"
-                  " WHERE path=?",
-                  (path,))
-        row = self.cursor.fetchone()
-        if not row:
-            return False
-        else:
-            return True
-
-    def check_file(self, path, use_timestamps=True):
-        """I will tell you if a given local file needs to be uploaded or not,
-        by looking in a database and seeing if I have a record of this file
-        having been uploaded earlier.
-
-        I return a FileResults object, synchronously. If r.was_uploaded()
-        returns False, you should upload the file. When you are finished
-        uploading it, call r.did_upload(filecap), so I can update my
-        database.
-
-        If was_uploaded() returns a filecap, you might be able to avoid an
-        upload. Call r.should_check(), and if it says False, you can skip the
-        upload and use the filecap returned by was_uploaded().
-
-        If should_check() returns True, you should perform a filecheck on the
-        filecap returned by was_uploaded(). If the check indicates the file
-        is healthy, please call r.did_check_healthy(checker_results) so I can
-        update the database, using the de-JSONized response from the webapi
-        t=check call for 'checker_results'. If the check indicates the file
-        is not healthy, please upload the file and call r.did_upload(filecap)
-        when you're done.
-
-        If use_timestamps=True (the default), I will compare mtime and ctime
-        of the local file against an entry in my database, and consider the
-        file to be unchanged if mtime, ctime, and filesize are all the same
-        as the earlier version. If use_timestamps=False, I will not trust the
-        timestamps, so more files (perhaps all) will be marked as needing
-        upload. A future version of this database may hash the file to make
-        equality decisions, in which case use_timestamps=False will not
-        always imply r.must_upload()==True.
-
-        'path' points to a local file on disk, possibly relative to the
-        current working directory. The database stores absolute pathnames.
-        """
-
-        path = abspath_expanduser_unicode(path)
-
-        # XXX consider using get_pathinfo
-        s = os.stat(path)
-        size = s[stat.ST_SIZE]
-        mtime = s[stat.ST_MTIME]
-        ctime = s[stat.ST_CTIME]
-
-        now = time.time()
-        c = self.cursor
-
-        c.execute("SELECT size,mtime,ctime,fileid"
-                  " FROM local_files"
-                  " WHERE path=?",
-                  (path,))
-        row = self.cursor.fetchone()
-        if not row:
-            return FileResult(self, None, False, path, mtime, ctime, size)
-        (last_size,last_mtime,last_ctime,last_fileid) = row
-
-        c.execute("SELECT caps.filecap, last_upload.last_checked"
-                  " FROM caps,last_upload"
-                  " WHERE caps.fileid=? AND last_upload.fileid=?",
-                  (last_fileid, last_fileid))
-        row2 = c.fetchone()
-
-        if ((last_size != size
-             or not use_timestamps
-             or last_mtime != mtime
-             or last_ctime != ctime) # the file has been changed
-            or (not row2) # we somehow forgot where we put the file last time
-            ):
-            c.execute("DELETE FROM local_files WHERE path=?", (path,))
-            self.connection.commit()
-            return FileResult(self, None, False, path, mtime, ctime, size)
-
-        # at this point, we're allowed to assume the file hasn't been changed
-        (filecap, last_checked) = row2
-        age = now - last_checked
-
-        probability = ((age - self.NO_CHECK_BEFORE) /
-                       (self.ALWAYS_CHECK_AFTER - self.NO_CHECK_BEFORE))
-        probability = min(max(probability, 0.0), 1.0)
-        should_check = bool(random.random() < probability)
-
-        return FileResult(self, to_str(filecap), should_check,
-                          path, mtime, ctime, size)
-
-    def get_or_allocate_fileid_for_cap(self, filecap):
-        # find an existing fileid for this filecap, or insert a new one. The
-        # caller is required to commit() afterwards.
-
-        # mysql has "INSERT ... ON DUPLICATE KEY UPDATE", but not sqlite
-        # sqlite has "INSERT ON CONFLICT REPLACE", but not mysql
-        # So we use INSERT, ignore any error, then a SELECT
-        c = self.cursor
-        try:
-            c.execute("INSERT INTO caps (filecap) VALUES (?)", (filecap,))
-        except (self.sqlite_module.IntegrityError, self.sqlite_module.OperationalError):
-            # sqlite3 on sid gives IntegrityError
-            # pysqlite2 (which we don't use, so maybe no longer relevant) on dapper gives OperationalError
-            pass
-        c.execute("SELECT fileid FROM caps WHERE filecap=?", (filecap,))
-        foundrow = c.fetchone()
-        assert foundrow
-        fileid = foundrow[0]
-        return fileid
-
-    def did_upload_file(self, filecap, path, mtime, ctime, size):
-        now = time.time()
-        fileid = self.get_or_allocate_fileid_for_cap(filecap)
-        try:
-            self.cursor.execute("INSERT INTO last_upload VALUES (?,?,?)",
-                                (fileid, now, now))
-        except (self.sqlite_module.IntegrityError, self.sqlite_module.OperationalError):
-            self.cursor.execute("UPDATE last_upload"
-                                " SET last_uploaded=?, last_checked=?"
-                                " WHERE fileid=?",
-                                (now, now, fileid))
-        try:
-            self.cursor.execute("INSERT INTO local_files VALUES (?,?,?,?,?)",
-                                (path, size, mtime, ctime, fileid))
-        except (self.sqlite_module.IntegrityError, self.sqlite_module.OperationalError):
-            self.cursor.execute("UPDATE local_files"
-                                " SET size=?, mtime=?, ctime=?, fileid=?"
-                                " WHERE path=?",
-                                (size, mtime, ctime, fileid, path))
-        self.connection.commit()
-
-    def did_check_file_healthy(self, filecap, results):
-        now = time.time()
-        fileid = self.get_or_allocate_fileid_for_cap(filecap)
-        self.cursor.execute("UPDATE last_upload"
-                            " SET last_checked=?"
-                            " WHERE fileid=?",
-                            (now, fileid))
-        self.connection.commit()
-
-    def check_directory(self, contents):
-        """I will tell you if a new directory needs to be created for a given
-        set of directory contents, or if I know of an existing (immutable)
-        directory that can be used instead.
-
-        'contents' should be a dictionary that maps from child name (a single
-        unicode string) to immutable childcap (filecap or dircap).
-
-        I return a DirectoryResult object, synchronously. If r.was_created()
-        returns False, you should create the directory (with
-        t=mkdir-immutable). When you are finished, call r.did_create(dircap)
-        so I can update my database.
-
-        If was_created() returns a dircap, you might be able to avoid the
-        mkdir. Call r.should_check(), and if it says False, you can skip the
-        mkdir and use the dircap returned by was_created().
-
-        If should_check() returns True, you should perform a check operation
-        on the dircap returned by was_created(). If the check indicates the
-        directory is healthy, please call
-        r.did_check_healthy(checker_results) so I can update the database,
-        using the de-JSONized response from the webapi t=check call for
-        'checker_results'. If the check indicates the directory is not
-        healthy, please repair or re-create the directory and call
-        r.did_create(dircap) when you're done.
-        """
-
-        now = time.time()
-        entries = []
-        for name in contents:
-            entries.append( [name.encode("utf-8"), contents[name]] )
-        entries.sort()
-        data = "".join([netstring(name_utf8)+netstring(cap)
-                        for (name_utf8,cap) in entries])
-        dirhash = backupdb_dirhash(data)
-        dirhash_s = base32.b2a(dirhash)
-        c = self.cursor
-        c.execute("SELECT dircap, last_checked"
-                  " FROM directories WHERE dirhash=?", (dirhash_s,))
-        row = c.fetchone()
-        if not row:
-            return DirectoryResult(self, dirhash_s, None, False)
-        (dircap, last_checked) = row
-        age = now - last_checked
-
-        probability = ((age - self.NO_CHECK_BEFORE) /
-                       (self.ALWAYS_CHECK_AFTER - self.NO_CHECK_BEFORE))
-        probability = min(max(probability, 0.0), 1.0)
-        should_check = bool(random.random() < probability)
-
-        return DirectoryResult(self, dirhash_s, to_str(dircap), should_check)
-
-    def did_create_directory(self, dircap, dirhash):
-        now = time.time()
-        # if the dirhash is already present (i.e. we've re-uploaded an
-        # existing directory, possibly replacing the dircap with a new one),
-        # update the record in place. Otherwise create a new record.)
-        self.cursor.execute("REPLACE INTO directories VALUES (?,?,?,?)",
-                            (dirhash, dircap, now, now))
-        self.connection.commit()
-
-    def did_check_directory_healthy(self, dircap, results):
-        now = time.time()
-        self.cursor.execute("UPDATE directories"
-                            " SET last_checked=?"
-                            " WHERE dircap=?",
-                            (now, dircap))
-        self.connection.commit()
diff --git a/src/allmydata/frontends/magic_folder.py b/src/allmydata/frontends/magic_folder.py
index 16cdaad0..89b981a5 100644
--- a/src/allmydata/frontends/magic_folder.py
+++ b/src/allmydata/frontends/magic_folder.py
@@ -19,7 +19,7 @@ from allmydata.util.encodingutil import listdir_filepath, to_filepath, \
      extend_filepath, unicode_from_filepath, unicode_segments_from, \
      quote_filepath, quote_local_unicode_path, quote_output, FilenameEncodingError
 from allmydata.immutable.upload import FileName, Data
-from allmydata import backupdb, magicpath
+from allmydata import magicfolderdb, magicpath
 
 
 IN_EXCL_UNLINK = 0x04000000L
@@ -31,13 +31,13 @@ def get_inotify_module():
         elif runtime.platform.supportsINotify():
             from twisted.internet import inotify
         else:
-            raise NotImplementedError("filesystem notification needed for drop-upload is not supported.\n"
+            raise NotImplementedError("filesystem notification needed for Magic Folder is not supported.\n"
                                       "This currently requires Linux or Windows.")
         return inotify
     except (ImportError, AttributeError) as e:
         log.msg(e)
         if sys.platform == "win32":
-            raise NotImplementedError("filesystem notification needed for drop-upload is not supported.\n"
+            raise NotImplementedError("filesystem notification needed for Magic Folder is not supported.\n"
                                       "Windows support requires at least Vista, and has only been tested on Windows 7.")
         raise
 
@@ -51,7 +51,7 @@ class MagicFolder(service.MultiService):
 
         service.MultiService.__init__(self)
 
-        db = backupdb.get_backupdb(dbfile, create_version=(backupdb.MAGIC_FOLDER_SCHEMA_v3, 3))
+        db = magicfolderdb.get_magicfolderdb(dbfile, create_version=(magicfolderdb.SCHEMA_v1, 1))
         if db is None:
             return Failure(Exception('ERROR: Unable to load magic folder db.'))
 
diff --git a/src/allmydata/magicfolderdb.py b/src/allmydata/magicfolderdb.py
new file mode 100644
index 00000000..187941b8
--- /dev/null
+++ b/src/allmydata/magicfolderdb.py
@@ -0,0 +1,139 @@
+
+import sys
+
+from allmydata.util.dbutil import get_db, DBError
+
+
+# magic-folder db schema version 1
+SCHEMA_v1 = """
+CREATE TABLE version
+(
+ version INTEGER  -- contains one row, set to 1
+);
+
+CREATE TABLE local_files
+(
+ path                VARCHAR(1024) PRIMARY KEY, -- UTF-8 filename relative to local magic folder dir
+ -- note that size is before mtime and ctime here, but after in function parameters
+ size                INTEGER,                   -- ST_SIZE, or NULL if the file has been deleted
+ mtime               REAL,                      -- ST_MTIME
+ ctime               REAL,                      -- ST_CTIME
+ version             INTEGER,
+ last_uploaded_uri   VARCHAR(256) UNIQUE,       -- URI:CHK:...
+ last_downloaded_uri VARCHAR(256) UNIQUE,       -- URI:CHK:...
+ last_downloaded_timestamp REAL
+);
+"""
+
+
+def get_magicfolderdb(dbfile, stderr=sys.stderr,
+                      create_version=(SCHEMA_v1, 1), just_create=False):
+    # Open or create the given backupdb file. The parent directory must
+    # exist.
+    try:
+        (sqlite3, db) = get_db(dbfile, stderr, create_version,
+                               just_create=just_create, dbname="magicfolderdb")
+        if create_version[1] in (1, 2):
+            return MagicFolderDB(sqlite3, db)
+        else:
+            print >>stderr, "invalid magicfolderdb schema version specified"
+            return None
+    except DBError, e:
+        print >>stderr, e
+        return None
+
+
+class MagicFolderDB(object):
+    VERSION = 1
+
+    def __init__(self, sqlite_module, connection):
+        self.sqlite_module = sqlite_module
+        self.connection = connection
+        self.cursor = connection.cursor()
+
+    def check_file_db_exists(self, path):
+        """I will tell you if a given file has an entry in my database or not
+        by returning True or False.
+        """
+        c = self.cursor
+        c.execute("SELECT size,mtime,ctime"
+                  " FROM local_files"
+                  " WHERE path=?",
+                  (path,))
+        row = self.cursor.fetchone()
+        if not row:
+            return False
+        else:
+            return True
+
+    def get_all_relpaths(self):
+        """
+        Retrieve a set of all relpaths of files that have had an entry in magic folder db
+        (i.e. that have been downloaded at least once).
+        """
+        self.cursor.execute("SELECT path FROM local_files")
+        rows = self.cursor.fetchall()
+        return set([r[0] for r in rows])
+
+    def get_last_downloaded_uri(self, relpath_u):
+        """
+        Return the last downloaded uri recorded in the magic folder db.
+        If none are found then return None.
+        """
+        c = self.cursor
+        c.execute("SELECT last_downloaded_uri"
+                  " FROM local_files"
+                  " WHERE path=?",
+                  (relpath_u,))
+        row = self.cursor.fetchone()
+        if not row:
+            return None
+        else:
+            return row[0]
+
+    def get_local_file_version(self, relpath_u):
+        """
+        Return the version of a local file tracked by our magic folder db.
+        If no db entry is found then return None.
+        """
+        c = self.cursor
+        c.execute("SELECT version"
+                  " FROM local_files"
+                  " WHERE path=?",
+                  (relpath_u,))
+        row = self.cursor.fetchone()
+        if not row:
+            return None
+        else:
+            return row[0]
+
+    def did_upload_version(self, filecap, relpath_u, version, pathinfo):
+        print "did_upload_version(%r, %r, %r, %r)" % (filecap, relpath_u, version, pathinfo)
+        try:
+            print "insert"
+            self.cursor.execute("INSERT INTO local_files VALUES (?,?,?,?,?,?)",
+                                (relpath_u, pathinfo.size, pathinfo.mtime, pathinfo.ctime, version, filecap, pathinfo.mtime))
+        except (self.sqlite_module.IntegrityError, self.sqlite_module.OperationalError):
+            print "err... update"
+            self.cursor.execute("UPDATE local_files"
+                                " SET size=?, mtime=?, ctime=?, version=?, last_downloaded_uri=?, last_downloaded_timestamp=?"
+                                " WHERE path=?",
+                                (pathinfo.size, pathinfo.mtime, pathinfo.ctime, version, filecap, pathinfo.mtime, relpath_u))
+        self.connection.commit()
+        print "commited"
+
+    def is_new_file(self, pathinfo, relpath_u):
+        """
+        Returns true if the file's current pathinfo (size, mtime, and ctime) has
+        changed from the pathinfo previously stored in the db.
+        """
+        #print "is_new_file(%r, %r)" % (pathinfo, relpath_u)
+        c = self.cursor
+        c.execute("SELECT size, mtime, ctime"
+                  " FROM local_files"
+                  " WHERE path=?",
+                  (relpath_u,))
+        row = self.cursor.fetchone()
+        if not row:
+            return True
+        return (pathinfo.size, pathinfo.mtime, pathinfo.ctime) != row
diff --git a/src/allmydata/scripts/backupdb.py b/src/allmydata/scripts/backupdb.py
new file mode 100644
index 00000000..e859597c
--- /dev/null
+++ b/src/allmydata/scripts/backupdb.py
@@ -0,0 +1,340 @@
+
+import os.path, sys, time, random, stat
+
+from allmydata.util.netstring import netstring
+from allmydata.util.hashutil import backupdb_dirhash
+from allmydata.util import base32
+from allmydata.util.fileutil import abspath_expanduser_unicode
+from allmydata.util.encodingutil import to_str
+from allmydata.util.dbutil import get_db, DBError
+
+
+DAY = 24*60*60
+MONTH = 30*DAY
+
+SCHEMA_v1 = """
+CREATE TABLE version -- added in v1
+(
+ version INTEGER  -- contains one row, set to 2
+);
+
+CREATE TABLE local_files -- added in v1
+(
+ path  VARCHAR(1024) PRIMARY KEY, -- index, this is an absolute UTF-8-encoded local filename
+ size  INTEGER,       -- os.stat(fn)[stat.ST_SIZE]
+ mtime NUMBER,        -- os.stat(fn)[stat.ST_MTIME]
+ ctime NUMBER,        -- os.stat(fn)[stat.ST_CTIME]
+ fileid INTEGER
+);
+
+CREATE TABLE caps -- added in v1
+(
+ fileid INTEGER PRIMARY KEY AUTOINCREMENT,
+ filecap VARCHAR(256) UNIQUE       -- URI:CHK:...
+);
+
+CREATE TABLE last_upload -- added in v1
+(
+ fileid INTEGER PRIMARY KEY,
+ last_uploaded TIMESTAMP,
+ last_checked TIMESTAMP
+);
+
+"""
+
+TABLE_DIRECTORY = """
+
+CREATE TABLE directories -- added in v2
+(
+ dirhash varchar(256) PRIMARY KEY,  -- base32(dirhash)
+ dircap varchar(256),               -- URI:DIR2-CHK:...
+ last_uploaded TIMESTAMP,
+ last_checked TIMESTAMP
+);
+
+"""
+
+SCHEMA_v2 = SCHEMA_v1 + TABLE_DIRECTORY
+
+UPDATE_v1_to_v2 = TABLE_DIRECTORY + """
+UPDATE version SET version=2;
+"""
+
+UPDATERS = {
+    2: UPDATE_v1_to_v2,
+}
+
+def get_backupdb(dbfile, stderr=sys.stderr,
+                 create_version=(SCHEMA_v2, 2), just_create=False):
+    # Open or create the given backupdb file. The parent directory must
+    # exist.
+    try:
+        (sqlite3, db) = get_db(dbfile, stderr, create_version, updaters=UPDATERS,
+                               just_create=just_create, dbname="backupdb")
+        return BackupDB_v2(sqlite3, db)
+    except DBError, e:
+        print >>stderr, e
+        return None
+
+
+class FileResult:
+    def __init__(self, bdb, filecap, should_check,
+                 path, mtime, ctime, size):
+        self.bdb = bdb
+        self.filecap = filecap
+        self.should_check_p = should_check
+
+        self.path = path
+        self.mtime = mtime
+        self.ctime = ctime
+        self.size = size
+
+    def was_uploaded(self):
+        if self.filecap:
+            return self.filecap
+        return False
+
+    def did_upload(self, filecap):
+        self.bdb.did_upload_file(filecap, self.path,
+                                 self.mtime, self.ctime, self.size)
+
+    def should_check(self):
+        return self.should_check_p
+
+    def did_check_healthy(self, results):
+        self.bdb.did_check_file_healthy(self.filecap, results)
+
+
+class DirectoryResult:
+    def __init__(self, bdb, dirhash, dircap, should_check):
+        self.bdb = bdb
+        self.dircap = dircap
+        self.should_check_p = should_check
+        self.dirhash = dirhash
+
+    def was_created(self):
+        if self.dircap:
+            return self.dircap
+        return False
+
+    def did_create(self, dircap):
+        self.bdb.did_create_directory(dircap, self.dirhash)
+
+    def should_check(self):
+        return self.should_check_p
+
+    def did_check_healthy(self, results):
+        self.bdb.did_check_directory_healthy(self.dircap, results)
+
+
+class BackupDB_v2:
+    VERSION = 2
+    NO_CHECK_BEFORE = 1*MONTH
+    ALWAYS_CHECK_AFTER = 2*MONTH
+
+    def __init__(self, sqlite_module, connection):
+        self.sqlite_module = sqlite_module
+        self.connection = connection
+        self.cursor = connection.cursor()
+
+    def check_file(self, path, use_timestamps=True):
+        """I will tell you if a given local file needs to be uploaded or not,
+        by looking in a database and seeing if I have a record of this file
+        having been uploaded earlier.
+
+        I return a FileResults object, synchronously. If r.was_uploaded()
+        returns False, you should upload the file. When you are finished
+        uploading it, call r.did_upload(filecap), so I can update my
+        database.
+
+        If was_uploaded() returns a filecap, you might be able to avoid an
+        upload. Call r.should_check(), and if it says False, you can skip the
+        upload and use the filecap returned by was_uploaded().
+
+        If should_check() returns True, you should perform a filecheck on the
+        filecap returned by was_uploaded(). If the check indicates the file
+        is healthy, please call r.did_check_healthy(checker_results) so I can
+        update the database, using the de-JSONized response from the webapi
+        t=check call for 'checker_results'. If the check indicates the file
+        is not healthy, please upload the file and call r.did_upload(filecap)
+        when you're done.
+
+        If use_timestamps=True (the default), I will compare ctime and mtime
+        of the local file against an entry in my database, and consider the
+        file to be unchanged if ctime, mtime, and filesize are all the same
+        as the earlier version. If use_timestamps=False, I will not trust the
+        timestamps, so more files (perhaps all) will be marked as needing
+        upload. A future version of this database may hash the file to make
+        equality decisions, in which case use_timestamps=False will not
+        always imply r.must_upload()==True.
+
+        'path' points to a local file on disk, possibly relative to the
+        current working directory. The database stores absolute pathnames.
+        """
+
+        path = abspath_expanduser_unicode(path)
+
+        # XXX consider using get_pathinfo
+        s = os.stat(path)
+        size = s[stat.ST_SIZE]
+        ctime = s[stat.ST_CTIME]
+        mtime = s[stat.ST_MTIME]
+
+        now = time.time()
+        c = self.cursor
+
+        c.execute("SELECT size,mtime,ctime,fileid"
+                  " FROM local_files"
+                  " WHERE path=?",
+                  (path,))
+        row = self.cursor.fetchone()
+        if not row:
+            return FileResult(self, None, False, path, mtime, ctime, size)
+        (last_size,last_mtime,last_ctime,last_fileid) = row
+
+        c.execute("SELECT caps.filecap, last_upload.last_checked"
+                  " FROM caps,last_upload"
+                  " WHERE caps.fileid=? AND last_upload.fileid=?",
+                  (last_fileid, last_fileid))
+        row2 = c.fetchone()
+
+        if ((last_size != size
+             or not use_timestamps
+             or last_mtime != mtime
+             or last_ctime != ctime) # the file has been changed
+            or (not row2) # we somehow forgot where we put the file last time
+            ):
+            c.execute("DELETE FROM local_files WHERE path=?", (path,))
+            self.connection.commit()
+            return FileResult(self, None, False, path, mtime, ctime, size)
+
+        # at this point, we're allowed to assume the file hasn't been changed
+        (filecap, last_checked) = row2
+        age = now - last_checked
+
+        probability = ((age - self.NO_CHECK_BEFORE) /
+                       (self.ALWAYS_CHECK_AFTER - self.NO_CHECK_BEFORE))
+        probability = min(max(probability, 0.0), 1.0)
+        should_check = bool(random.random() < probability)
+
+        return FileResult(self, to_str(filecap), should_check,
+                          path, mtime, ctime, size)
+
+    def get_or_allocate_fileid_for_cap(self, filecap):
+        # find an existing fileid for this filecap, or insert a new one. The
+        # caller is required to commit() afterwards.
+
+        # mysql has "INSERT ... ON DUPLICATE KEY UPDATE", but not sqlite
+        # sqlite has "INSERT ON CONFLICT REPLACE", but not mysql
+        # So we use INSERT, ignore any error, then a SELECT
+        c = self.cursor
+        try:
+            c.execute("INSERT INTO caps (filecap) VALUES (?)", (filecap,))
+        except (self.sqlite_module.IntegrityError, self.sqlite_module.OperationalError):
+            # sqlite3 on sid gives IntegrityError
+            # pysqlite2 (which we don't use, so maybe no longer relevant) on dapper gives OperationalError
+            pass
+        c.execute("SELECT fileid FROM caps WHERE filecap=?", (filecap,))
+        foundrow = c.fetchone()
+        assert foundrow
+        fileid = foundrow[0]
+        return fileid
+
+    def did_upload_file(self, filecap, path, mtime, ctime, size):
+        now = time.time()
+        fileid = self.get_or_allocate_fileid_for_cap(filecap)
+        try:
+            self.cursor.execute("INSERT INTO last_upload VALUES (?,?,?)",
+                                (fileid, now, now))
+        except (self.sqlite_module.IntegrityError, self.sqlite_module.OperationalError):
+            self.cursor.execute("UPDATE last_upload"
+                                " SET last_uploaded=?, last_checked=?"
+                                " WHERE fileid=?",
+                                (now, now, fileid))
+        try:
+            self.cursor.execute("INSERT INTO local_files VALUES (?,?,?,?,?)",
+                                (path, size, mtime, ctime, fileid))
+        except (self.sqlite_module.IntegrityError, self.sqlite_module.OperationalError):
+            self.cursor.execute("UPDATE local_files"
+                                " SET size=?, mtime=?, ctime=?, fileid=?"
+                                " WHERE path=?",
+                                (size, mtime, ctime, fileid, path))
+        self.connection.commit()
+
+    def did_check_file_healthy(self, filecap, results):
+        now = time.time()
+        fileid = self.get_or_allocate_fileid_for_cap(filecap)
+        self.cursor.execute("UPDATE last_upload"
+                            " SET last_checked=?"
+                            " WHERE fileid=?",
+                            (now, fileid))
+        self.connection.commit()
+
+    def check_directory(self, contents):
+        """I will tell you if a new directory needs to be created for a given
+        set of directory contents, or if I know of an existing (immutable)
+        directory that can be used instead.
+
+        'contents' should be a dictionary that maps from child name (a single
+        unicode string) to immutable childcap (filecap or dircap).
+
+        I return a DirectoryResult object, synchronously. If r.was_created()
+        returns False, you should create the directory (with
+        t=mkdir-immutable). When you are finished, call r.did_create(dircap)
+        so I can update my database.
+
+        If was_created() returns a dircap, you might be able to avoid the
+        mkdir. Call r.should_check(), and if it says False, you can skip the
+        mkdir and use the dircap returned by was_created().
+
+        If should_check() returns True, you should perform a check operation
+        on the dircap returned by was_created(). If the check indicates the
+        directory is healthy, please call
+        r.did_check_healthy(checker_results) so I can update the database,
+        using the de-JSONized response from the webapi t=check call for
+        'checker_results'. If the check indicates the directory is not
+        healthy, please repair or re-create the directory and call
+        r.did_create(dircap) when you're done.
+        """
+
+        now = time.time()
+        entries = []
+        for name in contents:
+            entries.append( [name.encode("utf-8"), contents[name]] )
+        entries.sort()
+        data = "".join([netstring(name_utf8)+netstring(cap)
+                        for (name_utf8,cap) in entries])
+        dirhash = backupdb_dirhash(data)
+        dirhash_s = base32.b2a(dirhash)
+        c = self.cursor
+        c.execute("SELECT dircap, last_checked"
+                  " FROM directories WHERE dirhash=?", (dirhash_s,))
+        row = c.fetchone()
+        if not row:
+            return DirectoryResult(self, dirhash_s, None, False)
+        (dircap, last_checked) = row
+        age = now - last_checked
+
+        probability = ((age - self.NO_CHECK_BEFORE) /
+                       (self.ALWAYS_CHECK_AFTER - self.NO_CHECK_BEFORE))
+        probability = min(max(probability, 0.0), 1.0)
+        should_check = bool(random.random() < probability)
+
+        return DirectoryResult(self, dirhash_s, to_str(dircap), should_check)
+
+    def did_create_directory(self, dircap, dirhash):
+        now = time.time()
+        # if the dirhash is already present (i.e. we've re-uploaded an
+        # existing directory, possibly replacing the dircap with a new one),
+        # update the record in place. Otherwise create a new record.)
+        self.cursor.execute("REPLACE INTO directories VALUES (?,?,?,?)",
+                            (dirhash, dircap, now, now))
+        self.connection.commit()
+
+    def did_check_directory_healthy(self, dircap, results):
+        now = time.time()
+        self.cursor.execute("UPDATE directories"
+                            " SET last_checked=?"
+                            " WHERE dircap=?",
+                            (now, dircap))
+        self.connection.commit()
diff --git a/src/allmydata/scripts/runner.py b/src/allmydata/scripts/runner.py
index a029b34a..af7bad01 100644
--- a/src/allmydata/scripts/runner.py
+++ b/src/allmydata/scripts/runner.py
@@ -5,8 +5,8 @@ from cStringIO import StringIO
 from twisted.python import usage
 
 from allmydata.scripts.common import get_default_nodedir
-from allmydata.scripts import debug, create_node, startstop_node, cli, keygen, stats_gatherer, admin, \
-magic_folder_cli
+from allmydata.scripts import debug, create_node, startstop_node, cli, keygen, stats_gatherer, \
+    admin, magic_folder_cli
 from allmydata.util.encodingutil import quote_output, quote_local_unicode_path, get_io_encoding
 
 def GROUP(s):
diff --git a/src/allmydata/scripts/tahoe_backup.py b/src/allmydata/scripts/tahoe_backup.py
index e2276406..f12b3171 100644
--- a/src/allmydata/scripts/tahoe_backup.py
+++ b/src/allmydata/scripts/tahoe_backup.py
@@ -8,7 +8,7 @@ from allmydata.scripts.common import get_alias, escape_path, DEFAULT_ALIAS, \
                                      UnknownAliasError
 from allmydata.scripts.common_http import do_http, HTTPError, format_http_error
 from allmydata.util import time_format
-from allmydata import backupdb
+from allmydata.scripts import backupdb
 from allmydata.util.encodingutil import listdir_unicode, quote_output, \
      quote_local_unicode_path, to_str, FilenameEncodingError, unicode_to_url
 from allmydata.util.assertutil import precondition
diff --git a/src/allmydata/scripts/tahoe_ls.py b/src/allmydata/scripts/tahoe_ls.py
index 9a8ce240..78eea1f2 100644
--- a/src/allmydata/scripts/tahoe_ls.py
+++ b/src/allmydata/scripts/tahoe_ls.py
@@ -151,7 +151,9 @@ def list(options):
             line.append(uri)
         if options["readonly-uri"]:
             line.append(quote_output(ro_uri or "-", quotemarks=False))
+
         rows.append((encoding_error, line))
+
     max_widths = []
     left_justifys = []
     for (encoding_error, row) in rows:
diff --git a/src/allmydata/test/test_backupdb.py b/src/allmydata/test/test_backupdb.py
index dddfb821..835e2531 100644
--- a/src/allmydata/test/test_backupdb.py
+++ b/src/allmydata/test/test_backupdb.py
@@ -6,7 +6,7 @@ from twisted.trial import unittest
 from allmydata.util import fileutil
 from allmydata.util.encodingutil import listdir_unicode, get_filesystem_encoding, unicode_platform
 from allmydata.util.assertutil import precondition
-from allmydata import backupdb
+from allmydata.scripts import backupdb
 
 class BackupDB(unittest.TestCase):
     def create(self, dbfile):
diff --git a/src/allmydata/test/test_cli_backup.py b/src/allmydata/test/test_cli_backup.py
index a48295de..3bd2a614 100644
--- a/src/allmydata/test/test_cli_backup.py
+++ b/src/allmydata/test/test_cli_backup.py
@@ -11,8 +11,7 @@ from allmydata.util import fileutil
 from allmydata.util.fileutil import abspath_expanduser_unicode
 from allmydata.util.encodingutil import get_io_encoding, unicode_to_argv
 from allmydata.util.namespace import Namespace
-from allmydata.scripts import cli
-from allmydata import backupdb
+from allmydata.scripts import cli, backupdb
 from .common_util import StallMixin
 from .no_network import GridTestMixin
 from .test_cli import CLITestMixin, parse_options
diff --git a/src/allmydata/test/test_magic_folder.py b/src/allmydata/test/test_magic_folder.py
index 4a6b60eb..7ebcf928 100644
--- a/src/allmydata/test/test_magic_folder.py
+++ b/src/allmydata/test/test_magic_folder.py
@@ -16,7 +16,7 @@ from .test_cli_magic_folder import MagicFolderCLITestMixin
 
 from allmydata.frontends import magic_folder
 from allmydata.frontends.magic_folder import MagicFolder, Downloader
-from allmydata import backupdb, magicpath
+from allmydata import magicfolderdb, magicpath
 from allmydata.util.fileutil import abspath_expanduser_unicode
 
 
@@ -39,10 +39,10 @@ class MagicFolderTestMixin(MagicFolderCLITestMixin, ShouldFailMixin, ReallyEqual
 
     def _createdb(self):
         dbfile = abspath_expanduser_unicode(u"magicfolderdb.sqlite", base=self.basedir)
-        bdb = backupdb.get_backupdb(dbfile, create_version=(backupdb.MAGIC_FOLDER_SCHEMA_v3, 3))
-        self.failUnless(bdb, "unable to create backupdb from %r" % (dbfile,))
-        self.failUnlessEqual(bdb.VERSION, 3)
-        return bdb
+        mdb = magicfolderdb.get_magicfolderdb(dbfile, create_version=(magicfolderdb.SCHEMA_v1, 1))
+        self.failUnless(mdb, "unable to create magicfolderdb from %r" % (dbfile,))
+        self.failUnlessEqual(mdb.VERSION, 1)
+        return mdb
 
     def _restart_client(self, ign):
         #print "_restart_client"
diff --git a/src/allmydata/util/fileutil.py b/src/allmydata/util/fileutil.py
index 8194fcd4..a0e4ce4a 100644
--- a/src/allmydata/util/fileutil.py
+++ b/src/allmydata/util/fileutil.py
@@ -6,6 +6,11 @@ import sys, exceptions, os, stat, tempfile, time, binascii
 from collections import namedtuple
 from errno import ENOENT
 
+if sys.platform == "win32":
+    from ctypes import WINFUNCTYPE, WinError, windll, POINTER, byref, c_ulonglong, \
+        create_unicode_buffer, get_last_error
+    from ctypes.wintypes import BOOL, DWORD, LPCWSTR, LPWSTR, LPVOID, HANDLE
+
 from twisted.python import log
 
 from pycryptopp.cipher.aes import AES
@@ -520,8 +525,6 @@ def get_available_space(whichdir, reserved_space):
 
 
 if sys.platform == "win32":
-    from ctypes.wintypes import BOOL, HANDLE, DWORD, LPCWSTR, LPVOID, WinError, get_last_error
-
     # <http://msdn.microsoft.com/en-us/library/aa363858%28v=vs.85%29.aspx>
     CreateFileW = WINFUNCTYPE(HANDLE, LPCWSTR, DWORD, DWORD, LPVOID, DWORD, DWORD, HANDLE) \
                       (("CreateFileW", windll.kernel32))