-# the backupdb is only available if sqlite3 is available. Python-2.5.x and
-# beyond include sqlite3 in the standard library. For python-2.4, the
-# "pysqlite2" package (which, despite the confusing name, uses sqlite3) must
-# be installed. On debian, install python-pysqlite2
-
import os.path, sys, time, random, stat
+from allmydata.util.netstring import netstring
+from allmydata.util.hashutil import backupdb_dirhash
+from allmydata.util import base32
+from allmydata.util.fileutil import abspath_expanduser_unicode
+from allmydata.util.encodingutil import to_str
+from allmydata.util.dbutil import get_db, DBError
+
+
DAY = 24*60*60
MONTH = 30*DAY
SCHEMA_v1 = """
-CREATE TABLE version
+CREATE TABLE version -- added in v1
(
- version INTEGER -- contains one row, set to 1
+ version INTEGER -- contains one row, set to 2
);
-CREATE TABLE local_files
+CREATE TABLE local_files -- added in v1
(
- path VARCHAR(1024) PRIMARY KEY, -- index, this is os.path.abspath(fn)
+ path VARCHAR(1024) PRIMARY KEY, -- index, this is an absolute UTF-8-encoded local filename
size INTEGER, -- os.stat(fn)[stat.ST_SIZE]
mtime NUMBER, -- os.stat(fn)[stat.ST_MTIME]
ctime NUMBER, -- os.stat(fn)[stat.ST_CTIME]
fileid INTEGER
);
-CREATE TABLE caps
+CREATE TABLE caps -- added in v1
(
fileid INTEGER PRIMARY KEY AUTOINCREMENT,
filecap VARCHAR(256) UNIQUE -- URI:CHK:...
);
-CREATE TABLE last_upload
+CREATE TABLE last_upload -- added in v1
(
fileid INTEGER PRIMARY KEY,
last_uploaded TIMESTAMP,
"""
-def get_backupdb(dbfile, stderr=sys.stderr):
- # open or create the given backupdb file. The parent directory must
- # exist.
- try:
- import sqlite3
- sqlite = sqlite3 # pyflakes whines about 'import sqlite3 as sqlite' ..
- except ImportError:
- try:
- from pysqlite2 import dbapi2
- sqlite = dbapi2 # .. when this clause does it too
- except ImportError:
- print >>stderr, "sqlite unavailable, not using backupdb"
- return None
+TABLE_DIRECTORY = """
- must_create = not os.path.exists(dbfile)
- try:
- db = sqlite.connect(dbfile)
- except (EnvironmentError, sqlite.OperationalError), e:
- print >>stderr, "Unable to create/open backupdb file %s: %s" % (dbfile, e)
- return None
+CREATE TABLE directories -- added in v2
+(
+ dirhash varchar(256) PRIMARY KEY, -- base32(dirhash)
+ dircap varchar(256), -- URI:DIR2-CHK:...
+ last_uploaded TIMESTAMP,
+ last_checked TIMESTAMP
+);
- c = db.cursor()
- if must_create:
- c.executescript(SCHEMA_v1)
- c.execute("INSERT INTO version (version) VALUES (1)")
- db.commit()
+"""
+
+SCHEMA_v2 = SCHEMA_v1 + TABLE_DIRECTORY
+
+UPDATE_v1_to_v2 = TABLE_DIRECTORY + """
+UPDATE version SET version=2;
+"""
+UPDATERS = {
+ 2: UPDATE_v1_to_v2,
+}
+
+def get_backupdb(dbfile, stderr=sys.stderr,
+ create_version=(SCHEMA_v2, 2), just_create=False):
+ # open or create the given backupdb file. The parent directory must
+ # exist.
try:
- c.execute("SELECT version FROM version")
- version = c.fetchone()[0]
- except sqlite.DatabaseError, e:
- # this indicates that the file is not a compatible database format.
- # Perhaps it was created with an old version, or it might be junk.
- print >>stderr, "backupdb file is unusable: %s" % e
+ (sqlite3, db) = get_db(dbfile, stderr, create_version, updaters=UPDATERS,
+ just_create=just_create, dbname="backupdb")
+ return BackupDB_v2(sqlite3, db)
+ except DBError, e:
+ print >>stderr, e
return None
- if version == 1:
- return BackupDB_v1(sqlite, db)
- print >>stderr, "Unable to handle backupdb version %s" % version
- return None
-MUST_UPLOAD, ALREADY_UPLOADED = range(2)
-class Result:
+class FileResult:
def __init__(self, bdb, filecap, should_check,
path, mtime, ctime, size):
self.bdb = bdb
return False
def did_upload(self, filecap):
- self.bdb.did_upload(filecap,
- self.path,
- self.mtime, self.ctime, self.size)
+ self.bdb.did_upload_file(filecap, self.path,
+ self.mtime, self.ctime, self.size)
def should_check(self):
return self.should_check_p
def did_check_healthy(self, results):
- self.bdb.did_check_healthy(self.filecap, results)
+ self.bdb.did_check_file_healthy(self.filecap, results)
+
+class DirectoryResult:
+ def __init__(self, bdb, dirhash, dircap, should_check):
+ self.bdb = bdb
+ self.dircap = dircap
+ self.should_check_p = should_check
+ self.dirhash = dirhash
+
+ def was_created(self):
+ if self.dircap:
+ return self.dircap
+ return False
+
+ def did_create(self, dircap):
+ self.bdb.did_create_directory(dircap, self.dirhash)
-class BackupDB_v1:
- VERSION = 1
+ def should_check(self):
+ return self.should_check_p
+
+ def did_check_healthy(self, results):
+ self.bdb.did_check_directory_healthy(self.dircap, results)
+
+class BackupDB_v2:
+ VERSION = 2
NO_CHECK_BEFORE = 1*MONTH
ALWAYS_CHECK_AFTER = 2*MONTH
by looking in a database and seeing if I have a record of this file
having been uploaded earlier.
- I return a Results object, synchronously. If r.was_uploaded() returns
- False, you should upload the file. When you are finished uploading
- it, call r.did_upload(filecap), so I can update my database.
+ I return a FileResults object, synchronously. If r.was_uploaded()
+ returns False, you should upload the file. When you are finished
+ uploading it, call r.did_upload(filecap), so I can update my
+ database.
If was_uploaded() returns a filecap, you might be able to avoid an
upload. Call r.should_check(), and if it says False, you can skip the
current working directory. The database stores absolute pathnames.
"""
- path = os.path.abspath(path)
+ path = abspath_expanduser_unicode(path)
s = os.stat(path)
size = s[stat.ST_SIZE]
ctime = s[stat.ST_CTIME]
(path,))
row = self.cursor.fetchone()
if not row:
- return Result(self, None, False, path, mtime, ctime, size)
+ return FileResult(self, None, False, path, mtime, ctime, size)
(last_size,last_mtime,last_ctime,last_fileid) = row
c.execute("SELECT caps.filecap, last_upload.last_checked"
):
c.execute("DELETE FROM local_files WHERE path=?", (path,))
self.connection.commit()
- return Result(self, None, False, path, mtime, ctime, size)
+ return FileResult(self, None, False, path, mtime, ctime, size)
# at this point, we're allowed to assume the file hasn't been changed
(filecap, last_checked) = row2
probability = min(max(probability, 0.0), 1.0)
should_check = bool(random.random() < probability)
- return Result(self, filecap, should_check, path, mtime, ctime, size)
+ return FileResult(self, to_str(filecap), should_check,
+ path, mtime, ctime, size)
def get_or_allocate_fileid_for_cap(self, filecap):
# find an existing fileid for this filecap, or insert a new one. The
c = self.cursor
try:
c.execute("INSERT INTO caps (filecap) VALUES (?)", (filecap,))
- except self.sqlite_module.IntegrityError:
+ except (self.sqlite_module.IntegrityError, self.sqlite_module.OperationalError):
+ # sqlite3 on sid gives IntegrityError
+ # pysqlite2 (which we don't use, so maybe no longer relevant) on dapper gives OperationalError
pass
c.execute("SELECT fileid FROM caps WHERE filecap=?", (filecap,))
foundrow = c.fetchone()
fileid = foundrow[0]
return fileid
- def did_upload(self, filecap, path, mtime, ctime, size):
+ def did_upload_file(self, filecap, path, mtime, ctime, size):
now = time.time()
fileid = self.get_or_allocate_fileid_for_cap(filecap)
try:
self.cursor.execute("INSERT INTO last_upload VALUES (?,?,?)",
(fileid, now, now))
- except self.sqlite_module.IntegrityError:
+ except (self.sqlite_module.IntegrityError, self.sqlite_module.OperationalError):
self.cursor.execute("UPDATE last_upload"
" SET last_uploaded=?, last_checked=?"
" WHERE fileid=?",
try:
self.cursor.execute("INSERT INTO local_files VALUES (?,?,?,?,?)",
(path, size, mtime, ctime, fileid))
- except self.sqlite_module.IntegrityError:
+ except (self.sqlite_module.IntegrityError, self.sqlite_module.OperationalError):
self.cursor.execute("UPDATE local_files"
" SET size=?, mtime=?, ctime=?, fileid=?"
" WHERE path=?",
(size, mtime, ctime, fileid, path))
self.connection.commit()
- def did_check_healthy(self, filecap, results):
+ def did_check_file_healthy(self, filecap, results):
now = time.time()
fileid = self.get_or_allocate_fileid_for_cap(filecap)
self.cursor.execute("UPDATE last_upload"
" WHERE fileid=?",
(now, fileid))
self.connection.commit()
+
+ def check_directory(self, contents):
+ """I will tell you if a new directory needs to be created for a given
+ set of directory contents, or if I know of an existing (immutable)
+ directory that can be used instead.
+
+ 'contents' should be a dictionary that maps from child name (a single
+ unicode string) to immutable childcap (filecap or dircap).
+
+ I return a DirectoryResult object, synchronously. If r.was_created()
+ returns False, you should create the directory (with
+ t=mkdir-immutable). When you are finished, call r.did_create(dircap)
+ so I can update my database.
+
+ If was_created() returns a dircap, you might be able to avoid the
+ mkdir. Call r.should_check(), and if it says False, you can skip the
+ mkdir and use the dircap returned by was_created().
+
+ If should_check() returns True, you should perform a check operation
+ on the dircap returned by was_created(). If the check indicates the
+ directory is healthy, please call
+ r.did_check_healthy(checker_results) so I can update the database,
+ using the de-JSONized response from the webapi t=check call for
+ 'checker_results'. If the check indicates the directory is not
+ healthy, please repair or re-create the directory and call
+ r.did_create(dircap) when you're done.
+ """
+
+ now = time.time()
+ entries = []
+ for name in contents:
+ entries.append( [name.encode("utf-8"), contents[name]] )
+ entries.sort()
+ data = "".join([netstring(name_utf8)+netstring(cap)
+ for (name_utf8,cap) in entries])
+ dirhash = backupdb_dirhash(data)
+ dirhash_s = base32.b2a(dirhash)
+ c = self.cursor
+ c.execute("SELECT dircap, last_checked"
+ " FROM directories WHERE dirhash=?", (dirhash_s,))
+ row = c.fetchone()
+ if not row:
+ return DirectoryResult(self, dirhash_s, None, False)
+ (dircap, last_checked) = row
+ age = now - last_checked
+
+ probability = ((age - self.NO_CHECK_BEFORE) /
+ (self.ALWAYS_CHECK_AFTER - self.NO_CHECK_BEFORE))
+ probability = min(max(probability, 0.0), 1.0)
+ should_check = bool(random.random() < probability)
+
+ return DirectoryResult(self, dirhash_s, to_str(dircap), should_check)
+
+ def did_create_directory(self, dircap, dirhash):
+ now = time.time()
+ # if the dirhash is already present (i.e. we've re-uploaded an
+ # existing directory, possibly replacing the dircap with a new one),
+ # update the record in place. Otherwise create a new record.)
+ self.cursor.execute("REPLACE INTO directories VALUES (?,?,?,?)",
+ (dirhash, dircap, now, now))
+ self.connection.commit()
+
+ def did_check_directory_healthy(self, dircap, results):
+ now = time.time()
+ self.cursor.execute("UPDATE directories"
+ " SET last_checked=?"
+ " WHERE dircap=?",
+ (now, dircap))
+ self.connection.commit()