From: Daira Hopwood Date: Thu, 8 Oct 2015 15:01:46 +0000 (+0100) Subject: Add magicfolderdb.py. X-Git-Url: https://git.rkrishnan.org/pf/reliability?a=commitdiff_plain;h=2502a843c0a580b0206da25a637fe00d28ebd4dc;p=tahoe-lafs%2Ftahoe-lafs.git Add magicfolderdb.py. Signed-off-by: Daira Hopwood --- diff --git a/src/allmydata/backupdb.py b/src/allmydata/backupdb.py deleted file mode 100644 index 316ebf51..00000000 --- a/src/allmydata/backupdb.py +++ /dev/null @@ -1,363 +0,0 @@ - -import os.path, sys, time, random, stat - -from allmydata.util.netstring import netstring -from allmydata.util.hashutil import backupdb_dirhash -from allmydata.util import base32 -from allmydata.util.fileutil import abspath_expanduser_unicode -from allmydata.util.encodingutil import to_str -from allmydata.util.dbutil import get_db, DBError - - -DAY = 24*60*60 -MONTH = 30*DAY - -MAIN_SCHEMA = """ -CREATE TABLE version -( - version INTEGER -- contains one row, set to %s -); - -CREATE TABLE local_files -( - path VARCHAR(1024) PRIMARY KEY, -- index, this is an absolute UTF-8-encoded local filename - -- note that size is before mtime and ctime here, but after in function parameters - size INTEGER, -- os.stat(fn)[stat.ST_SIZE] (NULL if the file has been deleted) - mtime NUMBER, -- os.stat(fn)[stat.ST_MTIME] - ctime NUMBER, -- os.stat(fn)[stat.ST_CTIME] - fileid INTEGER%s -); - -CREATE TABLE caps -( - fileid INTEGER PRIMARY KEY AUTOINCREMENT, - filecap VARCHAR(256) UNIQUE -- URI:CHK:... -); - -CREATE TABLE last_upload -( - fileid INTEGER PRIMARY KEY, - last_uploaded TIMESTAMP, - last_checked TIMESTAMP -); - -""" - -SCHEMA_v1 = MAIN_SCHEMA % (1, "") - -TABLE_DIRECTORY = """ - -CREATE TABLE directories -- added in v2 -( - dirhash varchar(256) PRIMARY KEY, -- base32(dirhash) - dircap varchar(256), -- URI:DIR2-CHK:... - last_uploaded TIMESTAMP, - last_checked TIMESTAMP -); - -""" - -SCHEMA_v2 = MAIN_SCHEMA % (2, "") + TABLE_DIRECTORY - -UPDATE_v1_to_v2 = TABLE_DIRECTORY + """ -UPDATE version SET version=2; -""" - -UPDATERS = { - 2: UPDATE_v1_to_v2, -} - - -def get_backupdb(dbfile, stderr=sys.stderr, - create_version=(SCHEMA_v2, 2), just_create=False): - # Open or create the given backupdb file. The parent directory must - # exist. - try: - (sqlite3, db) = get_db(dbfile, stderr, create_version, updaters=UPDATERS, - just_create=just_create, dbname="backupdb") - if create_version[1] in (1, 2): - return BackupDB(sqlite3, db) - else: - print >>stderr, "invalid db schema version specified" - return None - except DBError, e: - print >>stderr, e - return None - - -class FileResult: - def __init__(self, bdb, filecap, should_check, - path, mtime, ctime, size): - self.bdb = bdb - self.filecap = filecap - self.should_check_p = should_check - - self.path = path - self.mtime = mtime - self.ctime = ctime - self.size = size - - def was_uploaded(self): - if self.filecap: - return self.filecap - return False - - def did_upload(self, filecap): - self.bdb.did_upload_file(filecap, self.path, - self.mtime, self.ctime, self.size) - - def should_check(self): - return self.should_check_p - - def did_check_healthy(self, results): - self.bdb.did_check_file_healthy(self.filecap, results) - - -class DirectoryResult: - def __init__(self, bdb, dirhash, dircap, should_check): - self.bdb = bdb - self.dircap = dircap - self.should_check_p = should_check - self.dirhash = dirhash - - def was_created(self): - if self.dircap: - return self.dircap - return False - - def did_create(self, dircap): - self.bdb.did_create_directory(dircap, self.dirhash) - - def should_check(self): - return self.should_check_p - - def did_check_healthy(self, results): - self.bdb.did_check_directory_healthy(self.dircap, results) - - -class BackupDB: - VERSION = 2 - NO_CHECK_BEFORE = 1*MONTH - ALWAYS_CHECK_AFTER = 2*MONTH - - def __init__(self, sqlite_module, connection): - self.sqlite_module = sqlite_module - self.connection = connection - self.cursor = connection.cursor() - - def check_file_db_exists(self, path): - """I will tell you if a given file has an entry in my database or not - by returning True or False. - """ - c = self.cursor - c.execute("SELECT size,mtime,ctime,fileid" - " FROM local_files" - " WHERE path=?", - (path,)) - row = self.cursor.fetchone() - if not row: - return False - else: - return True - - def check_file(self, path, use_timestamps=True): - """I will tell you if a given local file needs to be uploaded or not, - by looking in a database and seeing if I have a record of this file - having been uploaded earlier. - - I return a FileResults object, synchronously. If r.was_uploaded() - returns False, you should upload the file. When you are finished - uploading it, call r.did_upload(filecap), so I can update my - database. - - If was_uploaded() returns a filecap, you might be able to avoid an - upload. Call r.should_check(), and if it says False, you can skip the - upload and use the filecap returned by was_uploaded(). - - If should_check() returns True, you should perform a filecheck on the - filecap returned by was_uploaded(). If the check indicates the file - is healthy, please call r.did_check_healthy(checker_results) so I can - update the database, using the de-JSONized response from the webapi - t=check call for 'checker_results'. If the check indicates the file - is not healthy, please upload the file and call r.did_upload(filecap) - when you're done. - - If use_timestamps=True (the default), I will compare mtime and ctime - of the local file against an entry in my database, and consider the - file to be unchanged if mtime, ctime, and filesize are all the same - as the earlier version. If use_timestamps=False, I will not trust the - timestamps, so more files (perhaps all) will be marked as needing - upload. A future version of this database may hash the file to make - equality decisions, in which case use_timestamps=False will not - always imply r.must_upload()==True. - - 'path' points to a local file on disk, possibly relative to the - current working directory. The database stores absolute pathnames. - """ - - path = abspath_expanduser_unicode(path) - - # XXX consider using get_pathinfo - s = os.stat(path) - size = s[stat.ST_SIZE] - mtime = s[stat.ST_MTIME] - ctime = s[stat.ST_CTIME] - - now = time.time() - c = self.cursor - - c.execute("SELECT size,mtime,ctime,fileid" - " FROM local_files" - " WHERE path=?", - (path,)) - row = self.cursor.fetchone() - if not row: - return FileResult(self, None, False, path, mtime, ctime, size) - (last_size,last_mtime,last_ctime,last_fileid) = row - - c.execute("SELECT caps.filecap, last_upload.last_checked" - " FROM caps,last_upload" - " WHERE caps.fileid=? AND last_upload.fileid=?", - (last_fileid, last_fileid)) - row2 = c.fetchone() - - if ((last_size != size - or not use_timestamps - or last_mtime != mtime - or last_ctime != ctime) # the file has been changed - or (not row2) # we somehow forgot where we put the file last time - ): - c.execute("DELETE FROM local_files WHERE path=?", (path,)) - self.connection.commit() - return FileResult(self, None, False, path, mtime, ctime, size) - - # at this point, we're allowed to assume the file hasn't been changed - (filecap, last_checked) = row2 - age = now - last_checked - - probability = ((age - self.NO_CHECK_BEFORE) / - (self.ALWAYS_CHECK_AFTER - self.NO_CHECK_BEFORE)) - probability = min(max(probability, 0.0), 1.0) - should_check = bool(random.random() < probability) - - return FileResult(self, to_str(filecap), should_check, - path, mtime, ctime, size) - - def get_or_allocate_fileid_for_cap(self, filecap): - # find an existing fileid for this filecap, or insert a new one. The - # caller is required to commit() afterwards. - - # mysql has "INSERT ... ON DUPLICATE KEY UPDATE", but not sqlite - # sqlite has "INSERT ON CONFLICT REPLACE", but not mysql - # So we use INSERT, ignore any error, then a SELECT - c = self.cursor - try: - c.execute("INSERT INTO caps (filecap) VALUES (?)", (filecap,)) - except (self.sqlite_module.IntegrityError, self.sqlite_module.OperationalError): - # sqlite3 on sid gives IntegrityError - # pysqlite2 (which we don't use, so maybe no longer relevant) on dapper gives OperationalError - pass - c.execute("SELECT fileid FROM caps WHERE filecap=?", (filecap,)) - foundrow = c.fetchone() - assert foundrow - fileid = foundrow[0] - return fileid - - def did_upload_file(self, filecap, path, mtime, ctime, size): - now = time.time() - fileid = self.get_or_allocate_fileid_for_cap(filecap) - try: - self.cursor.execute("INSERT INTO last_upload VALUES (?,?,?)", - (fileid, now, now)) - except (self.sqlite_module.IntegrityError, self.sqlite_module.OperationalError): - self.cursor.execute("UPDATE last_upload" - " SET last_uploaded=?, last_checked=?" - " WHERE fileid=?", - (now, now, fileid)) - try: - self.cursor.execute("INSERT INTO local_files VALUES (?,?,?,?,?)", - (path, size, mtime, ctime, fileid)) - except (self.sqlite_module.IntegrityError, self.sqlite_module.OperationalError): - self.cursor.execute("UPDATE local_files" - " SET size=?, mtime=?, ctime=?, fileid=?" - " WHERE path=?", - (size, mtime, ctime, fileid, path)) - self.connection.commit() - - def did_check_file_healthy(self, filecap, results): - now = time.time() - fileid = self.get_or_allocate_fileid_for_cap(filecap) - self.cursor.execute("UPDATE last_upload" - " SET last_checked=?" - " WHERE fileid=?", - (now, fileid)) - self.connection.commit() - - def check_directory(self, contents): - """I will tell you if a new directory needs to be created for a given - set of directory contents, or if I know of an existing (immutable) - directory that can be used instead. - - 'contents' should be a dictionary that maps from child name (a single - unicode string) to immutable childcap (filecap or dircap). - - I return a DirectoryResult object, synchronously. If r.was_created() - returns False, you should create the directory (with - t=mkdir-immutable). When you are finished, call r.did_create(dircap) - so I can update my database. - - If was_created() returns a dircap, you might be able to avoid the - mkdir. Call r.should_check(), and if it says False, you can skip the - mkdir and use the dircap returned by was_created(). - - If should_check() returns True, you should perform a check operation - on the dircap returned by was_created(). If the check indicates the - directory is healthy, please call - r.did_check_healthy(checker_results) so I can update the database, - using the de-JSONized response from the webapi t=check call for - 'checker_results'. If the check indicates the directory is not - healthy, please repair or re-create the directory and call - r.did_create(dircap) when you're done. - """ - - now = time.time() - entries = [] - for name in contents: - entries.append( [name.encode("utf-8"), contents[name]] ) - entries.sort() - data = "".join([netstring(name_utf8)+netstring(cap) - for (name_utf8,cap) in entries]) - dirhash = backupdb_dirhash(data) - dirhash_s = base32.b2a(dirhash) - c = self.cursor - c.execute("SELECT dircap, last_checked" - " FROM directories WHERE dirhash=?", (dirhash_s,)) - row = c.fetchone() - if not row: - return DirectoryResult(self, dirhash_s, None, False) - (dircap, last_checked) = row - age = now - last_checked - - probability = ((age - self.NO_CHECK_BEFORE) / - (self.ALWAYS_CHECK_AFTER - self.NO_CHECK_BEFORE)) - probability = min(max(probability, 0.0), 1.0) - should_check = bool(random.random() < probability) - - return DirectoryResult(self, dirhash_s, to_str(dircap), should_check) - - def did_create_directory(self, dircap, dirhash): - now = time.time() - # if the dirhash is already present (i.e. we've re-uploaded an - # existing directory, possibly replacing the dircap with a new one), - # update the record in place. Otherwise create a new record.) - self.cursor.execute("REPLACE INTO directories VALUES (?,?,?,?)", - (dirhash, dircap, now, now)) - self.connection.commit() - - def did_check_directory_healthy(self, dircap, results): - now = time.time() - self.cursor.execute("UPDATE directories" - " SET last_checked=?" - " WHERE dircap=?", - (now, dircap)) - self.connection.commit() diff --git a/src/allmydata/frontends/magic_folder.py b/src/allmydata/frontends/magic_folder.py index 16cdaad0..89b981a5 100644 --- a/src/allmydata/frontends/magic_folder.py +++ b/src/allmydata/frontends/magic_folder.py @@ -19,7 +19,7 @@ from allmydata.util.encodingutil import listdir_filepath, to_filepath, \ extend_filepath, unicode_from_filepath, unicode_segments_from, \ quote_filepath, quote_local_unicode_path, quote_output, FilenameEncodingError from allmydata.immutable.upload import FileName, Data -from allmydata import backupdb, magicpath +from allmydata import magicfolderdb, magicpath IN_EXCL_UNLINK = 0x04000000L @@ -31,13 +31,13 @@ def get_inotify_module(): elif runtime.platform.supportsINotify(): from twisted.internet import inotify else: - raise NotImplementedError("filesystem notification needed for drop-upload is not supported.\n" + raise NotImplementedError("filesystem notification needed for Magic Folder is not supported.\n" "This currently requires Linux or Windows.") return inotify except (ImportError, AttributeError) as e: log.msg(e) if sys.platform == "win32": - raise NotImplementedError("filesystem notification needed for drop-upload is not supported.\n" + raise NotImplementedError("filesystem notification needed for Magic Folder is not supported.\n" "Windows support requires at least Vista, and has only been tested on Windows 7.") raise @@ -51,7 +51,7 @@ class MagicFolder(service.MultiService): service.MultiService.__init__(self) - db = backupdb.get_backupdb(dbfile, create_version=(backupdb.MAGIC_FOLDER_SCHEMA_v3, 3)) + db = magicfolderdb.get_magicfolderdb(dbfile, create_version=(magicfolderdb.SCHEMA_v1, 1)) if db is None: return Failure(Exception('ERROR: Unable to load magic folder db.')) diff --git a/src/allmydata/magicfolderdb.py b/src/allmydata/magicfolderdb.py new file mode 100644 index 00000000..187941b8 --- /dev/null +++ b/src/allmydata/magicfolderdb.py @@ -0,0 +1,139 @@ + +import sys + +from allmydata.util.dbutil import get_db, DBError + + +# magic-folder db schema version 1 +SCHEMA_v1 = """ +CREATE TABLE version +( + version INTEGER -- contains one row, set to 1 +); + +CREATE TABLE local_files +( + path VARCHAR(1024) PRIMARY KEY, -- UTF-8 filename relative to local magic folder dir + -- note that size is before mtime and ctime here, but after in function parameters + size INTEGER, -- ST_SIZE, or NULL if the file has been deleted + mtime REAL, -- ST_MTIME + ctime REAL, -- ST_CTIME + version INTEGER, + last_uploaded_uri VARCHAR(256) UNIQUE, -- URI:CHK:... + last_downloaded_uri VARCHAR(256) UNIQUE, -- URI:CHK:... + last_downloaded_timestamp REAL +); +""" + + +def get_magicfolderdb(dbfile, stderr=sys.stderr, + create_version=(SCHEMA_v1, 1), just_create=False): + # Open or create the given backupdb file. The parent directory must + # exist. + try: + (sqlite3, db) = get_db(dbfile, stderr, create_version, + just_create=just_create, dbname="magicfolderdb") + if create_version[1] in (1, 2): + return MagicFolderDB(sqlite3, db) + else: + print >>stderr, "invalid magicfolderdb schema version specified" + return None + except DBError, e: + print >>stderr, e + return None + + +class MagicFolderDB(object): + VERSION = 1 + + def __init__(self, sqlite_module, connection): + self.sqlite_module = sqlite_module + self.connection = connection + self.cursor = connection.cursor() + + def check_file_db_exists(self, path): + """I will tell you if a given file has an entry in my database or not + by returning True or False. + """ + c = self.cursor + c.execute("SELECT size,mtime,ctime" + " FROM local_files" + " WHERE path=?", + (path,)) + row = self.cursor.fetchone() + if not row: + return False + else: + return True + + def get_all_relpaths(self): + """ + Retrieve a set of all relpaths of files that have had an entry in magic folder db + (i.e. that have been downloaded at least once). + """ + self.cursor.execute("SELECT path FROM local_files") + rows = self.cursor.fetchall() + return set([r[0] for r in rows]) + + def get_last_downloaded_uri(self, relpath_u): + """ + Return the last downloaded uri recorded in the magic folder db. + If none are found then return None. + """ + c = self.cursor + c.execute("SELECT last_downloaded_uri" + " FROM local_files" + " WHERE path=?", + (relpath_u,)) + row = self.cursor.fetchone() + if not row: + return None + else: + return row[0] + + def get_local_file_version(self, relpath_u): + """ + Return the version of a local file tracked by our magic folder db. + If no db entry is found then return None. + """ + c = self.cursor + c.execute("SELECT version" + " FROM local_files" + " WHERE path=?", + (relpath_u,)) + row = self.cursor.fetchone() + if not row: + return None + else: + return row[0] + + def did_upload_version(self, filecap, relpath_u, version, pathinfo): + print "did_upload_version(%r, %r, %r, %r)" % (filecap, relpath_u, version, pathinfo) + try: + print "insert" + self.cursor.execute("INSERT INTO local_files VALUES (?,?,?,?,?,?)", + (relpath_u, pathinfo.size, pathinfo.mtime, pathinfo.ctime, version, filecap, pathinfo.mtime)) + except (self.sqlite_module.IntegrityError, self.sqlite_module.OperationalError): + print "err... update" + self.cursor.execute("UPDATE local_files" + " SET size=?, mtime=?, ctime=?, version=?, last_downloaded_uri=?, last_downloaded_timestamp=?" + " WHERE path=?", + (pathinfo.size, pathinfo.mtime, pathinfo.ctime, version, filecap, pathinfo.mtime, relpath_u)) + self.connection.commit() + print "commited" + + def is_new_file(self, pathinfo, relpath_u): + """ + Returns true if the file's current pathinfo (size, mtime, and ctime) has + changed from the pathinfo previously stored in the db. + """ + #print "is_new_file(%r, %r)" % (pathinfo, relpath_u) + c = self.cursor + c.execute("SELECT size, mtime, ctime" + " FROM local_files" + " WHERE path=?", + (relpath_u,)) + row = self.cursor.fetchone() + if not row: + return True + return (pathinfo.size, pathinfo.mtime, pathinfo.ctime) != row diff --git a/src/allmydata/scripts/backupdb.py b/src/allmydata/scripts/backupdb.py new file mode 100644 index 00000000..e859597c --- /dev/null +++ b/src/allmydata/scripts/backupdb.py @@ -0,0 +1,340 @@ + +import os.path, sys, time, random, stat + +from allmydata.util.netstring import netstring +from allmydata.util.hashutil import backupdb_dirhash +from allmydata.util import base32 +from allmydata.util.fileutil import abspath_expanduser_unicode +from allmydata.util.encodingutil import to_str +from allmydata.util.dbutil import get_db, DBError + + +DAY = 24*60*60 +MONTH = 30*DAY + +SCHEMA_v1 = """ +CREATE TABLE version -- added in v1 +( + version INTEGER -- contains one row, set to 2 +); + +CREATE TABLE local_files -- added in v1 +( + path VARCHAR(1024) PRIMARY KEY, -- index, this is an absolute UTF-8-encoded local filename + size INTEGER, -- os.stat(fn)[stat.ST_SIZE] + mtime NUMBER, -- os.stat(fn)[stat.ST_MTIME] + ctime NUMBER, -- os.stat(fn)[stat.ST_CTIME] + fileid INTEGER +); + +CREATE TABLE caps -- added in v1 +( + fileid INTEGER PRIMARY KEY AUTOINCREMENT, + filecap VARCHAR(256) UNIQUE -- URI:CHK:... +); + +CREATE TABLE last_upload -- added in v1 +( + fileid INTEGER PRIMARY KEY, + last_uploaded TIMESTAMP, + last_checked TIMESTAMP +); + +""" + +TABLE_DIRECTORY = """ + +CREATE TABLE directories -- added in v2 +( + dirhash varchar(256) PRIMARY KEY, -- base32(dirhash) + dircap varchar(256), -- URI:DIR2-CHK:... + last_uploaded TIMESTAMP, + last_checked TIMESTAMP +); + +""" + +SCHEMA_v2 = SCHEMA_v1 + TABLE_DIRECTORY + +UPDATE_v1_to_v2 = TABLE_DIRECTORY + """ +UPDATE version SET version=2; +""" + +UPDATERS = { + 2: UPDATE_v1_to_v2, +} + +def get_backupdb(dbfile, stderr=sys.stderr, + create_version=(SCHEMA_v2, 2), just_create=False): + # Open or create the given backupdb file. The parent directory must + # exist. + try: + (sqlite3, db) = get_db(dbfile, stderr, create_version, updaters=UPDATERS, + just_create=just_create, dbname="backupdb") + return BackupDB_v2(sqlite3, db) + except DBError, e: + print >>stderr, e + return None + + +class FileResult: + def __init__(self, bdb, filecap, should_check, + path, mtime, ctime, size): + self.bdb = bdb + self.filecap = filecap + self.should_check_p = should_check + + self.path = path + self.mtime = mtime + self.ctime = ctime + self.size = size + + def was_uploaded(self): + if self.filecap: + return self.filecap + return False + + def did_upload(self, filecap): + self.bdb.did_upload_file(filecap, self.path, + self.mtime, self.ctime, self.size) + + def should_check(self): + return self.should_check_p + + def did_check_healthy(self, results): + self.bdb.did_check_file_healthy(self.filecap, results) + + +class DirectoryResult: + def __init__(self, bdb, dirhash, dircap, should_check): + self.bdb = bdb + self.dircap = dircap + self.should_check_p = should_check + self.dirhash = dirhash + + def was_created(self): + if self.dircap: + return self.dircap + return False + + def did_create(self, dircap): + self.bdb.did_create_directory(dircap, self.dirhash) + + def should_check(self): + return self.should_check_p + + def did_check_healthy(self, results): + self.bdb.did_check_directory_healthy(self.dircap, results) + + +class BackupDB_v2: + VERSION = 2 + NO_CHECK_BEFORE = 1*MONTH + ALWAYS_CHECK_AFTER = 2*MONTH + + def __init__(self, sqlite_module, connection): + self.sqlite_module = sqlite_module + self.connection = connection + self.cursor = connection.cursor() + + def check_file(self, path, use_timestamps=True): + """I will tell you if a given local file needs to be uploaded or not, + by looking in a database and seeing if I have a record of this file + having been uploaded earlier. + + I return a FileResults object, synchronously. If r.was_uploaded() + returns False, you should upload the file. When you are finished + uploading it, call r.did_upload(filecap), so I can update my + database. + + If was_uploaded() returns a filecap, you might be able to avoid an + upload. Call r.should_check(), and if it says False, you can skip the + upload and use the filecap returned by was_uploaded(). + + If should_check() returns True, you should perform a filecheck on the + filecap returned by was_uploaded(). If the check indicates the file + is healthy, please call r.did_check_healthy(checker_results) so I can + update the database, using the de-JSONized response from the webapi + t=check call for 'checker_results'. If the check indicates the file + is not healthy, please upload the file and call r.did_upload(filecap) + when you're done. + + If use_timestamps=True (the default), I will compare ctime and mtime + of the local file against an entry in my database, and consider the + file to be unchanged if ctime, mtime, and filesize are all the same + as the earlier version. If use_timestamps=False, I will not trust the + timestamps, so more files (perhaps all) will be marked as needing + upload. A future version of this database may hash the file to make + equality decisions, in which case use_timestamps=False will not + always imply r.must_upload()==True. + + 'path' points to a local file on disk, possibly relative to the + current working directory. The database stores absolute pathnames. + """ + + path = abspath_expanduser_unicode(path) + + # XXX consider using get_pathinfo + s = os.stat(path) + size = s[stat.ST_SIZE] + ctime = s[stat.ST_CTIME] + mtime = s[stat.ST_MTIME] + + now = time.time() + c = self.cursor + + c.execute("SELECT size,mtime,ctime,fileid" + " FROM local_files" + " WHERE path=?", + (path,)) + row = self.cursor.fetchone() + if not row: + return FileResult(self, None, False, path, mtime, ctime, size) + (last_size,last_mtime,last_ctime,last_fileid) = row + + c.execute("SELECT caps.filecap, last_upload.last_checked" + " FROM caps,last_upload" + " WHERE caps.fileid=? AND last_upload.fileid=?", + (last_fileid, last_fileid)) + row2 = c.fetchone() + + if ((last_size != size + or not use_timestamps + or last_mtime != mtime + or last_ctime != ctime) # the file has been changed + or (not row2) # we somehow forgot where we put the file last time + ): + c.execute("DELETE FROM local_files WHERE path=?", (path,)) + self.connection.commit() + return FileResult(self, None, False, path, mtime, ctime, size) + + # at this point, we're allowed to assume the file hasn't been changed + (filecap, last_checked) = row2 + age = now - last_checked + + probability = ((age - self.NO_CHECK_BEFORE) / + (self.ALWAYS_CHECK_AFTER - self.NO_CHECK_BEFORE)) + probability = min(max(probability, 0.0), 1.0) + should_check = bool(random.random() < probability) + + return FileResult(self, to_str(filecap), should_check, + path, mtime, ctime, size) + + def get_or_allocate_fileid_for_cap(self, filecap): + # find an existing fileid for this filecap, or insert a new one. The + # caller is required to commit() afterwards. + + # mysql has "INSERT ... ON DUPLICATE KEY UPDATE", but not sqlite + # sqlite has "INSERT ON CONFLICT REPLACE", but not mysql + # So we use INSERT, ignore any error, then a SELECT + c = self.cursor + try: + c.execute("INSERT INTO caps (filecap) VALUES (?)", (filecap,)) + except (self.sqlite_module.IntegrityError, self.sqlite_module.OperationalError): + # sqlite3 on sid gives IntegrityError + # pysqlite2 (which we don't use, so maybe no longer relevant) on dapper gives OperationalError + pass + c.execute("SELECT fileid FROM caps WHERE filecap=?", (filecap,)) + foundrow = c.fetchone() + assert foundrow + fileid = foundrow[0] + return fileid + + def did_upload_file(self, filecap, path, mtime, ctime, size): + now = time.time() + fileid = self.get_or_allocate_fileid_for_cap(filecap) + try: + self.cursor.execute("INSERT INTO last_upload VALUES (?,?,?)", + (fileid, now, now)) + except (self.sqlite_module.IntegrityError, self.sqlite_module.OperationalError): + self.cursor.execute("UPDATE last_upload" + " SET last_uploaded=?, last_checked=?" + " WHERE fileid=?", + (now, now, fileid)) + try: + self.cursor.execute("INSERT INTO local_files VALUES (?,?,?,?,?)", + (path, size, mtime, ctime, fileid)) + except (self.sqlite_module.IntegrityError, self.sqlite_module.OperationalError): + self.cursor.execute("UPDATE local_files" + " SET size=?, mtime=?, ctime=?, fileid=?" + " WHERE path=?", + (size, mtime, ctime, fileid, path)) + self.connection.commit() + + def did_check_file_healthy(self, filecap, results): + now = time.time() + fileid = self.get_or_allocate_fileid_for_cap(filecap) + self.cursor.execute("UPDATE last_upload" + " SET last_checked=?" + " WHERE fileid=?", + (now, fileid)) + self.connection.commit() + + def check_directory(self, contents): + """I will tell you if a new directory needs to be created for a given + set of directory contents, or if I know of an existing (immutable) + directory that can be used instead. + + 'contents' should be a dictionary that maps from child name (a single + unicode string) to immutable childcap (filecap or dircap). + + I return a DirectoryResult object, synchronously. If r.was_created() + returns False, you should create the directory (with + t=mkdir-immutable). When you are finished, call r.did_create(dircap) + so I can update my database. + + If was_created() returns a dircap, you might be able to avoid the + mkdir. Call r.should_check(), and if it says False, you can skip the + mkdir and use the dircap returned by was_created(). + + If should_check() returns True, you should perform a check operation + on the dircap returned by was_created(). If the check indicates the + directory is healthy, please call + r.did_check_healthy(checker_results) so I can update the database, + using the de-JSONized response from the webapi t=check call for + 'checker_results'. If the check indicates the directory is not + healthy, please repair or re-create the directory and call + r.did_create(dircap) when you're done. + """ + + now = time.time() + entries = [] + for name in contents: + entries.append( [name.encode("utf-8"), contents[name]] ) + entries.sort() + data = "".join([netstring(name_utf8)+netstring(cap) + for (name_utf8,cap) in entries]) + dirhash = backupdb_dirhash(data) + dirhash_s = base32.b2a(dirhash) + c = self.cursor + c.execute("SELECT dircap, last_checked" + " FROM directories WHERE dirhash=?", (dirhash_s,)) + row = c.fetchone() + if not row: + return DirectoryResult(self, dirhash_s, None, False) + (dircap, last_checked) = row + age = now - last_checked + + probability = ((age - self.NO_CHECK_BEFORE) / + (self.ALWAYS_CHECK_AFTER - self.NO_CHECK_BEFORE)) + probability = min(max(probability, 0.0), 1.0) + should_check = bool(random.random() < probability) + + return DirectoryResult(self, dirhash_s, to_str(dircap), should_check) + + def did_create_directory(self, dircap, dirhash): + now = time.time() + # if the dirhash is already present (i.e. we've re-uploaded an + # existing directory, possibly replacing the dircap with a new one), + # update the record in place. Otherwise create a new record.) + self.cursor.execute("REPLACE INTO directories VALUES (?,?,?,?)", + (dirhash, dircap, now, now)) + self.connection.commit() + + def did_check_directory_healthy(self, dircap, results): + now = time.time() + self.cursor.execute("UPDATE directories" + " SET last_checked=?" + " WHERE dircap=?", + (now, dircap)) + self.connection.commit() diff --git a/src/allmydata/scripts/runner.py b/src/allmydata/scripts/runner.py index a029b34a..af7bad01 100644 --- a/src/allmydata/scripts/runner.py +++ b/src/allmydata/scripts/runner.py @@ -5,8 +5,8 @@ from cStringIO import StringIO from twisted.python import usage from allmydata.scripts.common import get_default_nodedir -from allmydata.scripts import debug, create_node, startstop_node, cli, keygen, stats_gatherer, admin, \ -magic_folder_cli +from allmydata.scripts import debug, create_node, startstop_node, cli, keygen, stats_gatherer, \ + admin, magic_folder_cli from allmydata.util.encodingutil import quote_output, quote_local_unicode_path, get_io_encoding def GROUP(s): diff --git a/src/allmydata/scripts/tahoe_backup.py b/src/allmydata/scripts/tahoe_backup.py index e2276406..f12b3171 100644 --- a/src/allmydata/scripts/tahoe_backup.py +++ b/src/allmydata/scripts/tahoe_backup.py @@ -8,7 +8,7 @@ from allmydata.scripts.common import get_alias, escape_path, DEFAULT_ALIAS, \ UnknownAliasError from allmydata.scripts.common_http import do_http, HTTPError, format_http_error from allmydata.util import time_format -from allmydata import backupdb +from allmydata.scripts import backupdb from allmydata.util.encodingutil import listdir_unicode, quote_output, \ quote_local_unicode_path, to_str, FilenameEncodingError, unicode_to_url from allmydata.util.assertutil import precondition diff --git a/src/allmydata/scripts/tahoe_ls.py b/src/allmydata/scripts/tahoe_ls.py index 9a8ce240..78eea1f2 100644 --- a/src/allmydata/scripts/tahoe_ls.py +++ b/src/allmydata/scripts/tahoe_ls.py @@ -151,7 +151,9 @@ def list(options): line.append(uri) if options["readonly-uri"]: line.append(quote_output(ro_uri or "-", quotemarks=False)) + rows.append((encoding_error, line)) + max_widths = [] left_justifys = [] for (encoding_error, row) in rows: diff --git a/src/allmydata/test/test_backupdb.py b/src/allmydata/test/test_backupdb.py index dddfb821..835e2531 100644 --- a/src/allmydata/test/test_backupdb.py +++ b/src/allmydata/test/test_backupdb.py @@ -6,7 +6,7 @@ from twisted.trial import unittest from allmydata.util import fileutil from allmydata.util.encodingutil import listdir_unicode, get_filesystem_encoding, unicode_platform from allmydata.util.assertutil import precondition -from allmydata import backupdb +from allmydata.scripts import backupdb class BackupDB(unittest.TestCase): def create(self, dbfile): diff --git a/src/allmydata/test/test_cli_backup.py b/src/allmydata/test/test_cli_backup.py index a48295de..3bd2a614 100644 --- a/src/allmydata/test/test_cli_backup.py +++ b/src/allmydata/test/test_cli_backup.py @@ -11,8 +11,7 @@ from allmydata.util import fileutil from allmydata.util.fileutil import abspath_expanduser_unicode from allmydata.util.encodingutil import get_io_encoding, unicode_to_argv from allmydata.util.namespace import Namespace -from allmydata.scripts import cli -from allmydata import backupdb +from allmydata.scripts import cli, backupdb from .common_util import StallMixin from .no_network import GridTestMixin from .test_cli import CLITestMixin, parse_options diff --git a/src/allmydata/test/test_magic_folder.py b/src/allmydata/test/test_magic_folder.py index 4a6b60eb..7ebcf928 100644 --- a/src/allmydata/test/test_magic_folder.py +++ b/src/allmydata/test/test_magic_folder.py @@ -16,7 +16,7 @@ from .test_cli_magic_folder import MagicFolderCLITestMixin from allmydata.frontends import magic_folder from allmydata.frontends.magic_folder import MagicFolder, Downloader -from allmydata import backupdb, magicpath +from allmydata import magicfolderdb, magicpath from allmydata.util.fileutil import abspath_expanduser_unicode @@ -39,10 +39,10 @@ class MagicFolderTestMixin(MagicFolderCLITestMixin, ShouldFailMixin, ReallyEqual def _createdb(self): dbfile = abspath_expanduser_unicode(u"magicfolderdb.sqlite", base=self.basedir) - bdb = backupdb.get_backupdb(dbfile, create_version=(backupdb.MAGIC_FOLDER_SCHEMA_v3, 3)) - self.failUnless(bdb, "unable to create backupdb from %r" % (dbfile,)) - self.failUnlessEqual(bdb.VERSION, 3) - return bdb + mdb = magicfolderdb.get_magicfolderdb(dbfile, create_version=(magicfolderdb.SCHEMA_v1, 1)) + self.failUnless(mdb, "unable to create magicfolderdb from %r" % (dbfile,)) + self.failUnlessEqual(mdb.VERSION, 1) + return mdb def _restart_client(self, ign): #print "_restart_client" diff --git a/src/allmydata/util/fileutil.py b/src/allmydata/util/fileutil.py index 8194fcd4..a0e4ce4a 100644 --- a/src/allmydata/util/fileutil.py +++ b/src/allmydata/util/fileutil.py @@ -6,6 +6,11 @@ import sys, exceptions, os, stat, tempfile, time, binascii from collections import namedtuple from errno import ENOENT +if sys.platform == "win32": + from ctypes import WINFUNCTYPE, WinError, windll, POINTER, byref, c_ulonglong, \ + create_unicode_buffer, get_last_error + from ctypes.wintypes import BOOL, DWORD, LPCWSTR, LPWSTR, LPVOID, HANDLE + from twisted.python import log from pycryptopp.cipher.aes import AES @@ -520,8 +525,6 @@ def get_available_space(whichdir, reserved_space): if sys.platform == "win32": - from ctypes.wintypes import BOOL, HANDLE, DWORD, LPCWSTR, LPVOID, WinError, get_last_error - # CreateFileW = WINFUNCTYPE(HANDLE, LPCWSTR, DWORD, DWORD, LPVOID, DWORD, DWORD, HANDLE) \ (("CreateFileW", windll.kernel32))