2 import os.path, sys, time, random, stat
4 from allmydata.util.netstring import netstring
5 from allmydata.util.hashutil import backupdb_dirhash
6 from allmydata.util import base32
7 from allmydata.util.fileutil import abspath_expanduser_unicode
8 from allmydata.util.encodingutil import to_str
9 from allmydata.util.dbutil import get_db, DBError
18 version INTEGER -- contains one row, set to %s
21 CREATE TABLE local_files
23 path VARCHAR(1024) PRIMARY KEY, -- index, this is an absolute UTF-8-encoded local filename
24 -- note that size is before mtime and ctime here, but after in function parameters
25 size INTEGER, -- os.stat(fn)[stat.ST_SIZE] (NULL if the file has been deleted)
26 mtime NUMBER, -- os.stat(fn)[stat.ST_MTIME]
27 ctime NUMBER, -- os.stat(fn)[stat.ST_CTIME]
33 fileid INTEGER PRIMARY KEY AUTOINCREMENT,
34 filecap VARCHAR(256) UNIQUE -- URI:CHK:...
37 CREATE TABLE last_upload
39 fileid INTEGER PRIMARY KEY,
40 last_uploaded TIMESTAMP,
41 last_checked TIMESTAMP
46 SCHEMA_v1 = MAIN_SCHEMA % (1, "")
50 CREATE TABLE directories -- added in v2
52 dirhash varchar(256) PRIMARY KEY, -- base32(dirhash)
53 dircap varchar(256), -- URI:DIR2-CHK:...
54 last_uploaded TIMESTAMP,
55 last_checked TIMESTAMP
60 SCHEMA_v2 = MAIN_SCHEMA % (2, "") + TABLE_DIRECTORY
62 UPDATE_v1_to_v2 = TABLE_DIRECTORY + """
63 UPDATE version SET version=2;
71 # magic-folder db schema version 3
72 MAGIC_FOLDER_SCHEMA_v3 = """
75 version INTEGER -- contains one row, set to %s
78 CREATE TABLE local_files
80 path VARCHAR(1024) PRIMARY KEY, -- index, this is an absolute UTF-8-encoded local filename
81 -- note that size is before mtime and ctime here, but after in function parameters
82 size INTEGER, -- os.stat(fn)[stat.ST_SIZE] (NULL if the file has been deleted)
83 mtime NUMBER, -- os.stat(fn)[stat.ST_MTIME]
84 ctime NUMBER, -- os.stat(fn)[stat.ST_CTIME]
86 last_downloaded_uri VARCHAR(256) UNIQUE, -- URI:CHK:...
87 last_downloaded_timestamp NUMBER
92 def get_backupdb(dbfile, stderr=sys.stderr,
93 create_version=(SCHEMA_v2, 2), just_create=False):
94 # Open or create the given backupdb file. The parent directory must
97 (sqlite3, db) = get_db(dbfile, stderr, create_version, updaters=UPDATERS,
98 just_create=just_create, dbname="backupdb")
99 if create_version[1] in (1, 2):
100 return BackupDB(sqlite3, db)
101 elif create_version[1] == 3:
102 return MagicFolderDB(sqlite3, db)
104 print >>stderr, "invalid db schema version specified"
112 def __init__(self, bdb, filecap, should_check,
113 path, mtime, ctime, size):
115 self.filecap = filecap
116 self.should_check_p = should_check
123 def was_uploaded(self):
128 def did_upload(self, filecap):
129 self.bdb.did_upload_file(filecap, self.path,
130 self.mtime, self.ctime, self.size)
132 def should_check(self):
133 return self.should_check_p
135 def did_check_healthy(self, results):
136 self.bdb.did_check_file_healthy(self.filecap, results)
139 class DirectoryResult:
140 def __init__(self, bdb, dirhash, dircap, should_check):
143 self.should_check_p = should_check
144 self.dirhash = dirhash
146 def was_created(self):
151 def did_create(self, dircap):
152 self.bdb.did_create_directory(dircap, self.dirhash)
154 def should_check(self):
155 return self.should_check_p
157 def did_check_healthy(self, results):
158 self.bdb.did_check_directory_healthy(self.dircap, results)
163 NO_CHECK_BEFORE = 1*MONTH
164 ALWAYS_CHECK_AFTER = 2*MONTH
166 def __init__(self, sqlite_module, connection):
167 self.sqlite_module = sqlite_module
168 self.connection = connection
169 self.cursor = connection.cursor()
171 def check_file_db_exists(self, path):
172 """I will tell you if a given file has an entry in my database or not
173 by returning True or False.
176 c.execute("SELECT size,mtime,ctime,fileid"
180 row = self.cursor.fetchone()
186 def check_file(self, path, use_timestamps=True):
187 """I will tell you if a given local file needs to be uploaded or not,
188 by looking in a database and seeing if I have a record of this file
189 having been uploaded earlier.
191 I return a FileResults object, synchronously. If r.was_uploaded()
192 returns False, you should upload the file. When you are finished
193 uploading it, call r.did_upload(filecap), so I can update my
196 If was_uploaded() returns a filecap, you might be able to avoid an
197 upload. Call r.should_check(), and if it says False, you can skip the
198 upload and use the filecap returned by was_uploaded().
200 If should_check() returns True, you should perform a filecheck on the
201 filecap returned by was_uploaded(). If the check indicates the file
202 is healthy, please call r.did_check_healthy(checker_results) so I can
203 update the database, using the de-JSONized response from the webapi
204 t=check call for 'checker_results'. If the check indicates the file
205 is not healthy, please upload the file and call r.did_upload(filecap)
208 If use_timestamps=True (the default), I will compare mtime and ctime
209 of the local file against an entry in my database, and consider the
210 file to be unchanged if mtime, ctime, and filesize are all the same
211 as the earlier version. If use_timestamps=False, I will not trust the
212 timestamps, so more files (perhaps all) will be marked as needing
213 upload. A future version of this database may hash the file to make
214 equality decisions, in which case use_timestamps=False will not
215 always imply r.must_upload()==True.
217 'path' points to a local file on disk, possibly relative to the
218 current working directory. The database stores absolute pathnames.
221 path = abspath_expanduser_unicode(path)
223 # XXX consider using get_pathinfo
225 size = s[stat.ST_SIZE]
226 mtime = s[stat.ST_MTIME]
227 ctime = s[stat.ST_CTIME]
232 c.execute("SELECT size,mtime,ctime,fileid"
236 row = self.cursor.fetchone()
238 return FileResult(self, None, False, path, mtime, ctime, size)
239 (last_size,last_mtime,last_ctime,last_fileid) = row
241 c.execute("SELECT caps.filecap, last_upload.last_checked"
242 " FROM caps,last_upload"
243 " WHERE caps.fileid=? AND last_upload.fileid=?",
244 (last_fileid, last_fileid))
247 if ((last_size != size
248 or not use_timestamps
249 or last_mtime != mtime
250 or last_ctime != ctime) # the file has been changed
251 or (not row2) # we somehow forgot where we put the file last time
253 c.execute("DELETE FROM local_files WHERE path=?", (path,))
254 self.connection.commit()
255 return FileResult(self, None, False, path, mtime, ctime, size)
257 # at this point, we're allowed to assume the file hasn't been changed
258 (filecap, last_checked) = row2
259 age = now - last_checked
261 probability = ((age - self.NO_CHECK_BEFORE) /
262 (self.ALWAYS_CHECK_AFTER - self.NO_CHECK_BEFORE))
263 probability = min(max(probability, 0.0), 1.0)
264 should_check = bool(random.random() < probability)
266 return FileResult(self, to_str(filecap), should_check,
267 path, mtime, ctime, size)
269 def get_or_allocate_fileid_for_cap(self, filecap):
270 # find an existing fileid for this filecap, or insert a new one. The
271 # caller is required to commit() afterwards.
273 # mysql has "INSERT ... ON DUPLICATE KEY UPDATE", but not sqlite
274 # sqlite has "INSERT ON CONFLICT REPLACE", but not mysql
275 # So we use INSERT, ignore any error, then a SELECT
278 c.execute("INSERT INTO caps (filecap) VALUES (?)", (filecap,))
279 except (self.sqlite_module.IntegrityError, self.sqlite_module.OperationalError):
280 # sqlite3 on sid gives IntegrityError
281 # pysqlite2 (which we don't use, so maybe no longer relevant) on dapper gives OperationalError
283 c.execute("SELECT fileid FROM caps WHERE filecap=?", (filecap,))
284 foundrow = c.fetchone()
289 def did_upload_file(self, filecap, path, mtime, ctime, size):
291 fileid = self.get_or_allocate_fileid_for_cap(filecap)
293 self.cursor.execute("INSERT INTO last_upload VALUES (?,?,?)",
295 except (self.sqlite_module.IntegrityError, self.sqlite_module.OperationalError):
296 self.cursor.execute("UPDATE last_upload"
297 " SET last_uploaded=?, last_checked=?"
301 self.cursor.execute("INSERT INTO local_files VALUES (?,?,?,?,?)",
302 (path, size, mtime, ctime, fileid))
303 except (self.sqlite_module.IntegrityError, self.sqlite_module.OperationalError):
304 self.cursor.execute("UPDATE local_files"
305 " SET size=?, mtime=?, ctime=?, fileid=?"
307 (size, mtime, ctime, fileid, path))
308 self.connection.commit()
310 def did_check_file_healthy(self, filecap, results):
312 fileid = self.get_or_allocate_fileid_for_cap(filecap)
313 self.cursor.execute("UPDATE last_upload"
314 " SET last_checked=?"
317 self.connection.commit()
319 def check_directory(self, contents):
320 """I will tell you if a new directory needs to be created for a given
321 set of directory contents, or if I know of an existing (immutable)
322 directory that can be used instead.
324 'contents' should be a dictionary that maps from child name (a single
325 unicode string) to immutable childcap (filecap or dircap).
327 I return a DirectoryResult object, synchronously. If r.was_created()
328 returns False, you should create the directory (with
329 t=mkdir-immutable). When you are finished, call r.did_create(dircap)
330 so I can update my database.
332 If was_created() returns a dircap, you might be able to avoid the
333 mkdir. Call r.should_check(), and if it says False, you can skip the
334 mkdir and use the dircap returned by was_created().
336 If should_check() returns True, you should perform a check operation
337 on the dircap returned by was_created(). If the check indicates the
338 directory is healthy, please call
339 r.did_check_healthy(checker_results) so I can update the database,
340 using the de-JSONized response from the webapi t=check call for
341 'checker_results'. If the check indicates the directory is not
342 healthy, please repair or re-create the directory and call
343 r.did_create(dircap) when you're done.
348 for name in contents:
349 entries.append( [name.encode("utf-8"), contents[name]] )
351 data = "".join([netstring(name_utf8)+netstring(cap)
352 for (name_utf8,cap) in entries])
353 dirhash = backupdb_dirhash(data)
354 dirhash_s = base32.b2a(dirhash)
356 c.execute("SELECT dircap, last_checked"
357 " FROM directories WHERE dirhash=?", (dirhash_s,))
360 return DirectoryResult(self, dirhash_s, None, False)
361 (dircap, last_checked) = row
362 age = now - last_checked
364 probability = ((age - self.NO_CHECK_BEFORE) /
365 (self.ALWAYS_CHECK_AFTER - self.NO_CHECK_BEFORE))
366 probability = min(max(probability, 0.0), 1.0)
367 should_check = bool(random.random() < probability)
369 return DirectoryResult(self, dirhash_s, to_str(dircap), should_check)
371 def did_create_directory(self, dircap, dirhash):
373 # if the dirhash is already present (i.e. we've re-uploaded an
374 # existing directory, possibly replacing the dircap with a new one),
375 # update the record in place. Otherwise create a new record.)
376 self.cursor.execute("REPLACE INTO directories VALUES (?,?,?,?)",
377 (dirhash, dircap, now, now))
378 self.connection.commit()
380 def did_check_directory_healthy(self, dircap, results):
382 self.cursor.execute("UPDATE directories"
383 " SET last_checked=?"
386 self.connection.commit()
389 class MagicFolderDB():
392 def __init__(self, sqlite_module, connection):
393 self.sqlite_module = sqlite_module
394 self.connection = connection
395 self.cursor = connection.cursor()
397 def check_file_db_exists(self, path):
398 """I will tell you if a given file has an entry in my database or not
399 by returning True or False.
402 c.execute("SELECT size,mtime,ctime"
406 row = self.cursor.fetchone()
412 def get_all_relpaths(self):
414 Retrieve a set of all relpaths of files that have had an entry in magic folder db
415 (i.e. that have been downloaded at least once).
417 self.cursor.execute("SELECT path FROM local_files")
418 rows = self.cursor.fetchall()
419 return set([r[0] for r in rows])
421 def get_last_downloaded_uri(self, relpath_u):
423 Return the last downloaded uri recorded in the magic folder db.
424 If none are found then return None.
427 c.execute("SELECT last_downloaded_uri"
431 row = self.cursor.fetchone()
437 def get_local_file_version(self, relpath_u):
439 Return the version of a local file tracked by our magic folder db.
440 If no db entry is found then return None.
443 c.execute("SELECT version"
447 row = self.cursor.fetchone()
453 def did_upload_version(self, filecap, relpath_u, version, pathinfo):
454 print "did_upload_version(%r, %r, %r, %r)" % (filecap, relpath_u, version, pathinfo)
458 self.cursor.execute("INSERT INTO local_files VALUES (?,?,?,?,?,?)",
459 (relpath_u, pathinfo.size, pathinfo.mtime, pathinfo.ctime, version, filecap, pathinfo.mtime))
460 except (self.sqlite_module.IntegrityError, self.sqlite_module.OperationalError):
461 print "err... update"
463 self.cursor.execute("UPDATE local_files"
464 " SET size=?, mtime=?, ctime=?, version=?, last_downloaded_uri=?, last_downloaded_timestamp=?"
466 (pathinfo.size, pathinfo.mtime, pathinfo.ctime, version, filecap, pathinfo.mtime, relpath_u))
467 except (self.sqlite_module.IntegrityError, self.sqlite_module.OperationalError):
469 self.connection.commit()
472 def is_new_file(self, pathinfo, relpath_u):
474 Returns true if the file's current pathinfo (size, mtime, and ctime) has
475 changed from the pathinfo previously stored in the db.
477 #print "is_new_file(%r, %r)" % (pathinfo, relpath_u)
479 c.execute("SELECT size, mtime, ctime"
483 row = self.cursor.fetchone()
486 return (pathinfo.size, pathinfo.mtime, pathinfo.ctime) != row