2 import os.path, sys, time, random, stat
4 from allmydata.util.netstring import netstring
5 from allmydata.util.hashutil import backupdb_dirhash
6 from allmydata.util import base32
7 from allmydata.util.fileutil import abspath_expanduser_unicode
8 from allmydata.util.encodingutil import to_str
9 from allmydata.util.dbutil import get_db, DBError
16 CREATE TABLE version -- added in v1
18 version INTEGER -- contains one row, set to 2
21 CREATE TABLE local_files -- added in v1
23 path VARCHAR(1024) PRIMARY KEY, -- index, this is an absolute UTF-8-encoded local filename
24 size INTEGER, -- os.stat(fn)[stat.ST_SIZE]
25 mtime NUMBER, -- os.stat(fn)[stat.ST_MTIME]
26 ctime NUMBER, -- os.stat(fn)[stat.ST_CTIME]
30 CREATE TABLE caps -- added in v1
32 fileid INTEGER PRIMARY KEY AUTOINCREMENT,
33 filecap VARCHAR(256) UNIQUE -- URI:CHK:...
36 CREATE TABLE last_upload -- added in v1
38 fileid INTEGER PRIMARY KEY,
39 last_uploaded TIMESTAMP,
40 last_checked TIMESTAMP
47 CREATE TABLE directories -- added in v2
49 dirhash varchar(256) PRIMARY KEY, -- base32(dirhash)
50 dircap varchar(256), -- URI:DIR2-CHK:...
51 last_uploaded TIMESTAMP,
52 last_checked TIMESTAMP
57 SCHEMA_v2 = SCHEMA_v1 + TABLE_DIRECTORY
59 UPDATE_v1_to_v2 = TABLE_DIRECTORY + """
60 UPDATE version SET version=2;
67 def get_backupdb(dbfile, stderr=sys.stderr,
68 create_version=(SCHEMA_v2, 2), just_create=False):
69 # Open or create the given backupdb file. The parent directory must
72 (sqlite3, db) = get_db(dbfile, stderr, create_version, updaters=UPDATERS,
73 just_create=just_create, dbname="backupdb")
74 return BackupDB_v2(sqlite3, db)
81 def __init__(self, bdb, filecap, should_check,
82 path, mtime, ctime, size):
84 self.filecap = filecap
85 self.should_check_p = should_check
92 def was_uploaded(self):
97 def did_upload(self, filecap):
98 self.bdb.did_upload_file(filecap, self.path,
99 self.mtime, self.ctime, self.size)
101 def should_check(self):
102 return self.should_check_p
104 def did_check_healthy(self, results):
105 self.bdb.did_check_file_healthy(self.filecap, results)
108 class DirectoryResult:
109 def __init__(self, bdb, dirhash, dircap, should_check):
112 self.should_check_p = should_check
113 self.dirhash = dirhash
115 def was_created(self):
120 def did_create(self, dircap):
121 self.bdb.did_create_directory(dircap, self.dirhash)
123 def should_check(self):
124 return self.should_check_p
126 def did_check_healthy(self, results):
127 self.bdb.did_check_directory_healthy(self.dircap, results)
132 NO_CHECK_BEFORE = 1*MONTH
133 ALWAYS_CHECK_AFTER = 2*MONTH
135 def __init__(self, sqlite_module, connection):
136 self.sqlite_module = sqlite_module
137 self.connection = connection
138 self.cursor = connection.cursor()
140 def check_file(self, path, use_timestamps=True):
141 """I will tell you if a given local file needs to be uploaded or not,
142 by looking in a database and seeing if I have a record of this file
143 having been uploaded earlier.
145 I return a FileResults object, synchronously. If r.was_uploaded()
146 returns False, you should upload the file. When you are finished
147 uploading it, call r.did_upload(filecap), so I can update my
150 If was_uploaded() returns a filecap, you might be able to avoid an
151 upload. Call r.should_check(), and if it says False, you can skip the
152 upload and use the filecap returned by was_uploaded().
154 If should_check() returns True, you should perform a filecheck on the
155 filecap returned by was_uploaded(). If the check indicates the file
156 is healthy, please call r.did_check_healthy(checker_results) so I can
157 update the database, using the de-JSONized response from the webapi
158 t=check call for 'checker_results'. If the check indicates the file
159 is not healthy, please upload the file and call r.did_upload(filecap)
162 If use_timestamps=True (the default), I will compare ctime and mtime
163 of the local file against an entry in my database, and consider the
164 file to be unchanged if ctime, mtime, and filesize are all the same
165 as the earlier version. If use_timestamps=False, I will not trust the
166 timestamps, so more files (perhaps all) will be marked as needing
167 upload. A future version of this database may hash the file to make
168 equality decisions, in which case use_timestamps=False will not
169 always imply r.must_upload()==True.
171 'path' points to a local file on disk, possibly relative to the
172 current working directory. The database stores absolute pathnames.
175 path = abspath_expanduser_unicode(path)
177 # TODO: consider using get_pathinfo.
179 size = s[stat.ST_SIZE]
180 ctime = s[stat.ST_CTIME]
181 mtime = s[stat.ST_MTIME]
186 c.execute("SELECT size,mtime,ctime,fileid"
190 row = self.cursor.fetchone()
192 return FileResult(self, None, False, path, mtime, ctime, size)
193 (last_size,last_mtime,last_ctime,last_fileid) = row
195 c.execute("SELECT caps.filecap, last_upload.last_checked"
196 " FROM caps,last_upload"
197 " WHERE caps.fileid=? AND last_upload.fileid=?",
198 (last_fileid, last_fileid))
201 if ((last_size != size
202 or not use_timestamps
203 or last_mtime != mtime
204 or last_ctime != ctime) # the file has been changed
205 or (not row2) # we somehow forgot where we put the file last time
207 c.execute("DELETE FROM local_files WHERE path=?", (path,))
208 self.connection.commit()
209 return FileResult(self, None, False, path, mtime, ctime, size)
211 # at this point, we're allowed to assume the file hasn't been changed
212 (filecap, last_checked) = row2
213 age = now - last_checked
215 probability = ((age - self.NO_CHECK_BEFORE) /
216 (self.ALWAYS_CHECK_AFTER - self.NO_CHECK_BEFORE))
217 probability = min(max(probability, 0.0), 1.0)
218 should_check = bool(random.random() < probability)
220 return FileResult(self, to_str(filecap), should_check,
221 path, mtime, ctime, size)
223 def get_or_allocate_fileid_for_cap(self, filecap):
224 # find an existing fileid for this filecap, or insert a new one. The
225 # caller is required to commit() afterwards.
227 # mysql has "INSERT ... ON DUPLICATE KEY UPDATE", but not sqlite
228 # sqlite has "INSERT ON CONFLICT REPLACE", but not mysql
229 # So we use INSERT, ignore any error, then a SELECT
232 c.execute("INSERT INTO caps (filecap) VALUES (?)", (filecap,))
233 except (self.sqlite_module.IntegrityError, self.sqlite_module.OperationalError):
234 # sqlite3 on sid gives IntegrityError
235 # pysqlite2 (which we don't use, so maybe no longer relevant) on dapper gives OperationalError
237 c.execute("SELECT fileid FROM caps WHERE filecap=?", (filecap,))
238 foundrow = c.fetchone()
243 def did_upload_file(self, filecap, path, mtime, ctime, size):
245 fileid = self.get_or_allocate_fileid_for_cap(filecap)
247 self.cursor.execute("INSERT INTO last_upload VALUES (?,?,?)",
249 except (self.sqlite_module.IntegrityError, self.sqlite_module.OperationalError):
250 self.cursor.execute("UPDATE last_upload"
251 " SET last_uploaded=?, last_checked=?"
255 self.cursor.execute("INSERT INTO local_files VALUES (?,?,?,?,?)",
256 (path, size, mtime, ctime, fileid))
257 except (self.sqlite_module.IntegrityError, self.sqlite_module.OperationalError):
258 self.cursor.execute("UPDATE local_files"
259 " SET size=?, mtime=?, ctime=?, fileid=?"
261 (size, mtime, ctime, fileid, path))
262 self.connection.commit()
264 def did_check_file_healthy(self, filecap, results):
266 fileid = self.get_or_allocate_fileid_for_cap(filecap)
267 self.cursor.execute("UPDATE last_upload"
268 " SET last_checked=?"
271 self.connection.commit()
273 def check_directory(self, contents):
274 """I will tell you if a new directory needs to be created for a given
275 set of directory contents, or if I know of an existing (immutable)
276 directory that can be used instead.
278 'contents' should be a dictionary that maps from child name (a single
279 unicode string) to immutable childcap (filecap or dircap).
281 I return a DirectoryResult object, synchronously. If r.was_created()
282 returns False, you should create the directory (with
283 t=mkdir-immutable). When you are finished, call r.did_create(dircap)
284 so I can update my database.
286 If was_created() returns a dircap, you might be able to avoid the
287 mkdir. Call r.should_check(), and if it says False, you can skip the
288 mkdir and use the dircap returned by was_created().
290 If should_check() returns True, you should perform a check operation
291 on the dircap returned by was_created(). If the check indicates the
292 directory is healthy, please call
293 r.did_check_healthy(checker_results) so I can update the database,
294 using the de-JSONized response from the webapi t=check call for
295 'checker_results'. If the check indicates the directory is not
296 healthy, please repair or re-create the directory and call
297 r.did_create(dircap) when you're done.
302 for name in contents:
303 entries.append( [name.encode("utf-8"), contents[name]] )
305 data = "".join([netstring(name_utf8)+netstring(cap)
306 for (name_utf8,cap) in entries])
307 dirhash = backupdb_dirhash(data)
308 dirhash_s = base32.b2a(dirhash)
310 c.execute("SELECT dircap, last_checked"
311 " FROM directories WHERE dirhash=?", (dirhash_s,))
314 return DirectoryResult(self, dirhash_s, None, False)
315 (dircap, last_checked) = row
316 age = now - last_checked
318 probability = ((age - self.NO_CHECK_BEFORE) /
319 (self.ALWAYS_CHECK_AFTER - self.NO_CHECK_BEFORE))
320 probability = min(max(probability, 0.0), 1.0)
321 should_check = bool(random.random() < probability)
323 return DirectoryResult(self, dirhash_s, to_str(dircap), should_check)
325 def did_create_directory(self, dircap, dirhash):
327 # if the dirhash is already present (i.e. we've re-uploaded an
328 # existing directory, possibly replacing the dircap with a new one),
329 # update the record in place. Otherwise create a new record.)
330 self.cursor.execute("REPLACE INTO directories VALUES (?,?,?,?)",
331 (dirhash, dircap, now, now))
332 self.connection.commit()
334 def did_check_directory_healthy(self, dircap, results):
336 self.cursor.execute("UPDATE directories"
337 " SET last_checked=?"
340 self.connection.commit()