2 # the backupdb is only available if sqlite3 is available. Python-2.5.x and
3 # beyond include sqlite3 in the standard library. For python-2.4, the
4 # "pysqlite2" package (which, despite the confusing name, uses sqlite3) must
5 # be installed. On debian, install python-pysqlite2
7 import os.path, sys, time, random, stat
15 version INTEGER -- contains one row, set to 1
18 CREATE TABLE local_files
20 path VARCHAR(1024) PRIMARY KEY, -- index, this is os.path.abspath(fn)
21 size INTEGER, -- os.stat(fn)[stat.ST_SIZE]
22 mtime NUMBER, -- os.stat(fn)[stat.ST_MTIME]
23 ctime NUMBER, -- os.stat(fn)[stat.ST_CTIME]
29 fileid INTEGER PRIMARY KEY AUTOINCREMENT,
30 filecap VARCHAR(256) UNIQUE -- URI:CHK:...
33 CREATE TABLE last_upload
35 fileid INTEGER PRIMARY KEY,
36 last_uploaded TIMESTAMP,
37 last_checked TIMESTAMP
42 def get_backupdb(dbfile, stderr=sys.stderr):
43 # open or create the given backupdb file. The parent directory must
47 sqlite = sqlite3 # pyflakes whines about 'import sqlite3 as sqlite' ..
50 from pysqlite2 import dbapi2
51 sqlite = dbapi2 # .. when this clause does it too
53 print >>stderr, "sqlite unavailable, not using backupdb"
56 must_create = not os.path.exists(dbfile)
58 db = sqlite.connect(dbfile)
59 except (EnvironmentError, sqlite.OperationalError), e:
60 print >>stderr, "Unable to create/open backupdb file %s: %s" % (dbfile, e)
65 c.executescript(SCHEMA_v1)
66 c.execute("INSERT INTO version (version) VALUES (1)")
70 c.execute("SELECT version FROM version")
71 version = c.fetchone()[0]
72 except sqlite.DatabaseError, e:
73 # this indicates that the file is not a compatible database format.
74 # Perhaps it was created with an old version, or it might be junk.
75 print >>stderr, "backupdb file is unusable: %s" % e
79 return BackupDB_v1(sqlite, db)
80 print >>stderr, "Unable to handle backupdb version %s" % version
83 MUST_UPLOAD, ALREADY_UPLOADED = range(2)
85 def __init__(self, bdb, filecap, should_check,
86 path, mtime, ctime, size):
88 self.filecap = filecap
89 self.should_check_p = should_check
96 def was_uploaded(self):
101 def did_upload(self, filecap):
102 self.bdb.did_upload(filecap,
104 self.mtime, self.ctime, self.size)
106 def should_check(self):
107 return self.should_check_p
109 def did_check_healthy(self, results):
110 self.bdb.did_check_healthy(self.filecap, results)
114 NO_CHECK_BEFORE = 1*MONTH
115 ALWAYS_CHECK_AFTER = 2*MONTH
117 def __init__(self, sqlite_module, connection):
118 self.sqlite_module = sqlite_module
119 self.connection = connection
120 self.cursor = connection.cursor()
122 def check_file(self, path, use_timestamps=True):
123 """I will tell you if a given local file needs to be uploaded or not,
124 by looking in a database and seeing if I have a record of this file
125 having been uploaded earlier.
127 I return a Results object, synchronously. If r.was_uploaded() returns
128 False, you should upload the file. When you are finished uploading
129 it, call r.did_upload(filecap), so I can update my database.
131 If was_uploaded() returns a filecap, you might be able to avoid an
132 upload. Call r.should_check(), and if it says False, you can skip the
133 upload and use the filecap returned by was_uploaded().
135 If should_check() returns True, you should perform a filecheck on the
136 filecap returned by was_uploaded(). If the check indicates the file
137 is healthy, please call r.did_check_healthy(checker_results) so I can
138 update the database, using the de-JSONized response from the webapi
139 t=check call for 'checker_results'. If the check indicates the file
140 is not healthy, please upload the file and call r.did_upload(filecap)
143 I use_timestamps=True (the default), I will compare ctime and mtime
144 of the local file against an entry in my database, and consider the
145 file to be unchanged if ctime, mtime, and filesize are all the same
146 as the earlier version. If use_timestamps=False, I will not trust the
147 timestamps, so more files (perhaps all) will be marked as needing
148 upload. A future version of this database may hash the file to make
149 equality decisions, in which case use_timestamps=False will not
150 always imply r.must_upload()==True.
152 'path' points to a local file on disk, possibly relative to the
153 current working directory. The database stores absolute pathnames.
156 path = os.path.abspath(path)
158 size = s[stat.ST_SIZE]
159 ctime = s[stat.ST_CTIME]
160 mtime = s[stat.ST_MTIME]
165 c.execute("SELECT size,mtime,ctime,fileid"
169 row = self.cursor.fetchone()
171 return Result(self, None, False, path, mtime, ctime, size)
172 (last_size,last_mtime,last_ctime,last_fileid) = row
174 c.execute("SELECT caps.filecap, last_upload.last_checked"
175 " FROM caps,last_upload"
176 " WHERE caps.fileid=? AND last_upload.fileid=?",
177 (last_fileid, last_fileid))
180 if ((last_size != size
181 or not use_timestamps
182 or last_mtime != mtime
183 or last_ctime != ctime) # the file has been changed
184 or (not row2) # we somehow forgot where we put the file last time
186 c.execute("DELETE FROM local_files WHERE path=?", (path,))
187 self.connection.commit()
188 return Result(self, None, False, path, mtime, ctime, size)
190 # at this point, we're allowed to assume the file hasn't been changed
191 (filecap, last_checked) = row2
192 age = now - last_checked
194 probability = ((age - self.NO_CHECK_BEFORE) /
195 (self.ALWAYS_CHECK_AFTER - self.NO_CHECK_BEFORE))
196 probability = min(max(probability, 0.0), 1.0)
197 should_check = bool(random.random() < probability)
199 return Result(self, filecap, should_check, path, mtime, ctime, size)
201 def get_or_allocate_fileid_for_cap(self, filecap):
202 # find an existing fileid for this filecap, or insert a new one. The
203 # caller is required to commit() afterwards.
205 # mysql has "INSERT ... ON DUPLICATE KEY UPDATE", but not sqlite
206 # sqlite has "INSERT ON CONFLICT REPLACE", but not mysql
207 # So we use INSERT, ignore any error, then a SELECT
210 c.execute("INSERT INTO caps (filecap) VALUES (?)", (filecap,))
211 except (self.sqlite_module.IntegrityError, self.sqlite_module.OperationalError):
212 # sqlite3 on sid gives IntegrityError
213 # pysqlite2 on dapper gives OperationalError
215 c.execute("SELECT fileid FROM caps WHERE filecap=?", (filecap,))
216 foundrow = c.fetchone()
221 def did_upload(self, filecap, path, mtime, ctime, size):
223 fileid = self.get_or_allocate_fileid_for_cap(filecap)
225 self.cursor.execute("INSERT INTO last_upload VALUES (?,?,?)",
227 except (self.sqlite_module.IntegrityError, self.sqlite_module.OperationalError):
228 self.cursor.execute("UPDATE last_upload"
229 " SET last_uploaded=?, last_checked=?"
233 self.cursor.execute("INSERT INTO local_files VALUES (?,?,?,?,?)",
234 (path, size, mtime, ctime, fileid))
235 except (self.sqlite_module.IntegrityError, self.sqlite_module.OperationalError):
236 self.cursor.execute("UPDATE local_files"
237 " SET size=?, mtime=?, ctime=?, fileid=?"
239 (size, mtime, ctime, fileid, path))
240 self.connection.commit()
242 def did_check_healthy(self, filecap, results):
244 fileid = self.get_or_allocate_fileid_for_cap(filecap)
245 self.cursor.execute("UPDATE last_upload"
246 " SET last_checked=?"
249 self.connection.commit()