2 # the backupdb is only available if sqlite3 is available. Python-2.5.x and
3 # beyond include sqlite3 in the standard library. For python-2.4, the
4 # "pysqlite2" package (which, despite the confusing name, uses sqlite3) must
5 # be installed. On debian, install python-pysqlite2
7 import os.path, sys, time, random, stat
15 version INTEGER -- contains one row, set to 1
18 CREATE TABLE local_files
20 path VARCHAR(1024) PRIMARY KEY, -- index, this is os.path.abspath(fn)
21 size INTEGER, -- os.stat(fn)[stat.ST_SIZE]
22 mtime NUMBER, -- os.stat(fn)[stat.ST_MTIME]
23 ctime NUMBER, -- os.stat(fn)[stat.ST_CTIME]
29 fileid INTEGER PRIMARY KEY AUTOINCREMENT,
30 filecap VARCHAR(256) UNIQUE -- URI:CHK:...
33 CREATE TABLE last_upload
35 fileid INTEGER PRIMARY KEY,
36 last_uploaded TIMESTAMP,
37 last_checked TIMESTAMP
42 def get_backupdb(dbfile, stderr=sys.stderr):
43 # open or create the given backupdb file. The parent directory must
47 sqlite = sqlite3 # pyflakes whines about 'import sqlite3 as sqlite' ..
50 from pysqlite2 import dbapi2
51 sqlite = dbapi2 # .. when this clause does it too
53 print >>stderr, "sqlite unavailable, not using backupdb"
56 must_create = not os.path.exists(dbfile)
58 db = sqlite.connect(dbfile)
59 except (EnvironmentError, sqlite.OperationalError), e:
60 print >>stderr, "Unable to create/open backupdb file %s: %s" % (dbfile, e)
65 c.executescript(SCHEMA_v1)
66 c.execute("INSERT INTO version (version) VALUES (1)")
70 c.execute("SELECT version FROM version")
71 version = c.fetchone()[0]
72 except sqlite.DatabaseError, e:
73 # this indicates that the file is not a compatible database format.
74 # Perhaps it was created with an old version, or it might be junk.
75 print >>stderr, "backupdb file is unusable: %s" % e
79 return BackupDB_v1(sqlite, db)
80 print >>stderr, "Unable to handle backupdb version %s" % version
83 MUST_UPLOAD, ALREADY_UPLOADED = range(2)
85 def __init__(self, bdb, filecap, should_check,
86 path, mtime, ctime, size):
88 self.filecap = filecap
89 self.should_check_p = should_check
96 def was_uploaded(self):
101 def did_upload(self, filecap):
102 self.bdb.did_upload(filecap,
104 self.mtime, self.ctime, self.size)
106 def should_check(self):
107 return self.should_check_p
109 def did_check_healthy(self, results):
110 self.bdb.did_check_healthy(self.filecap, results)
114 NO_CHECK_BEFORE = 1*MONTH
115 ALWAYS_CHECK_AFTER = 2*MONTH
117 def __init__(self, sqlite_module, connection):
118 self.sqlite_module = sqlite_module
119 self.connection = connection
120 self.cursor = connection.cursor()
122 def check_file(self, path, use_timestamps=True):
123 """I will tell you if a given local file needs to be uploaded or not,
124 by looking in a database and seeing if I have a record of this file
125 having been uploaded earlier.
127 I return a Results object, synchronously. If r.was_uploaded() returns
128 False, you should upload the file. When you are finished uploading
129 it, call r.did_upload(filecap), so I can update my database.
131 If was_uploaded() returns a filecap, you might be able to avoid an
132 upload. Call r.must_check(), and if it says False, you can skip the
133 upload and use the filecap returned by was_uploaded().
135 If should_check() returns True, you should perform a filecheck on the
136 filecap returned by was_uploaded(). If the check indicates the file
137 is healthy, please call r.did_check_healthy(checker_results) so I can
138 update the database. If the check indicates the file is not healthy,
139 please upload the file and call r.did_upload(filecap) when you're
142 I use_timestamps=True (the default), I will compare ctime and mtime
143 of the local file against an entry in my database, and consider the
144 file to be unchanged if ctime, mtime, and filesize are all the same
145 as the earlier version. If use_timestamps=False, I will not trust the
146 timestamps, so more files (perhaps all) will be marked as needing
147 upload. A future version of this database may hash the file to make
148 equality decisions, in which case use_timestamps=False will not
149 always imply r.must_upload()==True.
151 'path' points to a local file on disk, possibly relative to the
152 current working directory. The database stores absolute pathnames.
155 path = os.path.abspath(path)
157 size = s[stat.ST_SIZE]
158 ctime = s[stat.ST_CTIME]
159 mtime = s[stat.ST_MTIME]
164 c.execute("SELECT size,mtime,ctime,fileid"
168 row = self.cursor.fetchone()
170 return Result(self, None, False, path, mtime, ctime, size)
171 (last_size,last_mtime,last_ctime,last_fileid) = row
173 c.execute("SELECT caps.filecap, last_upload.last_checked"
174 " FROM caps,last_upload"
175 " WHERE caps.fileid=? AND last_upload.fileid=?",
176 (last_fileid, last_fileid))
179 if ((last_size != size
180 or not use_timestamps
181 or last_mtime != mtime
182 or last_ctime != ctime) # the file has been changed
183 or (not row2) # we somehow forgot where we put the file last time
185 c.execute("DELETE FROM local_files WHERE path=?", (path,))
186 self.connection.commit()
187 return Result(self, None, False, path, mtime, ctime, size)
189 # at this point, we're allowed to assume the file hasn't been changed
190 (filecap, last_checked) = row2
191 age = now - last_checked
193 probability = ((age - self.NO_CHECK_BEFORE) /
194 (self.ALWAYS_CHECK_AFTER - self.NO_CHECK_BEFORE))
195 probability = min(max(probability, 0.0), 1.0)
196 should_check = bool(random.random() < probability)
198 return Result(self, filecap, should_check, path, mtime, ctime, size)
200 def get_or_allocate_fileid_for_cap(self, filecap):
201 # find an existing fileid for this filecap, or insert a new one. The
202 # caller is required to commit() afterwards.
204 # mysql has "INSERT ... ON DUPLICATE KEY UPDATE", but not sqlite
205 # sqlite has "INSERT ON CONFLICT REPLACE", but not mysql
206 # So we use INSERT, ignore any error, then a SELECT
209 c.execute("INSERT INTO caps (filecap) VALUES (?)", (filecap,))
210 except self.sqlite_module.IntegrityError:
212 c.execute("SELECT fileid FROM caps WHERE filecap=?", (filecap,))
213 foundrow = c.fetchone()
218 def did_upload(self, filecap, path, mtime, ctime, size):
220 fileid = self.get_or_allocate_fileid_for_cap(filecap)
222 self.cursor.execute("INSERT INTO last_upload VALUES (?,?,?)",
224 except self.sqlite_module.IntegrityError:
225 self.cursor.execute("UPDATE last_upload"
226 " SET last_uploaded=?, last_checked=?"
230 self.cursor.execute("INSERT INTO local_files VALUES (?,?,?,?,?)",
231 (path, size, mtime, ctime, fileid))
232 except self.sqlite_module.IntegrityError:
233 self.cursor.execute("UPDATE local_files"
234 " SET size=?, mtime=?, ctime=?, fileid=?"
236 (size, mtime, ctime, fileid, path))
237 self.connection.commit()
239 def did_check_healthy(self, filecap, results):
241 fileid = self.get_or_allocate_fileid_for_cap(filecap)
242 self.cursor.execute("UPDATE last_upload"
243 " SET last_checked=?"
246 self.connection.commit()