From: Brian Warner Date: Fri, 6 Feb 2009 02:56:40 +0000 (-0700) Subject: #598: add backupdb to 'tahoe backup' command, enable it by default X-Git-Tag: allmydata-tahoe-1.3.0~76 X-Git-Url: https://git.rkrishnan.org/webapi.txt?a=commitdiff_plain;h=add0e1f2f893c72321f69deeb92d88da1da9d30e;p=tahoe-lafs%2Ftahoe-lafs.git #598: add backupdb to 'tahoe backup' command, enable it by default --- diff --git a/src/allmydata/scripts/backupdb.py b/src/allmydata/scripts/backupdb.py index b93270c3..02cf7eb6 100644 --- a/src/allmydata/scripts/backupdb.py +++ b/src/allmydata/scripts/backupdb.py @@ -129,15 +129,16 @@ class BackupDB_v1: it, call r.did_upload(filecap), so I can update my database. If was_uploaded() returns a filecap, you might be able to avoid an - upload. Call r.must_check(), and if it says False, you can skip the + upload. Call r.should_check(), and if it says False, you can skip the upload and use the filecap returned by was_uploaded(). If should_check() returns True, you should perform a filecheck on the filecap returned by was_uploaded(). If the check indicates the file is healthy, please call r.did_check_healthy(checker_results) so I can - update the database. If the check indicates the file is not healthy, - please upload the file and call r.did_upload(filecap) when you're - done. + update the database, using the de-JSONized response from the webapi + t=check call for 'checker_results'. If the check indicates the file + is not healthy, please upload the file and call r.did_upload(filecap) + when you're done. I use_timestamps=True (the default), I will compare ctime and mtime of the local file against an entry in my database, and consider the diff --git a/src/allmydata/scripts/cli.py b/src/allmydata/scripts/cli.py index aad9978b..de5df036 100644 --- a/src/allmydata/scripts/cli.py +++ b/src/allmydata/scripts/cli.py @@ -193,6 +193,8 @@ class LnOptions(VDriveOptions): class BackupOptions(VDriveOptions): optFlags = [ ("verbose", "v", "Be noisy about what is happening."), + ("no-backupdb", None, "Do not use the backup-database (always upload all files)."), + ("ignore-timestamps", None, "Do not use backupdb timestamps to decide if a local file is unchanged."), ] def parseArgs(self, localdir, topath): diff --git a/src/allmydata/scripts/tahoe_backup.py b/src/allmydata/scripts/tahoe_backup.py index f68cc7fb..205c25ff 100644 --- a/src/allmydata/scripts/tahoe_backup.py +++ b/src/allmydata/scripts/tahoe_backup.py @@ -7,6 +7,7 @@ from allmydata.scripts.common import get_alias, escape_path, DEFAULT_ALIAS from allmydata.scripts.common_http import do_http from allmydata import uri from allmydata.util import time_format +from allmydata.scripts import backupdb def raiseHTTPError(msg, resp): msg = msg + ": %s %s %s" % (resp.status, resp.reason, resp.read()) @@ -27,17 +28,27 @@ def parse_old_timestamp(s, options): return when except ValueError: pass + try: # "2008-11-16 10.34 PM" (localtime) if s[-3:] in (" AM", " PM"): # this might raise ValueError - when = time.strptime(s[:-3], "%Y-%m-%d %H.%M") + when = time.strptime(s[:-3], "%Y-%m-%d %I.%M") if s[-3:] == "PM": when += 12*60*60 return when except ValueError: pass + + try: + # "2008-12-31 18.21.43" + when = time.strptime(s, "%Y-%m-%d %H.%M.%S") + return when + except ValueError: + pass + print >>options.stderr, "unable to parse old timestamp '%s', ignoring" % s + return None def readdir(dircap, options): # returns a dict of (childname: (type, readcap, metadata)), or None if the @@ -132,6 +143,14 @@ def backup(options): stdout = options.stdout stderr = options.stderr + use_backupdb = not options["no-backupdb"] + options.backupdb = None + if use_backupdb: + bdbfile = os.path.join(options["node-directory"], + "private", "backupdb.sqlite") + bdbfile = os.path.abspath(bdbfile) + options.backupdb = backupdb.get_backupdb(bdbfile) + rootcap, path = get_alias(options.aliases, options.to_dir, DEFAULT_ALIAS) to_url = nodeurl + "uri/%s/" % urllib.quote(rootcap) if path: @@ -241,24 +260,65 @@ class Node: n = self.__class__() return n.process(localpath, olddircap, self.options) + + def check_backupdb(self, childpath): + if not self.options.backupdb: + return True, None + use_timestamps = not self.options["ignore-timestamps"] + bdb = self.options.backupdb + r = bdb.check_file(childpath, use_timestamps) + + if not r.was_uploaded(): + return True, r + + if not r.should_check(): + # the file was uploaded or checked recently, so we can just use + # it + return False, r + + # we must check the file before using the results + filecap = r.was_uploaded() + self.verboseprint("checking %s" % filecap) + nodeurl = self.options['node-url'] + checkurl = nodeurl + "uri/%s?t=check&output=JSON" % urllib.quote(filecap) + resp = do_http("POST", checkurl) + if resp.status != 200: + # can't check, so we must assume it's bad + return True, r + + cr = simplejson.loads(resp.read()) + healthy = cr["results"]["healthy"] + if not healthy: + # must upload + return True, r + # file is healthy, no need to upload + r.did_check_healthy(cr) + return False, r + def upload(self, childpath): - self.verboseprint("uploading %s.." % childpath) + #self.verboseprint("uploading %s.." % childpath) + metadata = get_local_metadata(childpath) + # we can use the backupdb here - #s = os.stat(childpath) - # ... - # if we go with the old file, we're obligated to use the old - # metadata, to make sure it matches the metadata for this child in - # the old parent directory - # return oldcap, old_metadata + must_upload, bdb_results = self.check_backupdb(childpath) - metadata = get_local_metadata(childpath) - infileobj = open(os.path.expanduser(childpath), "rb") - url = self.options['node-url'] + "uri" - resp = do_http("PUT", url, infileobj) - if resp.status not in (200, 201): - raiseHTTPError("Error during file PUT", resp) - filecap = resp.read().strip() - self.verboseprint(" %s -> %s" % (childpath, filecap)) - self.verboseprint(" metadata: %s" % (metadata,)) - return filecap, metadata + if must_upload: + self.verboseprint("uploading %s.." % childpath) + infileobj = open(os.path.expanduser(childpath), "rb") + url = self.options['node-url'] + "uri" + resp = do_http("PUT", url, infileobj) + if resp.status not in (200, 201): + raiseHTTPError("Error during file PUT", resp) + filecap = resp.read().strip() + self.verboseprint(" %s -> %s" % (childpath, filecap)) + #self.verboseprint(" metadata: %s" % (metadata,)) + + if bdb_results: + bdb_results.did_upload(filecap) + + return filecap, metadata + + else: + self.verboseprint("skipping %s.." % childpath) + return bdb_results.was_uploaded(), metadata