From: Brian Warner Date: Fri, 13 Mar 2009 04:22:29 +0000 (-0700) Subject: consolidator: fix cycle detection to not trigger on merely shared directories, add... X-Git-Tag: allmydata-tahoe-1.4.0~60 X-Git-Url: https://git.rkrishnan.org/tahoe_css?a=commitdiff_plain;h=ddb2f1a6e32a1aae50c0f4b9bca6934814dce783;p=tahoe-lafs%2Ftahoe-lafs.git consolidator: fix cycle detection to not trigger on merely shared directories, add snapshot counter to progress --- diff --git a/src/allmydata/scripts/consolidate.py b/src/allmydata/scripts/consolidate.py index 63c4405c..335bfeb8 100644 --- a/src/allmydata/scripts/consolidate.py +++ b/src/allmydata/scripts/consolidate.py @@ -42,7 +42,6 @@ class Consolidator: except sqlite.OperationalError, e: if "table dirhashes already exists" not in str(e): raise - self.visited = set() def read_directory_json(self, dircap): url = self.nodeurl + "uri/%s?t=json" % urllib.quote(dircap) @@ -180,7 +179,7 @@ class Consolidator: snapshots = snapshots[:-1] first_snapshot = True - for (timestamp, rwname, writecap, roname, readcap) in snapshots: + for i,(timestamp, rwname, writecap, roname, readcap) in enumerate(snapshots): start_created = self.directories_created start_used_as_is = self.directories_used_as_is start_reused = self.directories_reused @@ -224,7 +223,7 @@ class Consolidator: # for the others, we must scan their contents and build up a new # readonly directory (which shares common subdirs with previous # backups) - self.msg(" %s: processing" % rwname) + self.msg(" %s: processing (%d/%d)" % (rwname, i+1, len(snapshots))) readcap = self.process_directory(readonly(writecap), (rwname,)) if self.options["really"]: self.msg(" replaced %s" % rwname) @@ -335,14 +334,17 @@ class Consolidator: raiseHTTPError("error during set_children", resp) return dircap - def scan_old_directory(self, dircap): + def scan_old_directory(self, dircap, ancestors=()): # scan this directory (recursively) and stash a hash of its contents # in the DB. This assumes that all subdirs can be used as-is (i.e. # they've already been declared immutable) dircap = readonly(dircap) - if dircap in self.visited: + if dircap in ancestors: raise CycleDetected - self.visited.add(dircap) + ancestors = ancestors + (dircap,) + #self.visited.add(dircap) + # TODO: we could use self.visited as a mapping from dircap to dirhash, + # to avoid re-scanning old shared directories multiple times self.directories_seen.add(dircap) self.directories_used.add(dircap) data = self.read_directory_json(dircap) @@ -350,7 +352,7 @@ class Consolidator: for (childname, (childtype, childdata)) in data["children"].items(): childcap = str(childdata["ro_uri"]) if childtype == "dirnode": - self.scan_old_directory(childcap) + self.scan_old_directory(childcap, ancestors) kids.append( (childname, childcap) ) dirhash = self.hash_directory_contents(kids) self.store_dirhash(dirhash, dircap)