From ddc9a7ae74005b972acdf2708b32023b19293c10 Mon Sep 17 00:00:00 2001
From: Brian Warner <warner@allmydata.com>
Date: Thu, 12 Mar 2009 20:48:01 -0700
Subject: [PATCH] consolidator: re-use more directories, add total directories
 seen-vs-used counts

---
 src/allmydata/scripts/consolidate.py   | 27 +++++++++++++++++++-------
 src/allmydata/test/test_consolidate.py |  8 ++++----
 2 files changed, 24 insertions(+), 11 deletions(-)

diff --git a/src/allmydata/scripts/consolidate.py b/src/allmydata/scripts/consolidate.py
index ef5c22e2..63c4405c 100644
--- a/src/allmydata/scripts/consolidate.py
+++ b/src/allmydata/scripts/consolidate.py
@@ -139,6 +139,9 @@ class Consolidator:
         self.directories_reused = 0
         self.directories_used_as_is = 0
         self.directories_created = 0
+        self.directories_seen = set()
+        self.directories_used = set()
+
         data = self.read_directory_json(archives_dircap)
         snapshots = {}
 
@@ -237,8 +240,9 @@ class Consolidator:
                      % (rwname,
                         snapshot_created, snapshot_used_as_is, snapshot_reused))
         # done!
-        self.msg(" system done, %d dirs created, %d used as-is, %d reused" \
-                 % (self.directories_created, self.directories_used_as_is,
+        self.msg(" system done, dircounts: %d/%d seen/used, %d created, %d as-is, %d reused" \
+                 % (len(self.directories_seen), len(self.directories_used),
+                    self.directories_created, self.directories_used_as_is,
                     self.directories_reused))
 
     def process_directory(self, readcap, path):
@@ -248,18 +252,22 @@ class Consolidator:
         # for my contents. In all cases I return a directory readcap that
         # points to my contents.
 
+        assert isinstance(readcap, str)
+        self.directories_seen.add(readcap)
+
         # build up contents to pass to mkdir() (which uses t=set_children)
         contents = {} # childname -> (type, rocap, metadata)
         data = self.read_directory_json(readcap)
         assert data is not None
         hashkids = []
-        num_dirs = 0
+        children_modified = False
         for (childname, (childtype, childdata)) in sorted(data["children"].items()):
             if childtype == "dirnode":
-                num_dirs += 1
                 childpath = path + (childname,)
-                childcap = self.process_directory(str(childdata["ro_uri"]),
-                                                  childpath)
+                old_childcap = str(childdata["ro_uri"])
+                childcap = self.process_directory(old_childcap, childpath)
+                if childcap != old_childcap:
+                    children_modified = True
                 contents[childname] = ("dirnode", childcap, None)
             else:
                 childcap = str(childdata["ro_uri"])
@@ -273,8 +281,9 @@ class Consolidator:
                 self.msg("   %s: reused" % "/".join(path))
             assert isinstance(old_dircap, str)
             self.directories_reused += 1
+            self.directories_used.add(old_dircap)
             return old_dircap
-        if num_dirs == 0:
+        if not children_modified:
             # we're allowed to use this directory as-is
             if self.options["verbose"]:
                 self.msg("   %s: used as-is" % "/".join(path))
@@ -282,6 +291,7 @@ class Consolidator:
             assert isinstance(new_dircap, str)
             self.store_dirhash(dirhash, new_dircap)
             self.directories_used_as_is += 1
+            self.directories_used.add(new_dircap)
             return new_dircap
         # otherwise, we need to create a new directory
         if self.options["verbose"]:
@@ -290,6 +300,7 @@ class Consolidator:
         assert isinstance(new_dircap, str)
         self.store_dirhash(dirhash, new_dircap)
         self.directories_created += 1
+        self.directories_used.add(new_dircap)
         return new_dircap
 
     def put_child(self, dircap, childname, childcap):
@@ -332,6 +343,8 @@ class Consolidator:
         if dircap in self.visited:
             raise CycleDetected
         self.visited.add(dircap)
+        self.directories_seen.add(dircap)
+        self.directories_used.add(dircap)
         data = self.read_directory_json(dircap)
         kids = []
         for (childname, (childtype, childdata)) in data["children"].items():
diff --git a/src/allmydata/test/test_consolidate.py b/src/allmydata/test/test_consolidate.py
index 60f1441e..d1562a6f 100644
--- a/src/allmydata/test/test_consolidate.py
+++ b/src/allmydata/test/test_consolidate.py
@@ -176,8 +176,8 @@ class Consolidate(GridTestMixin, CLITestMixin, unittest.TestCase):
             lines = out.splitlines()
             last = lines[-1]
             self.failUnlessEqual(last.strip(),
-                                 "system done, "
-                                 "7 dirs created, 2 used as-is, 13 reused")
+                                 "system done, dircounts: "
+                                 "25/12 seen/used, 7 created, 2 as-is, 13 reused")
             self.failUnless(os.path.exists(dbfile))
             self.failUnless(os.path.exists(backupfile))
             self.first_backup = backup = pickle.load(open(backupfile, "rb"))
@@ -197,8 +197,8 @@ class Consolidate(GridTestMixin, CLITestMixin, unittest.TestCase):
             lines = out.splitlines()
             last = lines[-1]
             self.failUnlessEqual(last.strip(),
-                                 "system done, "
-                                 "0 dirs created, 0 used as-is, 0 reused")
+                                 "system done, dircounts: "
+                                 "0/0 seen/used, 0 created, 0 as-is, 0 reused")
             backup = pickle.load(open(backupfile, "rb"))
             self.failUnlessEqual(backup, self.first_backup)
             self.failUnless(os.path.exists(backupfile + ".0"))
-- 
2.45.2