From 10778263576f05244f7fee15ebb959dfa3c33c14 Mon Sep 17 00:00:00 2001
From: Brian Warner <>
Date: Fri, 20 Feb 2009 21:46:06 -0700
Subject: [PATCH] BucketCountingCrawler: rename status and state keys to use
 'bucket' instead of 'share', because the former is more accurate

 src/allmydata/storage/       | 26 ++++++++++++++------------
 src/allmydata/storage/        |  5 +++++
 src/allmydata/test/     |  4 ++--
 src/allmydata/web/           |  8 ++++----
 src/allmydata/web/storage_status.xhtml |  2 +-
 5 files changed, 26 insertions(+), 19 deletions(-)

diff --git a/src/allmydata/storage/ b/src/allmydata/storage/
index 2daeeba3..80c23257 100644
--- a/src/allmydata/storage/
+++ b/src/allmydata/storage/
@@ -336,7 +336,9 @@ class BucketCountingCrawler(ShareCrawler):
     which I am providing storage. The actual number of files+directories in
     the full grid is probably higher (especially when there are more servers
     than 'N', the number of generated shares), because some files+directories
-    will have shares on other servers instead of me.
+    will have shares on other servers instead of me. Also note that the
+    number of buckets will differ from the number of shares in small grids,
+    when more than one share is placed on a single server.
     minimum_cycle_time = 60*60 # we don't need this more than once an hour
@@ -346,13 +348,13 @@ class BucketCountingCrawler(ShareCrawler):
         self.num_sample_prefixes = num_sample_prefixes
     def add_initial_state(self):
-        # ["share-counts"][cyclenum][prefix] = number
+        # ["bucket-counts"][cyclenum][prefix] = number
         # ["last-complete-cycle"] = cyclenum # maintained by base class
-        # ["last-complete-share-count"] = number
+        # ["last-complete-bucket-count"] = number
         # ["storage-index-samples"][prefix] = (cyclenum,
         #                                      list of SI strings (base32))
-        self.state.setdefault("share-counts", {})
-        self.state.setdefault("last-complete-share-count", None)
+        self.state.setdefault("bucket-counts", {})
+        self.state.setdefault("last-complete-bucket-count", None)
         self.state.setdefault("storage-index-samples", {})
     def process_prefixdir(self, cycle, prefix, prefixdir, buckets, start_slice):
@@ -360,22 +362,22 @@ class BucketCountingCrawler(ShareCrawler):
         # the individual buckets. We'll save state after each one. On my
         # laptop, a mostly-empty storage server can process about 70
         # prefixdirs in a 1.0s slice.
-        if cycle not in self.state["share-counts"]:
-            self.state["share-counts"][cycle] = {}
-        self.state["share-counts"][cycle][prefix] = len(buckets)
+        if cycle not in self.state["bucket-counts"]:
+            self.state["bucket-counts"][cycle] = {}
+        self.state["bucket-counts"][cycle][prefix] = len(buckets)
         if prefix in self.prefixes[:self.num_sample_prefixes]:
             self.state["storage-index-samples"][prefix] = (cycle, buckets)
     def finished_cycle(self, cycle):
-        last_counts = self.state["share-counts"].get(cycle, [])
+        last_counts = self.state["bucket-counts"].get(cycle, [])
         if len(last_counts) == len(self.prefixes):
             # great, we have a whole cycle.
             num_buckets = sum(last_counts.values())
-            self.state["last-complete-share-count"] = (cycle, num_buckets)
+            self.state["last-complete-bucket-count"] = (cycle, num_buckets)
             # get rid of old counts
-            for old_cycle in list(self.state["share-counts"].keys()):
+            for old_cycle in list(self.state["bucket-counts"].keys()):
                 if old_cycle != cycle:
-                    del self.state["share-counts"][old_cycle]
+                    del self.state["bucket-counts"][old_cycle]
         # get rid of old samples too
         for prefix in list(self.state["storage-index-samples"].keys()):
             old_cycle,buckets = self.state["storage-index-samples"][prefix]
diff --git a/src/allmydata/storage/ b/src/allmydata/storage/
index b5f83f4e..f0db0a54 100644
--- a/src/allmydata/storage/
+++ b/src/allmydata/storage/
@@ -170,6 +170,11 @@ class StorageServer(service.MultiService, Referenceable):
             # os.statvfs is available only on unix
         stats["storage_server.accepting_immutable_shares"] = int(writeable)
+        s = self.bucket_counter.get_state()
+        bucket_count = s.get("last-complete-bucket-count")
+        if bucket_count:
+            cycle, count = bucket_count
+            stats["storage_server.total_bucket_count"] = count
         return stats
diff --git a/src/allmydata/test/ b/src/allmydata/test/
index 435aac20..92c24744 100644
--- a/src/allmydata/test/
+++ b/src/allmydata/test/
@@ -1376,7 +1376,7 @@ class BucketCounter(unittest.TestCase, pollmixin.PollMixin):
             state = ss.bucket_counter.state
-            state["share-counts"][-12] = {}
+            state["bucket-counts"][-12] = {}
             state["storage-index-samples"]["bogusprefix!"] = (-12, [])
@@ -1388,7 +1388,7 @@ class BucketCounter(unittest.TestCase, pollmixin.PollMixin):
         def _check2(ignored):
             ss.bucket_counter.cpu_slice = orig_cpu_slice
             s = ss.bucket_counter.get_state()
-            self.failIf(-12 in s["share-counts"], s["share-counts"].keys())
+            self.failIf(-12 in s["bucket-counts"], s["bucket-counts"].keys())
             self.failIf("bogusprefix!" in s["storage-index-samples"],
diff --git a/src/allmydata/web/ b/src/allmydata/web/
index a759fec9..f3dd37c6 100644
--- a/src/allmydata/web/
+++ b/src/allmydata/web/
@@ -63,12 +63,12 @@ class StorageStatus(rend.Page):
         d.setdefault("disk_avail", None)
         return d
-    def data_last_complete_share_count(self, ctx, data):
+    def data_last_complete_bucket_count(self, ctx, data):
         s =
-        lcsc = s.get("last-complete-share-count")
-        if lcsc is None:
+        lcbc = s.get("last-complete-bucket-count")
+        if lcbc is None:
             return "Not computed yet"
-        cycle, count = lcsc
+        cycle, count = lcbc
         return count
     def render_count_crawler_status(self, ctx, storage):
diff --git a/src/allmydata/web/storage_status.xhtml b/src/allmydata/web/storage_status.xhtml
index c5683e17..379b23cd 100644
--- a/src/allmydata/web/storage_status.xhtml
+++ b/src/allmydata/web/storage_status.xhtml
@@ -38,7 +38,7 @@
     <li>Total buckets:
-       <span n:render="string" n:data="last_complete_share_count" />
+       <span n:render="string" n:data="last_complete_bucket_count" />
        (the number of files and directories for which this server is holding
         a share)