From: Brian Warner <warner@lothar.com>
Date: Sat, 21 Feb 2009 04:46:06 +0000 (-0700)
Subject: BucketCountingCrawler: rename status and state keys to use 'bucket' instead of 'share... 
X-Git-Tag: allmydata-tahoe-1.4.0~163
X-Git-Url: https://git.rkrishnan.org/components/com_hotproperty/%22doc.html/running.html?a=commitdiff_plain;h=10778263576f05244f7fee15ebb959dfa3c33c14;p=tahoe-lafs%2Ftahoe-lafs.git

BucketCountingCrawler: rename status and state keys to use 'bucket' instead of 'share', because the former is more accurate
---

diff --git a/src/allmydata/storage/crawler.py b/src/allmydata/storage/crawler.py
index 2daeeba3..80c23257 100644
--- a/src/allmydata/storage/crawler.py
+++ b/src/allmydata/storage/crawler.py
@@ -336,7 +336,9 @@ class BucketCountingCrawler(ShareCrawler):
     which I am providing storage. The actual number of files+directories in
     the full grid is probably higher (especially when there are more servers
     than 'N', the number of generated shares), because some files+directories
-    will have shares on other servers instead of me.
+    will have shares on other servers instead of me. Also note that the
+    number of buckets will differ from the number of shares in small grids,
+    when more than one share is placed on a single server.
     """
 
     minimum_cycle_time = 60*60 # we don't need this more than once an hour
@@ -346,13 +348,13 @@ class BucketCountingCrawler(ShareCrawler):
         self.num_sample_prefixes = num_sample_prefixes
 
     def add_initial_state(self):
-        # ["share-counts"][cyclenum][prefix] = number
+        # ["bucket-counts"][cyclenum][prefix] = number
         # ["last-complete-cycle"] = cyclenum # maintained by base class
-        # ["last-complete-share-count"] = number
+        # ["last-complete-bucket-count"] = number
         # ["storage-index-samples"][prefix] = (cyclenum,
         #                                      list of SI strings (base32))
-        self.state.setdefault("share-counts", {})
-        self.state.setdefault("last-complete-share-count", None)
+        self.state.setdefault("bucket-counts", {})
+        self.state.setdefault("last-complete-bucket-count", None)
         self.state.setdefault("storage-index-samples", {})
 
     def process_prefixdir(self, cycle, prefix, prefixdir, buckets, start_slice):
@@ -360,22 +362,22 @@ class BucketCountingCrawler(ShareCrawler):
         # the individual buckets. We'll save state after each one. On my
         # laptop, a mostly-empty storage server can process about 70
         # prefixdirs in a 1.0s slice.
-        if cycle not in self.state["share-counts"]:
-            self.state["share-counts"][cycle] = {}
-        self.state["share-counts"][cycle][prefix] = len(buckets)
+        if cycle not in self.state["bucket-counts"]:
+            self.state["bucket-counts"][cycle] = {}
+        self.state["bucket-counts"][cycle][prefix] = len(buckets)
         if prefix in self.prefixes[:self.num_sample_prefixes]:
             self.state["storage-index-samples"][prefix] = (cycle, buckets)
 
     def finished_cycle(self, cycle):
-        last_counts = self.state["share-counts"].get(cycle, [])
+        last_counts = self.state["bucket-counts"].get(cycle, [])
         if len(last_counts) == len(self.prefixes):
             # great, we have a whole cycle.
             num_buckets = sum(last_counts.values())
-            self.state["last-complete-share-count"] = (cycle, num_buckets)
+            self.state["last-complete-bucket-count"] = (cycle, num_buckets)
             # get rid of old counts
-            for old_cycle in list(self.state["share-counts"].keys()):
+            for old_cycle in list(self.state["bucket-counts"].keys()):
                 if old_cycle != cycle:
-                    del self.state["share-counts"][old_cycle]
+                    del self.state["bucket-counts"][old_cycle]
         # get rid of old samples too
         for prefix in list(self.state["storage-index-samples"].keys()):
             old_cycle,buckets = self.state["storage-index-samples"][prefix]
diff --git a/src/allmydata/storage/server.py b/src/allmydata/storage/server.py
index b5f83f4e..f0db0a54 100644
--- a/src/allmydata/storage/server.py
+++ b/src/allmydata/storage/server.py
@@ -170,6 +170,11 @@ class StorageServer(service.MultiService, Referenceable):
             # os.statvfs is available only on unix
             pass
         stats["storage_server.accepting_immutable_shares"] = int(writeable)
+        s = self.bucket_counter.get_state()
+        bucket_count = s.get("last-complete-bucket-count")
+        if bucket_count:
+            cycle, count = bucket_count
+            stats["storage_server.total_bucket_count"] = count
         return stats
 
 
diff --git a/src/allmydata/test/test_storage.py b/src/allmydata/test/test_storage.py
index 435aac20..92c24744 100644
--- a/src/allmydata/test/test_storage.py
+++ b/src/allmydata/test/test_storage.py
@@ -1376,7 +1376,7 @@ class BucketCounter(unittest.TestCase, pollmixin.PollMixin):
             state = ss.bucket_counter.state
             self.failUnlessEqual(state["last-complete-prefix"],
                                  ss.bucket_counter.prefixes[0])
-            state["share-counts"][-12] = {}
+            state["bucket-counts"][-12] = {}
             state["storage-index-samples"]["bogusprefix!"] = (-12, [])
             ss.bucket_counter.save_state()
         d.addCallback(_after_first_prefix)
@@ -1388,7 +1388,7 @@ class BucketCounter(unittest.TestCase, pollmixin.PollMixin):
         def _check2(ignored):
             ss.bucket_counter.cpu_slice = orig_cpu_slice
             s = ss.bucket_counter.get_state()
-            self.failIf(-12 in s["share-counts"], s["share-counts"].keys())
+            self.failIf(-12 in s["bucket-counts"], s["bucket-counts"].keys())
             self.failIf("bogusprefix!" in s["storage-index-samples"],
                         s["storage-index-samples"].keys())
         d.addCallback(_check2)
diff --git a/src/allmydata/web/storage.py b/src/allmydata/web/storage.py
index a759fec9..f3dd37c6 100644
--- a/src/allmydata/web/storage.py
+++ b/src/allmydata/web/storage.py
@@ -63,12 +63,12 @@ class StorageStatus(rend.Page):
         d.setdefault("disk_avail", None)
         return d
 
-    def data_last_complete_share_count(self, ctx, data):
+    def data_last_complete_bucket_count(self, ctx, data):
         s = self.storage.bucket_counter.get_state()
-        lcsc = s.get("last-complete-share-count")
-        if lcsc is None:
+        lcbc = s.get("last-complete-bucket-count")
+        if lcbc is None:
             return "Not computed yet"
-        cycle, count = lcsc
+        cycle, count = lcbc
         return count
 
     def render_count_crawler_status(self, ctx, storage):
diff --git a/src/allmydata/web/storage_status.xhtml b/src/allmydata/web/storage_status.xhtml
index c5683e17..379b23cd 100644
--- a/src/allmydata/web/storage_status.xhtml
+++ b/src/allmydata/web/storage_status.xhtml
@@ -38,7 +38,7 @@
 
   <ul>
     <li>Total buckets:
-       <span n:render="string" n:data="last_complete_share_count" />
+       <span n:render="string" n:data="last_complete_bucket_count" />
        (the number of files and directories for which this server is holding
         a share)
       <ul>