From 10778263576f05244f7fee15ebb959dfa3c33c14 Mon Sep 17 00:00:00 2001 From: Brian Warner Date: Fri, 20 Feb 2009 21:46:06 -0700 Subject: [PATCH] BucketCountingCrawler: rename status and state keys to use 'bucket' instead of 'share', because the former is more accurate --- src/allmydata/storage/crawler.py | 26 ++++++++++++++------------ src/allmydata/storage/server.py | 5 +++++ src/allmydata/test/test_storage.py | 4 ++-- src/allmydata/web/storage.py | 8 ++++---- src/allmydata/web/storage_status.xhtml | 2 +- 5 files changed, 26 insertions(+), 19 deletions(-) diff --git a/src/allmydata/storage/crawler.py b/src/allmydata/storage/crawler.py index 2daeeba3..80c23257 100644 --- a/src/allmydata/storage/crawler.py +++ b/src/allmydata/storage/crawler.py @@ -336,7 +336,9 @@ class BucketCountingCrawler(ShareCrawler): which I am providing storage. The actual number of files+directories in the full grid is probably higher (especially when there are more servers than 'N', the number of generated shares), because some files+directories - will have shares on other servers instead of me. + will have shares on other servers instead of me. Also note that the + number of buckets will differ from the number of shares in small grids, + when more than one share is placed on a single server. """ minimum_cycle_time = 60*60 # we don't need this more than once an hour @@ -346,13 +348,13 @@ class BucketCountingCrawler(ShareCrawler): self.num_sample_prefixes = num_sample_prefixes def add_initial_state(self): - # ["share-counts"][cyclenum][prefix] = number + # ["bucket-counts"][cyclenum][prefix] = number # ["last-complete-cycle"] = cyclenum # maintained by base class - # ["last-complete-share-count"] = number + # ["last-complete-bucket-count"] = number # ["storage-index-samples"][prefix] = (cyclenum, # list of SI strings (base32)) - self.state.setdefault("share-counts", {}) - self.state.setdefault("last-complete-share-count", None) + self.state.setdefault("bucket-counts", {}) + self.state.setdefault("last-complete-bucket-count", None) self.state.setdefault("storage-index-samples", {}) def process_prefixdir(self, cycle, prefix, prefixdir, buckets, start_slice): @@ -360,22 +362,22 @@ class BucketCountingCrawler(ShareCrawler): # the individual buckets. We'll save state after each one. On my # laptop, a mostly-empty storage server can process about 70 # prefixdirs in a 1.0s slice. - if cycle not in self.state["share-counts"]: - self.state["share-counts"][cycle] = {} - self.state["share-counts"][cycle][prefix] = len(buckets) + if cycle not in self.state["bucket-counts"]: + self.state["bucket-counts"][cycle] = {} + self.state["bucket-counts"][cycle][prefix] = len(buckets) if prefix in self.prefixes[:self.num_sample_prefixes]: self.state["storage-index-samples"][prefix] = (cycle, buckets) def finished_cycle(self, cycle): - last_counts = self.state["share-counts"].get(cycle, []) + last_counts = self.state["bucket-counts"].get(cycle, []) if len(last_counts) == len(self.prefixes): # great, we have a whole cycle. num_buckets = sum(last_counts.values()) - self.state["last-complete-share-count"] = (cycle, num_buckets) + self.state["last-complete-bucket-count"] = (cycle, num_buckets) # get rid of old counts - for old_cycle in list(self.state["share-counts"].keys()): + for old_cycle in list(self.state["bucket-counts"].keys()): if old_cycle != cycle: - del self.state["share-counts"][old_cycle] + del self.state["bucket-counts"][old_cycle] # get rid of old samples too for prefix in list(self.state["storage-index-samples"].keys()): old_cycle,buckets = self.state["storage-index-samples"][prefix] diff --git a/src/allmydata/storage/server.py b/src/allmydata/storage/server.py index b5f83f4e..f0db0a54 100644 --- a/src/allmydata/storage/server.py +++ b/src/allmydata/storage/server.py @@ -170,6 +170,11 @@ class StorageServer(service.MultiService, Referenceable): # os.statvfs is available only on unix pass stats["storage_server.accepting_immutable_shares"] = int(writeable) + s = self.bucket_counter.get_state() + bucket_count = s.get("last-complete-bucket-count") + if bucket_count: + cycle, count = bucket_count + stats["storage_server.total_bucket_count"] = count return stats diff --git a/src/allmydata/test/test_storage.py b/src/allmydata/test/test_storage.py index 435aac20..92c24744 100644 --- a/src/allmydata/test/test_storage.py +++ b/src/allmydata/test/test_storage.py @@ -1376,7 +1376,7 @@ class BucketCounter(unittest.TestCase, pollmixin.PollMixin): state = ss.bucket_counter.state self.failUnlessEqual(state["last-complete-prefix"], ss.bucket_counter.prefixes[0]) - state["share-counts"][-12] = {} + state["bucket-counts"][-12] = {} state["storage-index-samples"]["bogusprefix!"] = (-12, []) ss.bucket_counter.save_state() d.addCallback(_after_first_prefix) @@ -1388,7 +1388,7 @@ class BucketCounter(unittest.TestCase, pollmixin.PollMixin): def _check2(ignored): ss.bucket_counter.cpu_slice = orig_cpu_slice s = ss.bucket_counter.get_state() - self.failIf(-12 in s["share-counts"], s["share-counts"].keys()) + self.failIf(-12 in s["bucket-counts"], s["bucket-counts"].keys()) self.failIf("bogusprefix!" in s["storage-index-samples"], s["storage-index-samples"].keys()) d.addCallback(_check2) diff --git a/src/allmydata/web/storage.py b/src/allmydata/web/storage.py index a759fec9..f3dd37c6 100644 --- a/src/allmydata/web/storage.py +++ b/src/allmydata/web/storage.py @@ -63,12 +63,12 @@ class StorageStatus(rend.Page): d.setdefault("disk_avail", None) return d - def data_last_complete_share_count(self, ctx, data): + def data_last_complete_bucket_count(self, ctx, data): s = self.storage.bucket_counter.get_state() - lcsc = s.get("last-complete-share-count") - if lcsc is None: + lcbc = s.get("last-complete-bucket-count") + if lcbc is None: return "Not computed yet" - cycle, count = lcsc + cycle, count = lcbc return count def render_count_crawler_status(self, ctx, storage): diff --git a/src/allmydata/web/storage_status.xhtml b/src/allmydata/web/storage_status.xhtml index c5683e17..379b23cd 100644 --- a/src/allmydata/web/storage_status.xhtml +++ b/src/allmydata/web/storage_status.xhtml @@ -38,7 +38,7 @@