From 0a819e29127a0855a70ebc258a9f24cd6ba7d2a8 Mon Sep 17 00:00:00 2001
From: Brian Warner <warner@allmydata.com>
Date: Mon, 17 Nov 2008 21:45:16 -0700
Subject: [PATCH] diskwatcher: record total-space (the size of the disk as
 reported by df) in the db, report it to HTTP clients. This will involve a
 50-item-per-second upgrade process when it is first used on old data

---
 misc/spacetime/diskwatcher.py  | 26 +++++++++++--
 misc/spacetime/diskwatcher.tac | 67 ++++++++++++++++++++++++++++++++--
 2 files changed, 85 insertions(+), 8 deletions(-)

diff --git a/misc/spacetime/diskwatcher.py b/misc/spacetime/diskwatcher.py
index d408a150..05a68ac9 100644
--- a/misc/spacetime/diskwatcher.py
+++ b/misc/spacetime/diskwatcher.py
@@ -1,14 +1,32 @@
 
-# put this definition in a separate file, because axiom uses the
-# fully-qualified classname as a database table name, so __builtin__ is kinda
-# ugly.
-
 from axiom.item import Item
 from axiom.attributes import text, integer, timestamp
 
+
 class Sample(Item):
+    # we didn't originally set typeName, so it was generated from the
+    # fully-qualified classname ("diskwatcher.Sample"), then Axiom
+    # automatically lowercases and un-dot-ifies it to get
+    # "diskwatcher_sample". Now we explicitly provide a name.
+    typeName = "diskwatcher_sample"
+
+    # version 2 added the 'total' field
+    schemaVersion = 2
+
     url = text(indexed=True)
     when = timestamp(indexed=True)
+    total = integer()
     used = integer()
     avail = integer()
 
+def upgradeSample1to2(old):
+    total = 0
+    return old.upgradeVersion("diskwatcher_sample", 1, 2,
+                              url=old.url,
+                              when=old.when,
+                              total=0,
+                              used=old.used,
+                              avail=old.avail)
+
+from axiom.upgrade import registerUpgrader
+registerUpgrader(upgradeSample1to2, "diskwatcher_sample", 1, 2)
diff --git a/misc/spacetime/diskwatcher.tac b/misc/spacetime/diskwatcher.tac
index 5ebedf34..74839e24 100644
--- a/misc/spacetime/diskwatcher.tac
+++ b/misc/spacetime/diskwatcher.tac
@@ -75,9 +75,14 @@ class DiskWatcher(service.MultiService, resource.Resource):
         service.MultiService.__init__(self)
         resource.Resource.__init__(self)
         self.store = Store("history.axiom")
+        self.store.whenFullyUpgraded().addCallback(self._upgrade_complete)
+        service.IService(self.store).setServiceParent(self) # let upgrader run
         ts = internet.TimerService(self.POLL_INTERVAL, self.poll)
         ts.setServiceParent(self)
 
+    def _upgrade_complete(self, ignored):
+        print "Axiom store upgrade complete"
+
     def startService(self):
         service.MultiService.startService(self)
 
@@ -152,7 +157,7 @@ class DiskWatcher(service.MultiService, resource.Resource):
         print "%s : total=%s, used=%s, avail=%s" % (url,
                                                     total, used, avail)
         Sample(store=self.store,
-               url=unicode(url), when=when, used=used, avail=avail)
+               url=unicode(url), when=when, total=total, used=used, avail=avail)
 
     def calculate_growth_timeleft(self):
         timespans = []
@@ -170,6 +175,23 @@ class DiskWatcher(service.MultiService, resource.Resource):
                 timespans.append( (name, timespan, growth, timeleft) )
         return timespans
 
+    def find_total_space(self):
+        # this returns the sum of disk-avail stats for all servers that 1)
+        # are listed in urls.txt and 2) have responded recently.
+        now = extime.Time()
+        recent = now - timedelta(seconds=2*self.POLL_INTERVAL)
+        total_space = 0
+        for url in self.get_urls():
+            url = unicode(url)
+            latest = list(self.store.query(Sample,
+                                           AND(Sample.url == url,
+                                               Sample.when > recent),
+                                           sort=Sample.when.descending,
+                                           limit=1))
+            if latest:
+                total_space += latest[0].total
+        return total_space
+
     def find_total_available_space(self):
         # this returns the sum of disk-avail stats for all servers that 1)
         # are listed in urls.txt and 2) have responded recently.
@@ -283,6 +305,37 @@ class DiskWatcher(service.MultiService, resource.Resource):
             return _plural(s/MONTH, "month")
         return _plural(s/YEAR, "year")
 
+    def abbreviate_space2(self, s, SI=True):
+        def _plural(count, unit):
+            count = int(count)
+            return "%d %s" % (count, unit)
+        if s is None:
+            return "unknown"
+        if SI:
+            U = 1000.0
+            isuffix = "B"
+        else:
+            U = 1024.0
+            isuffix = "iB"
+        def r(count, suffix):
+            return "%.2f %s%s" % (count, suffix, isuffix)
+
+        if s < 1024: # 1000-1023 get emitted as bytes, even in SI mode
+            return r(s, "")
+        if s < U*U:
+            return r(s/U, "k")
+        if s < U*U*U:
+            return r(s/(U*U), "M")
+        if s < U*U*U*U:
+            return r(s/(U*U*U), "G")
+        if s < U*U*U*U*U:
+            return r(s/(U*U*U*U), "T")
+        return r(s/(U*U*U*U*U), "P")
+
+    def abbreviate_space(self, s):
+        return "(%s, %s)" % (self.abbreviate_space2(s, True),
+                             self.abbreviate_space2(s, False))
+
     def render(self, req):
         t = req.args.get("t", ["html"])[0]
         ctype = "text/plain"
@@ -290,12 +343,18 @@ class DiskWatcher(service.MultiService, resource.Resource):
         if t == "html":
             data = ""
             for (name, timespan, growth, timeleft) in self.calculate_growth_timeleft():
-                data += "%f bytes per second, %s remaining (over %s)\n" % \
-                        (growth, self.abbreviate_time(timeleft), name)
+                data += "%f bytes per second (%sps), %s remaining (over %s)\n" % \
+                        (growth, self.abbreviate_space2(growth, True),
+                         self.abbreviate_time(timeleft), name)
             used = self.find_total_used_space()
-            data += "total used: %d bytes\n" % used
+            data += "total used: %d bytes %s\n" % (used,
+                                                   self.abbreviate_space(used))
+            total = self.find_total_space()
+            data += "total space: %d bytes %s\n" % (total,
+                                                    self.abbreviate_space(total))
         elif t == "json":
             current = {"rates": self.calculate_growth_timeleft(),
+                       "total": self.find_total_space(),
                        "used": self.find_total_used_space(),
                        "available": self.find_total_available_space(),
                        }
-- 
2.45.2