From 3aba70bbcfb75ee0cbed9fc8f90bd54884cbd2bf Mon Sep 17 00:00:00 2001
From: Brian Warner <warner@allmydata.com>
Date: Tue, 30 Sep 2008 16:34:48 -0700
Subject: [PATCH] misc/spacetime: use async polling so we can add a 60-second
 timeout, add an index to the 'url' Axiom column for 2x speedup

---
 misc/spacetime/diskwatcher.py  |  2 +-
 misc/spacetime/diskwatcher.tac | 12 ++++++++----
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/misc/spacetime/diskwatcher.py b/misc/spacetime/diskwatcher.py
index 7de54c9f..d408a150 100644
--- a/misc/spacetime/diskwatcher.py
+++ b/misc/spacetime/diskwatcher.py
@@ -7,7 +7,7 @@ from axiom.item import Item
 from axiom.attributes import text, integer, timestamp
 
 class Sample(Item):
-    url = text()
+    url = text(indexed=True)
     when = timestamp(indexed=True)
     used = integer()
     avail = integer()
diff --git a/misc/spacetime/diskwatcher.tac b/misc/spacetime/diskwatcher.tac
index ee24f9d9..5ebedf34 100644
--- a/misc/spacetime/diskwatcher.tac
+++ b/misc/spacetime/diskwatcher.tac
@@ -105,17 +105,19 @@ class DiskWatcher(service.MultiService, resource.Resource):
 
     def poll(self):
         log.msg("polling..")
-        return self.poll_synchronous()
-        #return self.poll_asynchronous()
+        #return self.poll_synchronous()
+        return self.poll_asynchronous()
 
     def poll_asynchronous(self):
         # this didn't actually seem to work any better than poll_synchronous:
         # logs are more noisy, and I got frequent DNS failures. But with a
-        # lot of servers to query, this is probably the better way to go.
+        # lot of servers to query, this is probably the better way to go. A
+        # significant advantage of this approach is that we can use a
+        # timeout= argument to tolerate hanging servers.
         dl = []
         for url in self.get_urls():
             when = extime.Time()
-            d = client.getPage(url)
+            d = client.getPage(url, timeout=60)
             d.addCallback(self.got_response, when, url)
             dl.append(d)
         d = defer.DeferredList(dl)
@@ -132,6 +134,8 @@ class DiskWatcher(service.MultiService, resource.Resource):
             attempts += 1
             try:
                 when = extime.Time()
+                # if a server accepts the connection and then hangs, this
+                # will block forever
                 data_json = urllib.urlopen(url).read()
                 self.got_response(data_json, when, url)
                 fetched += 1
-- 
2.45.2