From ef002c935a15eb76f797e5707f0e2248c82deabc Mon Sep 17 00:00:00 2001
From: david-sarah <david-sarah@jacaranda.org>
Date: Fri, 20 Nov 2009 21:56:44 -0800
Subject: [PATCH] storage server: detect disk space usage on Windows too (fixes
 #637)

---
 _auto_deps.py                      |  15 ++-
 docs/configuration.txt             |   6 +-
 src/allmydata/storage/server.py    | 149 ++++++++++++++++++-----------
 src/allmydata/test/test_storage.py |  68 ++++++++++---
 4 files changed, 156 insertions(+), 82 deletions(-)

diff --git a/_auto_deps.py b/_auto_deps.py
index 923ee36c..5becd1cf 100644
--- a/_auto_deps.py
+++ b/_auto_deps.py
@@ -37,19 +37,18 @@ if sys.version_info < (2, 5):
     install_requires.append("pysqlite >= 2.0.5")
 
 ## The following block is commented-out because there is not currently a pywin32 package which
-## can be easy_install'ed and also which actually makes "import win32api" succeed.  Users have
-## to manually install pywin32 on Windows before installing Tahoe.
+## can be easy_install'ed and also which actually makes "import win32api" succeed.
+## See http://sourceforge.net/tracker/index.php?func=detail&aid=1799934&group_id=78018&atid=551954
+## Users have to manually install pywin32 on Windows before installing Tahoe.
 ##import platform
 ##if platform.system() == "Windows":
 ##    # Twisted requires pywin32 if it is going to offer process management functionality, or if
 ##    # it is going to offer iocp reactor.  We currently require process management.  It would be
 ##    # better if Twisted would declare that it requires pywin32 if it is going to offer process
-##    # management.  Then the specification and the evolution of Twisted's reliance on pywin32 can
-##    # be confined to the Twisted setup data, and Tahoe can remain blissfully ignorant about such
-##    # things as if a future version of Twisted requires a different version of pywin32, or if a
-##    # future version of Twisted implements process management without using pywin32 at all,
-##    # etc..  That is twisted ticket #3238 -- http://twistedmatrix.com/trac/ticket/3238 .  But
-##    # until Twisted does that, Tahoe needs to be non-ignorant of the following requirement:
+##    # management.  That is twisted ticket #3238 -- http://twistedmatrix.com/trac/ticket/3238 .
+##    # On the other hand, Tahoe also depends on pywin32 for getting free disk space statistics
+##    # (although that is not a hard requirement: if win32api can't be imported then we don't
+##    # rely on having the disk stats).
 ##    install_requires.append('pywin32')
 
 if hasattr(sys, 'frozen'): # for py2exe
diff --git a/docs/configuration.txt b/docs/configuration.txt
index 994f9ac9..1e4b248a 100644
--- a/docs/configuration.txt
+++ b/docs/configuration.txt
@@ -302,8 +302,10 @@ readonly = (boolean, optional)
 reserved_space = (str, optional)
 
  If provided, this value defines how much disk space is reserved: the storage
- server will not accept any share which causes the amount of free space (as
- measured by 'df', or more specifically statvfs(2)) to drop below this value.
+ server will not accept any share which causes the amount of free disk space
+ to drop below this value. (The free space is measured by a call to statvfs(2)
+ on Unix, or GetDiskFreeSpaceEx on Windows, and is the space available to the
+ user account under which the storage server runs.)
 
  This string contains a number, with an optional case-insensitive scale
  suffix like "K" or "M" or "G", and an optional "B" or "iB" suffix. So
diff --git a/src/allmydata/storage/server.py b/src/allmydata/storage/server.py
index 2b0f8af6..b9e45575 100644
--- a/src/allmydata/storage/server.py
+++ b/src/allmydata/storage/server.py
@@ -36,6 +36,16 @@ class StorageServer(service.MultiService, Referenceable):
     implements(RIStorageServer, IStatsProducer)
     name = 'storage'
     LeaseCheckerClass = LeaseCheckingCrawler
+    windows = False
+
+    try:
+        import win32api, win32con
+        windows = True
+        # <http://msdn.microsoft.com/en-us/library/ms680621%28VS.85%29.aspx>
+        win32api.SetErrorMode(win32con.SEM_FAILCRITICALERRORS |
+                              win32con.SEM_NOOPENFILEERRORBOX)
+    except ImportError:
+        pass
 
     def __init__(self, storedir, nodeid, reserved_space=0,
                  discard_storage=False, readonly_storage=False,
@@ -70,7 +80,7 @@ class StorageServer(service.MultiService, Referenceable):
 
         if reserved_space:
             if self.get_available_space() is None:
-                log.msg("warning: [storage]reserved_space= is set, but this platform does not support statvfs(2), so this reservation cannot be honored",
+                log.msg("warning: [storage]reserved_space= is set, but this platform does not support an API to get disk statistics (statvfs(2) or GetDiskFreeSpaceEx), so this reservation cannot be honored",
                         umin="0wZ27w", level=log.UNUSUAL)
 
         self.latencies = {"allocate": [], # immutable
@@ -147,22 +157,35 @@ class StorageServer(service.MultiService, Referenceable):
     def _clean_incomplete(self):
         fileutil.rm_dir(self.incomingdir)
 
-    def do_statvfs(self):
-        return os.statvfs(self.storedir)
+    def get_disk_stats(self):
+        """Return disk statistics for the storage disk, in the form of a dict
+        with the following fields.
+          total:            total bytes on disk
+          free_for_root:    bytes actually free on disk
+          free_for_nonroot: bytes free for "a non-privileged user" [Unix] or
+                              the current user [Windows]; might take into
+                              account quotas depending on platform
+          used:             bytes used on disk
+          avail:            bytes available excluding reserved space
+        An AttributeError can occur if the OS has no API to get disk information.
+        An EnvironmentError can occur if the OS call fails."""
+
+        if self.windows:
+            # For Windows systems, where os.statvfs is not available, use GetDiskFreeSpaceEx.
+            # <http://docs.activestate.com/activepython/2.5/pywin32/win32api__GetDiskFreeSpaceEx_meth.html>
+            #
+            # Although the docs say that the argument should be the root directory
+            # of a disk, GetDiskFreeSpaceEx actually accepts any path on that disk
+            # (like its Win32 equivalent).
+
+            (free_for_nonroot, total, free_for_root) = self.win32api.GetDiskFreeSpaceEx(self.storedir)
+        else:
+            # For Unix-like systems.
+            # <http://docs.python.org/library/os.html#os.statvfs>
+            # <http://opengroup.org/onlinepubs/7990989799/xsh/fstatvfs.html>
+            # <http://opengroup.org/onlinepubs/7990989799/xsh/sysstatvfs.h.html>
+            s = os.statvfs(self.storedir)
 
-    def get_stats(self):
-        # remember: RIStatsProvider requires that our return dict
-        # contains numeric values.
-        stats = { 'storage_server.allocated': self.allocated_size(), }
-        stats["storage_server.reserved_space"] = self.reserved_space
-        for category,ld in self.get_latencies().items():
-            for name,v in ld.items():
-                stats['storage_server.latencies.%s.%s' % (category, name)] = v
-        writeable = True
-        if self.readonly_storage:
-            writeable = False
-        try:
-            s = self.do_statvfs()
             # on my mac laptop:
             #  statvfs(2) is a wrapper around statfs(2).
             #    statvfs.f_frsize = statfs.f_bsize :
@@ -173,55 +196,67 @@ class StorageServer(service.MultiService, Referenceable):
             # wrong, and s.f_blocks*s.f_frsize is twice the size of my disk,
             # but s.f_bavail*s.f_frsize is correct
 
-            disk_total = s.f_frsize * s.f_blocks
-            disk_used = s.f_frsize * (s.f_blocks - s.f_bfree)
-            # spacetime predictors should look at the slope of disk_used.
-            disk_free_for_root = s.f_frsize * s.f_bfree
-            disk_free_for_nonroot = s.f_frsize * s.f_bavail
-
-            # include our local policy here: if we stop accepting shares when
-            # the available space drops below 1GB, then include that fact in
-            # disk_avail.
-            disk_avail = disk_free_for_nonroot - self.reserved_space
-            disk_avail = max(disk_avail, 0)
-            if self.readonly_storage:
-                disk_avail = 0
-            if disk_avail == 0:
-                writeable = False
+            total = s.f_frsize * s.f_blocks
+            free_for_root = s.f_frsize * s.f_bfree
+            free_for_nonroot = s.f_frsize * s.f_bavail
+
+        # valid for all platforms:
+        used = total - free_for_root
+        avail = max(free_for_nonroot - self.reserved_space, 0)
+
+        return { 'total': total, 'free_for_root': free_for_root,
+                 'free_for_nonroot': free_for_nonroot,
+                 'used': used, 'avail': avail, }
+
+    def get_stats(self):
+        # remember: RIStatsProvider requires that our return dict
+        # contains numeric values.
+        stats = { 'storage_server.allocated': self.allocated_size(), }
+        stats['storage_server.reserved_space'] = self.reserved_space
+        for category,ld in self.get_latencies().items():
+            for name,v in ld.items():
+                stats['storage_server.latencies.%s.%s' % (category, name)] = v
+
+        try:
+            disk = self.get_disk_stats()
+            writeable = disk['avail'] > 0
 
             # spacetime predictors should use disk_avail / (d(disk_used)/dt)
-            stats["storage_server.disk_total"] = disk_total
-            stats["storage_server.disk_used"] = disk_used
-            stats["storage_server.disk_free_for_root"] = disk_free_for_root
-            stats["storage_server.disk_free_for_nonroot"] = disk_free_for_nonroot
-            stats["storage_server.disk_avail"] = disk_avail
+            stats['storage_server.disk_total'] = disk['total']
+            stats['storage_server.disk_used'] = disk['used']
+            stats['storage_server.disk_free_for_root'] = disk['free_for_root']
+            stats['storage_server.disk_free_for_nonroot'] = disk['free_for_nonroot']
+            stats['storage_server.disk_avail'] = disk['avail']
         except AttributeError:
-            # os.statvfs is available only on unix
-            pass
-        stats["storage_server.accepting_immutable_shares"] = int(writeable)
+            writeable = True
+        except EnvironmentError:
+            log.msg("OS call to get disk statistics failed", level=log.UNUSUAL)
+            writeable = False
+
+        if self.readonly_storage:
+            stats['storage_server.disk_avail'] = 0
+            writeable = False
+
+        stats['storage_server.accepting_immutable_shares'] = int(writeable)
         s = self.bucket_counter.get_state()
         bucket_count = s.get("last-complete-bucket-count")
         if bucket_count:
-            stats["storage_server.total_bucket_count"] = bucket_count
+            stats['storage_server.total_bucket_count'] = bucket_count
         return stats
 
-
-    def stat_disk(self, d):
-        s = os.statvfs(d)
-        # s.f_bavail: available to non-root users
-        disk_avail = s.f_frsize * s.f_bavail
-        return disk_avail
-
     def get_available_space(self):
-        # returns None if it cannot be measured (windows)
+        """Returns available space for share storage in bytes, or None if no
+        API to get this information is available."""
+
+        if self.readonly_storage:
+            return 0
         try:
-            disk_avail = self.stat_disk(self.storedir)
-            disk_avail -= self.reserved_space
+            return self.get_disk_stats()['avail']
         except AttributeError:
-            disk_avail = None
-        if self.readonly_storage:
-            disk_avail = 0
-        return disk_avail
+            return None
+        except EnvironmentError:
+            log.msg("OS call to get disk statistics failed", level=log.UNUSUAL)
+            return 0
 
     def allocated_size(self):
         space = 0
@@ -232,9 +267,9 @@ class StorageServer(service.MultiService, Referenceable):
     def remote_get_version(self):
         remaining_space = self.get_available_space()
         if remaining_space is None:
-            # we're on a platform that doesn't have 'df', so make a vague
-            # guess.
+            # We're on a platform that has no API to get disk stats.
             remaining_space = 2**64
+
         version = { "http://allmydata.org/tahoe/protocols/storage/v1" :
                     { "maximum-immutable-share-size": remaining_space,
                       "tolerates-immutable-read-overrun": True,
@@ -288,7 +323,7 @@ class StorageServer(service.MultiService, Referenceable):
             sf = ShareFile(fn)
             sf.add_or_renew_lease(lease_info)
 
-        # self.readonly_storage causes remaining_space=0
+        # self.readonly_storage causes remaining_space <= 0
 
         for shnum in sharenums:
             incominghome = os.path.join(self.incomingdir, si_dir, "%d" % shnum)
diff --git a/src/allmydata/test/test_storage.py b/src/allmydata/test/test_storage.py
index 6ca78ceb..7538b390 100644
--- a/src/allmydata/test/test_storage.py
+++ b/src/allmydata/test/test_storage.py
@@ -228,8 +228,9 @@ class BucketProxy(unittest.TestCase):
                                        0x44, WriteBucketProxy_v2, ReadBucketProxy)
 
 class FakeDiskStorageServer(StorageServer):
-    def stat_disk(self, d):
-        return self.DISKAVAIL
+    DISKAVAIL = 0
+    def get_disk_stats(self):
+        return { 'free_for_nonroot': self.DISKAVAIL, 'avail': max(self.DISKAVAIL - self.reserved_space, 0), }
 
 class Server(unittest.TestCase):
 
@@ -412,7 +413,7 @@ class Server(unittest.TestCase):
     def test_reserved_space(self):
         ss = self.create("test_reserved_space", reserved_space=10000,
                          klass=FakeDiskStorageServer)
-        # the FakeDiskStorageServer doesn't do real statvfs() calls
+        # the FakeDiskStorageServer doesn't do real calls to get_disk_stats
         ss.DISKAVAIL = 15000
         # 15k available, 10k reserved, leaves 5k for shares
 
@@ -468,6 +469,23 @@ class Server(unittest.TestCase):
         ss.disownServiceParent()
         del ss
 
+    def test_disk_stats(self):
+        # This will spuriously fail if there is zero disk space left (but so will other tests).
+        ss = self.create("test_disk_stats", reserved_space=0)
+
+        disk = ss.get_disk_stats()
+        self.failUnless(disk['total'] > 0, disk['total'])
+        self.failUnless(disk['used'] > 0, disk['used'])
+        self.failUnless(disk['free_for_root'] > 0, disk['free_for_root'])
+        self.failUnless(disk['free_for_nonroot'] > 0, disk['free_for_nonroot'])
+        self.failUnless(disk['avail'] > 0, disk['avail'])
+
+    def test_disk_stats_avail_nonnegative(self):
+        ss = self.create("test_disk_stats_avail_nonnegative", reserved_space=2**64)
+
+        disk = ss.get_disk_stats()
+        self.failUnlessEqual(disk['avail'], 0)
+
     def test_seek(self):
         basedir = self.workdir("test_seek_behavior")
         fileutil.make_dirs(basedir)
@@ -624,12 +642,10 @@ class Server(unittest.TestCase):
         self.failUnlessEqual(writers, {})
 
         stats = ss.get_stats()
-        self.failUnlessEqual(stats["storage_server.accepting_immutable_shares"],
-                             False)
+        self.failUnlessEqual(stats["storage_server.accepting_immutable_shares"], 0)
         if "storage_server.disk_avail" in stats:
-            # windows does not have os.statvfs, so it doesn't give us disk
-            # stats. But if there are stats, readonly_storage means
-            # disk_avail=0
+            # Some platforms may not have an API to get disk stats.
+            # But if there are stats, readonly_storage means disk_avail=0
             self.failUnlessEqual(stats["storage_server.disk_avail"], 0)
 
     def test_discard(self):
@@ -2405,10 +2421,14 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin, WebRenderingMixin):
         d = self.render1(page, args={"t": ["json"]})
         return d
 
-class NoStatvfsServer(StorageServer):
-    def do_statvfs(self):
+class NoDiskStatsServer(StorageServer):
+    def get_disk_stats(self):
         raise AttributeError
 
+class BadDiskStatsServer(StorageServer):
+    def get_disk_stats(self):
+        raise OSError
+
 class WebStatus(unittest.TestCase, pollmixin.PollMixin, WebRenderingMixin):
 
     def setUp(self):
@@ -2450,12 +2470,12 @@ class WebStatus(unittest.TestCase, pollmixin.PollMixin, WebRenderingMixin):
         d = self.render1(page, args={"t": ["json"]})
         return d
 
-    def test_status_no_statvfs(self):
-        # windows has no os.statvfs . Make sure the code handles that even on
-        # unix.
-        basedir = "storage/WebStatus/status_no_statvfs"
+    def test_status_no_disk_stats(self):
+        # Some platforms may have no disk stats API. Make sure the code can handle that
+        # (test runs on all platforms).
+        basedir = "storage/WebStatus/status_no_disk_stats"
         fileutil.make_dirs(basedir)
-        ss = NoStatvfsServer(basedir, "\x00" * 20)
+        ss = NoDiskStatsServer(basedir, "\x00" * 20)
         ss.setServiceParent(self.s)
         w = StorageStatus(ss)
         html = w.renderSynchronously()
@@ -2463,6 +2483,24 @@ class WebStatus(unittest.TestCase, pollmixin.PollMixin, WebRenderingMixin):
         s = remove_tags(html)
         self.failUnless("Accepting new shares: Yes" in s, s)
         self.failUnless("Total disk space: ?" in s, s)
+        self.failUnless("Space Available to Tahoe: ?" in s, s)
+        self.failUnless(ss.get_available_space() is None)
+
+    def test_status_bad_disk_stats(self):
+        # If the API to get disk stats exists but a call to it fails, then the status should
+        # show that no shares will be accepted, and get_available_space() should be 0.
+        basedir = "storage/WebStatus/status_bad_disk_stats"
+        fileutil.make_dirs(basedir)
+        ss = BadDiskStatsServer(basedir, "\x00" * 20)
+        ss.setServiceParent(self.s)
+        w = StorageStatus(ss)
+        html = w.renderSynchronously()
+        self.failUnless("<h1>Storage Server Status</h1>" in html, html)
+        s = remove_tags(html)
+        self.failUnless("Accepting new shares: No" in s, s)
+        self.failUnless("Total disk space: ?" in s, s)
+        self.failUnless("Space Available to Tahoe: ?" in s, s)
+        self.failUnless(ss.get_available_space() == 0)
 
     def test_readonly(self):
         basedir = "storage/WebStatus/readonly"
-- 
2.45.2