From ef002c935a15eb76f797e5707f0e2248c82deabc Mon Sep 17 00:00:00 2001 From: david-sarah Date: Fri, 20 Nov 2009 21:56:44 -0800 Subject: [PATCH] storage server: detect disk space usage on Windows too (fixes #637) --- _auto_deps.py | 15 ++- docs/configuration.txt | 6 +- src/allmydata/storage/server.py | 149 ++++++++++++++++++----------- src/allmydata/test/test_storage.py | 68 ++++++++++--- 4 files changed, 156 insertions(+), 82 deletions(-) diff --git a/_auto_deps.py b/_auto_deps.py index 923ee36c..5becd1cf 100644 --- a/_auto_deps.py +++ b/_auto_deps.py @@ -37,19 +37,18 @@ if sys.version_info < (2, 5): install_requires.append("pysqlite >= 2.0.5") ## The following block is commented-out because there is not currently a pywin32 package which -## can be easy_install'ed and also which actually makes "import win32api" succeed. Users have -## to manually install pywin32 on Windows before installing Tahoe. +## can be easy_install'ed and also which actually makes "import win32api" succeed. +## See http://sourceforge.net/tracker/index.php?func=detail&aid=1799934&group_id=78018&atid=551954 +## Users have to manually install pywin32 on Windows before installing Tahoe. ##import platform ##if platform.system() == "Windows": ## # Twisted requires pywin32 if it is going to offer process management functionality, or if ## # it is going to offer iocp reactor. We currently require process management. It would be ## # better if Twisted would declare that it requires pywin32 if it is going to offer process -## # management. Then the specification and the evolution of Twisted's reliance on pywin32 can -## # be confined to the Twisted setup data, and Tahoe can remain blissfully ignorant about such -## # things as if a future version of Twisted requires a different version of pywin32, or if a -## # future version of Twisted implements process management without using pywin32 at all, -## # etc.. That is twisted ticket #3238 -- http://twistedmatrix.com/trac/ticket/3238 . But -## # until Twisted does that, Tahoe needs to be non-ignorant of the following requirement: +## # management. That is twisted ticket #3238 -- http://twistedmatrix.com/trac/ticket/3238 . +## # On the other hand, Tahoe also depends on pywin32 for getting free disk space statistics +## # (although that is not a hard requirement: if win32api can't be imported then we don't +## # rely on having the disk stats). ## install_requires.append('pywin32') if hasattr(sys, 'frozen'): # for py2exe diff --git a/docs/configuration.txt b/docs/configuration.txt index 994f9ac9..1e4b248a 100644 --- a/docs/configuration.txt +++ b/docs/configuration.txt @@ -302,8 +302,10 @@ readonly = (boolean, optional) reserved_space = (str, optional) If provided, this value defines how much disk space is reserved: the storage - server will not accept any share which causes the amount of free space (as - measured by 'df', or more specifically statvfs(2)) to drop below this value. + server will not accept any share which causes the amount of free disk space + to drop below this value. (The free space is measured by a call to statvfs(2) + on Unix, or GetDiskFreeSpaceEx on Windows, and is the space available to the + user account under which the storage server runs.) This string contains a number, with an optional case-insensitive scale suffix like "K" or "M" or "G", and an optional "B" or "iB" suffix. So diff --git a/src/allmydata/storage/server.py b/src/allmydata/storage/server.py index 2b0f8af6..b9e45575 100644 --- a/src/allmydata/storage/server.py +++ b/src/allmydata/storage/server.py @@ -36,6 +36,16 @@ class StorageServer(service.MultiService, Referenceable): implements(RIStorageServer, IStatsProducer) name = 'storage' LeaseCheckerClass = LeaseCheckingCrawler + windows = False + + try: + import win32api, win32con + windows = True + # + win32api.SetErrorMode(win32con.SEM_FAILCRITICALERRORS | + win32con.SEM_NOOPENFILEERRORBOX) + except ImportError: + pass def __init__(self, storedir, nodeid, reserved_space=0, discard_storage=False, readonly_storage=False, @@ -70,7 +80,7 @@ class StorageServer(service.MultiService, Referenceable): if reserved_space: if self.get_available_space() is None: - log.msg("warning: [storage]reserved_space= is set, but this platform does not support statvfs(2), so this reservation cannot be honored", + log.msg("warning: [storage]reserved_space= is set, but this platform does not support an API to get disk statistics (statvfs(2) or GetDiskFreeSpaceEx), so this reservation cannot be honored", umin="0wZ27w", level=log.UNUSUAL) self.latencies = {"allocate": [], # immutable @@ -147,22 +157,35 @@ class StorageServer(service.MultiService, Referenceable): def _clean_incomplete(self): fileutil.rm_dir(self.incomingdir) - def do_statvfs(self): - return os.statvfs(self.storedir) + def get_disk_stats(self): + """Return disk statistics for the storage disk, in the form of a dict + with the following fields. + total: total bytes on disk + free_for_root: bytes actually free on disk + free_for_nonroot: bytes free for "a non-privileged user" [Unix] or + the current user [Windows]; might take into + account quotas depending on platform + used: bytes used on disk + avail: bytes available excluding reserved space + An AttributeError can occur if the OS has no API to get disk information. + An EnvironmentError can occur if the OS call fails.""" + + if self.windows: + # For Windows systems, where os.statvfs is not available, use GetDiskFreeSpaceEx. + # + # + # Although the docs say that the argument should be the root directory + # of a disk, GetDiskFreeSpaceEx actually accepts any path on that disk + # (like its Win32 equivalent). + + (free_for_nonroot, total, free_for_root) = self.win32api.GetDiskFreeSpaceEx(self.storedir) + else: + # For Unix-like systems. + # + # + # + s = os.statvfs(self.storedir) - def get_stats(self): - # remember: RIStatsProvider requires that our return dict - # contains numeric values. - stats = { 'storage_server.allocated': self.allocated_size(), } - stats["storage_server.reserved_space"] = self.reserved_space - for category,ld in self.get_latencies().items(): - for name,v in ld.items(): - stats['storage_server.latencies.%s.%s' % (category, name)] = v - writeable = True - if self.readonly_storage: - writeable = False - try: - s = self.do_statvfs() # on my mac laptop: # statvfs(2) is a wrapper around statfs(2). # statvfs.f_frsize = statfs.f_bsize : @@ -173,55 +196,67 @@ class StorageServer(service.MultiService, Referenceable): # wrong, and s.f_blocks*s.f_frsize is twice the size of my disk, # but s.f_bavail*s.f_frsize is correct - disk_total = s.f_frsize * s.f_blocks - disk_used = s.f_frsize * (s.f_blocks - s.f_bfree) - # spacetime predictors should look at the slope of disk_used. - disk_free_for_root = s.f_frsize * s.f_bfree - disk_free_for_nonroot = s.f_frsize * s.f_bavail - - # include our local policy here: if we stop accepting shares when - # the available space drops below 1GB, then include that fact in - # disk_avail. - disk_avail = disk_free_for_nonroot - self.reserved_space - disk_avail = max(disk_avail, 0) - if self.readonly_storage: - disk_avail = 0 - if disk_avail == 0: - writeable = False + total = s.f_frsize * s.f_blocks + free_for_root = s.f_frsize * s.f_bfree + free_for_nonroot = s.f_frsize * s.f_bavail + + # valid for all platforms: + used = total - free_for_root + avail = max(free_for_nonroot - self.reserved_space, 0) + + return { 'total': total, 'free_for_root': free_for_root, + 'free_for_nonroot': free_for_nonroot, + 'used': used, 'avail': avail, } + + def get_stats(self): + # remember: RIStatsProvider requires that our return dict + # contains numeric values. + stats = { 'storage_server.allocated': self.allocated_size(), } + stats['storage_server.reserved_space'] = self.reserved_space + for category,ld in self.get_latencies().items(): + for name,v in ld.items(): + stats['storage_server.latencies.%s.%s' % (category, name)] = v + + try: + disk = self.get_disk_stats() + writeable = disk['avail'] > 0 # spacetime predictors should use disk_avail / (d(disk_used)/dt) - stats["storage_server.disk_total"] = disk_total - stats["storage_server.disk_used"] = disk_used - stats["storage_server.disk_free_for_root"] = disk_free_for_root - stats["storage_server.disk_free_for_nonroot"] = disk_free_for_nonroot - stats["storage_server.disk_avail"] = disk_avail + stats['storage_server.disk_total'] = disk['total'] + stats['storage_server.disk_used'] = disk['used'] + stats['storage_server.disk_free_for_root'] = disk['free_for_root'] + stats['storage_server.disk_free_for_nonroot'] = disk['free_for_nonroot'] + stats['storage_server.disk_avail'] = disk['avail'] except AttributeError: - # os.statvfs is available only on unix - pass - stats["storage_server.accepting_immutable_shares"] = int(writeable) + writeable = True + except EnvironmentError: + log.msg("OS call to get disk statistics failed", level=log.UNUSUAL) + writeable = False + + if self.readonly_storage: + stats['storage_server.disk_avail'] = 0 + writeable = False + + stats['storage_server.accepting_immutable_shares'] = int(writeable) s = self.bucket_counter.get_state() bucket_count = s.get("last-complete-bucket-count") if bucket_count: - stats["storage_server.total_bucket_count"] = bucket_count + stats['storage_server.total_bucket_count'] = bucket_count return stats - - def stat_disk(self, d): - s = os.statvfs(d) - # s.f_bavail: available to non-root users - disk_avail = s.f_frsize * s.f_bavail - return disk_avail - def get_available_space(self): - # returns None if it cannot be measured (windows) + """Returns available space for share storage in bytes, or None if no + API to get this information is available.""" + + if self.readonly_storage: + return 0 try: - disk_avail = self.stat_disk(self.storedir) - disk_avail -= self.reserved_space + return self.get_disk_stats()['avail'] except AttributeError: - disk_avail = None - if self.readonly_storage: - disk_avail = 0 - return disk_avail + return None + except EnvironmentError: + log.msg("OS call to get disk statistics failed", level=log.UNUSUAL) + return 0 def allocated_size(self): space = 0 @@ -232,9 +267,9 @@ class StorageServer(service.MultiService, Referenceable): def remote_get_version(self): remaining_space = self.get_available_space() if remaining_space is None: - # we're on a platform that doesn't have 'df', so make a vague - # guess. + # We're on a platform that has no API to get disk stats. remaining_space = 2**64 + version = { "http://allmydata.org/tahoe/protocols/storage/v1" : { "maximum-immutable-share-size": remaining_space, "tolerates-immutable-read-overrun": True, @@ -288,7 +323,7 @@ class StorageServer(service.MultiService, Referenceable): sf = ShareFile(fn) sf.add_or_renew_lease(lease_info) - # self.readonly_storage causes remaining_space=0 + # self.readonly_storage causes remaining_space <= 0 for shnum in sharenums: incominghome = os.path.join(self.incomingdir, si_dir, "%d" % shnum) diff --git a/src/allmydata/test/test_storage.py b/src/allmydata/test/test_storage.py index 6ca78ceb..7538b390 100644 --- a/src/allmydata/test/test_storage.py +++ b/src/allmydata/test/test_storage.py @@ -228,8 +228,9 @@ class BucketProxy(unittest.TestCase): 0x44, WriteBucketProxy_v2, ReadBucketProxy) class FakeDiskStorageServer(StorageServer): - def stat_disk(self, d): - return self.DISKAVAIL + DISKAVAIL = 0 + def get_disk_stats(self): + return { 'free_for_nonroot': self.DISKAVAIL, 'avail': max(self.DISKAVAIL - self.reserved_space, 0), } class Server(unittest.TestCase): @@ -412,7 +413,7 @@ class Server(unittest.TestCase): def test_reserved_space(self): ss = self.create("test_reserved_space", reserved_space=10000, klass=FakeDiskStorageServer) - # the FakeDiskStorageServer doesn't do real statvfs() calls + # the FakeDiskStorageServer doesn't do real calls to get_disk_stats ss.DISKAVAIL = 15000 # 15k available, 10k reserved, leaves 5k for shares @@ -468,6 +469,23 @@ class Server(unittest.TestCase): ss.disownServiceParent() del ss + def test_disk_stats(self): + # This will spuriously fail if there is zero disk space left (but so will other tests). + ss = self.create("test_disk_stats", reserved_space=0) + + disk = ss.get_disk_stats() + self.failUnless(disk['total'] > 0, disk['total']) + self.failUnless(disk['used'] > 0, disk['used']) + self.failUnless(disk['free_for_root'] > 0, disk['free_for_root']) + self.failUnless(disk['free_for_nonroot'] > 0, disk['free_for_nonroot']) + self.failUnless(disk['avail'] > 0, disk['avail']) + + def test_disk_stats_avail_nonnegative(self): + ss = self.create("test_disk_stats_avail_nonnegative", reserved_space=2**64) + + disk = ss.get_disk_stats() + self.failUnlessEqual(disk['avail'], 0) + def test_seek(self): basedir = self.workdir("test_seek_behavior") fileutil.make_dirs(basedir) @@ -624,12 +642,10 @@ class Server(unittest.TestCase): self.failUnlessEqual(writers, {}) stats = ss.get_stats() - self.failUnlessEqual(stats["storage_server.accepting_immutable_shares"], - False) + self.failUnlessEqual(stats["storage_server.accepting_immutable_shares"], 0) if "storage_server.disk_avail" in stats: - # windows does not have os.statvfs, so it doesn't give us disk - # stats. But if there are stats, readonly_storage means - # disk_avail=0 + # Some platforms may not have an API to get disk stats. + # But if there are stats, readonly_storage means disk_avail=0 self.failUnlessEqual(stats["storage_server.disk_avail"], 0) def test_discard(self): @@ -2405,10 +2421,14 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin, WebRenderingMixin): d = self.render1(page, args={"t": ["json"]}) return d -class NoStatvfsServer(StorageServer): - def do_statvfs(self): +class NoDiskStatsServer(StorageServer): + def get_disk_stats(self): raise AttributeError +class BadDiskStatsServer(StorageServer): + def get_disk_stats(self): + raise OSError + class WebStatus(unittest.TestCase, pollmixin.PollMixin, WebRenderingMixin): def setUp(self): @@ -2450,12 +2470,12 @@ class WebStatus(unittest.TestCase, pollmixin.PollMixin, WebRenderingMixin): d = self.render1(page, args={"t": ["json"]}) return d - def test_status_no_statvfs(self): - # windows has no os.statvfs . Make sure the code handles that even on - # unix. - basedir = "storage/WebStatus/status_no_statvfs" + def test_status_no_disk_stats(self): + # Some platforms may have no disk stats API. Make sure the code can handle that + # (test runs on all platforms). + basedir = "storage/WebStatus/status_no_disk_stats" fileutil.make_dirs(basedir) - ss = NoStatvfsServer(basedir, "\x00" * 20) + ss = NoDiskStatsServer(basedir, "\x00" * 20) ss.setServiceParent(self.s) w = StorageStatus(ss) html = w.renderSynchronously() @@ -2463,6 +2483,24 @@ class WebStatus(unittest.TestCase, pollmixin.PollMixin, WebRenderingMixin): s = remove_tags(html) self.failUnless("Accepting new shares: Yes" in s, s) self.failUnless("Total disk space: ?" in s, s) + self.failUnless("Space Available to Tahoe: ?" in s, s) + self.failUnless(ss.get_available_space() is None) + + def test_status_bad_disk_stats(self): + # If the API to get disk stats exists but a call to it fails, then the status should + # show that no shares will be accepted, and get_available_space() should be 0. + basedir = "storage/WebStatus/status_bad_disk_stats" + fileutil.make_dirs(basedir) + ss = BadDiskStatsServer(basedir, "\x00" * 20) + ss.setServiceParent(self.s) + w = StorageStatus(ss) + html = w.renderSynchronously() + self.failUnless("

Storage Server Status

" in html, html) + s = remove_tags(html) + self.failUnless("Accepting new shares: No" in s, s) + self.failUnless("Total disk space: ?" in s, s) + self.failUnless("Space Available to Tahoe: ?" in s, s) + self.failUnless(ss.get_available_space() == 0) def test_readonly(self): basedir = "storage/WebStatus/readonly" -- 2.45.2