From: Brian Warner Date: Thu, 17 Jul 2008 01:20:57 +0000 (-0700) Subject: first pass at deep-checker, no webapi yet, probably big problems with it, only minima... X-Git-Url: https://git.rkrishnan.org/listings/vdrive/reliability?a=commitdiff_plain;h=9289433ba38dd724834cb6e97bc5907c3e42cd10;p=tahoe-lafs%2Ftahoe-lafs.git first pass at deep-checker, no webapi yet, probably big problems with it, only minimal tests --- diff --git a/src/allmydata/dirnode.py b/src/allmydata/dirnode.py index 2e73cafe..d8d3e93b 100644 --- a/src/allmydata/dirnode.py +++ b/src/allmydata/dirnode.py @@ -9,6 +9,7 @@ from allmydata.mutable.node import MutableFileNode from allmydata.interfaces import IMutableFileNode, IDirectoryNode,\ IURI, IFileNode, IMutableFileURI, IVerifierURI, IFilesystemNode, \ ExistingChildError, ICheckable +from allmydata.immutable.checker import DeepCheckResults from allmydata.util import hashutil, mathutil from allmydata.util.hashutil import netstring from allmydata.util.limiter import ConcurrencyLimiter @@ -533,6 +534,48 @@ class NewDirectoryNode: d.addCallback(_got_list) return d + def deep_check(self, verify=False, repair=False): + results = DeepCheckResults() + found = set() + found.add(self.get_verifier()) + + limiter = ConcurrencyLimiter(10) + + d = self._add_check_from_node(self, results, limiter, verify, repair) + d.addCallback(lambda res: + self._add_deepcheck_from_dirnode(self, + found, results, limiter, + verify, repair)) + d.addCallback(lambda res: results) + return d + + def _add_check_from_node(self, node, results, limiter, verify, repair): + d = limiter.add(node.check, verify, repair) + d.addCallback(results.add_check) + return d + + def _add_deepcheck_from_dirnode(self, node, found, results, limiter, + verify, repair): + d = limiter.add(node.list) + def _got_list(children): + dl = [] + for name, (child, metadata) in children.iteritems(): + verifier = child.get_verifier() + if verifier in found: + # avoid loops + continue + dl.append(self._add_check_from_node(child, + results, limiter, + verify, repair)) + if IDirectoryNode.providedBy(child): + dl.append(self._add_deepcheck_from_node(child, found, + results, limiter, + verify, repair)) + if dl: + return defer.DeferredList(dl) + d.addCallback(_got_list) + return d + class DeepStats: def __init__(self): self.stats = {} diff --git a/src/allmydata/immutable/checker.py b/src/allmydata/immutable/checker.py index c3269217..33a12931 100644 --- a/src/allmydata/immutable/checker.py +++ b/src/allmydata/immutable/checker.py @@ -10,7 +10,8 @@ from zope.interface import implements from twisted.internet import defer from twisted.python import log from allmydata import storage -from allmydata.interfaces import IVerifierURI, ICheckerResults +from allmydata.interfaces import IVerifierURI, \ + ICheckerResults, IDeepCheckResults from allmydata.immutable import download from allmydata.util import hashutil, base32 @@ -44,6 +45,42 @@ class Results: s += "Not Healthy!\n" return s +class DeepCheckResults: + implements(IDeepCheckResults) + + def __init__(self): + self.objects_checked = 0 + self.objects_healthy = 0 + self.repairs_attempted = 0 + self.repairs_successful = 0 + self.problems = [] + self.server_problems = {} + + def add_check(self, r): + self.objects_checked += 1 + if r.is_healthy: + self.objects_healthy += 1 + else: + self.problems.append(r) + + def add_repair(self, is_successful): + self.repairs_attempted += 1 + if is_successful: + self.repairs_successful += 1 + + def count_objects_checked(self): + return self.objects_checked + def count_objects_healthy(self): + return self.objects_healthy + def count_repairs_attempted(self): + return self.repairs_attempted + def count_repairs_successful(self): + return self.repairs_successful + def get_server_problems(self): + return self.server_problems + def get_problems(self): + return self.problems + class SimpleCHKFileChecker: """Return a list of (needed, total, found, sharemap), where sharemap maps diff --git a/src/allmydata/immutable/filenode.py b/src/allmydata/immutable/filenode.py index c3caae73..1ed88982 100644 --- a/src/allmydata/immutable/filenode.py +++ b/src/allmydata/immutable/filenode.py @@ -3,7 +3,7 @@ from zope.interface import implements from twisted.internet import defer from allmydata.interfaces import IFileNode, IFileURI, IURI, ICheckable from allmydata import uri -from allmydata.immutable.checker import Results, \ +from allmydata.immutable.checker import Results, DeepCheckResults, \ SimpleCHKFileChecker, SimpleCHKFileVerifier class FileNode: @@ -52,6 +52,15 @@ class FileNode: v = SimpleCHKFileChecker(peer_getter, vcap) return v.check() + def deep_check(self, verify=False, repair=False): + d = self.check(verify, repair) + def _done(r): + dr = DeepCheckResults() + dr.add_check(r) + return dr + d.addCallback(_done) + return d + def download(self, target): downloader = self._client.getServiceNamed("downloader") return downloader.download(self.uri, target) diff --git a/src/allmydata/interfaces.py b/src/allmydata/interfaces.py index c03d4d18..d63dd9b2 100644 --- a/src/allmydata/interfaces.py +++ b/src/allmydata/interfaces.py @@ -1473,6 +1473,15 @@ class ICheckable(Interface): taken. """ + def deep_check(verify=False, repair=False): + """Check upon the health of me and everything I can reach. + + This is a recursive form of check(), useable on dirnodes. (it can be + called safely on filenodes too, but only checks the one object). + + I return a Deferred that fires with an IDeepCheckResults object. + """ + class ICheckerResults(Interface): """I contain the detailed results of a check/verify/repair operation. @@ -1506,6 +1515,27 @@ class ICheckerResults(Interface): # same nodeid, they will fail as a pair, and overall reliability is # decreased. +class IDeepCheckResults(Interface): + """I contain the results of a deep-check operation. + + This is returned by a call to ICheckable.deep_check(). + """ + + def count_objects_checked(): + """Return the number of objects that were checked.""" + def count_objects_healthy(): + """Return the number of objects that were fully healthy.""" + def count_repairs_attempted(): + """Return the number of repair operations that were attempted.""" + def count_repairs_successful(): + """Return the number of repair operations that succeeded in bringing + the object back up to full health.""" + def get_server_problems(): + """Return a dict, mapping server nodeid to a count of how many + problems involved that server.""" + def get_problems(): + """Return a list of ICheckerResults, one for each object that + was not fully healthy.""" class IClient(Interface): diff --git a/src/allmydata/mutable/node.py b/src/allmydata/mutable/node.py index cd9f9763..38c132a1 100644 --- a/src/allmydata/mutable/node.py +++ b/src/allmydata/mutable/node.py @@ -11,6 +11,7 @@ from allmydata.util import hashutil from allmydata.util.assertutil import precondition from allmydata.uri import WriteableSSKFileURI from allmydata.immutable.encode import NotEnoughSharesError +from allmydata.immutable.checker import DeepCheckResults from pycryptopp.publickey import rsa from pycryptopp.cipher.aes import AES @@ -240,6 +241,15 @@ class MutableFileNode: checker = MutableChecker(self) return checker.check(verify, repair) + def deep_check(self, verify=False, repair=False): + d = self.check(verify, repair) + def _done(r): + dr = DeepCheckResults() + dr.add_check(r) + return dr + d.addCallback(_done) + return d + # allow the use of IDownloadTarget def download(self, target): # fake it. TODO: make this cleaner. diff --git a/src/allmydata/test/test_dirnode.py b/src/allmydata/test/test_dirnode.py index 4aba00c4..044a8485 100644 --- a/src/allmydata/test/test_dirnode.py +++ b/src/allmydata/test/test_dirnode.py @@ -117,6 +117,29 @@ class Dirnode(unittest.TestCase, testutil.ShouldFailMixin, testutil.StallMixin): d.addCallback(_done) return d + def _test_deepcheck_create(self): + d = self.client.create_empty_dirnode() + def _created_root(rootnode): + self._rootnode = rootnode + d.addCallback(_created_root) + def _done(res): + return self._rootnode + d.addCallback(_done) + return d + + def test_deepcheck(self): + d = self._test_deepcheck_create() + d.addCallback(lambda rootnode: rootnode.deep_check()) + def _check_results(r): + self.failUnlessEqual(r.count_objects_checked(), 1) + self.failUnlessEqual(r.count_objects_healthy(), 1) + self.failUnlessEqual(r.count_repairs_attempted(), 0) + self.failUnlessEqual(r.count_repairs_successful(), 0) + self.failUnlessEqual(len(r.get_server_problems()), 0) + self.failUnlessEqual(len(r.get_problems()), 0) + d.addCallback(_check_results) + return d + def test_readonly(self): fileuri = make_chk_file_uri(1234) filenode = self.client.create_node_from_uri(fileuri)