From: Brian Warner <warner@allmydata.com> Date: Tue, 16 Oct 2007 19:25:09 +0000 (-0700) Subject: add an equally-simple file-verifier X-Git-Tag: allmydata-tahoe-0.7.0~369 X-Git-Url: https://git.rkrishnan.org/components/com_hotproperty/...?a=commitdiff_plain;h=d1f13fd1176bed0c35ae0b1835614514d4757b1a;p=tahoe-lafs%2Ftahoe-lafs.git add an equally-simple file-verifier --- diff --git a/src/allmydata/checker.py b/src/allmydata/checker.py index b964cf19..57565246 100644 --- a/src/allmydata/checker.py +++ b/src/allmydata/checker.py @@ -10,7 +10,8 @@ from twisted.internet import defer from twisted.application import service from twisted.python import log from allmydata.interfaces import IVerifierURI -from allmydata import uri +from allmydata import uri, download +from allmydata.util import hashutil, idlib class SimpleCHKFileChecker: @@ -101,3 +102,100 @@ class Checker(service.MultiService): else: raise ValueError("I don't know how to check '%s'" % (uri_to_check,)) + def verify(self, uri_to_verify): + uri_to_verify = IVerifierURI(uri_to_verify) + if uri_to_verify is None: + return defer.succeed(True) + elif isinstance(uri_to_verify, uri.CHKFileVerifierURI): + v = SimpleCHKFileVerifier(self.parent, uri_to_verify) + return v.start() + elif isinstance(uri_to_verify, uri.DirnodeVerifierURI): + # for dirnodes, checking and verifying are currently equivalent + tub = self.parent.tub + c = SimpleDirnodeChecker(tub) + return c.check(uri_to_verify) + else: + raise ValueError("I don't know how to verify '%s'" % + (uri_to_verify,)) + +class VerifyingOutput: + def __init__(self, total_length): + self._crypttext_hasher = hashutil.crypttext_hasher() + self.length = 0 + self.total_length = total_length + self._segment_number = 0 + self._crypttext_hash_tree = None + self._opened = False + + def setup_hashtrees(self, plaintext_hashtree, crypttext_hashtree): + self._crypttext_hash_tree = crypttext_hashtree + + def write_segment(self, crypttext): + self.length += len(crypttext) + + self._crypttext_hasher.update(crypttext) + if self._crypttext_hash_tree: + ch = hashutil.crypttext_segment_hasher() + ch.update(crypttext) + crypttext_leaves = {self._segment_number: ch.digest()} + self._crypttext_hash_tree.set_hashes(leaves=crypttext_leaves) + + self._segment_number += 1 + + def close(self): + self.crypttext_hash = self._crypttext_hasher.digest() + + def finish(self): + return True + + +class SimpleCHKFileVerifier(download.FileDownloader): + # this reconstructs the crypttext, which verifies that at least 'k' of + # the shareholders are around and have valid data. It does not check the + # remaining shareholders, and it cannot verify the plaintext. + check_plaintext_hash = False + + def __init__(self, client, u): + self._client = client + + u = IVerifierURI(u) + self._storage_index = u.storage_index + self._uri_extension_hash = u.uri_extension_hash + self._total_shares = u.total_shares + self._size = u.size + self._num_needed_shares = u.needed_shares + + self._output = VerifyingOutput(self._size) + self._paused = False + self._stopped = False + + self.active_buckets = {} # k: shnum, v: bucket + self._share_buckets = [] # list of (sharenum, bucket) tuples + self._share_vbuckets = {} # k: shnum, v: set of ValidatedBuckets + self._uri_extension_sources = [] + + self._uri_extension_data = None + + self._fetch_failures = {"uri_extension": 0, + "plaintext_hashroot": 0, + "plaintext_hashtree": 0, + "crypttext_hashroot": 0, + "crypttext_hashtree": 0, + } + + + def start(self): + log.msg("starting download [%s]" % idlib.b2a(self._storage_index)[:6]) + + # first step: who should we download from? + d = defer.maybeDeferred(self._get_all_shareholders) + d.addCallback(self._got_all_shareholders) + # now get the uri_extension block from somebody and validate it + d.addCallback(self._obtain_uri_extension) + d.addCallback(self._got_uri_extension) + d.addCallback(self._get_hashtrees) + d.addCallback(self._create_validated_buckets) + # once we know that, we can download blocks from everybody + d.addCallback(self._download_all_segments) + d.addCallback(self._done) + return d diff --git a/src/allmydata/test/test_system.py b/src/allmydata/test/test_system.py index f59cd7e1..fab60167 100644 --- a/src/allmydata/test/test_system.py +++ b/src/allmydata/test/test_system.py @@ -295,6 +295,7 @@ class SystemTest(testutil.SignalMixin, unittest.TestCase): d.addCallback(self._test_control) d.addCallback(self._test_cli) d.addCallback(self._test_checker) + d.addCallback(self._test_verifier) return d test_vdrive.timeout = 1100 @@ -809,3 +810,20 @@ class SystemTest(testutil.SignalMixin, unittest.TestCase): d.addCallback(_done) return d + def _test_verifier(self, res): + vdrive0 = self.clients[0].getServiceNamed("vdrive") + checker1 = self.clients[1].getServiceNamed("checker") + d = vdrive0.get_node_at_path("~") + d.addCallback(lambda home: home.build_manifest()) + def _check_all(manifest): + dl = [] + for si in manifest: + dl.append(checker1.verify(si)) + return deferredutil.DeferredListShouldSucceed(dl) + d.addCallback(_check_all) + def _done(res): + for i in res: + self.failUnless(i is True) + d.addCallback(_done) + return d +