From: Brian Warner <warner@allmydata.com>
Date: Tue, 16 Oct 2007 19:25:09 +0000 (-0700)
Subject: add an equally-simple file-verifier
X-Git-Tag: allmydata-tahoe-0.7.0~369
X-Git-Url: https://git.rkrishnan.org/components/com_hotproperty/...?a=commitdiff_plain;h=d1f13fd1176bed0c35ae0b1835614514d4757b1a;p=tahoe-lafs%2Ftahoe-lafs.git

add an equally-simple file-verifier
---

diff --git a/src/allmydata/checker.py b/src/allmydata/checker.py
index b964cf19..57565246 100644
--- a/src/allmydata/checker.py
+++ b/src/allmydata/checker.py
@@ -10,7 +10,8 @@ from twisted.internet import defer
 from twisted.application import service
 from twisted.python import log
 from allmydata.interfaces import IVerifierURI
-from allmydata import uri
+from allmydata import uri, download
+from allmydata.util import hashutil, idlib
 
 class SimpleCHKFileChecker:
 
@@ -101,3 +102,100 @@ class Checker(service.MultiService):
         else:
             raise ValueError("I don't know how to check '%s'" % (uri_to_check,))
 
+    def verify(self, uri_to_verify):
+        uri_to_verify = IVerifierURI(uri_to_verify)
+        if uri_to_verify is None:
+            return defer.succeed(True)
+        elif isinstance(uri_to_verify, uri.CHKFileVerifierURI):
+            v = SimpleCHKFileVerifier(self.parent, uri_to_verify)
+            return v.start()
+        elif isinstance(uri_to_verify, uri.DirnodeVerifierURI):
+            # for dirnodes, checking and verifying are currently equivalent
+            tub = self.parent.tub
+            c = SimpleDirnodeChecker(tub)
+            return c.check(uri_to_verify)
+        else:
+            raise ValueError("I don't know how to verify '%s'" %
+                             (uri_to_verify,))
+
+class VerifyingOutput:
+    def __init__(self, total_length):
+        self._crypttext_hasher = hashutil.crypttext_hasher()
+        self.length = 0
+        self.total_length = total_length
+        self._segment_number = 0
+        self._crypttext_hash_tree = None
+        self._opened = False
+
+    def setup_hashtrees(self, plaintext_hashtree, crypttext_hashtree):
+        self._crypttext_hash_tree = crypttext_hashtree
+
+    def write_segment(self, crypttext):
+        self.length += len(crypttext)
+
+        self._crypttext_hasher.update(crypttext)
+        if self._crypttext_hash_tree:
+            ch = hashutil.crypttext_segment_hasher()
+            ch.update(crypttext)
+            crypttext_leaves = {self._segment_number: ch.digest()}
+            self._crypttext_hash_tree.set_hashes(leaves=crypttext_leaves)
+
+        self._segment_number += 1
+
+    def close(self):
+        self.crypttext_hash = self._crypttext_hasher.digest()
+
+    def finish(self):
+        return True
+
+
+class SimpleCHKFileVerifier(download.FileDownloader):
+    # this reconstructs the crypttext, which verifies that at least 'k' of
+    # the shareholders are around and have valid data. It does not check the
+    # remaining shareholders, and it cannot verify the plaintext.
+    check_plaintext_hash = False
+
+    def __init__(self, client, u):
+        self._client = client
+
+        u = IVerifierURI(u)
+        self._storage_index = u.storage_index
+        self._uri_extension_hash = u.uri_extension_hash
+        self._total_shares = u.total_shares
+        self._size = u.size
+        self._num_needed_shares = u.needed_shares
+
+        self._output = VerifyingOutput(self._size)
+        self._paused = False
+        self._stopped = False
+
+        self.active_buckets = {} # k: shnum, v: bucket
+        self._share_buckets = [] # list of (sharenum, bucket) tuples
+        self._share_vbuckets = {} # k: shnum, v: set of ValidatedBuckets
+        self._uri_extension_sources = []
+
+        self._uri_extension_data = None
+
+        self._fetch_failures = {"uri_extension": 0,
+                                "plaintext_hashroot": 0,
+                                "plaintext_hashtree": 0,
+                                "crypttext_hashroot": 0,
+                                "crypttext_hashtree": 0,
+                                }
+
+
+    def start(self):
+        log.msg("starting download [%s]" % idlib.b2a(self._storage_index)[:6])
+
+        # first step: who should we download from?
+        d = defer.maybeDeferred(self._get_all_shareholders)
+        d.addCallback(self._got_all_shareholders)
+        # now get the uri_extension block from somebody and validate it
+        d.addCallback(self._obtain_uri_extension)
+        d.addCallback(self._got_uri_extension)
+        d.addCallback(self._get_hashtrees)
+        d.addCallback(self._create_validated_buckets)
+        # once we know that, we can download blocks from everybody
+        d.addCallback(self._download_all_segments)
+        d.addCallback(self._done)
+        return d
diff --git a/src/allmydata/test/test_system.py b/src/allmydata/test/test_system.py
index f59cd7e1..fab60167 100644
--- a/src/allmydata/test/test_system.py
+++ b/src/allmydata/test/test_system.py
@@ -295,6 +295,7 @@ class SystemTest(testutil.SignalMixin, unittest.TestCase):
         d.addCallback(self._test_control)
         d.addCallback(self._test_cli)
         d.addCallback(self._test_checker)
+        d.addCallback(self._test_verifier)
         return d
     test_vdrive.timeout = 1100
 
@@ -809,3 +810,20 @@ class SystemTest(testutil.SignalMixin, unittest.TestCase):
         d.addCallback(_done)
         return d
 
+    def _test_verifier(self, res):
+        vdrive0 = self.clients[0].getServiceNamed("vdrive")
+        checker1 = self.clients[1].getServiceNamed("checker")
+        d = vdrive0.get_node_at_path("~")
+        d.addCallback(lambda home: home.build_manifest())
+        def _check_all(manifest):
+            dl = []
+            for si in manifest:
+                dl.append(checker1.verify(si))
+            return deferredutil.DeferredListShouldSucceed(dl)
+        d.addCallback(_check_all)
+        def _done(res):
+            for i in res:
+                self.failUnless(i is True)
+        d.addCallback(_done)
+        return d
+