From: Brian Warner <warner@lothar.com>
Date: Sun, 10 Jan 2010 20:35:52 +0000 (-0800)
Subject: test_repairer: add (commented-out) test_each_byte, to see exactly what the
X-Git-Url: https://git.rkrishnan.org/listings/reliability?a=commitdiff_plain;h=1ed5bbdcb6908f0ac9196db347b9f1733f396730;p=tahoe-lafs%2Ftahoe-lafs.git

test_repairer: add (commented-out) test_each_byte, to see exactly what the
Verifier misses

The results (described in #819) match our expectations: it misses corruption
in unused share fields and in most container fields (which are only visible
to the storage server, not the client). 1265 bytes of a 2753 byte
share (hosting a 56-byte file with an artifically small segment size) are
unused, mostly in the unused tail of the overallocated UEB space (765 bytes),
and the allocated-but-unwritten plaintext_hash_tree (480 bytes).
---

diff --git a/src/allmydata/test/test_repairer.py b/src/allmydata/test/test_repairer.py
index f21191d9..7e928d59 100644
--- a/src/allmydata/test/test_repairer.py
+++ b/src/allmydata/test/test_repairer.py
@@ -303,6 +303,60 @@ class Verifier(GridTestMixin, unittest.TestCase, RepairTestMixin):
     # crypttext-hash-tree but then the ciphertext should show up as invalid.
     # Normally this could only be triggered by a bug in FEC decode.
 
+    def OFF_test_each_byte(self):
+        # this test takes 140s to run on my laptop, and doesn't have any
+        # actual asserts, so it's commented out. It corrupts each byte of the
+        # share in sequence, and checks to see which ones the Verifier
+        # catches and which it misses. Ticket #819 contains details: there
+        # are several portions of the share that are unused, for which
+        # corruption is not supposed to be caught.
+        #
+        # If the test ran quickly, we could use the share size to compute the
+        # offsets of these unused portions and assert that everything outside
+        # of them was detected. We could then replace the rest of
+        # Verifier.test_* (which takes 16s to run on my laptop) with this
+        # one.
+        self.basedir = "repairer/Verifier/each_byte"
+        self.set_up_grid(num_clients=2)
+        d = self.upload_and_stash()
+        def _grab_sh0(res):
+            self.sh0_file = [sharefile
+                             for (shnum, serverid, sharefile)
+                             in self.find_shares(self.uri)
+                             if shnum == 0][0]
+            self.sh0_orig = open(self.sh0_file, "rb").read()
+        d.addCallback(_grab_sh0)
+        def _fix_sh0(res):
+            f = open(self.sh0_file, "wb")
+            f.write(self.sh0_orig)
+            f.close()
+        def _corrupt(ign, which):
+            def _corruptor(s, debug=False):
+                return s[:which] + chr(ord(s[which])^0x01) + s[which+1:]
+            self.corrupt_shares_numbered(self.uri, [0], _corruptor)
+        results = {}
+        def _did_check(vr, i):
+            #print "corrupt %d: healthy=%s" % (i, vr.is_healthy())
+            results[i] = vr.is_healthy()
+        def _start(ign):
+            d = defer.succeed(None)
+            for i in range(len(self.sh0_orig)):
+                d.addCallback(_corrupt, i)
+                d.addCallback(lambda ign:
+                              self.c1_filenode.check(Monitor(), verify=True))
+                d.addCallback(_did_check, i)
+                d.addCallback(_fix_sh0)
+            return d
+        d.addCallback(_start)
+        def _show_results(ign):
+            f = open("test_each_byte_output", "w")
+            for i in sorted(results.keys()):
+                print >>f, "%d: %s" % (i, results[i])
+            f.close()
+            print "Please look in _trial_temp/test_each_byte_output for results"
+        d.addCallback(_show_results)
+        return d
+
 # We'll allow you to pass this test even if you trigger thirty-five times as
 # many block sends and disk writes as would be optimal.
 WRITE_LEEWAY = 35