From: Brian Warner Date: Fri, 8 Jun 2007 04:24:39 +0000 (-0700) Subject: test_encode.py: even more testing of merkle trees, getting fairly comprehensive now X-Git-Tag: allmydata-tahoe-0.3.0~13 X-Git-Url: https://git.rkrishnan.org/architecture.txt?a=commitdiff_plain;h=cabba59fe74c6221e3714dffacbd50b44ca905c1;p=tahoe-lafs%2Ftahoe-lafs.git test_encode.py: even more testing of merkle trees, getting fairly comprehensive now --- diff --git a/src/allmydata/encode.py b/src/allmydata/encode.py index 83126fe5..7879955e 100644 --- a/src/allmydata/encode.py +++ b/src/allmydata/encode.py @@ -122,6 +122,8 @@ class Encoder(object): data['size'] = self.file_size data['segment_size'] = self.segment_size + data['num_segments'] = mathutil.div_ceil(self.file_size, + self.segment_size) data['needed_shares'] = self.required_shares data['total_shares'] = self.num_shares diff --git a/src/allmydata/test/test_encode.py b/src/allmydata/test/test_encode.py index b39751aa..fd5bfa4d 100644 --- a/src/allmydata/test/test_encode.py +++ b/src/allmydata/test/test_encode.py @@ -1,14 +1,18 @@ -#! /usr/bin/env python from twisted.trial import unittest from twisted.internet import defer from twisted.python.failure import Failure from foolscap import eventual -from allmydata import encode, download -from allmydata.util import bencode +from allmydata import encode, download, hashtree +from allmydata.util import hashutil from allmydata.uri import pack_uri +from allmydata.Crypto.Cipher import AES +import sha from cStringIO import StringIO +def netstring(s): + return "%d:%s," % (len(s), s) + class FakePeer: def __init__(self, mode="good"): self.ss = FakeStorageServer(mode) @@ -44,6 +48,9 @@ class FakeStorageServer: class LostPeerError(Exception): pass +def flip_bit(good): # flips the last bit + return good[:-1] + chr(ord(good[-1]) ^ 0x01) + class FakeBucketWriter: # these are used for both reading and writing def __init__(self, mode="good"): @@ -96,41 +103,38 @@ class FakeBucketWriter: assert not self.closed self.closed = True - def flip_bit(self, good): # flips the last bit - return good[:-1] + chr(ord(good[-1]) ^ 0x01) - def get_block(self, blocknum): assert isinstance(blocknum, (int, long)) if self.mode == "bad block": - return self.flip_bit(self.blocks[blocknum]) + return flip_bit(self.blocks[blocknum]) return self.blocks[blocknum] def get_plaintext_hashes(self): hashes = self.plaintext_hashes[:] if self.mode == "bad plaintext hashroot": - hashes[0] = self.flip_bit(hashes[0]) + hashes[0] = flip_bit(hashes[0]) if self.mode == "bad plaintext hash": - hashes[1] = self.flip_bit(hashes[1]) + hashes[1] = flip_bit(hashes[1]) return hashes def get_crypttext_hashes(self): hashes = self.crypttext_hashes[:] if self.mode == "bad crypttext hashroot": - hashes[0] = self.flip_bit(hashes[0]) + hashes[0] = flip_bit(hashes[0]) if self.mode == "bad crypttext hash": - hashes[1] = self.flip_bit(hashes[1]) + hashes[1] = flip_bit(hashes[1]) return hashes def get_block_hashes(self): if self.mode == "bad blockhash": hashes = self.block_hashes[:] - hashes[1] = self.flip_bit(hashes[1]) + hashes[1] = flip_bit(hashes[1]) return hashes return self.block_hashes def get_share_hashes(self): if self.mode == "bad sharehash": hashes = self.share_hashes[:] - hashes[1] = (hashes[1][0], self.flip_bit(hashes[1][1])) + hashes[1] = (hashes[1][0], flip_bit(hashes[1][1])) return hashes if self.mode == "missing sharehash": # one sneaky attack would be to pretend we don't know our own @@ -141,7 +145,7 @@ class FakeBucketWriter: def get_thingA(self): if self.mode == "bad thingA": - return self.flip_bit(self.thingA) + return flip_bit(self.thingA) return self.thingA @@ -266,12 +270,7 @@ class Roundtrip(unittest.TestCase): d = self.send(k_and_happy_and_n, AVAILABLE_SHARES, max_segment_size, bucket_modes, data) # that fires with (thingA_hash, e, shareholders) - if recover_mode == "recover": - d.addCallback(self.recover, AVAILABLE_SHARES) - elif recover_mode == "thingA": - d.addCallback(self.recover_with_thingA, AVAILABLE_SHARES) - else: - raise RuntimeError, "unknown recover_mode '%s'" % recover_mode + d.addCallback(self.recover, AVAILABLE_SHARES, recover_mode) # that fires with newdata def _downloaded((newdata, fd)): self.failUnless(newdata == data) @@ -301,8 +300,15 @@ class Roundtrip(unittest.TestCase): peer = FakeBucketWriter(mode) shareholders[shnum] = peer e.set_shareholders(shareholders) - e.set_thingA_data({'verifierid': "V" * 20, - 'fileid': "F" * 20, + fileid_hasher = sha.new(netstring("allmydata_fileid_v1")) + fileid_hasher.update(data) + cryptor = AES.new(key=nonkey, mode=AES.MODE_CTR, + counterstart="\x00"*16) + verifierid_hasher = sha.new(netstring("allmydata_verifierid_v1")) + verifierid_hasher.update(cryptor.encrypt(data)) + + e.set_thingA_data({'verifierid': verifierid_hasher.digest(), + 'fileid': fileid_hasher.digest(), }) d = e.start() def _sent(thingA_hash): @@ -310,60 +316,14 @@ class Roundtrip(unittest.TestCase): d.addCallback(_sent) return d - def recover(self, (thingA_hash, e, shareholders), AVAILABLE_SHARES): - URI = pack_uri(storage_index="S" * 20, - key=e.key, - thingA_hash=thingA_hash, - needed_shares=e.required_shares, - total_shares=e.num_shares, - size=e.file_size) - client = None - target = download.Data() - fd = download.FileDownloader(client, URI, target) + def recover(self, (thingA_hash, e, shareholders), AVAILABLE_SHARES, + recover_mode): + key = e.key + if "corrupt_key" in recover_mode: + key = flip_bit(key) - # we manually cycle the FileDownloader through a number of steps that - # would normally be sequenced by a Deferred chain in - # FileDownloader.start(), to give us more control over the process. - # In particular, by bypassing _get_all_shareholders, we skip - # permuted-peerlist selection. - for shnum, bucket in shareholders.items(): - if shnum < AVAILABLE_SHARES and bucket.closed: - fd.add_share_bucket(shnum, bucket) - fd._got_all_shareholders(None) - - # grab a copy of thingA from one of the shareholders - thingA = shareholders[0].thingA - thingA_data = bencode.bdecode(thingA) - NOTthingA = {'codec_name': e._codec.get_encoder_type(), - 'codec_params': e._codec.get_serialized_params(), - 'tail_codec_params': e._tail_codec.get_serialized_params(), - 'verifierid': "V" * 20, - 'fileid': "F" * 20, - #'share_root_hash': roothash, - 'segment_size': e.segment_size, - 'needed_shares': e.required_shares, - 'total_shares': e.num_shares, - } - fd._got_thingA(thingA_data) - # we skip _get_hashtrees here, and the lack of hashtree attributes - # will cause the download.Output object to skip the - # plaintext/crypttext merkle tree checks. We instruct the downloader - # to skip the full-file checks as well. - fd.check_verifierid = False - fd.check_fileid = False - - fd._create_validated_buckets(None) - d = fd._download_all_segments(None) - d.addCallback(fd._done) - def _done(newdata): - return (newdata, fd) - d.addCallback(_done) - return d - - def recover_with_thingA(self, (thingA_hash, e, shareholders), - AVAILABLE_SHARES): URI = pack_uri(storage_index="S" * 20, - key=e.key, + key=key, thingA_hash=thingA_hash, needed_shares=e.required_shares, total_shares=e.num_shares, @@ -382,21 +342,39 @@ class Roundtrip(unittest.TestCase): fd.add_share_bucket(shnum, bucket) fd._got_all_shareholders(None) - # ask shareholders for thingA as usual, validating the responses. - # Arrange for shareholders[0] to be the first, so we can selectively - # corrupt the data it returns. + # Make it possible to obtain thingA from the shareholders. Arrange + # for shareholders[0] to be the first, so we can selectively corrupt + # the data it returns. fd._thingA_sources = shareholders.values() fd._thingA_sources.remove(shareholders[0]) fd._thingA_sources.insert(0, shareholders[0]) - # the thingA block contains plaintext/crypttext hash trees, but does - # not have a fileid or verifierid, so we have to disable those checks - fd.check_verifierid = False - fd.check_fileid = False - d = fd._obtain_thingA(None) + d = defer.succeed(None) + + # have the FileDownloader retrieve a copy of thingA itself + d.addCallback(fd._obtain_thingA) + + if "corrupt_crypttext_hashes" in recover_mode: + # replace everybody's crypttext hash trees with a different one + # (computed over a different file), then modify our thingA to + # reflect the new crypttext hash tree root + def _corrupt_crypttext_hashes(thingA): + assert isinstance(thingA, dict) + assert 'crypttext_root_hash' in thingA + badhash = hashutil.tagged_hash("bogus", "data") + bad_crypttext_hashes = [badhash] * thingA['num_segments'] + badtree = hashtree.HashTree(bad_crypttext_hashes) + for bucket in shareholders.values(): + bucket.crypttext_hashes = list(badtree) + thingA['crypttext_root_hash'] = badtree[0] + return thingA + d.addCallback(_corrupt_crypttext_hashes) + d.addCallback(fd._got_thingA) + # also have the FileDownloader ask for hash trees d.addCallback(fd._get_hashtrees) + d.addCallback(fd._create_validated_buckets) d.addCallback(fd._download_all_segments) d.addCallback(fd._done) @@ -505,12 +483,11 @@ class Roundtrip(unittest.TestCase): expected[where] += 1 self.failUnlessEqual(fd._fetch_failures, expected) - def test_good_thingA(self): - # exercise recover_mode="thingA", just to make sure the test works - modemap = dict([(i, "good") for i in range(1)] + - [(i, "good") for i in range(1, 10)]) - d = self.send_and_recover((4,8,10), bucket_modes=modemap, - recover_mode="thingA") + def test_good(self): + # just to make sure the test harness works when we aren't + # intentionally causing failures + modemap = dict([(i, "good") for i in range(0, 10)]) + d = self.send_and_recover((4,8,10), bucket_modes=modemap) d.addCallback(self.assertFetchFailureIn, None) return d @@ -519,8 +496,7 @@ class Roundtrip(unittest.TestCase): # different server. modemap = dict([(i, "bad thingA") for i in range(1)] + [(i, "good") for i in range(1, 10)]) - d = self.send_and_recover((4,8,10), bucket_modes=modemap, - recover_mode="thingA") + d = self.send_and_recover((4,8,10), bucket_modes=modemap) d.addCallback(self.assertFetchFailureIn, "thingA") return d @@ -529,8 +505,7 @@ class Roundtrip(unittest.TestCase): # to a different server. modemap = dict([(i, "bad plaintext hashroot") for i in range(1)] + [(i, "good") for i in range(1, 10)]) - d = self.send_and_recover((4,8,10), bucket_modes=modemap, - recover_mode="thingA") + d = self.send_and_recover((4,8,10), bucket_modes=modemap) d.addCallback(self.assertFetchFailureIn, "plaintext_hashroot") return d @@ -539,8 +514,7 @@ class Roundtrip(unittest.TestCase): # over to a different server. modemap = dict([(i, "bad crypttext hashroot") for i in range(1)] + [(i, "good") for i in range(1, 10)]) - d = self.send_and_recover((4,8,10), bucket_modes=modemap, - recover_mode="thingA") + d = self.send_and_recover((4,8,10), bucket_modes=modemap) d.addCallback(self.assertFetchFailureIn, "crypttext_hashroot") return d @@ -549,8 +523,7 @@ class Roundtrip(unittest.TestCase): # over to a different server. modemap = dict([(i, "bad plaintext hash") for i in range(1)] + [(i, "good") for i in range(1, 10)]) - d = self.send_and_recover((4,8,10), bucket_modes=modemap, - recover_mode="thingA") + d = self.send_and_recover((4,8,10), bucket_modes=modemap) d.addCallback(self.assertFetchFailureIn, "plaintext_hashtree") return d @@ -559,11 +532,39 @@ class Roundtrip(unittest.TestCase): # over to a different server. modemap = dict([(i, "bad crypttext hash") for i in range(1)] + [(i, "good") for i in range(1, 10)]) - d = self.send_and_recover((4,8,10), bucket_modes=modemap, - recover_mode="thingA") + d = self.send_and_recover((4,8,10), bucket_modes=modemap) d.addCallback(self.assertFetchFailureIn, "crypttext_hashtree") return d + def test_bad_crypttext_hashes_failure(self): + # to test that the crypttext merkle tree is really being applied, we + # sneak into the download process and corrupt two things: we replace + # everybody's crypttext hashtree with a bad version (computed over + # bogus data), and we modify the supposedly-validated thingA block to + # match the new crypttext hashtree root. The download process should + # notice that the crypttext coming out of FEC doesn't match the tree, + # and fail. + + modemap = dict([(i, "good") for i in range(0, 10)]) + d = self.send_and_recover((4,8,10), bucket_modes=modemap, + recover_mode=("corrupt_crypttext_hashes")) + def _done(res): + self.failUnless(isinstance(res, Failure)) + self.failUnless(res.check(hashtree.BadHashError), res) + d.addBoth(_done) + return d + + + def test_bad_plaintext(self): + # faking a decryption failure is easier: just corrupt the key + modemap = dict([(i, "good") for i in range(0, 10)]) + d = self.send_and_recover((4,8,10), bucket_modes=modemap, + recover_mode=("corrupt_key")) + def _done(res): + self.failUnless(isinstance(res, Failure)) + self.failUnless(res.check(hashtree.BadHashError)) + d.addBoth(_done) + return d def test_bad_sharehashes_failure(self): # the first 7 servers have bad block hashes, so the sharehash tree