From: Zooko O'Whielacronx Date: Tue, 14 Oct 2008 23:09:20 +0000 (-0700) Subject: repairer: test all different kinds of corruption that can happen to share files on... X-Git-Url: https://git.rkrishnan.org/pf/content/en/service/simplejson/?a=commitdiff_plain;h=8a6d1e5da601c4452f57f13bf3eeb2fd0560be82;p=tahoe-lafs%2Ftahoe-lafs.git repairer: test all different kinds of corruption that can happen to share files on disk --- diff --git a/src/allmydata/immutable/layout.py b/src/allmydata/immutable/layout.py index 6e38e2d0..2da722be 100644 --- a/src/allmydata/immutable/layout.py +++ b/src/allmydata/immutable/layout.py @@ -9,9 +9,9 @@ from allmydata.util.assertutil import _assert, precondition """ -Share data is written into a single file. At the start of the file, there is -a series of four-byte big-endian offset values, which indicate where each -section starts. Each offset is measured from the beginning of the file. +Share data is written in a file. At the start of the file, there is a series of four-byte +big-endian offset values, which indicate where each section starts. Each offset is measured from +the beginning of the share data. 0x00: version number (=00 00 00 01) 0x04: segment size diff --git a/src/allmydata/interfaces.py b/src/allmydata/interfaces.py index 9d6ddd48..86efd1f6 100644 --- a/src/allmydata/interfaces.py +++ b/src/allmydata/interfaces.py @@ -34,6 +34,7 @@ class RIStubClient(RemoteInterface): RemoteInterface for the StubClient.""" class RIBucketWriter(RemoteInterface): + """ Objects of this kind live on the server side. """ def write(offset=Offset, data=ShareData): return None @@ -230,6 +231,9 @@ class RIStorageServer(RemoteInterface): return TupleOf(bool, DictOf(int, ReadData)) class IStorageBucketWriter(Interface): + """ + Objects of this kind live on the client side. + """ def put_block(segmentnum=int, data=ShareData): """@param data: For most segments, this data will be 'blocksize' bytes in length. The last segment might be shorter. diff --git a/src/allmydata/storage.py b/src/allmydata/storage.py index f86dc3bf..59d671da 100644 --- a/src/allmydata/storage.py +++ b/src/allmydata/storage.py @@ -353,8 +353,8 @@ class BucketReader(Referenceable): # 9 ?? n*92 extra leases -assert struct.calcsize("L"), 4 -assert struct.calcsize("Q"), 8 +assert struct.calcsize("L"), 4 # The struct module doc says that L's are 4 bytes in size. +assert struct.calcsize("Q"), 8 # The struct module doc says that Q's are 8 bytes in size (at least with big-endian ordering). class MutableShareFile: @@ -1205,6 +1205,3 @@ class StorageServer(service.MultiService, Referenceable): facility="tahoe.storage", level=log.NOISY, parent=lp) self.add_latency("readv", time.time() - start) return datavs - - -# the code before here runs on the storage server, not the client diff --git a/src/allmydata/test/test_immutable_checker.py b/src/allmydata/test/test_immutable_checker.py index 6093d59e..194e17bd 100644 --- a/src/allmydata/test/test_immutable_checker.py +++ b/src/allmydata/test/test_immutable_checker.py @@ -8,6 +8,183 @@ import random, struct TEST_DATA="\x02"*(upload.Uploader.URI_LIT_SIZE_THRESHOLD+1) +def corrupt_field(data, offset, size): + if random.random() < 0.5: + return testutil.flip_one_bit(data, offset, size) + else: + return data[:offset]+testutil.insecurerandstr(size)+data[offset+size:] + +def _corrupt_file_version_number(data): + """ Scramble the file data -- the share file version number have one bit flipped or else + will be changed to a random value.""" + return corrupt_field(data, 0x00, 4) + +def _corrupt_size_of_file_data(data): + """ Scramble the file data -- the field showing the size of the share data within the + file will have one bit flipped or else will be changed to a random value. """ + return corrupt_field(data, 0x04, 4) + +def _corrupt_sharedata_version_number(data): + """ Scramble the file data -- the share data version number will have one bit flipped or + else will be changed to a random value.""" + return corrupt_field(data, 0x0c, 4) + +def _corrupt_segment_size(data): + """ Scramble the file data -- the field showing the size of the segment will have one + bit flipped or else be changed to a random value. """ + sharevernum = struct.unpack(">l", data[0x0c:0x0c+4])[0] + assert sharevernum in (1, 2), "This test is designed to corrupt immutable shares of v1 or v2 in specific ways." + if sharevernum == 1: + return corrupt_field(data, 0x0c+0x04, 4) + else: + return corrupt_field(data, 0x0c+0x04, 8) + +def _corrupt_size_of_sharedata(data): + """ Scramble the file data -- the field showing the size of the data within the share + data will have one bit flipped or else will be changed to a random value. """ + sharevernum = struct.unpack(">l", data[0x0c:0x0c+4])[0] + assert sharevernum in (1, 2), "This test is designed to corrupt immutable shares of v1 or v2 in specific ways." + if sharevernum == 1: + return corrupt_field(data, 0x0c+0x08, 4) + else: + return corrupt_field(data, 0x0c+0x0c, 8) + +def _corrupt_offset_of_sharedata(data): + """ Scramble the file data -- the field showing the offset of the data within the share + data will have one bit flipped or else be changed to a random value. """ + sharevernum = struct.unpack(">l", data[0x0c:0x0c+4])[0] + assert sharevernum in (1, 2), "This test is designed to corrupt immutable shares of v1 or v2 in specific ways." + if sharevernum == 1: + return corrupt_field(data, 0x0c+0x0c, 4) + else: + return corrupt_field(data, 0x0c+0x14, 8) + +def _corrupt_offset_of_ciphertext_hash_tree(data): + """ Scramble the file data -- the field showing the offset of the ciphertext hash tree + within the share data will have one bit flipped or else be changed to a random value. + """ + sharevernum = struct.unpack(">l", data[0x0c:0x0c+4])[0] + assert sharevernum in (1, 2), "This test is designed to corrupt immutable shares of v1 or v2 in specific ways." + if sharevernum == 1: + return corrupt_field(data, 0x0c+0x14, 4) + else: + return corrupt_field(data, 0x0c+0x24, 8) + +def _corrupt_offset_of_block_hashes(data): + """ Scramble the file data -- the field showing the offset of the block hash tree within + the share data will have one bit flipped or else will be changed to a random value. """ + sharevernum = struct.unpack(">l", data[0x0c:0x0c+4])[0] + assert sharevernum in (1, 2), "This test is designed to corrupt immutable shares of v1 or v2 in specific ways." + if sharevernum == 1: + return corrupt_field(data, 0x0c+0x18, 4) + else: + return corrupt_field(data, 0x0c+0x2c, 8) + +def _corrupt_offset_of_share_hashes(data): + """ Scramble the file data -- the field showing the offset of the share hash tree within + the share data will have one bit flipped or else will be changed to a random value. """ + sharevernum = struct.unpack(">l", data[0x0c:0x0c+4])[0] + assert sharevernum in (1, 2), "This test is designed to corrupt immutable shares of v1 or v2 in specific ways." + if sharevernum == 1: + return corrupt_field(data, 0x0c+0x1c, 4) + else: + return corrupt_field(data, 0x0c+0x34, 8) + +def _corrupt_offset_of_uri_extension(data): + """ Scramble the file data -- the field showing the offset of the uri extension will + have one bit flipped or else will be changed to a random value. """ + sharevernum = struct.unpack(">l", data[0x0c:0x0c+4])[0] + assert sharevernum in (1, 2), "This test is designed to corrupt immutable shares of v1 or v2 in specific ways." + if sharevernum == 1: + return corrupt_field(data, 0x0c+0x20, 4) + else: + return corrupt_field(data, 0x0c+0x3c, 8) + +def _corrupt_share_data(data): + """ Scramble the file data -- the field containing the share data itself will have one + bit flipped or else will be changed to a random value. """ + sharevernum = struct.unpack(">l", data[0x0c:0x0c+4])[0] + assert sharevernum in (1, 2), "This test is designed to corrupt immutable shares of v1 or v2 in specific ways." + if sharevernum == 1: + sharedatasize = struct.unpack(">L", data[0x0c+0x08:0x0c+0x08+4])[0] + + return corrupt_field(data, 0x0c+0x24, sharedatasize) + else: + sharedatasize = struct.unpack(">Q", data[0x0c+0x08:0x0c+0x0c+8])[0] + + return corrupt_field(data, 0x0c+0x44, sharedatasize) + +def _corrupt_crypttext_hash_tree(data): + """ Scramble the file data -- the field containing the crypttext hash tree will have one + bit flipped or else will be changed to a random value. + """ + sharevernum = struct.unpack(">l", data[0x0c:0x0c+4])[0] + assert sharevernum in (1, 2), "This test is designed to corrupt immutable shares of v1 or v2 in specific ways." + if sharevernum == 1: + crypttexthashtreeoffset = struct.unpack(">L", data[0x0c+0x14:0x0c+0x14+4])[0] + blockhashesoffset = struct.unpack(">L", data[0x0c+0x18:0x0c+0x18+4])[0] + else: + crypttexthashtreeoffset = struct.unpack(">Q", data[0x0c+0x24:0x0c+0x24+8])[0] + blockhashesoffset = struct.unpack(">Q", data[0x0c+0x2c:0x0c+0x2c+8])[0] + + return corrupt_field(data, crypttexthashtreeoffset, blockhashesoffset-crypttexthashtreeoffset) + +def _corrupt_block_hashes(data): + """ Scramble the file data -- the field containing the block hash tree will have one bit + flipped or else will be changed to a random value. + """ + sharevernum = struct.unpack(">l", data[0x0c:0x0c+4])[0] + assert sharevernum in (1, 2), "This test is designed to corrupt immutable shares of v1 or v2 in specific ways." + if sharevernum == 1: + blockhashesoffset = struct.unpack(">L", data[0x0c+0x18:0x0c+0x18+4])[0] + sharehashesoffset = struct.unpack(">L", data[0x0c+0x1c:0x0c+0x1c+4])[0] + else: + blockhashesoffset = struct.unpack(">Q", data[0x0c+0x2c:0x0c+0x2c+8])[0] + sharehashesoffset = struct.unpack(">Q", data[0x0c+0x34:0x0c+0x34+8])[0] + + return corrupt_field(data, blockhashesoffset, sharehashesoffset-blockhashesoffset) + +def _corrupt_share_hashes(data): + """ Scramble the file data -- the field containing the share hash chain will have one + bit flipped or else will be changed to a random value. + """ + sharevernum = struct.unpack(">l", data[0x0c:0x0c+4])[0] + assert sharevernum in (1, 2), "This test is designed to corrupt immutable shares of v1 or v2 in specific ways." + if sharevernum == 1: + sharehashesoffset = struct.unpack(">L", data[0x0c+0x1c:0x0c+0x1c+4])[0] + uriextoffset = struct.unpack(">L", data[0x0c+0x20:0x0c+0x20+4])[0] + else: + sharehashesoffset = struct.unpack(">Q", data[0x0c+0x34:0x0c+0x34+8])[0] + uriextoffset = struct.unpack(">Q", data[0x0c+0x3c:0x0c+0x3c+8])[0] + + return corrupt_field(data, sharehashesoffset, uriextoffset-sharehashesoffset) + +def _corrupt_length_of_uri_extension(data): + """ Scramble the file data -- the field showing the length of the uri extension will + have one bit flipped or else will be changed to a random value. """ + sharevernum = struct.unpack(">l", data[0x0c:0x0c+4])[0] + assert sharevernum in (1, 2), "This test is designed to corrupt immutable shares of v1 or v2 in specific ways." + if sharevernum == 1: + uriextoffset = struct.unpack(">L", data[0x0c+0x20:0x0c+0x20+4])[0] + return corrupt_field(data, uriextoffset, 4) + else: + uriextoffset = struct.unpack(">Q", data[0x0c+0x3c:0x0c+0x3c+8])[0] + return corrupt_field(data, uriextoffset, 8) + +def _corrupt_uri_extension(data): + """ Scramble the file data -- the field containing the uri extension will have one bit + flipped or else will be changed to a random value. """ + sharevernum = struct.unpack(">l", data[0x0c:0x0c+4])[0] + assert sharevernum in (1, 2), "This test is designed to corrupt immutable shares of v1 or v2 in specific ways." + if sharevernum == 1: + uriextoffset = struct.unpack(">L", data[0x0c+0x20:0x0c+0x20+4])[0] + uriextlen = struct.unpack(">L", data[0x0c+uriextoffset:0x0c+uriextoffset+4])[0] + else: + uriextoffset = struct.unpack(">Q", data[0x0c+0x3c:0x0c+0x3c+8])[0] + uriextlen = struct.unpack(">Q", data[0x0c+uriextoffset:0x0c+uriextoffset+8])[0] + + return corrupt_field(data, uriextoffset, uriextlen) + class Test(ShareManglingMixin, unittest.TestCase): def setUp(self): # Set self.basedir to a temp dir which has the name of the current test method in its @@ -46,7 +223,7 @@ class Test(ShareManglingMixin, unittest.TestCase): shares = self.find_shares() ks = shares.keys() if sharenum is not None: - k = [ key for key in shares.keys() if key[1] == sharenum][0] + k = [ key for key in shares.keys() if key[1] == sharenum ][0] else: k = random.choice(ks) del shares[k] @@ -54,26 +231,6 @@ class Test(ShareManglingMixin, unittest.TestCase): return unused - def _corrupt_a_share(self, unused=None): - """ Exactly one bit of exactly one share on disk will be flipped (randomly selected from - among the bits of the 'share data' -- the verifiable bits).""" - - shares = self.find_shares() - ks = shares.keys() - k = random.choice(ks) - data = shares[k] - - (version, size, num_leases) = struct.unpack(">LLL", data[:0xc]) - sharedata = data[0xc:0xc+size] - - corruptedsharedata = testutil.flip_one_bit(sharedata) - corrupteddata = data[:0xc]+corruptedsharedata+data[0xc+size:] - shares[k] = corrupteddata - - self.replace_shares(shares, storage_index=self.uri.storage_index) - - return unused - def test_test_code(self): # The following process of stashing the shares, running # replace_shares, and asserting that the new set of shares equals the @@ -84,7 +241,6 @@ class Test(ShareManglingMixin, unittest.TestCase): def _stash_it(res): stash[0] = res return res - d.addCallback(_stash_it) d.addCallback(self.replace_shares, storage_index=self.uri.storage_index) @@ -149,11 +305,20 @@ class Test(ShareManglingMixin, unittest.TestCase): sum_of_allocate_counts += counters.get('storage_server.allocate', 0) return sum_of_allocate_counts + def _corrupt_a_random_share(self, unused, corruptor_func): + """ Exactly one share on disk will be corrupted by corruptor_func. """ + shares = self.find_shares() + ks = shares.keys() + k = random.choice(ks) + + shares[k] = corruptor_func(shares[k]) + + self.replace_shares(shares, storage_index=self.uri.storage_index) + def test_check_without_verify(self): - """ Check says the file is healthy when none of the shares have been - touched. It says that the file is unhealthy when all of them have - been removed. It says that the file is healthy if one bit of one share - has been flipped.""" + """ Check says the file is healthy when none of the shares have been touched. It says + that the file is unhealthy when all of them have been removed. It doesn't use any reads. + """ d = defer.succeed(self.filenode) def _check1(filenode): before_check_reads = self._count_reads() @@ -168,22 +333,8 @@ class Test(ShareManglingMixin, unittest.TestCase): return d2 d.addCallback(_check1) - d.addCallback(self._corrupt_a_share) - def _check2(ignored): - before_check_reads = self._count_reads() - d2 = self.filenode.check(verify=False) - - def _after_check(checkresults): - after_check_reads = self._count_reads() - self.failIf(after_check_reads - before_check_reads > 0, after_check_reads - before_check_reads) - - d2.addCallback(_after_check) - return d2 - d.addCallback(_check2) - return d - d.addCallback(lambda ignore: self.replace_shares({}, storage_index=self.uri.storage_index)) - def _check3(ignored): + def _check2(ignored): before_check_reads = self._count_reads() d2 = self.filenode.check(verify=False) @@ -194,13 +345,14 @@ class Test(ShareManglingMixin, unittest.TestCase): d2.addCallback(_after_check) return d2 - d.addCallback(_check3) + d.addCallback(_check2) return d def test_check_with_verify(self): """ Check says the file is healthy when none of the shares have been touched. It says - that the file is unhealthy if one bit of one share has been flipped.""" + that the file is unhealthy if any field of any share has been corrupted. It doesn't use + more than twice as many reads as it needs. """ # N == 10. 2 is the "efficiency leeway" -- we'll allow you to pass this test even if # you trigger twice as many disk reads and blocks sends as would be optimal. DELTA_READS = 10 * 2 @@ -219,7 +371,13 @@ class Test(ShareManglingMixin, unittest.TestCase): return d2 d.addCallback(_check1) - d.addCallback(self._corrupt_a_share) + d.addCallback(self.find_shares) + stash = [None] + def _stash_it(res): + stash[0] = res + return res + d.addCallback(_stash_it) + def _check2(ignored): before_check_reads = self._count_reads() d2 = self.filenode.check(verify=True) @@ -232,7 +390,32 @@ class Test(ShareManglingMixin, unittest.TestCase): d2.addCallback(_after_check) return d2 - d.addCallback(_check2) + + def _put_it_all_back(ignored): + self.replace_shares(stash[0], storage_index=self.uri.storage_index) + return ignored + + for corruptor_func in ( + _corrupt_file_version_number, + _corrupt_size_of_file_data, + _corrupt_sharedata_version_number, + _corrupt_segment_size, + _corrupt_size_of_sharedata, + _corrupt_offset_of_sharedata, + _corrupt_offset_of_ciphertext_hash_tree, + _corrupt_offset_of_block_hashes, + _corrupt_offset_of_share_hashes, + _corrupt_offset_of_uri_extension, + _corrupt_share_data, + _corrupt_crypttext_hash_tree, + _corrupt_block_hashes, + _corrupt_share_hashes, + _corrupt_length_of_uri_extension, + _corrupt_uri_extension, + ): + d.addCallback(self._corrupt_a_random_share, corruptor_func) + d.addCallback(_check2) + d.addCallback(_put_it_all_back) return d test_check_with_verify.todo = "We haven't implemented a verifier this thorough yet." @@ -309,9 +492,6 @@ class Test(ShareManglingMixin, unittest.TestCase): return d2 d.addCallback(_repair_from_deletion_of_7) - # Now we corrupt a share... - d.addCallback(self._corrupt_a_share) - def _repair_from_corruption(filenode): before_repair_reads = self._count_reads() before_repair_allocates = self._count_allocates() @@ -333,7 +513,29 @@ class Test(ShareManglingMixin, unittest.TestCase): d2.addCallback(_after_repair) return d2 - d.addCallback(_repair_from_corruption) + + for corruptor_func in ( + _corrupt_file_version_number, + _corrupt_size_of_file_data, + _corrupt_sharedata_version_number, + _corrupt_segment_size, + _corrupt_size_of_sharedata, + _corrupt_offset_of_sharedata, + _corrupt_offset_of_ciphertext_hash_tree, + _corrupt_offset_of_block_hashes, + _corrupt_offset_of_share_hashes, + _corrupt_offset_of_uri_extension, + _corrupt_share_data, + _corrupt_crypttext_hash_tree, + _corrupt_block_hashes, + _corrupt_share_hashes, + _corrupt_length_of_uri_extension, + _corrupt_uri_extension, + ): + # Now we corrupt a share... + d.addCallback(self._corrupt_a_random_share, corruptor_func) + # And repair... + d.addCallback(_repair_from_corruption) return d test_repair.todo = "We haven't implemented a repairer yet." diff --git a/src/allmydata/util/testutil.py b/src/allmydata/util/testutil.py index 7ca6f4c3..27e4f069 100644 --- a/src/allmydata/util/testutil.py +++ b/src/allmydata/util/testutil.py @@ -4,6 +4,8 @@ from random import randrange from twisted.internet import reactor, defer, task from twisted.python import failure +def insecurerandstr(n): + return ''.join(map(chr, map(randrange, [0]*n, [256]*n))) def flip_bit(good, which): # flip the low-order bit of good[which] @@ -13,9 +15,12 @@ def flip_bit(good, which): pieces = good[:which], good[which:which+1], good[which+1:] return pieces[0] + chr(ord(pieces[1]) ^ 0x01) + pieces[2] -def flip_one_bit(s): - # flip one random bit of the string s - i = randrange(0, len(s)) +def flip_one_bit(s, offset=0, size=None): + """ flip one random bit of the string s, in a byte greater than or equal to offset and less + than offset+size. """ + if size is None: + size=len(s) + i = randrange(offset, offset+size) result = s[:i] + chr(ord(s[i])^(0x01<