1 # -*- test-case-name: allmydata.test.test_hashtree -*-
3 from twisted.trial import unittest
5 from allmydata.util.hashutil import tagged_hash
6 from allmydata import hashtree
9 def make_tree(numleaves):
10 leaves = ["%d" % i for i in range(numleaves)]
11 leaf_hashes = [tagged_hash("tag", leaf) for leaf in leaves]
12 ht = hashtree.HashTree(leaf_hashes)
15 class Complete(unittest.TestCase):
16 def test_create(self):
17 # try out various sizes, since we pad to a power of two
22 self.failUnlessEqual(len(root), 32)
23 self.failUnlessEqual(ht.get_leaf(0), tagged_hash("tag", "0"))
24 self.failUnlessRaises(IndexError, ht.get_leaf, 8)
25 self.failUnlessEqual(ht.get_leaf_index(0), 7)
26 self.failUnlessRaises(IndexError, ht.parent, 0)
27 self.failUnlessRaises(IndexError, ht.needed_for, -1)
29 def test_needed_hashes(self):
31 self.failUnlessEqual(ht.needed_hashes(0), set([8, 4, 2]))
32 self.failUnlessEqual(ht.needed_hashes(0, True), set([7, 8, 4, 2]))
33 self.failUnlessEqual(ht.needed_hashes(1), set([7, 4, 2]))
34 self.failUnlessEqual(ht.needed_hashes(7), set([13, 5, 1]))
35 self.failUnlessEqual(ht.needed_hashes(7, False), set([13, 5, 1]))
36 self.failUnlessEqual(ht.needed_hashes(7, True), set([14, 13, 5, 1]))
41 (1,1), (3,2), (7,3), (8,3), (4,2), (9,3), (10,3),
42 (2,1), (5,2), (11,3), (12,3), (6,2), (13,3), (14,3),
44 self.failUnlessEqual(list(ht.depth_first()), expected)
47 self.failUnless("\n 0:" in d)
48 self.failUnless("\n 1:" in d)
49 self.failUnless("\n 3:" in d)
50 self.failUnless("\n 7:" in d)
51 self.failUnless("\n 8:" in d)
52 self.failUnless("\n 4:" in d)
54 class Incomplete(unittest.TestCase):
56 def test_create(self):
57 ht = hashtree.IncompleteHashTree(6)
58 ht = hashtree.IncompleteHashTree(9)
59 ht = hashtree.IncompleteHashTree(8)
60 self.failUnlessEqual(ht[0], None)
61 self.failUnlessEqual(ht.get_leaf(0), None)
62 self.failUnlessRaises(IndexError, ht.get_leaf, 8)
63 self.failUnlessEqual(ht.get_leaf_index(0), 7)
65 def test_needed_hashes(self):
66 ht = hashtree.IncompleteHashTree(8)
67 self.failUnlessEqual(ht.needed_hashes(0), set([8, 4, 2]))
68 self.failUnlessEqual(ht.needed_hashes(0, True), set([7, 8, 4, 2]))
69 self.failUnlessEqual(ht.needed_hashes(1), set([7, 4, 2]))
70 self.failUnlessEqual(ht.needed_hashes(7), set([13, 5, 1]))
71 self.failUnlessEqual(ht.needed_hashes(7, False), set([13, 5, 1]))
72 self.failUnlessEqual(ht.needed_hashes(7, True), set([14, 13, 5, 1]))
73 ht = hashtree.IncompleteHashTree(1)
74 self.failUnlessEqual(ht.needed_hashes(0), set([]))
75 ht = hashtree.IncompleteHashTree(6)
76 self.failUnlessEqual(ht.needed_hashes(0), set([8, 4, 2]))
77 self.failUnlessEqual(ht.needed_hashes(0, True), set([7, 8, 4, 2]))
78 self.failUnlessEqual(ht.needed_hashes(1), set([7, 4, 2]))
79 self.failUnlessEqual(ht.needed_hashes(5), set([11, 6, 1]))
80 self.failUnlessEqual(ht.needed_hashes(5, False), set([11, 6, 1]))
81 self.failUnlessEqual(ht.needed_hashes(5, True), set([12, 11, 6, 1]))
83 def test_depth_of(self):
84 hashtree.IncompleteHashTree(8)
85 self.failUnlessEqual(hashtree.depth_of(0), 0)
87 self.failUnlessEqual(hashtree.depth_of(i), 1, "i=%d"%i)
89 self.failUnlessEqual(hashtree.depth_of(i), 2, "i=%d"%i)
90 for i in [7,8,9,10,11,12,13,14]:
91 self.failUnlessEqual(hashtree.depth_of(i), 3, "i=%d"%i)
94 # IncompleteHashTree.set_hashes() used to take O(N**2). This test is
95 # meant to show that it now takes O(N) or maybe O(N*ln(N)). I wish
96 # there were a good way to assert this (like counting VM operations
97 # or something): the problem was inside list.sort(), so there's no
98 # good way to instrument set_hashes() to count what we care about. On
99 # my laptop, 10k leaves takes 1.1s in this fixed version, and 11.6s
100 # in the old broken version. An 80k-leaf test (corresponding to a
101 # 10GB file with a 128KiB segsize) 10s in the fixed version, and
102 # several hours in the broken version, but 10s on my laptop (plus the
103 # 20s of setup code) probably means 200s on our dapper buildslave,
104 # which is painfully long for a unit test.
105 self.do_test_speed(10000)
107 def do_test_speed(self, SIZE):
108 # on my laptop, SIZE=80k (corresponding to a 10GB file with a 128KiB
110 # 7s to build the (complete) HashTree
111 # 13s to set up the dictionary
112 # 10s to run set_hashes()
114 iht = hashtree.IncompleteHashTree(SIZE)
117 for i in range(SIZE):
118 needed.update(ht.needed_hashes(i, True))
119 all = dict([ (i, ht[i]) for i in needed])
120 iht.set_hashes(hashes=all)
122 def test_check(self):
123 # first create a complete hash tree
125 # then create a corresponding incomplete tree
126 iht = hashtree.IncompleteHashTree(6)
128 # suppose we wanted to validate leaf[0]
129 # leaf[0] is the same as node[7]
130 self.failUnlessEqual(iht.needed_hashes(0), set([8, 4, 2]))
131 self.failUnlessEqual(iht.needed_hashes(0, True), set([7, 8, 4, 2]))
132 self.failUnlessEqual(iht.needed_hashes(1), set([7, 4, 2]))
133 iht[0] = ht[0] # set the root
134 self.failUnlessEqual(iht.needed_hashes(0), set([8, 4, 2]))
135 self.failUnlessEqual(iht.needed_hashes(1), set([7, 4, 2]))
137 self.failUnlessEqual(iht.needed_hashes(0), set([8, 4, 2]))
138 self.failUnlessEqual(iht.needed_hashes(1), set([7, 4, 2]))
141 iht = hashtree.IncompleteHashTree(6)
143 current_hashes = list(iht)
144 # this should fail because there aren't enough hashes known
146 iht.set_hashes(leaves={0: tagged_hash("tag", "0")})
147 except hashtree.NotEnoughHashesError:
150 self.fail("didn't catch not enough hashes")
152 # and the set of hashes stored in the tree should still be the same
153 self.failUnlessEqual(list(iht), current_hashes)
154 # and we should still need the same
155 self.failUnlessEqual(iht.needed_hashes(0), set([8, 4, 2]))
157 chain = {0: ht[0], 2: ht[2], 4: ht[4], 8: ht[8]}
158 # this should fail because the leaf hash is just plain wrong
160 iht.set_hashes(chain, leaves={0: tagged_hash("bad tag", "0")})
161 except hashtree.BadHashError:
164 self.fail("didn't catch bad hash")
166 # this should fail because we give it conflicting hashes: one as an
167 # internal node, another as a leaf
169 iht.set_hashes(chain, leaves={1: tagged_hash("bad tag", "1")})
170 except hashtree.BadHashError:
173 self.fail("didn't catch bad hash")
175 bad_chain = chain.copy()
176 bad_chain[2] = ht[2] + "BOGUS"
178 # this should fail because the internal hash is wrong
180 iht.set_hashes(bad_chain, leaves={0: tagged_hash("tag", "0")})
181 except hashtree.BadHashError:
184 self.fail("didn't catch bad hash")
186 # this should succeed
188 iht.set_hashes(chain, leaves={0: tagged_hash("tag", "0")})
189 except hashtree.BadHashError, e:
190 self.fail("bad hash: %s" % e)
192 self.failUnlessEqual(ht.get_leaf(0), tagged_hash("tag", "0"))
193 self.failUnlessRaises(IndexError, ht.get_leaf, 8)
195 # this should succeed too
197 iht.set_hashes(leaves={1: tagged_hash("tag", "1")})
198 except hashtree.BadHashError:
199 self.fail("bad hash")
201 # this should fail because we give it hashes that conflict with some
202 # that we added successfully before
204 iht.set_hashes(leaves={1: tagged_hash("bad tag", "1")})
205 except hashtree.BadHashError:
208 self.fail("didn't catch bad hash")
210 # now that leaves 0 and 1 are known, some of the internal nodes are
212 self.failUnlessEqual(iht.needed_hashes(4), set([12, 6]))
213 chain = {6: ht[6], 12: ht[12]}
215 # this should succeed
217 iht.set_hashes(chain, leaves={4: tagged_hash("tag", "4")})
218 except hashtree.BadHashError, e:
219 self.fail("bad hash: %s" % e)