]> git.rkrishnan.org Git - tahoe-lafs/tahoe-lafs.git/blob - src/allmydata/test/test_hashtree.py
Merge pull request #236 from daira/2725.timezone-test.0
[tahoe-lafs/tahoe-lafs.git] / src / allmydata / test / test_hashtree.py
1 # -*- test-case-name: allmydata.test.test_hashtree -*-
2
3 from twisted.trial import unittest
4
5 from allmydata.util.hashutil import tagged_hash
6 from allmydata import hashtree
7
8
9 def make_tree(numleaves):
10     leaves = ["%d" % i for i in range(numleaves)]
11     leaf_hashes = [tagged_hash("tag", leaf) for leaf in leaves]
12     ht = hashtree.HashTree(leaf_hashes)
13     return ht
14
15 class Complete(unittest.TestCase):
16     def test_create(self):
17         # try out various sizes, since we pad to a power of two
18         ht = make_tree(6)
19         ht = make_tree(9)
20         ht = make_tree(8)
21         root = ht[0]
22         self.failUnlessEqual(len(root), 32)
23         self.failUnlessEqual(ht.get_leaf(0), tagged_hash("tag", "0"))
24         self.failUnlessRaises(IndexError, ht.get_leaf, 8)
25         self.failUnlessEqual(ht.get_leaf_index(0), 7)
26         self.failUnlessRaises(IndexError, ht.parent, 0)
27         self.failUnlessRaises(IndexError, ht.needed_for, -1)
28
29     def test_needed_hashes(self):
30         ht = make_tree(8)
31         self.failUnlessEqual(ht.needed_hashes(0), set([8, 4, 2]))
32         self.failUnlessEqual(ht.needed_hashes(0, True), set([7, 8, 4, 2]))
33         self.failUnlessEqual(ht.needed_hashes(1), set([7, 4, 2]))
34         self.failUnlessEqual(ht.needed_hashes(7), set([13, 5, 1]))
35         self.failUnlessEqual(ht.needed_hashes(7, False), set([13, 5, 1]))
36         self.failUnlessEqual(ht.needed_hashes(7, True), set([14, 13, 5, 1]))
37
38     def test_dump(self):
39         ht = make_tree(6)
40         expected = [(0,0),
41                     (1,1), (3,2), (7,3), (8,3), (4,2), (9,3), (10,3),
42                     (2,1), (5,2), (11,3), (12,3), (6,2), (13,3), (14,3),
43                     ]
44         self.failUnlessEqual(list(ht.depth_first()), expected)
45         d = "\n" + ht.dump()
46         #print d
47         self.failUnless("\n  0:" in d)
48         self.failUnless("\n    1:" in d)
49         self.failUnless("\n      3:" in d)
50         self.failUnless("\n        7:" in d)
51         self.failUnless("\n        8:" in d)
52         self.failUnless("\n      4:" in d)
53
54 class Incomplete(unittest.TestCase):
55
56     def test_create(self):
57         ht = hashtree.IncompleteHashTree(6)
58         ht = hashtree.IncompleteHashTree(9)
59         ht = hashtree.IncompleteHashTree(8)
60         self.failUnlessEqual(ht[0], None)
61         self.failUnlessEqual(ht.get_leaf(0), None)
62         self.failUnlessRaises(IndexError, ht.get_leaf, 8)
63         self.failUnlessEqual(ht.get_leaf_index(0), 7)
64
65     def test_needed_hashes(self):
66         ht = hashtree.IncompleteHashTree(8)
67         self.failUnlessEqual(ht.needed_hashes(0), set([8, 4, 2]))
68         self.failUnlessEqual(ht.needed_hashes(0, True), set([7, 8, 4, 2]))
69         self.failUnlessEqual(ht.needed_hashes(1), set([7, 4, 2]))
70         self.failUnlessEqual(ht.needed_hashes(7), set([13, 5, 1]))
71         self.failUnlessEqual(ht.needed_hashes(7, False), set([13, 5, 1]))
72         self.failUnlessEqual(ht.needed_hashes(7, True), set([14, 13, 5, 1]))
73         ht = hashtree.IncompleteHashTree(1)
74         self.failUnlessEqual(ht.needed_hashes(0), set([]))
75         ht = hashtree.IncompleteHashTree(6)
76         self.failUnlessEqual(ht.needed_hashes(0), set([8, 4, 2]))
77         self.failUnlessEqual(ht.needed_hashes(0, True), set([7, 8, 4, 2]))
78         self.failUnlessEqual(ht.needed_hashes(1), set([7, 4, 2]))
79         self.failUnlessEqual(ht.needed_hashes(5), set([11, 6, 1]))
80         self.failUnlessEqual(ht.needed_hashes(5, False), set([11, 6, 1]))
81         self.failUnlessEqual(ht.needed_hashes(5, True), set([12, 11, 6, 1]))
82
83     def test_depth_of(self):
84         hashtree.IncompleteHashTree(8)
85         self.failUnlessEqual(hashtree.depth_of(0), 0)
86         for i in [1,2]:
87             self.failUnlessEqual(hashtree.depth_of(i), 1, "i=%d"%i)
88         for i in [3,4,5,6]:
89             self.failUnlessEqual(hashtree.depth_of(i), 2, "i=%d"%i)
90         for i in [7,8,9,10,11,12,13,14]:
91             self.failUnlessEqual(hashtree.depth_of(i), 3, "i=%d"%i)
92
93     def test_large(self):
94         # IncompleteHashTree.set_hashes() used to take O(N**2). This test is
95         # meant to show that it now takes O(N) or maybe O(N*ln(N)). I wish
96         # there were a good way to assert this (like counting VM operations
97         # or something): the problem was inside list.sort(), so there's no
98         # good way to instrument set_hashes() to count what we care about. On
99         # my laptop, 10k leaves takes 1.1s in this fixed version, and 11.6s
100         # in the old broken version. An 80k-leaf test (corresponding to a
101         # 10GB file with a 128KiB segsize) 10s in the fixed version, and
102         # several hours in the broken version, but 10s on my laptop (plus the
103         # 20s of setup code) probably means 200s on our dapper buildslave,
104         # which is painfully long for a unit test.
105         self.do_test_speed(10000)
106
107     def do_test_speed(self, SIZE):
108         # on my laptop, SIZE=80k (corresponding to a 10GB file with a 128KiB
109         # segsize) takes:
110         #  7s to build the (complete) HashTree
111         #  13s to set up the dictionary
112         #  10s to run set_hashes()
113         ht = make_tree(SIZE)
114         iht = hashtree.IncompleteHashTree(SIZE)
115
116         needed = set()
117         for i in range(SIZE):
118             needed.update(ht.needed_hashes(i, True))
119         all = dict([ (i, ht[i]) for i in needed])
120         iht.set_hashes(hashes=all)
121
122     def test_check(self):
123         # first create a complete hash tree
124         ht = make_tree(6)
125         # then create a corresponding incomplete tree
126         iht = hashtree.IncompleteHashTree(6)
127
128         # suppose we wanted to validate leaf[0]
129         #  leaf[0] is the same as node[7]
130         self.failUnlessEqual(iht.needed_hashes(0), set([8, 4, 2]))
131         self.failUnlessEqual(iht.needed_hashes(0, True), set([7, 8, 4, 2]))
132         self.failUnlessEqual(iht.needed_hashes(1), set([7, 4, 2]))
133         iht[0] = ht[0] # set the root
134         self.failUnlessEqual(iht.needed_hashes(0), set([8, 4, 2]))
135         self.failUnlessEqual(iht.needed_hashes(1), set([7, 4, 2]))
136         iht[5] = ht[5]
137         self.failUnlessEqual(iht.needed_hashes(0), set([8, 4, 2]))
138         self.failUnlessEqual(iht.needed_hashes(1), set([7, 4, 2]))
139
140         # reset
141         iht = hashtree.IncompleteHashTree(6)
142
143         current_hashes = list(iht)
144         # this should fail because there aren't enough hashes known
145         try:
146             iht.set_hashes(leaves={0: tagged_hash("tag", "0")})
147         except hashtree.NotEnoughHashesError:
148             pass
149         else:
150             self.fail("didn't catch not enough hashes")
151
152         # and the set of hashes stored in the tree should still be the same
153         self.failUnlessEqual(list(iht), current_hashes)
154         # and we should still need the same
155         self.failUnlessEqual(iht.needed_hashes(0), set([8, 4, 2]))
156
157         chain = {0: ht[0], 2: ht[2], 4: ht[4], 8: ht[8]}
158         # this should fail because the leaf hash is just plain wrong
159         try:
160             iht.set_hashes(chain, leaves={0: tagged_hash("bad tag", "0")})
161         except hashtree.BadHashError:
162             pass
163         else:
164             self.fail("didn't catch bad hash")
165
166         # this should fail because we give it conflicting hashes: one as an
167         # internal node, another as a leaf
168         try:
169             iht.set_hashes(chain, leaves={1: tagged_hash("bad tag", "1")})
170         except hashtree.BadHashError:
171             pass
172         else:
173             self.fail("didn't catch bad hash")
174
175         bad_chain = chain.copy()
176         bad_chain[2] = ht[2] + "BOGUS"
177
178         # this should fail because the internal hash is wrong
179         try:
180             iht.set_hashes(bad_chain, leaves={0: tagged_hash("tag", "0")})
181         except hashtree.BadHashError:
182             pass
183         else:
184             self.fail("didn't catch bad hash")
185
186         # this should succeed
187         try:
188             iht.set_hashes(chain, leaves={0: tagged_hash("tag", "0")})
189         except hashtree.BadHashError, e:
190             self.fail("bad hash: %s" % e)
191
192         self.failUnlessEqual(ht.get_leaf(0), tagged_hash("tag", "0"))
193         self.failUnlessRaises(IndexError, ht.get_leaf, 8)
194
195         # this should succeed too
196         try:
197             iht.set_hashes(leaves={1: tagged_hash("tag", "1")})
198         except hashtree.BadHashError:
199             self.fail("bad hash")
200
201         # this should fail because we give it hashes that conflict with some
202         # that we added successfully before
203         try:
204             iht.set_hashes(leaves={1: tagged_hash("bad tag", "1")})
205         except hashtree.BadHashError:
206             pass
207         else:
208             self.fail("didn't catch bad hash")
209
210         # now that leaves 0 and 1 are known, some of the internal nodes are
211         # known
212         self.failUnlessEqual(iht.needed_hashes(4), set([12, 6]))
213         chain = {6: ht[6], 12: ht[12]}
214
215         # this should succeed
216         try:
217             iht.set_hashes(chain, leaves={4: tagged_hash("tag", "4")})
218         except hashtree.BadHashError, e:
219             self.fail("bad hash: %s" % e)