]> git.rkrishnan.org Git - tahoe-lafs/tahoe-lafs.git/blob - src/allmydata/util/hashutil.py
use added secret to protect convergent encryption
[tahoe-lafs/tahoe-lafs.git] / src / allmydata / util / hashutil.py
1 from pycryptopp.hash.sha256 import SHA256
2 import os
3
4 # Be very very cautious when modifying this file. Almost any change will
5 # cause a compatibility break, invalidating all outstanding URIs and making
6 # any previously uploaded files become inaccessible. BE CONSERVATIVE AND TEST
7 # AGAINST OLD DATA!
8
9 # Various crypto values are this size: hash outputs (from SHA-256d),
10 # randomly-generated secrets such as the lease secret, and symmetric encryption
11 # keys.  In the near future we will add DSA private keys, and salts of various
12 # kinds.
13 CRYPTO_VAL_SIZE=32
14
15 class IntegrityCheckError(Exception):
16     pass
17
18 def netstring(s):
19     assert isinstance(s, str), s # no unicode here
20     return "%d:%s," % (len(s), s,)
21
22 class _SHA256d_Hasher:
23     # use SHA-256d, as defined by Ferguson and Schneier: hash the output
24     # again to prevent length-extension attacks
25     def __init__(self, truncate_to=None):
26         self.h = SHA256()
27         self.truncate_to = truncate_to
28         self._digest = None
29     def update(self, data):
30         assert isinstance(data, str) # no unicode
31         self.h.update(data)
32     def digest(self):
33         if self._digest is None:
34             h1 = self.h.digest()
35             del self.h
36             h2 = SHA256(h1).digest()
37             if self.truncate_to:
38                 h2 = h2[:self.truncate_to]
39             self._digest = h2
40         return self._digest
41
42
43
44 def tagged_hasher(tag, truncate_to=None):
45     hasher = _SHA256d_Hasher(truncate_to)
46     hasher.update(netstring(tag))
47     return hasher
48
49 def tagged_hash(tag, val, truncate_to=None):
50     hasher = tagged_hasher(tag, truncate_to)
51     hasher.update(val)
52     return hasher.digest()
53
54 def tagged_pair_hash(tag, val1, val2, truncate_to=None):
55     s = _SHA256d_Hasher(truncate_to)
56     s.update(netstring(tag))
57     s.update(netstring(val1))
58     s.update(netstring(val2))
59     return s.digest()
60
61 ## specific hash tags that we use
62
63 # immutable
64 STORAGE_INDEX_TAG = "allmydata_immutable_key_to_storage_index_v1"
65 BLOCK_TAG = "allmydata_encoded_subshare_v1"
66 UEB_TAG = "allmydata_uri_extension_v1"
67 PLAINTEXT_TAG = "allmydata_plaintext_v1"
68 CIPHERTEXT_TAG = "allmydata_crypttext_v1"
69 CIPHERTEXT_SEGMENT_TAG = "allmydata_crypttext_segment_v1"
70 PLAINTEXT_SEGMENT_TAG = "allmydata_plaintext_segment_v1"
71 CONVERGENT_ENCRYPTION_TAG = "allmydata_immutable_content_to_key_with_added_secret_v1+"
72
73 CLIENT_RENEWAL_TAG = "allmydata_client_renewal_secret_v1"
74 CLIENT_CANCEL_TAG = "allmydata_client_cancel_secret_v1"
75 FILE_RENEWAL_TAG = "allmydata_file_renewal_secret_v1"
76 FILE_CANCEL_TAG = "allmydata_file_cancel_secret_v1"
77 BUCKET_RENEWAL_TAG = "allmydata_bucket_renewal_secret_v1"
78 BUCKET_CANCEL_TAG = "allmydata_bucket_cancel_secret_v1"
79
80 # mutable
81 MUTABLE_WRITEKEY_TAG = "allmydata_mutable_privkey_to_writekey_v1"
82 MUTABLE_WRITE_ENABLER_MASTER_TAG = "allmydata_mutable_writekey_to_write_enabler_master_v1"
83 MUTABLE_WRITE_ENABLER_TAG = "allmydata_mutable_write_enabler_master_and_nodeid_to_write_enabler_v1"
84 MUTABLE_PUBKEY_TAG = "allmydata_mutable_pubkey_to_fingerprint_v1"
85 MUTABLE_READKEY_TAG = "allmydata_mutable_writekey_to_readkey_v1"
86 MUTABLE_DATAKEY_TAG = "allmydata_mutable_readkey_to_datakey_v1"
87 MUTABLE_STORAGEINDEX_TAG = "allmydata_mutable_readkey_to_storage_index_v1"
88
89 # dirnodes
90 DIRNODE_CHILD_WRITECAP_TAG = "allmydata_mutable_writekey_and_salt_to_dirnode_child_capkey_v1"
91
92 def storage_index_hash(key):
93     # storage index is truncated to 128 bits (16 bytes). We're only hashing a
94     # 16-byte value to get it, so there's no point in using a larger value.  We
95     # use this same tagged hash to go from encryption key to storage index for
96     # random-keyed immutable files and convergent-encryption immutabie
97     # files. Mutable files use ssk_storage_index_hash().
98     return tagged_hash(STORAGE_INDEX_TAG, key, 16)
99
100 def block_hash(data):
101     return tagged_hash(BLOCK_TAG, data)
102 def block_hasher():
103     return tagged_hasher(BLOCK_TAG)
104
105 def uri_extension_hash(data):
106     return tagged_hash(UEB_TAG, data)
107 def uri_extension_hasher():
108     return tagged_hasher(UEB_TAG)
109
110 def plaintext_hash(data):
111     return tagged_hash(PLAINTEXT_TAG, data)
112 def plaintext_hasher():
113     return tagged_hasher(PLAINTEXT_TAG)
114
115 def crypttext_hash(data):
116     return tagged_hash(CIPHERTEXT_TAG, data)
117 def crypttext_hasher():
118     return tagged_hasher(CIPHERTEXT_TAG)
119
120 def crypttext_segment_hash(data):
121     return tagged_hash(CIPHERTEXT_SEGMENT_TAG, data)
122 def crypttext_segment_hasher():
123     return tagged_hasher(CIPHERTEXT_SEGMENT_TAG)
124
125 def plaintext_segment_hash(data):
126     return tagged_hash(PLAINTEXT_SEGMENT_TAG, data)
127 def plaintext_segment_hasher():
128     return tagged_hasher(PLAINTEXT_SEGMENT_TAG)
129
130 KEYLEN = 16
131
132 def convergence_hash(k, n, segsize, data, convergence):
133     h = convergence_hasher(k, n, segsize, convergence)
134     h.update(data)
135     return h.digest()
136 def convergence_hasher(k, n, segsize, convergence):
137     assert isinstance(convergence, str)
138     param_tag = netstring("%d,%d,%d" % (k, n, segsize))
139     tag = CONVERGENT_ENCRYPTION_TAG + netstring(convergence) + param_tag
140     return tagged_hasher(tag, KEYLEN)
141
142 def random_key():
143     return os.urandom(KEYLEN)
144
145 def my_renewal_secret_hash(my_secret):
146     return tagged_hash(my_secret, CLIENT_RENEWAL_TAG)
147 def my_cancel_secret_hash(my_secret):
148     return tagged_hash(my_secret, CLIENT_CANCEL_TAG)
149
150 def file_renewal_secret_hash(client_renewal_secret, storage_index):
151     return tagged_pair_hash(FILE_RENEWAL_TAG,
152                             client_renewal_secret, storage_index)
153
154 def file_cancel_secret_hash(client_cancel_secret, storage_index):
155     return tagged_pair_hash(FILE_CANCEL_TAG,
156                             client_cancel_secret, storage_index)
157
158 def bucket_renewal_secret_hash(file_renewal_secret, peerid):
159     assert len(peerid) == 20, "%s: %r" % (len(peerid), peerid) # binary!
160     return tagged_pair_hash(BUCKET_RENEWAL_TAG, file_renewal_secret, peerid)
161
162 def bucket_cancel_secret_hash(file_cancel_secret, peerid):
163     assert len(peerid) == 20, "%s: %r" % (len(peerid), peerid) # binary!
164     return tagged_pair_hash(BUCKET_CANCEL_TAG, file_cancel_secret, peerid)
165
166
167 def _xor(a, b):
168     return "".join([chr(ord(c) ^ ord(b)) for c in a])
169
170 def hmac(tag, data):
171     ikey = _xor(tag, "\x36")
172     okey = _xor(tag, "\x5c")
173     h1 = SHA256(ikey + data).digest()
174     h2 = SHA256(okey + h1).digest()
175     return h2
176
177 def mutable_rwcap_key_hash(iv, writekey):
178     return tagged_pair_hash(DIRNODE_CHILD_WRITECAP_TAG, iv, writekey, KEYLEN)
179
180 def ssk_writekey_hash(privkey):
181     return tagged_hash(MUTABLE_WRITEKEY_TAG, privkey, KEYLEN)
182 def ssk_write_enabler_master_hash(writekey):
183     return tagged_hash(MUTABLE_WRITE_ENABLER_MASTER_TAG, writekey)
184 def ssk_write_enabler_hash(writekey, peerid):
185     assert len(peerid) == 20, "%s: %r" % (len(peerid), peerid) # binary!
186     wem = ssk_write_enabler_master_hash(writekey)
187     return tagged_pair_hash(MUTABLE_WRITE_ENABLER_TAG, wem, peerid)
188
189 def ssk_pubkey_fingerprint_hash(pubkey):
190     return tagged_hash(MUTABLE_PUBKEY_TAG, pubkey)
191
192 def ssk_readkey_hash(writekey):
193     return tagged_hash(MUTABLE_READKEY_TAG, writekey, KEYLEN)
194 def ssk_readkey_data_hash(IV, readkey):
195     return tagged_pair_hash(MUTABLE_DATAKEY_TAG, IV, readkey, KEYLEN)
196 def ssk_storage_index_hash(readkey):
197     return tagged_hash(MUTABLE_STORAGEINDEX_TAG, readkey, KEYLEN)