]> git.rkrishnan.org Git - tahoe-lafs/tahoe-lafs.git/commitdiff
move almost all hashing to SHA256, consolidate into hashutil.py
authorBrian Warner <warner@allmydata.com>
Fri, 8 Jun 2007 04:47:21 +0000 (21:47 -0700)
committerBrian Warner <warner@allmydata.com>
Fri, 8 Jun 2007 04:47:21 +0000 (21:47 -0700)
The only SHA-1 hash that remains is used in the permutation of nodeids,
where we need to decide if we care about performance or long-term security.
I suspect that we could use a much weaker hash (and faster) hash for
this purpose. In the long run, we'll be doing thousands of such hashes
for each file uploaded or downloaded (one per known peer).

src/allmydata/download.py
src/allmydata/encode.py
src/allmydata/hashtree.py
src/allmydata/interfaces.py
src/allmydata/test/test_encode.py
src/allmydata/test/test_system.py
src/allmydata/test/test_upload.py
src/allmydata/upload.py
src/allmydata/uri.py
src/allmydata/util/hashutil.py
src/allmydata/workqueue.py

index 79bd2e7211f87472cf6dd070b7c8bd31ef6061ae..1d8729cf229c554fb3ff02751566d9ee6d3f48db 100644 (file)
@@ -1,15 +1,14 @@
 
-import os, random, sha
+import os, random
 from zope.interface import implements
 from twisted.python import log
 from twisted.internet import defer
 from twisted.application import service
 
-from allmydata.util import idlib, mathutil, bencode
+from allmydata.util import idlib, mathutil, bencode, hashutil
 from allmydata.util.assertutil import _assert
 from allmydata import codec, hashtree
 from allmydata.Crypto.Cipher import AES
-from allmydata.Crypto.Hash import SHA256
 from allmydata.uri import unpack_uri
 from allmydata.interfaces import IDownloadTarget, IDownloader
 
@@ -32,8 +31,8 @@ class Output:
         self.downloadable = downloadable
         self._decryptor = AES.new(key=key, mode=AES.MODE_CTR,
                                   counterstart="\x00"*16)
-        self._verifierid_hasher = sha.new(netstring("allmydata_verifierid_v1"))
-        self._fileid_hasher = sha.new(netstring("allmydata_fileid_v1"))
+        self._verifierid_hasher = hashutil.verifierid_hasher()
+        self._fileid_hasher = hashutil.fileid_hasher()
         self.length = 0
         self._segment_number = 0
         self._plaintext_hash_tree = None
@@ -54,7 +53,7 @@ class Output:
         # 2*segment_size.
         self._verifierid_hasher.update(crypttext)
         if self._crypttext_hash_tree:
-            ch = SHA256.new(netstring("allmydata_crypttext_segment_v1"))
+            ch = hashutil.crypttext_segment_hasher()
             ch.update(crypttext)
             crypttext_leaves = {self._segment_number: ch.digest()}
             self._crypttext_hash_tree.set_hashes(leaves=crypttext_leaves)
@@ -66,7 +65,7 @@ class Output:
 
         self._fileid_hasher.update(plaintext)
         if self._plaintext_hash_tree:
-            ph = SHA256.new(netstring("allmydata_plaintext_segment_v1"))
+            ph = hashutil.plaintext_segment_hasher()
             ph.update(plaintext)
             plaintext_leaves = {self._segment_number: ph.digest()}
             self._plaintext_hash_tree.set_hashes(leaves=plaintext_leaves)
@@ -140,7 +139,7 @@ class ValidatedBucket:
 
             #log.msg("checking block_hash(shareid=%d, blocknum=%d) len=%d" %
             #        (self.sharenum, blocknum, len(blockdata)))
-            blockhash = hashtree.block_hash(blockdata)
+            blockhash = hashutil.block_hash(blockdata)
             # we always validate the blockhash
             bh = dict(enumerate(blockhashes))
             # replace blockhash root with validated value
@@ -350,7 +349,7 @@ class FileDownloader:
         # comes back, and compare it against the version in our URI. If they
         # don't match, ignore their data and try someone else.
         def _validate(proposal, bucket):
-            h = hashtree.thingA_hash(proposal)
+            h = hashutil.thingA_hash(proposal)
             if h != self._thingA_hash:
                 self._fetch_failures["thingA"] += 1
                 msg = ("The copy of thingA we received from %s was bad" %
@@ -392,7 +391,7 @@ class FileDownloader:
 
         verifierid = d['verifierid']
         assert isinstance(verifierid, str)
-        assert len(verifierid) == 20
+        assert len(verifierid) == 32
         self._verifierid = verifierid
         self._fileid = d['fileid']
         self._roothash = d['share_root_hash']
index 7879955e24eebfc6b295c36d42658786b092a816..b905c466c8c32f3dec9634a07f2886b7e78f74f7 100644 (file)
@@ -3,10 +3,9 @@
 from zope.interface import implements
 from twisted.internet import defer
 from twisted.python import log
-from allmydata.hashtree import HashTree, block_hash, thingA_hash
+from allmydata.hashtree import HashTree
 from allmydata.Crypto.Cipher import AES
-from allmydata.Crypto.Hash import SHA256
-from allmydata.util import mathutil, bencode
+from allmydata.util import mathutil, bencode, hashutil
 from allmydata.util.assertutil import _assert
 from allmydata.codec import CRSEncoder
 from allmydata.interfaces import IEncoder
@@ -224,8 +223,8 @@ class Encoder(object):
         # of additional shares which can be substituted if the primary ones
         # are unavailable
 
-        plaintext_hasher = SHA256.new(netstring("allmydata_plaintext_segment_v1"))
-        crypttext_hasher = SHA256.new(netstring("allmydata_crypttext_segment_v1"))
+        plaintext_hasher = hashutil.plaintext_segment_hasher()
+        crypttext_hasher = hashutil.crypttext_segment_hasher()
 
         # memory footprint: we only hold a tiny piece of the plaintext at any
         # given time. We build up a segment's worth of cryptttext, then hand
@@ -258,8 +257,8 @@ class Encoder(object):
         codec = self._tail_codec
         input_piece_size = codec.get_block_size()
 
-        plaintext_hasher = SHA256.new(netstring("allmydata_plaintext_segment_v1"))
-        crypttext_hasher = SHA256.new(netstring("allmydata_crypttext_segment_v1"))
+        plaintext_hasher = hashutil.plaintext_segment_hasher()
+        crypttext_hasher = hashutil.crypttext_segment_hasher()
 
         for i in range(self.required_shares):
             input_piece = self.infile.read(input_piece_size)
@@ -297,7 +296,7 @@ class Encoder(object):
             shareid = shareids[i]
             d = self.send_subshare(shareid, segnum, subshare)
             dl.append(d)
-            subshare_hash = block_hash(subshare)
+            subshare_hash = hashutil.block_hash(subshare)
             self.subshare_hashes[shareid].append(subshare_hash)
         dl = self._gather_responses(dl)
         def _logit(res):
@@ -437,7 +436,7 @@ class Encoder(object):
     def send_thingA_to_all_shareholders(self):
         log.msg("%s: sending thingA" % self)
         thingA = bencode.bencode(self.thingA_data)
-        self.thingA_hash = thingA_hash(thingA)
+        self.thingA_hash = hashutil.thingA_hash(thingA)
         dl = []
         for shareid in self.landlords.keys():
             dl.append(self.send_thingA(shareid, thingA))
index a3b1c5b418e95fec3100c10f551a7d616cbbf79c..f1a839f6449a78787e1b7a65e10a81d289880c5e 100644 (file)
@@ -446,9 +446,3 @@ class IncompleteHashTree(CompleteBinaryTreeMixin, list):
             for i in added:
                 self[i] = None
             raise
-
-def block_hash(data):
-    return tagged_hash("encoded subshare", data)
-
-def thingA_hash(data):
-    return tagged_hash("thingA", data)
index ce2cbe841e3e9ffc2b2459dd1d129bc2b323eff8..86c95bdb1a8973b13393bf374cd13f5728f18afd 100644 (file)
@@ -11,7 +11,6 @@ Hash = StringConstraint(maxLength=HASH_SIZE,
 Nodeid = StringConstraint(maxLength=20,
                           minLength=20) # binary format 20-byte SHA1 hash
 FURL = StringConstraint(1000)
-Verifierid = StringConstraint(20)
 StorageIndex = StringConstraint(32)
 URI = StringConstraint(300) # kind of arbitrary
 MAX_BUCKETS = 200  # per peer
@@ -121,12 +120,12 @@ RIMutableDirectoryNode_ = Any() # TODO: how can we avoid this?
 class RIMutableDirectoryNode(RemoteInterface):
     def list():
         return ListOf( TupleOf(str, # name, relative to directory
-                               ChoiceOf(RIMutableDirectoryNode_, Verifierid)),
+                               ChoiceOf(RIMutableDirectoryNode_, URI)),
                        maxLength=100,
                        )
 
     def get(name=str):
-        return ChoiceOf(RIMutableDirectoryNode_, Verifierid)
+        return ChoiceOf(RIMutableDirectoryNode_, URI)
 
     def add_directory(name=str):
         return RIMutableDirectoryNode_
index fd5bfa4d77edcc82e7a39c5e00afbccd42d637b1..07f0f058093764f1483f5a8d15c85f013b0d656e 100644 (file)
@@ -7,7 +7,6 @@ from allmydata import encode, download, hashtree
 from allmydata.util import hashutil
 from allmydata.uri import pack_uri
 from allmydata.Crypto.Cipher import AES
-import sha
 from cStringIO import StringIO
 
 def netstring(s):
@@ -300,11 +299,11 @@ class Roundtrip(unittest.TestCase):
             peer = FakeBucketWriter(mode)
             shareholders[shnum] = peer
         e.set_shareholders(shareholders)
-        fileid_hasher = sha.new(netstring("allmydata_fileid_v1"))
+        fileid_hasher = hashutil.fileid_hasher()
         fileid_hasher.update(data)
         cryptor = AES.new(key=nonkey, mode=AES.MODE_CTR,
                           counterstart="\x00"*16)
-        verifierid_hasher = sha.new(netstring("allmydata_verifierid_v1"))
+        verifierid_hasher = hashutil.verifierid_hasher()
         verifierid_hasher.update(cryptor.encrypt(data))
 
         e.set_thingA_data({'verifierid': verifierid_hasher.digest(),
@@ -322,7 +321,7 @@ class Roundtrip(unittest.TestCase):
         if "corrupt_key" in recover_mode:
             key = flip_bit(key)
 
-        URI = pack_uri(storage_index="S" * 20,
+        URI = pack_uri(storage_index="S" * 32,
                        key=key,
                        thingA_hash=thingA_hash,
                        needed_shares=e.required_shares,
index 868b3ad1bc7eeac71f3fc899716284ecaad92a6b..7a97b00ca3b0a180a27c45978a7a94232ab12b6d 100644 (file)
@@ -216,7 +216,7 @@ class SystemTest(testutil.SignalMixin, unittest.TestCase):
         # change the storage index, which means we'll be asking about the
         # wrong file, so nobody will have any shares
         d = uri.unpack_uri(gooduri)
-        assert len(d['storage_index']) == 20
+        assert len(d['storage_index']) == 32
         d['storage_index'] = self.flip_bit(d['storage_index'])
         return uri.pack_uri(**d)
 
index 37386cebbbf83593e7236c47755ef30a37ba0ae3..d5b20c1c5f1b72a4a8ea5cc7490740500ce1e0c3 100644 (file)
@@ -26,7 +26,7 @@ class GoodServer(unittest.TestCase):
         self.failUnless(uri.startswith("URI:"))
         d = unpack_uri(uri)
         self.failUnless(isinstance(d['storage_index'], str))
-        self.failUnlessEqual(len(d['storage_index']), 20)
+        self.failUnlessEqual(len(d['storage_index']), 32)
         self.failUnless(isinstance(d['key'], str))
         self.failUnlessEqual(len(d['key']), 16)
 
index 51d40896050f69190b3595eaf34258e7db27864c..b63b1a39102d132b61300d23551b0d16472eecb5 100644 (file)
@@ -4,14 +4,14 @@ from twisted.internet import defer
 from twisted.application import service
 from foolscap import Referenceable
 
-from allmydata.util import idlib
+from allmydata.util import idlib, hashutil
 from allmydata import encode
 from allmydata.uri import pack_uri
 from allmydata.interfaces import IUploadable, IUploader
 from allmydata.Crypto.Cipher import AES
 
 from cStringIO import StringIO
-import collections, random, sha
+import collections, random
 
 class NotEnoughPeersError(Exception):
     pass
@@ -75,10 +75,10 @@ class FileUploader:
 
     def set_id_strings(self, verifierid, fileid):
         assert isinstance(verifierid, str)
-        assert len(verifierid) == 20
+        assert len(verifierid) == 32
         self._verifierid = verifierid
         assert isinstance(fileid, str)
-        assert len(fileid) == 20
+        assert len(fileid) == 32
         self._fileid = fileid
 
     def set_encryption_key(self, key):
@@ -298,8 +298,8 @@ class Uploader(service.MultiService):
 
     def compute_id_strings(self, f):
         # return a list of (fileid, encryptionkey, verifierid)
-        fileid_hasher = sha.new(netstring("allmydata_fileid_v1"))
-        enckey_hasher = sha.new(netstring("allmydata_encryption_key_v1"))
+        fileid_hasher = hashutil.fileid_hasher()
+        enckey_hasher = hashutil.key_hasher()
         f.seek(0)
         BLOCKSIZE = 64*1024
         while True:
@@ -313,7 +313,7 @@ class Uploader(service.MultiService):
 
         # now make a second pass to determine the verifierid. It would be
         # nice to make this involve fewer passes.
-        verifierid_hasher = sha.new(netstring("allmydata_verifierid_v1"))
+        verifierid_hasher = hashutil.verifierid_hasher()
         key = enckey[:16]
         cryptor = AES.new(key=key, mode=AES.MODE_CTR,
                           counterstart="\x00"*16)
index 9cfce767d7df45b10ab08488b76fbc6176b7f63e..9a602fae0777046692fc76929bb4b3a6e7a8e61e 100644 (file)
@@ -9,7 +9,7 @@ def pack_uri(storage_index, key, thingA_hash,
              needed_shares, total_shares, size):
     # applications should pass keyword parameters into this
     assert isinstance(storage_index, str)
-    assert len(storage_index) == 20 # sha1 hash. TODO: sha256
+    assert len(storage_index) == 32 # sha256 hash
 
     assert isinstance(thingA_hash, str)
     assert len(thingA_hash) == 32 # sha56 hash
index ccadccf0f13d94ace6b332c8dc9dd0965611b29e..c85ec238e90316c2ab4ff35ca1ab377fea6912f9 100644 (file)
@@ -16,3 +16,43 @@ def tagged_pair_hash(tag, val1, val2):
     s.update(netstring(val2))
     return s.digest()
 
+# specific hash tags that we use
+
+def tagged_hasher(tag):
+    return SHA256.new(netstring(tag))
+
+def block_hash(data):
+    return tagged_hash("allmydata_encoded_subshare_v1", data)
+def block_hasher():
+    return tagged_hasher("allmydata_encoded_subshare_v1")
+
+def thingA_hash(data):
+    return tagged_hash("thingA", data)
+def thingA_hasher():
+    return tagged_hasher("thingA")
+
+def fileid_hash(data):
+    return tagged_hash("allmydata_fileid_v1", data)
+def fileid_hasher():
+    return tagged_hasher("allmydata_fileid_v1")
+
+def verifierid_hash(data):
+    return tagged_hash("allmydata_verifierid_v1", data)
+def verifierid_hasher():
+    return tagged_hasher("allmydata_verifierid_v1")
+
+def crypttext_segment_hash(data):
+    return tagged_hash("allmydata_crypttext_segment_v1", data)
+def crypttext_segment_hasher():
+    return tagged_hasher("allmydata_crypttext_segment_v1")
+
+def plaintext_segment_hash(data):
+    return tagged_hash("allmydata_plaintext_segment_v1", data)
+def plaintext_segment_hasher():
+    return tagged_hasher("allmydata_plaintext_segment_v1")
+
+def key_hash(data):
+    return tagged_hash("allmydata_encryption_key_v1", data)
+def key_hasher():
+    return tagged_hasher("allmydata_encryption_key_v1")
+
index 1ad47a9eb80bc3d31b4d0664947153daa238c2f2..4e28e9db7e54a21586c26b72ec894a9cd74f43f2 100644 (file)
@@ -1,10 +1,11 @@
 
-import os, shutil, sha
+import os, shutil
 from zope.interface import implements
 from twisted.internet import defer
 from allmydata.util import bencode
 from allmydata.util.idlib import b2a
 from allmydata.Crypto.Cipher import AES
+from allmydata.Crypto.Hash import SHA256
 from allmydata.filetree.nodemaker import NodeMaker
 from allmydata.filetree.interfaces import INode
 from allmydata.filetree.file import CHKFileNode
@@ -382,9 +383,9 @@ def make_aes_key():
 def make_rsa_key():
     raise NotImplementedError
 def hash_sha(data):
-    return sha.new(data).digest()
+    return SHA256.new(data).digest()
 def hash_sha_to_key(data):
-    return sha.new(data).digest()[:AES_KEY_LENGTH]
+    return SHA256.new(data).digest()[:AES_KEY_LENGTH]
 def aes_encrypt(key, plaintext):
     assert isinstance(key, str)
     assert len(key) == AES_KEY_LENGTH