rename fileid/verifierid to plaintext_hash/crypttext_hash

author Brian Warner <warner@lothar.com>

Sun, 10 Jun 2007 03:46:04 +0000 (20:46 -0700)

committer Brian Warner <warner@lothar.com>

Sun, 10 Jun 2007 03:46:04 +0000 (20:46 -0700)
author Brian Warner <warner@lothar.com>
Sun, 10 Jun 2007 03:46:04 +0000 (20:46 -0700)
committer Brian Warner <warner@lothar.com>
Sun, 10 Jun 2007 03:46:04 +0000 (20:46 -0700)
diff --git a/docs/URI-extension.txt b/docs/URI-extension.txt

index 0b4348090110c94c8d00e27f9b90d8b27b9de7ab..e8d47aa827eaab4feab291384c5e76f399cab505 100644 (file)
--- a/docs/URI-extension.txt
+++ b/docs/URI-extension.txt
@@ -24,9 +24,9 @@ on their sizes):
   tail_codec_params   12
  
   share_root_hash     32 (binary) or 52 (base32-encoded) each
- fileid
+ plaintext_hash
   plaintext_root_hash
- verifierid
+ crypttext_hash
   crypttext_root_hash
  
  Some pieces are needed elsewhere (size should be visible without pulling the
@@ -35,9 +35,6 @@ peer selection algorithms need needed_shares to ask a minimal set of peers).
  Some pieces are arguably redundant but are convenient to have present
  (test_encode.py makes use of num_segments).
  
-fileid/verifierid need to be renamed 'plaintext_hash' and 'crypttext_hash'
-respectively.
-
  The rule for this data block is that it should be a constant size for all
  files, regardless of file size. Therefore hash trees (which have a size that
  depends linearly upon the number of segments) are stored elsewhere in the
diff --git a/src/allmydata/download.py b/src/allmydata/download.py

index 498c171f4f1ac545dd0eb0ef24ce625cccf2f111..4a7cab688c08c944a27d62a2f6b2eb4de74374a4 100644 (file)
--- a/src/allmydata/download.py
+++ b/src/allmydata/download.py
@@ -29,8 +29,8 @@ class Output:
          self.downloadable = downloadable
          self._decryptor = AES.new(key=key, mode=AES.MODE_CTR,
                                    counterstart="\x00"*16)
-        self._verifierid_hasher = hashutil.verifierid_hasher()
-        self._fileid_hasher = hashutil.fileid_hasher()
+        self._crypttext_hasher = hashutil.crypttext_hasher()
+        self._plaintext_hasher = hashutil.plaintext_hasher()
          self.length = 0
          self._segment_number = 0
          self._plaintext_hash_tree = None
@@ -49,7 +49,7 @@ class Output:
          # memory footprint: 'crypttext' is the only segment_size usage
          # outstanding. While we decrypt it into 'plaintext', we hit
          # 2*segment_size.
-        self._verifierid_hasher.update(crypttext)
+        self._crypttext_hasher.update(crypttext)
          if self._crypttext_hash_tree:
              ch = hashutil.crypttext_segment_hasher()
              ch.update(crypttext)
@@ -61,7 +61,7 @@ class Output:
  
          # now we're back down to 1*segment_size.
  
-        self._fileid_hasher.update(plaintext)
+        self._plaintext_hasher.update(plaintext)
          if self._plaintext_hash_tree:
              ph = hashutil.plaintext_segment_hasher()
              ph.update(plaintext)
@@ -74,8 +74,8 @@ class Output:
          self.downloadable.write(plaintext)
  
      def close(self):
-        self.verifierid = self._verifierid_hasher.digest()
-        self.fileid = self._fileid_hasher.digest()
+        self.crypttext_hash = self._crypttext_hasher.digest()
+        self.plaintext_hash = self._plaintext_hasher.digest()
          self.downloadable.close()
  
      def finish(self):
@@ -252,8 +252,8 @@ class SegmentDownloader:
          self.parent.bucket_failed(vbucket)
  
  class FileDownloader:
-    check_verifierid = True
-    check_fileid = True
+    check_crypttext_hash = True
+    check_plaintext_hash = True
  
      def __init__(self, client, uri, downloadable):
          self._client = client
@@ -412,11 +412,11 @@ class FileDownloader:
          self._tail_codec = codec.get_decoder_by_name(d['codec_name'])
          self._tail_codec.set_serialized_params(d['tail_codec_params'])
  
-        verifierid = d['verifierid']
-        assert isinstance(verifierid, str)
-        assert len(verifierid) == 32
-        self._verifierid = verifierid
-        self._fileid = d['fileid']
+        crypttext_hash = d['crypttext_hash']
+        assert isinstance(crypttext_hash, str)
+        assert len(crypttext_hash) == 32
+        self._crypttext_hash = crypttext_hash
+        self._plaintext_hash = d['plaintext_hash']
          self._roothash = d['share_root_hash']
  
          self._segment_size = segment_size = d['segment_size']
@@ -576,18 +576,20 @@ class FileDownloader:
  
      def _done(self, res):
          self._output.close()
-        log.msg("computed VERIFIERID: %s" % idlib.b2a(self._output.verifierid))
-        log.msg("computed FILEID: %s" % idlib.b2a(self._output.fileid))
-        if self.check_verifierid:
-            _assert(self._verifierid == self._output.verifierid,
-                    "bad verifierid: computed=%s, expected=%s" %
-                    (idlib.b2a(self._output.verifierid),
-                     idlib.b2a(self._verifierid)))
-        if self.check_fileid:
-            _assert(self._fileid == self._output.fileid,
-                    "bad fileid: computed=%s, expected=%s" %
-                    (idlib.b2a(self._output.fileid),
-                     idlib.b2a(self._fileid)))
+        log.msg("computed CRYPTTEXT_HASH: %s" %
+                idlib.b2a(self._output.crypttext_hash))
+        log.msg("computed PLAINTEXT_HASH: %s" %
+                idlib.b2a(self._output.plaintext_hash))
+        if self.check_crypttext_hash:
+            _assert(self._crypttext_hash == self._output.crypttext_hash,
+                    "bad crypttext_hash: computed=%s, expected=%s" %
+                    (idlib.b2a(self._output.crypttext_hash),
+                     idlib.b2a(self._crypttext_hash)))
+        if self.check_plaintext_hash:
+            _assert(self._plaintext_hash == self._output.plaintext_hash,
+                    "bad plaintext_hash: computed=%s, expected=%s" %
+                    (idlib.b2a(self._output.plaintext_hash),
+                     idlib.b2a(self._plaintext_hash)))
          _assert(self._output.length == self._size,
                  got=self._output.length, expected=self._size)
          return self._output.finish()
diff --git a/src/allmydata/interfaces.py b/src/allmydata/interfaces.py

index 4f40fd21bbb9b2c20ee60d372b391798aa14df6a..e0686a0d593ce584426441c5d3072b6a058d095f 100644 (file)
--- a/src/allmydata/interfaces.py
+++ b/src/allmydata/interfaces.py
@@ -205,7 +205,7 @@ class ICodecEncoder(Interface):
          this means it may contain hex digits and hyphens, and nothing else.
          The idea is that the URI contains something like '%s:%s:%s' %
          (encoder.get_encoder_name(), encoder.get_serialized_params(),
-        b2a(verifierid)), and this is enough information to construct a
+        b2a(crypttext_hash)), and this is enough information to construct a
          compatible decoder.
          """
  
diff --git a/src/allmydata/test/test_encode.py b/src/allmydata/test/test_encode.py

index 1b194f01cb972b8056c92f08c6acd53542fa740f..dc7a64d5946befb4817b49b837de1282f81a9981 100644 (file)
--- a/src/allmydata/test/test_encode.py
+++ b/src/allmydata/test/test_encode.py
@@ -33,7 +33,7 @@ class FakeStorageServer:
          d = eventual.fireEventually()
          d.addCallback(lambda res: _call())
          return d
-    def allocate_buckets(self, verifierid, sharenums, shareize, blocksize, canary):
+    def allocate_buckets(self, crypttext_hash, sharenums, shareize, blocksize, canary):
          if self.mode == "full":
              return (set(), {},)
          elif self.mode == "already got them":
@@ -296,15 +296,15 @@ class Roundtrip(unittest.TestCase):
              peer = FakeBucketWriter(mode)
              shareholders[shnum] = peer
          e.set_shareholders(shareholders)
-        fileid_hasher = hashutil.fileid_hasher()
-        fileid_hasher.update(data)
+        plaintext_hasher = hashutil.plaintext_hasher()
+        plaintext_hasher.update(data)
          cryptor = AES.new(key=nonkey, mode=AES.MODE_CTR,
                            counterstart="\x00"*16)
-        verifierid_hasher = hashutil.verifierid_hasher()
-        verifierid_hasher.update(cryptor.encrypt(data))
+        crypttext_hasher = hashutil.crypttext_hasher()
+        crypttext_hasher.update(cryptor.encrypt(data))
  
-        e.set_uri_extension_data({'verifierid': verifierid_hasher.digest(),
-                                  'fileid': fileid_hasher.digest(),
+        e.set_uri_extension_data({'crypttext_hash': crypttext_hasher.digest(),
+                                  'plaintext_hash': plaintext_hasher.digest(),
                                    })
          d = e.start()
          def _sent(uri_extension_hash):
diff --git a/src/allmydata/test/test_system.py b/src/allmydata/test/test_system.py

index 90b7c792b89f154c2e10b30af6f6d77d3305c158..9535541039af3b190d707ea7fa4796a70ef63655 100644 (file)
--- a/src/allmydata/test/test_system.py
+++ b/src/allmydata/test/test_system.py
@@ -226,7 +226,7 @@ class SystemTest(testutil.SignalMixin, unittest.TestCase):
      # some of the validation data, so it will fail in the post-download phase
      # when the file's crypttext integrity check fails. Do the same thing for
      # the key, which should cause the download to fail the post-download
-    # plaintext verifierid check.
+    # plaintext_hash check.
  
      def test_vdrive(self):
          self.basedir = "test_system/SystemTest/test_vdrive"
diff --git a/src/allmydata/test/test_upload.py b/src/allmydata/test/test_upload.py

index cf6017a0b11f3ea9ef4c9a3fdc0eafb7f4829088..f1372d5e91eec863e3941b7f300a8677125b88be 100644 (file)
--- a/src/allmydata/test/test_upload.py
+++ b/src/allmydata/test/test_upload.py
@@ -11,8 +11,9 @@ from test_encode import FakePeer
  class FakeClient:
      def __init__(self, mode="good"):
          self.mode = mode
-    def get_permuted_peers(self, verifierid):
-        return [ ("%20d"%fakeid, "%20d"%fakeid, FakePeer(self.mode),) for fakeid in range(50) ]
+    def get_permuted_peers(self, storage_index):
+        return [ ("%20d"%fakeid, "%20d"%fakeid, FakePeer(self.mode),)
+                 for fakeid in range(50) ]
  
  class GoodServer(unittest.TestCase):
      def setUp(self):
diff --git a/src/allmydata/upload.py b/src/allmydata/upload.py

index 49f93d82148a64c32878ff77800f8523e99b0609..93bc432bc2875fa82feea94b021ba74ab34c1e90 100644 (file)
--- a/src/allmydata/upload.py
+++ b/src/allmydata/upload.py
@@ -22,14 +22,15 @@ class TooFullError(Exception):
      pass
  
  class PeerTracker:
-    def __init__(self, peerid, permutedid, connection, sharesize, blocksize, verifierid):
+    def __init__(self, peerid, permutedid, connection,
+                 sharesize, blocksize, crypttext_hash):
          self.peerid = peerid
          self.permutedid = permutedid
          self.connection = connection # to an RIClient
          self.buckets = {} # k: shareid, v: IRemoteBucketWriter
          self.sharesize = sharesize
          self.blocksize = blocksize
-        self.verifierid = verifierid
+        self.crypttext_hash = crypttext_hash
          self._storageserver = None
  
      def query(self, sharenums):
@@ -42,9 +43,11 @@ class PeerTracker:
      def _got_storageserver(self, storageserver):
          self._storageserver = storageserver
      def _query(self, sharenums):
-        d = self._storageserver.callRemote("allocate_buckets", self.verifierid,
+        d = self._storageserver.callRemote("allocate_buckets",
+                                           self.crypttext_hash,
                                             sharenums, self.sharesize,
-                                           self.blocksize, canary=Referenceable())
+                                           self.blocksize,
+                                           canary=Referenceable())
          d.addCallback(self._got_reply)
          return d
          
@@ -70,13 +73,13 @@ class FileUploader:
          self._size = filehandle.tell()
          filehandle.seek(0)
  
-    def set_id_strings(self, verifierid, fileid):
-        assert isinstance(verifierid, str)
-        assert len(verifierid) == 32
-        self._verifierid = verifierid
-        assert isinstance(fileid, str)
-        assert len(fileid) == 32
-        self._fileid = fileid
+    def set_id_strings(self, crypttext_hash, plaintext_hash):
+        assert isinstance(crypttext_hash, str)
+        assert len(crypttext_hash) == 32
+        self._crypttext_hash = crypttext_hash
+        assert isinstance(plaintext_hash, str)
+        assert len(plaintext_hash) == 32
+        self._plaintext_hash = plaintext_hash
  
      def set_encryption_key(self, key):
          assert isinstance(key, str)
@@ -92,7 +95,7 @@ class FileUploader:
          This method returns a Deferred that will fire with the URI (a
          string)."""
  
-        log.msg("starting upload [%s]" % (idlib.b2a(self._verifierid),))
+        log.msg("starting upload [%s]" % (idlib.b2a(self._crypttext_hash),))
          assert self.needed_shares
  
          # create the encoder, so we can know how large the shares will be
@@ -103,9 +106,11 @@ class FileUploader:
  
          # we are responsible for locating the shareholders. self._encoder is
          # responsible for handling the data and sending out the shares.
-        peers = self._client.get_permuted_peers(self._verifierid)
+        peers = self._client.get_permuted_peers(self._crypttext_hash)
          assert peers
-        trackers = [ PeerTracker(peerid, permutedid, conn, share_size, block_size, self._verifierid)
+        trackers = [ PeerTracker(peerid, permutedid, conn,
+                                 share_size, block_size,
+                                 self._crypttext_hash)
                       for permutedid, peerid, conn in peers ]
          self.usable_peers = set(trackers) # this set shrinks over time
          self.used_peers = set() # while this set grows
@@ -236,13 +241,13 @@ class FileUploader:
          self._encoder.set_shareholders(buckets)
  
          uri_extension_data = {}
-        uri_extension_data['verifierid'] = self._verifierid
-        uri_extension_data['fileid'] = self._fileid
+        uri_extension_data['crypttext_hash'] = self._crypttext_hash
+        uri_extension_data['plaintext_hash'] = self._plaintext_hash
          self._encoder.set_uri_extension_data(uri_extension_data)
          return self._encoder.start()
  
      def _compute_uri(self, uri_extension_hash):
-        return pack_uri(storage_index=self._verifierid,
+        return pack_uri(storage_index=self._crypttext_hash,
                          key=self._encryption_key,
                          uri_extension_hash=uri_extension_hash,
                          needed_shares=self.needed_shares,
@@ -291,8 +296,8 @@ class Uploader(service.MultiService):
      total_shares = 100 # Total number of shares created by encoding.  If everybody has room then this is is how many we will upload.
  
      def compute_id_strings(self, f):
-        # return a list of (fileid, encryptionkey, verifierid)
-        fileid_hasher = hashutil.fileid_hasher()
+        # return a list of (plaintext_hash, encryptionkey, crypttext_hash)
+        plaintext_hasher = hashutil.plaintext_hasher()
          enckey_hasher = hashutil.key_hasher()
          f.seek(0)
          BLOCKSIZE = 64*1024
@@ -300,14 +305,14 @@ class Uploader(service.MultiService):
              data = f.read(BLOCKSIZE)
              if not data:
                  break
-            fileid_hasher.update(data)
+            plaintext_hasher.update(data)
              enckey_hasher.update(data)
-        fileid = fileid_hasher.digest()
+        plaintext_hash = plaintext_hasher.digest()
          enckey = enckey_hasher.digest()
  
-        # now make a second pass to determine the verifierid. It would be
+        # now make a second pass to determine the crypttext_hash. It would be
          # nice to make this involve fewer passes.
-        verifierid_hasher = hashutil.verifierid_hasher()
+        crypttext_hasher = hashutil.crypttext_hasher()
          key = enckey[:16]
          cryptor = AES.new(key=key, mode=AES.MODE_CTR,
                            counterstart="\x00"*16)
@@ -316,13 +321,13 @@ class Uploader(service.MultiService):
              data = f.read(BLOCKSIZE)
              if not data:
                  break
-            verifierid_hasher.update(cryptor.encrypt(data))
-        verifierid = verifierid_hasher.digest()
+            crypttext_hasher.update(cryptor.encrypt(data))
+        crypttext_hash = crypttext_hasher.digest()
  
          # and leave the file pointer at the beginning
          f.seek(0)
  
-        return fileid, key, verifierid
+        return plaintext_hash, key, crypttext_hash
  
      def upload(self, f, options={}):
          # this returns the URI
@@ -333,9 +338,9 @@ class Uploader(service.MultiService):
          u = self.uploader_class(self.parent, options)
          u.set_filehandle(fh)
          u.set_params(self.needed_shares, self.desired_shares, self.total_shares)
-        fileid, key, verifierid = self.compute_id_strings(fh)
+        plaintext_hash, key, crypttext_hash = self.compute_id_strings(fh)
          u.set_encryption_key(key)
-        u.set_id_strings(verifierid, fileid)
+        u.set_id_strings(crypttext_hash, plaintext_hash)
          d = u.start()
          def _done(res):
              f.close_filehandle(fh)
diff --git a/src/allmydata/util/hashutil.py b/src/allmydata/util/hashutil.py

index 9cd42a21a3d62ebaa9c827d893a44eb8a45cda18..c3d967b9684cab0cb14b5f4c50f59d1a185a07ff 100644 (file)
--- a/src/allmydata/util/hashutil.py
+++ b/src/allmydata/util/hashutil.py
@@ -31,15 +31,15 @@ def uri_extension_hash(data):
  def uri_extension_hasher():
      return tagged_hasher("allmydata_uri_extension_v1")
  
-def fileid_hash(data):
-    return tagged_hash("allmydata_fileid_v1", data)
-def fileid_hasher():
-    return tagged_hasher("allmydata_fileid_v1")
-
-def verifierid_hash(data):
-    return tagged_hash("allmydata_verifierid_v1", data)
-def verifierid_hasher():
-    return tagged_hasher("allmydata_verifierid_v1")
+def plaintext_hash(data):
+    return tagged_hash("allmydata_plaintext_hash_v1", data)
+def plaintext_hasher():
+    return tagged_hasher("allmydata_plaintext_hash_v1")
+
+def crypttext_hash(data):
+    return tagged_hash("allmydata_crypttext_hash_v1", data)
+def crypttext_hasher():
+    return tagged_hasher("allmydata_crypttext_hash_v1")
  
  def crypttext_segment_hash(data):
      return tagged_hash("allmydata_crypttext_segment_v1", data)
author	Brian Warner <warner@lothar.com>
	Sun, 10 Jun 2007 03:46:04 +0000 (20:46 -0700)
committer	Brian Warner <warner@lothar.com>
	Sun, 10 Jun 2007 03:46:04 +0000 (20:46 -0700)
docs/URI-extension.txt		patch \| blob \| history
src/allmydata/download.py		patch \| blob \| history
src/allmydata/interfaces.py		patch \| blob \| history
src/allmydata/test/test_encode.py		patch \| blob \| history
src/allmydata/test/test_system.py		patch \| blob \| history
src/allmydata/test/test_upload.py		patch \| blob \| history
src/allmydata/upload.py		patch \| blob \| history
src/allmydata/util/hashutil.py		patch \| blob \| history