switch from base62 to base32 for storage indices, switch from z-base-32 to rfc 3548...

author Zooko O'Whielacronx <zooko@zooko.com>

Fri, 15 Feb 2008 02:27:47 +0000 (19:27 -0700)

committer Zooko O'Whielacronx <zooko@zooko.com>

Fri, 15 Feb 2008 02:27:47 +0000 (19:27 -0700)
author Zooko O'Whielacronx <zooko@zooko.com>
Fri, 15 Feb 2008 02:27:47 +0000 (19:27 -0700)
committer Zooko O'Whielacronx <zooko@zooko.com>
Fri, 15 Feb 2008 02:27:47 +0000 (19:27 -0700)
diff --git a/src/allmydata/client.py b/src/allmydata/client.py

index 7314cb6f34fe67260c31e61119b8505602c19769..12afba9279e899530b428df26fbc6a6e4a82aeee 100644 (file)
--- a/src/allmydata/client.py
+++ b/src/allmydata/client.py
@@ -15,7 +15,7 @@ from allmydata.checker import Checker
  from allmydata.offloaded import Helper
  from allmydata.control import ControlServer
  from allmydata.introducer import IntroducerClient
-from allmydata.util import hashutil, idlib, testutil
+from allmydata.util import hashutil, base32, testutil
  from allmydata.filenode import FileNode
  from allmydata.dirnode import NewDirectoryNode
  from allmydata.mutable import MutableFileNode
@@ -102,9 +102,9 @@ class Client(node.Node, testutil.PollMixin):
  
      def init_lease_secret(self):
          def make_secret():
-            return idlib.b2a(os.urandom(hashutil.CRYPTO_VAL_SIZE)) + "\n"
+            return base32.b2a(os.urandom(hashutil.CRYPTO_VAL_SIZE)) + "\n"
          secret_s = self.get_or_create_private_config("secret", make_secret)
-        self._lease_secret = idlib.a2b(secret_s)
+        self._lease_secret = base32.a2b(secret_s)
  
      def init_storage(self):
          # should we run a storage server (and publish it for others to use)?
diff --git a/src/allmydata/download.py b/src/allmydata/download.py

index aa54f0f1f0cddae1fb037086bd668c89084516b1..c4c7ea5db07b97fcabd7971ff50e327803f40bcb 100644 (file)
--- a/src/allmydata/download.py
+++ b/src/allmydata/download.py
@@ -6,7 +6,7 @@ from twisted.internet.interfaces import IPushProducer, IConsumer
  from twisted.application import service
  from foolscap.eventual import eventually
  
-from allmydata.util import idlib, mathutil, hashutil, log
+from allmydata.util import base32, mathutil, hashutil, log
  from allmydata.util.assertutil import _assert
  from allmydata import codec, hashtree, storage, uri
  from allmydata.interfaces import IDownloadTarget, IDownloader, IFileURI, \
@@ -70,7 +70,7 @@ class Output:
              crypttext_leaves = {self._segment_number: ch.digest()}
              self.log(format="crypttext leaf hash (%(bytes)sB) [%(segnum)d] is %(hash)s",
                       bytes=len(crypttext),
-                     segnum=self._segment_number, hash=idlib.b2a(ch.digest()),
+                     segnum=self._segment_number, hash=base32.b2a(ch.digest()),
                       level=log.NOISY)
              self._crypttext_hash_tree.set_hashes(leaves=crypttext_leaves)
  
@@ -86,7 +86,7 @@ class Output:
              plaintext_leaves = {self._segment_number: ph.digest()}
              self.log(format="plaintext leaf hash (%(bytes)sB) [%(segnum)d] is %(hash)s",
                       bytes=len(plaintext),
-                     segnum=self._segment_number, hash=idlib.b2a(ph.digest()),
+                     segnum=self._segment_number, hash=base32.b2a(ph.digest()),
                       level=log.NOISY)
              self._plaintext_hash_tree.set_hashes(leaves=plaintext_leaves)
  
@@ -180,7 +180,7 @@ class ValidatedBucket:
              #log.msg("checking block_hash(shareid=%d, blocknum=%d) len=%d "
              #        "%r .. %r: %s" %
              #        (self.sharenum, blocknum, len(blockdata),
-            #         blockdata[:50], blockdata[-50:], idlib.b2a(blockhash)))
+            #         blockdata[:50], blockdata[-50:], base32.b2a(blockhash)))
  
              # we always validate the blockhash
              bh = dict(enumerate(blockhashes))
@@ -203,22 +203,22 @@ class ValidatedBucket:
                  received from the remote peer were bad.""")
              log.msg(" have self._share_hash: %s" % bool(self._share_hash))
              log.msg(" block length: %d" % len(blockdata))
-            log.msg(" block hash: %s" % idlib.b2a_or_none(blockhash))
+            log.msg(" block hash: %s" % base32.b2a_or_none(blockhash))
              if len(blockdata) < 100:
                  log.msg(" block data: %r" % (blockdata,))
              else:
                  log.msg(" block data start/end: %r .. %r" %
                          (blockdata[:50], blockdata[-50:]))
-            log.msg(" root hash: %s" % idlib.b2a(self._roothash))
+            log.msg(" root hash: %s" % base32.b2a(self._roothash))
              log.msg(" share hash tree:\n" + self.share_hash_tree.dump())
              log.msg(" block hash tree:\n" + self.block_hash_tree.dump())
              lines = []
              for i,h in sorted(sharehashes):
-                lines.append("%3d: %s" % (i, idlib.b2a_or_none(h)))
+                lines.append("%3d: %s" % (i, base32.b2a_or_none(h)))
              log.msg(" sharehashes:\n" + "\n".join(lines) + "\n")
              lines = []
              for i,h in enumerate(blockhashes):
-                lines.append("%3d: %s" % (i, idlib.b2a_or_none(h)))
+                lines.append("%3d: %s" % (i, base32.b2a_or_none(h)))
              log.msg(" blockhashes:\n" + "\n".join(lines) + "\n")
              raise
  
@@ -782,13 +782,13 @@ class FileDownloader:
          if self.check_crypttext_hash:
              _assert(self._crypttext_hash == self._output.crypttext_hash,
                      "bad crypttext_hash: computed=%s, expected=%s" %
-                    (idlib.b2a(self._output.crypttext_hash),
-                     idlib.b2a(self._crypttext_hash)))
+                    (base32.b2a(self._output.crypttext_hash),
+                     base32.b2a(self._crypttext_hash)))
          if self.check_plaintext_hash:
              _assert(self._plaintext_hash == self._output.plaintext_hash,
                      "bad plaintext_hash: computed=%s, expected=%s" %
-                    (idlib.b2a(self._output.plaintext_hash),
-                     idlib.b2a(self._plaintext_hash)))
+                    (base32.b2a(self._output.plaintext_hash),
+                     base32.b2a(self._plaintext_hash)))
          _assert(self._output.length == self._size,
                  got=self._output.length, expected=self._size)
          return self._output.finish()
diff --git a/src/allmydata/encode.py b/src/allmydata/encode.py

index c06b1054fcb1f328e73ca96118b8da7a2fd5f63a..def30526fd10917bbf9343eb583f9619a780e1a9 100644 (file)
--- a/src/allmydata/encode.py
+++ b/src/allmydata/encode.py
@@ -6,7 +6,7 @@ from twisted.internet import defer
  from foolscap import eventual
  from allmydata import storage, uri
  from allmydata.hashtree import HashTree
-from allmydata.util import mathutil, hashutil, idlib, log
+from allmydata.util import mathutil, hashutil, base32, log
  from allmydata.util.assertutil import _assert, precondition
  from allmydata.codec import CRSEncoder
  from allmydata.interfaces import IEncoder, IStorageBucketWriter, \
@@ -435,11 +435,11 @@ class Encoder(object):
              d = self.send_subshare(shareid, segnum, subshare, lognum)
              dl.append(d)
              subshare_hash = hashutil.block_hash(subshare)
-            #from allmydata.util import idlib
+            #from allmydata.util import base32
              #log.msg("creating block (shareid=%d, blocknum=%d) "
              #        "len=%d %r .. %r: %s" %
              #        (shareid, segnum, len(subshare),
-            #         subshare[:50], subshare[-50:], idlib.b2a(subshare_hash)))
+            #         subshare[:50], subshare[-50:], base32.b2a(subshare_hash)))
              self.subshare_hashes[shareid].append(subshare_hash)
  
          dl = self._gather_responses(dl)
@@ -518,7 +518,7 @@ class Encoder(object):
          d.addCallback(_got)
          def _got_hashtree_leaves(leaves):
              self.log("Encoder: got plaintext_hashtree_leaves: %s" %
-                     (",".join([idlib.b2a(h) for h in leaves]),),
+                     (",".join([base32.b2a(h) for h in leaves]),),
                       level=log.NOISY)
              ht = list(HashTree(list(leaves)))
              self.uri_extension_data["plaintext_root_hash"] = ht[0]
@@ -636,7 +636,7 @@ class Encoder(object):
          ed = {}
          for k,v in self.uri_extension_data.items():
              if k.endswith("hash"):
-                ed[k] = idlib.b2a(v)
+                ed[k] = base32.b2a(v)
              else:
                  ed[k] = v
          self.log("uri_extension_data is %s" % (ed,), level=log.NOISY, parent=lp)
diff --git a/src/allmydata/hashtree.py b/src/allmydata/hashtree.py

index f1a839f6449a78787e1b7a65e10a81d289880c5e..c2f6deca50f7cbbd2c5910c6d213cb171f8f9510 100644 (file)
--- a/src/allmydata/hashtree.py
+++ b/src/allmydata/hashtree.py
@@ -48,7 +48,7 @@ or implied.  It probably won't make your computer catch on fire,
  or eat  your children, but it might.  Use at your own risk.
  """
  
-from allmydata.util import idlib
+from allmydata.util import base32
  from allmydata.util.hashutil import tagged_hash, tagged_pair_hash
  
  __version__ = '1.0.0-allmydata'
@@ -153,7 +153,7 @@ class CompleteBinaryTreeMixin:
          lines = []
          for i,depth in self.depth_first():
              lines.append("%s%3d: %s" % ("  "*depth, i,
-                                        idlib.b2a_or_none(self[i])))
+                                        base32.b2a_or_none(self[i])))
          return "\n".join(lines) + "\n"
  
      def get_leaf_index(self, leafnum):
diff --git a/src/allmydata/mutable.py b/src/allmydata/mutable.py

index 4459ee71647c270b40422ed6d5cfdd1459180750..b125f2f0151a68efd83fd784e751758d3b3472c8 100644 (file)
--- a/src/allmydata/mutable.py
+++ b/src/allmydata/mutable.py
@@ -6,7 +6,7 @@ from twisted.internet import defer
  from twisted.python import failure
  from foolscap.eventual import eventually
  from allmydata.interfaces import IMutableFileNode, IMutableFileURI
-from allmydata.util import hashutil, mathutil, idlib, log
+from allmydata.util import base32, hashutil, mathutil, idlib, log
  from allmydata.uri import WriteableSSKFileURI
  from allmydata import hashtree, codec, storage
  from allmydata.encode import NotEnoughPeersError
@@ -404,7 +404,7 @@ class Retrieve:
              # ok, it's a valid verinfo. Add it to the list of validated
              # versions.
              self.log(" found valid version %d-%s from %s-sh%d: %d-%d/%d/%d"
-                     % (seqnum, idlib.b2a(root_hash)[:4],
+                     % (seqnum, base32.b2a(root_hash)[:4],
                          idlib.shortnodeid_b2a(peerid), shnum,
                          k, N, segsize, datalength))
              self._valid_versions[verinfo] = (prefix, DictOfSets())
@@ -562,7 +562,7 @@ class Retrieve:
                  shares_s.append("#%d" % shnum)
          shares_s = ",".join(shares_s)
          self.log("_attempt_decode: version %d-%s, shares: %s" %
-                 (seqnum, idlib.b2a(root_hash)[:4], shares_s))
+                 (seqnum, base32.b2a(root_hash)[:4], shares_s))
  
          # first, validate each share that we haven't validated yet. We use
          # self._valid_shares to remember which ones we've already checked.
@@ -963,7 +963,7 @@ class Publish:
              for oldplace in current_share_peers.get(shnum, []):
                  (peerid, seqnum, R) = oldplace
                  logmsg2.append("%s:#%d:R=%s" % (idlib.shortnodeid_b2a(peerid),
-                                                seqnum, idlib.b2a(R)[:4]))
+                                                seqnum, base32.b2a(R)[:4]))
              logmsg.append("sh%d on (%s)" % (shnum, "/".join(logmsg2)))
          self.log("sharemap: %s" % (", ".join(logmsg)), level=log.NOISY)
          self.log("we are planning to push new seqnum=#%d" % self._new_seqnum,
@@ -1126,7 +1126,7 @@ class Publish:
                                                for i in needed_hashes ] )
          root_hash = share_hash_tree[0]
          assert len(root_hash) == 32
-        self.log("my new root_hash is %s" % idlib.b2a(root_hash))
+        self.log("my new root_hash is %s" % base32.b2a(root_hash))
  
          prefix = pack_prefix(seqnum, root_hash, IV,
                               required_shares, total_shares,
@@ -1257,8 +1257,8 @@ class Publish:
                                   " shnum=%d: I thought they had #%d:R=%s,"
                                   " but testv reported #%d:R=%s" %
                                   (shnum,
-                                  seqnum, idlib.b2a(root_hash)[:4],
-                                  old_seqnum, idlib.b2a(old_root_hash)[:4]),
+                                  seqnum, base32.b2a(root_hash)[:4],
+                                  old_seqnum, base32.b2a(old_root_hash)[:4]),
                                   parent=lp, level=log.WEIRD)
                          surprised = True
          if surprised:
@@ -1268,7 +1268,7 @@ class Publish:
          for shnum, places in dispatch_map.items():
              sent_to = [(idlib.shortnodeid_b2a(peerid),
                          seqnum,
-                        idlib.b2a(root_hash)[:4])
+                        base32.b2a(root_hash)[:4])
                         for (peerid,seqnum,root_hash) in places]
              self.log(" share %d sent to: %s" % (shnum, sent_to),
                       level=log.NOISY)
diff --git a/src/allmydata/scripts/debug.py b/src/allmydata/scripts/debug.py

index df1d6f333483a9723a3c98f31e71dea22b3d7897..cee325605b89ea2103aa93e100d8e18fa616a86a 100644 (file)
--- a/src/allmydata/scripts/debug.py
+++ b/src/allmydata/scripts/debug.py
@@ -98,7 +98,7 @@ def format_expiration_time(expiration_time):
  
  def dump_mutable_share(config, out, err):
      from allmydata import storage
-    from allmydata.util import idlib
+    from allmydata.util import base32, idlib
      m = storage.MutableShareFile(config['filename'])
      f = open(config['filename'], "rb")
      WE, nodeid = m._read_write_enabler_and_nodeid(f)
@@ -118,7 +118,7 @@ def dump_mutable_share(config, out, err):
      print >>out
      print >>out, "Mutable slot found:"
      print >>out, " share_type: %s" % share_type
-    print >>out, " write_enabler: %s" % idlib.b2a(WE)
+    print >>out, " write_enabler: %s" % base32.b2a(WE)
      print >>out, " WE for nodeid: %s" % idlib.nodeid_b2a(nodeid)
      print >>out, " num_extra_leases: %d" % num_extra_leases
      print >>out, " container_size: %d" % container_size
@@ -130,8 +130,8 @@ def dump_mutable_share(config, out, err):
              print >>out, "  ownerid: %d" % oid
              when = format_expiration_time(et)
              print >>out, "  expires in %s" % when
-            print >>out, "  renew_secret: %s" % idlib.b2a(rs)
-            print >>out, "  cancel_secret: %s" % idlib.b2a(cs)
+            print >>out, "  renew_secret: %s" % base32.b2a(rs)
+            print >>out, "  cancel_secret: %s" % base32.b2a(cs)
              print >>out, "  secrets are for nodeid: %s" % idlib.nodeid_b2a(anid)
      else:
          print >>out, "No leases."
@@ -144,7 +144,7 @@ def dump_mutable_share(config, out, err):
  
  def dump_SDMF_share(offset, length, config, out, err):
      from allmydata import mutable
-    from allmydata.util import idlib
+    from allmydata.util import base32
  
      f = open(config['filename'], "rb")
      f.seek(offset)
@@ -168,8 +168,8 @@ def dump_SDMF_share(offset, length, config, out, err):
  
      print >>out, " SDMF contents:"
      print >>out, "  seqnum: %d" % seqnum
-    print >>out, "  root_hash: %s" % idlib.b2a(root_hash)
-    print >>out, "  IV: %s" % idlib.b2a(IV)
+    print >>out, "  root_hash: %s" % base32.b2a(root_hash)
+    print >>out, "  IV: %s" % base32.b2a(IV)
      print >>out, "  required_shares: %d" % k
      print >>out, "  total_shares: %d" % N
      print >>out, "  segsize: %d" % segsize
@@ -194,7 +194,7 @@ class DumpCapOptions(usage.Options):
  
  def dump_cap(config, out=sys.stdout, err=sys.stderr):
      from allmydata import uri
-    from allmydata.util.idlib import a2b
+    from allmydata.util import base32
      from base64 import b32decode
      import urlparse, urllib
  
@@ -204,11 +204,11 @@ def dump_cap(config, out=sys.stdout, err=sys.stderr):
          nodeid = b32decode(config['nodeid'].upper())
      secret = None
      if config['client-secret']:
-        secret = a2b(config['client-secret'])
+        secret = base32.a2b(config['client-secret'])
      elif config['client-dir']:
          secretfile = os.path.join(config['client-dir'], "private", "secret")
          try:
-            secret = a2b(open(secretfile, "r").read().strip())
+            secret = base32.a2b(open(secretfile, "r").read().strip())
          except EnvironmentError:
              pass
  
@@ -224,34 +224,33 @@ def dump_cap(config, out=sys.stdout, err=sys.stderr):
  
  def _dump_secrets(storage_index, secret, nodeid, out):
      from allmydata.util import hashutil
-    from allmydata.util.idlib import b2a
+    from allmydata.util import base32
  
      if secret:
          crs = hashutil.my_renewal_secret_hash(secret)
-        print >>out, " client renewal secret:", b2a(crs)
+        print >>out, " client renewal secret:", base32.b2a(crs)
          frs = hashutil.file_renewal_secret_hash(crs, storage_index)
-        print >>out, " file renewal secret:", b2a(frs)
+        print >>out, " file renewal secret:", base32.b2a(frs)
          if nodeid:
              renew = hashutil.bucket_renewal_secret_hash(frs, nodeid)
-            print >>out, " lease renewal secret:", b2a(renew)
+            print >>out, " lease renewal secret:", base32.b2a(renew)
          ccs = hashutil.my_cancel_secret_hash(secret)
-        print >>out, " client cancel secret:", b2a(ccs)
+        print >>out, " client cancel secret:", base32.b2a(ccs)
          fcs = hashutil.file_cancel_secret_hash(ccs, storage_index)
-        print >>out, " file cancel secret:", b2a(fcs)
+        print >>out, " file cancel secret:", base32.b2a(fcs)
          if nodeid:
              cancel = hashutil.bucket_cancel_secret_hash(fcs, nodeid)
-            print >>out, " lease cancel secret:", b2a(cancel)
+            print >>out, " lease cancel secret:", base32.b2a(cancel)
  
  def dump_uri_instance(u, nodeid, secret, out, err, show_header=True):
      from allmydata import storage, uri
-    from allmydata.util.idlib import b2a
-    from allmydata.util import hashutil
+    from allmydata.util import base32, hashutil
  
      if isinstance(u, uri.CHKFileURI):
          if show_header:
              print >>out, "CHK File:"
-        print >>out, " key:", b2a(u.key)
-        print >>out, " UEB hash:", b2a(u.uri_extension_hash)
+        print >>out, " key:", base32.b2a(u.key)
+        print >>out, " UEB hash:", base32.b2a(u.uri_extension_hash)
          print >>out, " size:", u.size
          print >>out, " k/N: %d/%d" % (u.needed_shares, u.total_shares)
          print >>out, " storage index:", storage.si_b2a(u.storage_index)
@@ -259,7 +258,7 @@ def dump_uri_instance(u, nodeid, secret, out, err, show_header=True):
      elif isinstance(u, uri.CHKFileVerifierURI):
          if show_header:
              print >>out, "CHK Verifier URI:"
-        print >>out, " UEB hash:", b2a(u.uri_extension_hash)
+        print >>out, " UEB hash:", base32.b2a(u.uri_extension_hash)
          print >>out, " size:", u.size
          print >>out, " k/N: %d/%d" % (u.needed_shares, u.total_shares)
          print >>out, " storage index:", storage.si_b2a(u.storage_index)
@@ -272,28 +271,28 @@ def dump_uri_instance(u, nodeid, secret, out, err, show_header=True):
      elif isinstance(u, uri.WriteableSSKFileURI):
          if show_header:
              print >>out, "SSK Writeable URI:"
-        print >>out, " writekey:", b2a(u.writekey)
-        print >>out, " readkey:", b2a(u.readkey)
+        print >>out, " writekey:", base32.b2a(u.writekey)
+        print >>out, " readkey:", base32.b2a(u.readkey)
          print >>out, " storage index:", storage.si_b2a(u.storage_index)
-        print >>out, " fingerprint:", b2a(u.fingerprint)
+        print >>out, " fingerprint:", base32.b2a(u.fingerprint)
          print >>out
          if nodeid:
              we = hashutil.ssk_write_enabler_hash(u.writekey, nodeid)
-            print >>out, " write_enabler:", b2a(we)
+            print >>out, " write_enabler:", base32.b2a(we)
              print >>out
          _dump_secrets(u.storage_index, secret, nodeid, out)
  
      elif isinstance(u, uri.ReadonlySSKFileURI):
          if show_header:
              print >>out, "SSK Read-only URI:"
-        print >>out, " readkey:", b2a(u.readkey)
+        print >>out, " readkey:", base32.b2a(u.readkey)
          print >>out, " storage index:", storage.si_b2a(u.storage_index)
-        print >>out, " fingerprint:", b2a(u.fingerprint)
+        print >>out, " fingerprint:", base32.b2a(u.fingerprint)
      elif isinstance(u, uri.SSKVerifierURI):
          if show_header:
              print >>out, "SSK Verifier URI:"
          print >>out, " storage index:", storage.si_b2a(u.storage_index)
-        print >>out, " fingerprint:", b2a(u.fingerprint)
+        print >>out, " fingerprint:", base32.b2a(u.fingerprint)
  
      elif isinstance(u, uri.NewDirectoryURI):
          if show_header:
@@ -358,7 +357,7 @@ class CatalogSharesOptions(usage.Options):
  
  def describe_share(abs_sharefile, si_s, shnum_s, now, out, err):
      from allmydata import uri, storage, mutable
-    from allmydata.util import idlib
+    from allmydata.util import base32
      import struct
  
      f = open(abs_sharefile, "rb")
@@ -403,7 +402,7 @@ def describe_share(abs_sharefile, si_s, shnum_s, now, out, err):
  
              print >>out, "SDMF %s %d/%d %d #%d:%s %d %s" % \
                    (si_s, k, N, datalen,
-                   seqnum, idlib.b2a(root_hash),
+                   seqnum, base32.b2a(root_hash),
                     expiration, abs_sharefile)
          else:
              print >>out, "UNKNOWN mutable %s" % (abs_sharefile,)
diff --git a/src/allmydata/storage.py b/src/allmydata/storage.py

index 06db965ec052c31c53a6a5db20a7ac641fa61efd..f2df9ffbb2f6391bb46c3f08f78f322ca5929f87 100644 (file)
--- a/src/allmydata/storage.py
+++ b/src/allmydata/storage.py
@@ -9,7 +9,7 @@ from zope.interface import implements
  from allmydata.interfaces import RIStorageServer, RIBucketWriter, \
       RIBucketReader, IStorageBucketWriter, IStorageBucketReader, HASH_SIZE, \
       BadWriteEnablerError, IStatsProducer
-from allmydata.util import base62, fileutil, idlib, mathutil, log
+from allmydata.util import base32, fileutil, idlib, mathutil, log
  from allmydata.util.assertutil import precondition, _assert
  import allmydata # for __version__
  
@@ -48,10 +48,10 @@ NUM_RE=re.compile("^[0-9]+$")
  #   B+0x48: next lease, or end of record
  
  def si_b2a(storageindex):
-    return base62.b2a(storageindex)
+    return base32.b2a(storageindex)
  
  def si_a2b(ascii_storageindex):
-    return base62.a2b(ascii_storageindex)
+    return base32.a2b(ascii_storageindex)
  
  def storage_index_to_dir(storageindex):
      sia = si_b2a(storageindex)
diff --git a/src/allmydata/test/test_cli.py b/src/allmydata/test/test_cli.py

index 30611731eb6f52329f38b4336aa2cf3f2c156da8..fe1dad44dc29645a328b79d0acd36b613c1450f4 100644 (file)
--- a/src/allmydata/test/test_cli.py
+++ b/src/allmydata/test/test_cli.py
@@ -89,31 +89,31 @@ class CLI(unittest.TestCase):
                             size=size)
          output = self._dump_cap(u.to_string())
          self.failUnless("CHK File:" in output)
-        self.failUnless("key: yyyoryarywdyqnyjbefoadeqbh" in output)
-        self.failUnless("UEB hash: hd7rwri6djiapo6itg5hcxa7ze5im7z9qwcdu8oka6qinahsbiuo" in output)
+        self.failUnless("key: aaaqeayeaudaocajbifqydiob4" in output, output)
+        self.failUnless("UEB hash: 4d5euev6djvynq6vrg34mpy5xi3vl5x7oumdthqky6ovcy4wbvtq" in output, output)
          self.failUnless("size: 1234" in output)
          self.failUnless("k/N: 25/100" in output)
-        self.failUnless("storage index: 2WlXTYP4ahK2VBkx1pckfC" in output, output)
+        self.failUnless("storage index: kmkbjguwmkxej3wejdcvu74zki" in output, output)
  
-        output = self._dump_cap("--client-secret", "p3w849k9whqhw6b9fkf4xjs5xc",
+        output = self._dump_cap("--client-secret", "5s33nk3qpvnj2fw3z4mnm2y6fa",
                                  u.to_string())
-        self.failUnless("client renewal secret: pu3oy5fu4irjsudwhn6c71g87anrxi1kokt4hmxz7qh5p1895zpy" in output)
+        self.failUnless("client renewal secret: jltcy6cppghq6ha3uzcawqr2lvwpzmw4teeqj2if6jd2vfpit6hq" in output, output)
  
          output = self._dump_cap(u.get_verifier().to_string())
          self.failIf("key: " in output)
-        self.failUnless("UEB hash: hd7rwri6djiapo6itg5hcxa7ze5im7z9qwcdu8oka6qinahsbiuo" in output)
+        self.failUnless("UEB hash: 4d5euev6djvynq6vrg34mpy5xi3vl5x7oumdthqky6ovcy4wbvtq" in output, output)
          self.failUnless("size: 1234" in output)
          self.failUnless("k/N: 25/100" in output)
-        self.failUnless("storage index: 2WlXTYP4ahK2VBkx1pckfC" in output, output)
+        self.failUnless("storage index: kmkbjguwmkxej3wejdcvu74zki" in output, output)
  
          prefixed_u = "http://127.0.0.1/uri/%s" % urllib.quote(u.to_string())
          output = self._dump_cap(prefixed_u)
          self.failUnless("CHK File:" in output)
-        self.failUnless("key: yyyoryarywdyqnyjbefoadeqbh" in output)
-        self.failUnless("UEB hash: hd7rwri6djiapo6itg5hcxa7ze5im7z9qwcdu8oka6qinahsbiuo" in output)
+        self.failUnless("key: aaaqeayeaudaocajbifqydiob4" in output, output)
+        self.failUnless("UEB hash: 4d5euev6djvynq6vrg34mpy5xi3vl5x7oumdthqky6ovcy4wbvtq" in output, output)
          self.failUnless("size: 1234" in output)
          self.failUnless("k/N: 25/100" in output)
-        self.failUnless("storage index: 2WlXTYP4ahK2VBkx1pckfC" in output, output)
+        self.failUnless("storage index: kmkbjguwmkxej3wejdcvu74zki" in output, output)
  
      def test_dump_cap_lit(self):
          u = uri.LiteralFileURI("this is some data")
@@ -128,22 +128,22 @@ class CLI(unittest.TestCase):
  
          output = self._dump_cap(u.to_string())
          self.failUnless("SSK Writeable URI:" in output)
-        self.failUnless("writekey: yryonyebyryonyebyryonyebyr" in output)
-        self.failUnless("readkey: zhgqsyrkuywo3rha41b1d7xrar" in output)
-        self.failUnless("storage index: 4GWqxTUinIqKqWj770lRIA" in output, output)
-        self.failUnless("fingerprint: 959x79z6959x79z6959x79z6959x79z6959x79z6959x79z6959y" in output)
+        self.failUnless("writekey: aeaqcaibaeaqcaibaeaqcaibae" in output, output)
+        self.failUnless("readkey: x4gowaektauqze4y2sbsd5peye" in output, output)
+        self.failUnless("storage index: rqx7xnpexjxuqprto6pezagdxi" in output, output)
+        self.failUnless("fingerprint: 737p57x6737p57x6737p57x6737p57x6737p57x6737p57x6737a" in output, output)
  
-        output = self._dump_cap("--client-secret", "p3w849k9whqhw6b9fkf4xjs5xc",
+        output = self._dump_cap("--client-secret", "tylkpgr364eav3ipsnq57yyafu",
                                  u.to_string())
-        self.failUnless("file renewal secret: xy9p89q9pkitqn4ycwu5tpt9yia7s9izsqudnb4q5jdc3rawgcny" in output)
+        self.failUnless("file renewal secret: cs54qwurfjmeduruapo46kqwexpcvav5oemczblonglj6xmoyvkq" in output, output)
  
          fileutil.make_dirs("cli/test_dump_cap/private")
          f = open("cli/test_dump_cap/private/secret", "w")
-        f.write("p3w849k9whqhw6b9fkf4xjs5xc\n")
+        f.write("y6c7q34mjbt5kkf6hb3utuoj7u\n")
          f.close()
          output = self._dump_cap("--client-dir", "cli/test_dump_cap",
                                  u.to_string())
-        self.failUnless("file renewal secret: xy9p89q9pkitqn4ycwu5tpt9yia7s9izsqudnb4q5jdc3rawgcny" in output)
+        self.failUnless("file renewal secret: 4jkip4ie2zgmbhcni6g4vmsivwuakpbw7hwnmdancsc6fkrv27kq" in output, output)
  
          output = self._dump_cap("--client-dir", "cli/test_dump_cap_BOGUS",
                                  u.to_string())
@@ -151,28 +151,28 @@ class CLI(unittest.TestCase):
  
          output = self._dump_cap("--nodeid", "tqc35esocrvejvg4mablt6aowg6tl43j",
                                  u.to_string())
-        self.failUnless("write_enabler: rqk9q6w46dim5ybshqk9kotkyhqcdqmp1z6498xniuz5kkjs1w7o" in output)
+        self.failUnless("write_enabler: eok7o6u26dvl3abw4ok7kqrka4omdolnsx627hpcvtx3kkjwsu5q" in output, output)
          self.failIf("file renewal secret:" in output)
  
          output = self._dump_cap("--nodeid", "tqc35esocrvejvg4mablt6aowg6tl43j",
-                                "--client-secret", "p3w849k9whqhw6b9fkf4xjs5xc",
+                                "--client-secret", "6orzlv22ggdhphjpmsixcbwufq",
                                  u.to_string())
-        self.failUnless("write_enabler: rqk9q6w46dim5ybshqk9kotkyhqcdqmp1z6498xniuz5kkjs1w7o" in output)
-        self.failUnless("file renewal secret: xy9p89q9pkitqn4ycwu5tpt9yia7s9izsqudnb4q5jdc3rawgcny" in output)
-        self.failUnless("lease renewal secret: r3fsw67mfji3c9mtsisqdumc1pz3gquzdrh4cpu63h8du4uuedgo" in output)
+        self.failUnless("write_enabler: eok7o6u26dvl3abw4ok7kqrka4omdolnsx627hpcvtx3kkjwsu5q" in output, output)
+        self.failUnless("file renewal secret: aabhsp6kfsxb57jzdan4dnyzcd3m2prx34jd4z5nj5t5a7guf5fq" in output, output)
+        self.failUnless("lease renewal secret: bajcslergse474ga775msalmxxapgwr27lngeja4u7ef5j7yh4bq" in output, output)
  
          u = u.get_readonly()
          output = self._dump_cap(u.to_string())
          self.failUnless("SSK Read-only URI:" in output)
-        self.failUnless("readkey: zhgqsyrkuywo3rha41b1d7xrar" in output)
-        self.failUnless("storage index: 4GWqxTUinIqKqWj770lRIA" in output, output)
-        self.failUnless("fingerprint: 959x79z6959x79z6959x79z6959x79z6959x79z6959x79z6959y" in output)
+        self.failUnless("readkey: x4gowaektauqze4y2sbsd5peye" in output, output)
+        self.failUnless("storage index: rqx7xnpexjxuqprto6pezagdxi" in output, output)
+        self.failUnless("fingerprint: 737p57x6737p57x6737p57x6737p57x6737p57x6737p57x6737a" in output)
  
          u = u.get_verifier()
          output = self._dump_cap(u.to_string())
          self.failUnless("SSK Verifier URI:" in output)
-        self.failUnless("storage index: 4GWqxTUinIqKqWj770lRIA" in output, output)
-        self.failUnless("fingerprint: 959x79z6959x79z6959x79z6959x79z6959x79z6959x79z6959y" in output)
+        self.failUnless("storage index: rqx7xnpexjxuqprto6pezagdxi" in output, output)
+        self.failUnless("fingerprint: 737p57x6737p57x6737p57x6737p57x6737p57x6737p57x6737a" in output)
  
      def test_dump_cap_directory(self):
          writekey = "\x01" * 16
@@ -182,37 +182,37 @@ class CLI(unittest.TestCase):
  
          output = self._dump_cap(u.to_string())
          self.failUnless("Directory Writeable URI:" in output)
-        self.failUnless("writekey: yryonyebyryonyebyryonyebyr" in output)
-        self.failUnless("readkey: zhgqsyrkuywo3rha41b1d7xrar" in output)
-        self.failUnless("storage index: 4GWqxTUinIqKqWj770lRIA" in output, output)
-        self.failUnless("fingerprint: 959x79z6959x79z6959x79z6959x79z6959x79z6959x79z6959y" in output)
+        self.failUnless("writekey: aeaqcaibaeaqcaibaeaqcaibae" in output, output)
+        self.failUnless("readkey: x4gowaektauqze4y2sbsd5peye" in output, output)
+        self.failUnless("storage index: rqx7xnpexjxuqprto6pezagdxi" in output, output)
+        self.failUnless("fingerprint: 737p57x6737p57x6737p57x6737p57x6737p57x6737p57x6737a" in output, output)
  
-        output = self._dump_cap("--client-secret", "p3w849k9whqhw6b9fkf4xjs5xc",
+        output = self._dump_cap("--client-secret", "a3nyfbnkorp377jhguslgc2dqi",
                                  u.to_string())
-        self.failUnless("file renewal secret: xy9p89q9pkitqn4ycwu5tpt9yia7s9izsqudnb4q5jdc3rawgcny" in output)
+        self.failUnless("file renewal secret: zwmq2azrd7lfcmhkrhpgjsxeb2vfpixgvrczbo2asqzdfbmiemwq" in output, output)
  
          output = self._dump_cap("--nodeid", "tqc35esocrvejvg4mablt6aowg6tl43j",
                                  u.to_string())
-        self.failUnless("write_enabler: rqk9q6w46dim5ybshqk9kotkyhqcdqmp1z6498xniuz5kkjs1w7o" in output)
+        self.failUnless("write_enabler: eok7o6u26dvl3abw4ok7kqrka4omdolnsx627hpcvtx3kkjwsu5q" in output, output)
          self.failIf("file renewal secret:" in output)
  
          output = self._dump_cap("--nodeid", "tqc35esocrvejvg4mablt6aowg6tl43j",
-                                "--client-secret", "p3w849k9whqhw6b9fkf4xjs5xc",
+                                "--client-secret", "rzaq5to2xm6e5otctpdvzw6bfa",
                                  u.to_string())
-        self.failUnless("write_enabler: rqk9q6w46dim5ybshqk9kotkyhqcdqmp1z6498xniuz5kkjs1w7o" in output)
-        self.failUnless("file renewal secret: xy9p89q9pkitqn4ycwu5tpt9yia7s9izsqudnb4q5jdc3rawgcny" in output)
-        self.failUnless("lease renewal secret: r3fsw67mfji3c9mtsisqdumc1pz3gquzdrh4cpu63h8du4uuedgo" in output)
+        self.failUnless("write_enabler: eok7o6u26dvl3abw4ok7kqrka4omdolnsx627hpcvtx3kkjwsu5q" in output, output)
+        self.failUnless("file renewal secret: wdmu6rwefvmp2venbb4xz5u3273oybmuu553mi7uic37gfu6bacq" in output, output)
+        self.failUnless("lease renewal secret: tlvwfudyfeqyss5kybt6ya72foedqxdovumlbt6ok7u5pyrf2mfq" in output, output)
  
          u = u.get_readonly()
          output = self._dump_cap(u.to_string())
          self.failUnless("Directory Read-only URI:" in output)
-        self.failUnless("readkey: zhgqsyrkuywo3rha41b1d7xrar" in output)
-        self.failUnless("storage index: 4GWqxTUinIqKqWj770lRIA" in output, output)
-        self.failUnless("fingerprint: 959x79z6959x79z6959x79z6959x79z6959x79z6959x79z6959y" in output)
+        self.failUnless("readkey: x4gowaektauqze4y2sbsd5peye" in output, output)
+        self.failUnless("storage index: rqx7xnpexjxuqprto6pezagdxi" in output, output)
+        self.failUnless("fingerprint: 737p57x6737p57x6737p57x6737p57x6737p57x6737p57x6737a" in output)
  
          u = u.get_verifier()
          output = self._dump_cap(u.to_string())
          self.failUnless("Directory Verifier URI:" in output)
-        self.failUnless("storage index: 4GWqxTUinIqKqWj770lRIA" in output, output)
-        self.failUnless("fingerprint: 959x79z6959x79z6959x79z6959x79z6959x79z6959x79z6959y" in output)
+        self.failUnless("storage index: rqx7xnpexjxuqprto6pezagdxi" in output, output)
+        self.failUnless("fingerprint: 737p57x6737p57x6737p57x6737p57x6737p57x6737p57x6737a" in output, output)
  
diff --git a/src/allmydata/test/test_client.py b/src/allmydata/test/test_client.py

index 6f2e2f5bf5883e759b24426f5224f16a882b8eb6..1a263f93c4c06da607a92f587b205b3723de705d 100644 (file)
--- a/src/allmydata/test/test_client.py
+++ b/src/allmydata/test/test_client.py
@@ -7,7 +7,7 @@ from twisted.python import log
  
  import allmydata
  from allmydata import client, introducer
-from allmydata.util import idlib
+from allmydata.util import base32
  from foolscap.eventual import flushEventualQueue
  
  class FakeIntroducerClient(introducer.IntroducerClient):
@@ -42,9 +42,9 @@ class Basic(unittest.TestCase):
          secret_fname = os.path.join(basedir, "private", "secret")
          self.failUnless(os.path.exists(secret_fname), secret_fname)
          renew_secret = c.get_renewal_secret()
-        self.failUnless(idlib.b2a(renew_secret))
+        self.failUnless(base32.b2a(renew_secret))
          cancel_secret = c.get_cancel_secret()
-        self.failUnless(idlib.b2a(cancel_secret))
+        self.failUnless(base32.b2a(cancel_secret))
  
      def test_sizelimit_1(self):
          basedir = "client.Basic.test_sizelimit_1"
diff --git a/src/allmydata/test/test_uri.py b/src/allmydata/test/test_uri.py

index 7ccd32785e1744b7a31310b28a1e98e9e8633730..6b5a8c6a50dd271e959f71d4c26e34ebc5d7fa33 100644 (file)
--- a/src/allmydata/test/test_uri.py
+++ b/src/allmydata/test/test_uri.py
@@ -44,7 +44,7 @@ class Literal(unittest.TestCase):
  class Compare(unittest.TestCase):
      def test_compare(self):
          lit1 = uri.LiteralFileURI("some data")
-        fileURI = 'URI:CHK:f3mf6az85wpcai8ma4qayfmxuc:nnw518w5hu3t5oohwtp7ah9n81z9rfg6c1ywk33ia3m64o67nsgo:3:10:345834'
+        fileURI = 'URI:CHK:f5ahxa25t4qkktywz6teyfvcx4:opuioq7tj2y6idzfp6cazehtmgs5fdcebcz3cygrxyydvcozrmeq:3:10:345834'
          chk1 = uri.CHKFileURI.init_from_string(fileURI)
          chk2 = uri.CHKFileURI.init_from_string(fileURI)
          self.failIfEqual(lit1, chk1)
@@ -167,13 +167,13 @@ class Invalid(unittest.TestCase):
  
  class Constraint(unittest.TestCase):
      def test_constraint(self):
-       good="http://127.0.0.1:8123/uri/URI%3ADIR2%3Aqo8ayna47cpw3rx3kho3mu7q4h%3Abk9qbgx76gh6eyj5ps8p6buz8fffw1ofc37e9w9d6ncsfpuz7icy/"
+       good="http://127.0.0.1:8123/uri/URI%3ADIR2%3Agh3l5rbvnv2333mrfvalmjfr4i%3Alz6l7u3z3b7g37s4zkdmfpx5ly4ib4m6thrpbusi6ys62qtc6mma/"
         uri.NewDirectoryURI.init_from_human_encoding(good)
         self.failUnlessRaises(AssertionError, uri.NewDirectoryURI.init_from_string, good)
         bad = good + '==='
         self.failUnlessRaises(AssertionError, uri.NewDirectoryURI.init_from_human_encoding, bad)
         self.failUnlessRaises(AssertionError, uri.NewDirectoryURI.init_from_string, bad)
-       fileURI = 'URI:CHK:f3mf6az85wpcai8ma4qayfmxuc:nnw518w5hu3t5oohwtp7ah9n81z9rfg6c1ywk33ia3m64o67nsgo:3:10:345834'
+       fileURI = 'URI:CHK:gh3l5rbvnv2333mrfvalmjfr4i:lz6l7u3z3b7g37s4zkdmfpx5ly4ib4m6thrpbusi6ys62qtc6mma:3:10:345834'
         uri.CHKFileURI.init_from_string(fileURI)
  
  class Mutable(unittest.TestCase):
diff --git a/src/allmydata/test/test_util.py b/src/allmydata/test/test_util.py

index 46fa94c3cef9d61074daf59d455b01efb09adc44..52e7c0bd975fc032dafc84f7e535b7b69e5d18d5 100644 (file)
--- a/src/allmydata/test/test_util.py
+++ b/src/allmydata/test/test_util.py
@@ -6,19 +6,21 @@ from twisted.trial import unittest
  from twisted.internet import defer
  from twisted.python import failure
  
-from allmydata.util import bencode, idlib, humanreadable, mathutil, hashutil
+from allmydata.util import base32, bencode, idlib, humanreadable, mathutil, hashutil
  from allmydata.util import assertutil, fileutil, testutil, deferredutil
  
  
-class IDLib(unittest.TestCase):
+class Base32(unittest.TestCase):
      def test_b2a(self):
-        self.failUnlessEqual(idlib.b2a("\x12\x34"), "ne4y")
+        self.failUnlessEqual(base32.b2a("\x12\x34"), "ci2a")
      def test_b2a_or_none(self):
-        self.failUnlessEqual(idlib.b2a_or_none(None), None)
-        self.failUnlessEqual(idlib.b2a_or_none("\x12\x34"), "ne4y")
+        self.failUnlessEqual(base32.b2a_or_none(None), None)
+        self.failUnlessEqual(base32.b2a_or_none("\x12\x34"), "ci2a")
      def test_a2b(self):
-        self.failUnlessEqual(idlib.a2b("ne4y"), "\x12\x34")
-        self.failUnlessRaises(AssertionError, idlib.a2b, "b0gus")
+        self.failUnlessEqual(base32.a2b("ci2a"), "\x12\x34")
+        self.failUnlessRaises(AssertionError, base32.a2b, "b0gus")
+
+class IDLib(unittest.TestCase):
      def test_nodeid_b2a(self):
          self.failUnlessEqual(idlib.nodeid_b2a("\x00"*20), "a"*32)
  
diff --git a/src/allmydata/upload.py b/src/allmydata/upload.py

index 7ff7b3ef226439c0d945b8e25a19d61b18fa6113..8d7a633923387a859832273ff9521ae3f12a3150 100644 (file)
--- a/src/allmydata/upload.py
+++ b/src/allmydata/upload.py
@@ -13,7 +13,7 @@ from allmydata.util.hashutil import file_renewal_secret_hash, \
       bucket_cancel_secret_hash, plaintext_hasher, \
       storage_index_hash, plaintext_segment_hasher, content_hash_key_hasher
  from allmydata import encode, storage, hashtree, uri
-from allmydata.util import idlib, mathutil
+from allmydata.util import base32, idlib, mathutil
  from allmydata.util.assertutil import precondition
  from allmydata.interfaces import IUploadable, IUploader, IUploadResults, \
       IEncryptedUploadable, RIEncryptedUploadable, IUploadStatus
@@ -448,7 +448,7 @@ class EncryptAnUploadable:
                           level=log.NOISY)
                  self.log(format="plaintext leaf hash [%(segnum)d] is %(hash)s",
                           segnum=len(self._plaintext_segment_hashes)-1,
-                         hash=idlib.b2a(p.digest()),
+                         hash=base32.b2a(p.digest()),
                           level=log.NOISY)
  
              offset += this_segment
@@ -547,7 +547,7 @@ class EncryptAnUploadable:
                       level=log.NOISY)
              self.log(format="plaintext leaf hash [%(segnum)d] is %(hash)s",
                       segnum=len(self._plaintext_segment_hashes)-1,
-                     hash=idlib.b2a(p.digest()),
+                     hash=base32.b2a(p.digest()),
                       level=log.NOISY)
          assert len(self._plaintext_segment_hashes) == num_segments
          return defer.succeed(tuple(self._plaintext_segment_hashes[first:last]))
diff --git a/src/allmydata/uri.py b/src/allmydata/uri.py

index acd31f43510d585d7209f5175f345026a4b405b2..770defd64ad0b0c5a21028a550a3149df314cd1f 100644 (file)
--- a/src/allmydata/uri.py
+++ b/src/allmydata/uri.py
@@ -3,7 +3,7 @@ import re, urllib
  from zope.interface import implements
  from twisted.python.components import registerAdapter
  from allmydata import storage
-from allmydata.util import base62, idlib, hashutil
+from allmydata.util import base32, base32, hashutil
  from allmydata.interfaces import IURI, IDirnodeURI, IFileURI, IVerifierURI, \
       IMutableFileURI, INewDirectoryURI, IReadonlyNewDirectoryURI
  
@@ -11,9 +11,8 @@ from allmydata.interfaces import IURI, IDirnodeURI, IFileURI, IVerifierURI, \
  # enough information to retrieve and validate the contents. It shall be
  # expressed in a limited character set (namely [TODO]).
  
-ZBASE32STR_128bits = '(%s{25}%s)' % (idlib.ZBASE32CHAR, idlib.ZBASE32CHAR_3bits)
-ZBASE32STR_256bits = '(%s{51}%s)' % (idlib.ZBASE32CHAR, idlib.ZBASE32CHAR_1bits)
-ZBASE62STR_128bits = '(%s{22})' % (base62.ZBASE62CHAR)
+BASE32STR_128bits = '(%s{25}%s)' % (base32.BASE32CHAR, base32.BASE32CHAR_3bits)
+BASE32STR_256bits = '(%s{51}%s)' % (base32.BASE32CHAR, base32.BASE32CHAR_1bits)
  
  SEP='(?::|%3A)'
  NUMBER='([0-9]+)'
@@ -38,11 +37,11 @@ class _BaseURI:
  class CHKFileURI(_BaseURI):
      implements(IURI, IFileURI)
  
-    STRING_RE=re.compile('^URI:CHK:'+ZBASE32STR_128bits+':'+
-                         ZBASE32STR_256bits+':'+NUMBER+':'+NUMBER+':'+NUMBER+
+    STRING_RE=re.compile('^URI:CHK:'+BASE32STR_128bits+':'+
+                         BASE32STR_256bits+':'+NUMBER+':'+NUMBER+':'+NUMBER+
                           '$')
      HUMAN_RE=re.compile('^'+OPTIONALHTTPLEAD+'URI'+SEP+'CHK'+SEP+
-                     ZBASE32STR_128bits+SEP+ZBASE32STR_256bits+SEP+NUMBER+
+                     BASE32STR_128bits+SEP+BASE32STR_256bits+SEP+NUMBER+
                       SEP+NUMBER+SEP+NUMBER+'$')
  
      def __init__(self, key, uri_extension_hash, needed_shares, total_shares,
@@ -61,14 +60,14 @@ class CHKFileURI(_BaseURI):
      def init_from_human_encoding(cls, uri):
          mo = cls.HUMAN_RE.search(uri)
          assert mo, uri
-        return cls(idlib.a2b(mo.group(1)), idlib.a2b(mo.group(2)),
+        return cls(base32.a2b(mo.group(1)), base32.a2b(mo.group(2)),
                     int(mo.group(3)), int(mo.group(4)), int(mo.group(5)))
  
      @classmethod
      def init_from_string(cls, uri):
          mo = cls.STRING_RE.search(uri)
          assert mo, uri
-        return cls(idlib.a2b(mo.group(1)), idlib.a2b(mo.group(2)),
+        return cls(base32.a2b(mo.group(1)), base32.a2b(mo.group(2)),
                     int(mo.group(3)), int(mo.group(4)), int(mo.group(5)))
  
      def to_string(self):
@@ -77,8 +76,8 @@ class CHKFileURI(_BaseURI):
          assert isinstance(self.size, (int,long))
  
          return ('URI:CHK:%s:%s:%d:%d:%d' %
-                (idlib.b2a(self.key),
-                 idlib.b2a(self.uri_extension_hash),
+                (base32.b2a(self.key),
+                 base32.b2a(self.uri_extension_hash),
                   self.needed_shares,
                   self.total_shares,
                   self.size))
@@ -103,10 +102,10 @@ class CHKFileURI(_BaseURI):
  class CHKFileVerifierURI(_BaseURI):
      implements(IVerifierURI)
  
-    STRING_RE=re.compile('^URI:CHK-Verifier:'+ZBASE62STR_128bits+':'+
-                         ZBASE32STR_256bits+':'+NUMBER+':'+NUMBER+':'+NUMBER)
+    STRING_RE=re.compile('^URI:CHK-Verifier:'+BASE32STR_128bits+':'+
+                         BASE32STR_256bits+':'+NUMBER+':'+NUMBER+':'+NUMBER)
      HUMAN_RE=re.compile('^'+OPTIONALHTTPLEAD+'URI'+SEP+'CHK-Verifier'+SEP+
-                        ZBASE62STR_128bits+SEP+ZBASE32STR_256bits+SEP+NUMBER+
+                        BASE32STR_128bits+SEP+BASE32STR_256bits+SEP+NUMBER+
                          SEP+NUMBER+SEP+NUMBER)
  
      def __init__(self, storage_index, uri_extension_hash,
@@ -122,14 +121,14 @@ class CHKFileVerifierURI(_BaseURI):
      def init_from_human_encoding(cls, uri):
          mo = cls.HUMAN_RE.search(uri)
          assert mo, uri
-        return cls(idlib.a2b(mo.group(1)), idlib.a2b(mo.group(2)),
+        return cls(base32.a2b(mo.group(1)), base32.a2b(mo.group(2)),
                     int(mo.group(3)), int(mo.group(4)), int(mo.group(5)))
  
      @classmethod
      def init_from_string(cls, uri):
          mo = cls.STRING_RE.search(uri)
          assert mo, (uri, cls, cls.STRING_RE)
-        return cls(storage.si_a2b(mo.group(1)), idlib.a2b(mo.group(2)),
+        return cls(storage.si_a2b(mo.group(1)), base32.a2b(mo.group(2)),
                     int(mo.group(3)), int(mo.group(4)), int(mo.group(5)))
  
      def to_string(self):
@@ -139,7 +138,7 @@ class CHKFileVerifierURI(_BaseURI):
  
          return ('URI:CHK-Verifier:%s:%s:%d:%d:%d' %
                  (storage.si_b2a(self.storage_index),
-                 idlib.b2a(self.uri_extension_hash),
+                 base32.b2a(self.uri_extension_hash),
                   self.needed_shares,
                   self.total_shares,
                   self.size))
@@ -148,8 +147,8 @@ class CHKFileVerifierURI(_BaseURI):
  class LiteralFileURI(_BaseURI):
      implements(IURI, IFileURI)
  
-    STRING_RE=re.compile('^URI:LIT:'+idlib.ZBASE32STR_anybytes+'$')
-    HUMAN_RE=re.compile('^'+OPTIONALHTTPLEAD+'URI'+SEP+'LIT'+SEP+idlib.ZBASE32STR_anybytes+'$')
+    STRING_RE=re.compile('^URI:LIT:'+base32.BASE32STR_anybytes+'$')
+    HUMAN_RE=re.compile('^'+OPTIONALHTTPLEAD+'URI'+SEP+'LIT'+SEP+base32.BASE32STR_anybytes+'$')
  
      def __init__(self, data=None):
          if data is not None:
@@ -159,16 +158,16 @@ class LiteralFileURI(_BaseURI):
      def init_from_human_encoding(cls, uri):
          mo = cls.HUMAN_RE.search(uri)
          assert mo, uri
-        return cls(idlib.a2b(mo.group(1)))
+        return cls(base32.a2b(mo.group(1)))
  
      @classmethod
      def init_from_string(cls, uri):
          mo = cls.STRING_RE.search(uri)
          assert mo, uri
-        return cls(idlib.a2b(mo.group(1)))
+        return cls(base32.a2b(mo.group(1)))
  
      def to_string(self):
-        return 'URI:LIT:%s' % idlib.b2a(self.data)
+        return 'URI:LIT:%s' % base32.b2a(self.data)
  
      def is_readonly(self):
          return True
@@ -188,10 +187,10 @@ class WriteableSSKFileURI(_BaseURI):
      implements(IURI, IMutableFileURI)
  
      BASE_STRING='URI:SSK:'
-    STRING_RE=re.compile('^'+BASE_STRING+ZBASE32STR_128bits+':'+
-                         ZBASE32STR_256bits+'$')
+    STRING_RE=re.compile('^'+BASE_STRING+BASE32STR_128bits+':'+
+                         BASE32STR_256bits+'$')
      HUMAN_RE=re.compile('^'+OPTIONALHTTPLEAD+'URI'+SEP+'SSK'+SEP+
-                        ZBASE32STR_128bits+SEP+ZBASE32STR_256bits+'$')
+                        BASE32STR_128bits+SEP+BASE32STR_256bits+'$')
  
      def __init__(self, writekey, fingerprint):
          self.writekey = writekey
@@ -204,25 +203,25 @@ class WriteableSSKFileURI(_BaseURI):
      def init_from_human_encoding(cls, uri):
          mo = cls.HUMAN_RE.search(uri)
          assert mo, uri
-        return cls(idlib.a2b(mo.group(1)), idlib.a2b(mo.group(2)))
+        return cls(base32.a2b(mo.group(1)), base32.a2b(mo.group(2)))
  
      @classmethod
      def init_from_string(cls, uri):
          mo = cls.STRING_RE.search(uri)
          assert mo, (uri, cls)
-        return cls(idlib.a2b(mo.group(1)), idlib.a2b(mo.group(2)))
+        return cls(base32.a2b(mo.group(1)), base32.a2b(mo.group(2)))
  
      def to_string(self):
          assert isinstance(self.writekey, str)
          assert isinstance(self.fingerprint, str)
-        return 'URI:SSK:%s:%s' % (idlib.b2a(self.writekey),
-                                  idlib.b2a(self.fingerprint))
+        return 'URI:SSK:%s:%s' % (base32.b2a(self.writekey),
+                                  base32.b2a(self.fingerprint))
  
      def __repr__(self):
          return "<%s %s>" % (self.__class__.__name__, self.abbrev())
  
      def abbrev(self):
-        return idlib.b2a(self.writekey[:5])
+        return base32.b2a(self.writekey[:5])
  
      def is_readonly(self):
          return False
@@ -237,8 +236,8 @@ class ReadonlySSKFileURI(_BaseURI):
      implements(IURI, IMutableFileURI)
  
      BASE_STRING='URI:SSK-RO:'
-    STRING_RE=re.compile('^URI:SSK-RO:'+ZBASE32STR_128bits+':'+ZBASE32STR_256bits+'$')
-    HUMAN_RE=re.compile('^'+OPTIONALHTTPLEAD+'URI'+SEP+'SSK-RO'+SEP+ZBASE32STR_128bits+SEP+ZBASE32STR_256bits+'$')
+    STRING_RE=re.compile('^URI:SSK-RO:'+BASE32STR_128bits+':'+BASE32STR_256bits+'$')
+    HUMAN_RE=re.compile('^'+OPTIONALHTTPLEAD+'URI'+SEP+'SSK-RO'+SEP+BASE32STR_128bits+SEP+BASE32STR_256bits+'$')
  
      def __init__(self, readkey, fingerprint):
          self.readkey = readkey
@@ -250,25 +249,25 @@ class ReadonlySSKFileURI(_BaseURI):
      def init_from_human_encoding(cls, uri):
          mo = cls.HUMAN_RE.search(uri)
          assert mo, uri
-        return cls(idlib.a2b(mo.group(1)), idlib.a2b(mo.group(2)))
+        return cls(base32.a2b(mo.group(1)), base32.a2b(mo.group(2)))
  
      @classmethod
      def init_from_string(cls, uri):
          mo = cls.STRING_RE.search(uri)
          assert mo, uri
-        return cls(idlib.a2b(mo.group(1)), idlib.a2b(mo.group(2)))
+        return cls(base32.a2b(mo.group(1)), base32.a2b(mo.group(2)))
  
      def to_string(self):
          assert isinstance(self.readkey, str)
          assert isinstance(self.fingerprint, str)
-        return 'URI:SSK-RO:%s:%s' % (idlib.b2a(self.readkey),
-                                     idlib.b2a(self.fingerprint))
+        return 'URI:SSK-RO:%s:%s' % (base32.b2a(self.readkey),
+                                     base32.b2a(self.fingerprint))
  
      def __repr__(self):
          return "<%s %s>" % (self.__class__.__name__, self.abbrev())
  
      def abbrev(self):
-        return idlib.b2a(self.readkey[:5])
+        return base32.b2a(self.readkey[:5])
  
      def is_readonly(self):
          return True
@@ -283,8 +282,8 @@ class SSKVerifierURI(_BaseURI):
      implements(IVerifierURI)
  
      BASE_STRING='URI:SSK-Verifier:'
-    STRING_RE=re.compile('^'+BASE_STRING+ZBASE62STR_128bits+':'+ZBASE32STR_256bits+'$')
-    HUMAN_RE=re.compile('^'+OPTIONALHTTPLEAD+'URI'+SEP+'SSK-RO'+SEP+ZBASE62STR_128bits+SEP+ZBASE32STR_256bits+'$')
+    STRING_RE=re.compile('^'+BASE_STRING+BASE32STR_128bits+':'+BASE32STR_256bits+'$')
+    HUMAN_RE=re.compile('^'+OPTIONALHTTPLEAD+'URI'+SEP+'SSK-RO'+SEP+BASE32STR_128bits+SEP+BASE32STR_256bits+'$')
  
      def __init__(self, storage_index, fingerprint):
          assert len(storage_index) == 16
@@ -295,19 +294,19 @@ class SSKVerifierURI(_BaseURI):
      def init_from_human_encoding(cls, uri):
          mo = cls.HUMAN_RE.search(uri)
          assert mo, uri
-        return cls(storage.si_a2b(mo.group(1)), idlib.a2b(mo.group(2)))
+        return cls(storage.si_a2b(mo.group(1)), base32.a2b(mo.group(2)))
  
      @classmethod
      def init_from_string(cls, uri):
          mo = cls.STRING_RE.search(uri)
          assert mo, (uri, cls)
-        return cls(storage.si_a2b(mo.group(1)), idlib.a2b(mo.group(2)))
+        return cls(storage.si_a2b(mo.group(1)), base32.a2b(mo.group(2)))
  
      def to_string(self):
          assert isinstance(self.storage_index, str)
          assert isinstance(self.fingerprint, str)
          return 'URI:SSK-Verifier:%s:%s' % (storage.si_b2a(self.storage_index),
-                                           idlib.b2a(self.fingerprint))
+                                           base32.b2a(self.fingerprint))
  
  class _NewDirectoryBaseURI(_BaseURI):
      implements(IURI, IDirnodeURI)
@@ -513,6 +512,6 @@ def unpack_extension_readable(data):
      unpacked["UEB_hash"] = hashutil.uri_extension_hash(data)
      for k in sorted(unpacked.keys()):
          if 'hash' in k:
-            unpacked[k] = idlib.b2a(unpacked[k])
+            unpacked[k] = base32.b2a(unpacked[k])
      return unpacked
  
diff --git a/src/allmydata/util/base32.py b/src/allmydata/util/base32.py

new file mode 100644 (file)

index 0000000..983ff19
--- /dev/null
+++ b/src/allmydata/util/base32.py
@@ -0,0 +1,261 @@
+# from the Python Standard Library
+import string
+
+from assertutil import precondition
+
+z_base_32_alphabet = "ybndrfg8ejkmcpqxot1uwisza345h769" # Zooko's choice, rationale in "DESIGN" doc
+rfc3548_alphabet = "abcdefghijklmnopqrstuvwxyz234567" # RFC3548 standard used by Gnutella, Content-Addressable Web, THEX, Bitzi, Web-Calculus...
+chars = rfc3548_alphabet
+
+vals = ''.join(map(chr, range(32)))
+c2vtranstable = string.maketrans(chars, vals)
+v2ctranstable = string.maketrans(vals, chars)
+identitytranstable = string.maketrans(chars, chars)
+
+def _get_trailing_chars_without_lsbs(N, d):
+    """
+    @return: a list of chars that can legitimately appear in the last place when the least significant N bits are ignored.
+    """
+    s = []
+    if N < 4:
+        s.extend(_get_trailing_chars_without_lsbs(N+1, d=d))
+    i = 0
+    while i < len(chars):
+        if not d.has_key(i):
+            d[i] = None
+            s.append(chars[i])
+        i = i + 2**N
+    return s
+
+def get_trailing_chars_without_lsbs(N):
+    precondition((N >= 0) and (N < 5), "N is required to be > 0 and < len(chars).", N=N)
+    if N == 0:
+        return chars
+    d = {}
+    return ''.join(_get_trailing_chars_without_lsbs(N, d=d))
+
+BASE32CHAR = '['+get_trailing_chars_without_lsbs(0)+']'
+BASE32CHAR_4bits = '['+get_trailing_chars_without_lsbs(1)+']'
+BASE32CHAR_3bits = '['+get_trailing_chars_without_lsbs(2)+']'
+BASE32CHAR_2bits = '['+get_trailing_chars_without_lsbs(3)+']'
+BASE32CHAR_1bits = '['+get_trailing_chars_without_lsbs(4)+']'
+BASE32STR_1byte = BASE32CHAR+BASE32CHAR_3bits
+BASE32STR_2bytes = BASE32CHAR+'{3}'+BASE32CHAR_1bits
+BASE32STR_3bytes = BASE32CHAR+'{4}'+BASE32CHAR_4bits
+BASE32STR_4bytes = BASE32CHAR+'{6}'+BASE32CHAR_2bits
+BASE32STR_anybytes = '((?:%s{8})*' % (BASE32CHAR,) + "(?:|%s|%s|%s|%s))" % (BASE32STR_1byte, BASE32STR_2bytes, BASE32STR_3bytes, BASE32STR_4bytes)
+
+def b2a(os):
+    """
+    @param os the data to be encoded (a string)
+
+    @return the contents of os in base-32 encoded form
+    """
+    return b2a_l(os, len(os)*8)
+
+def b2a_or_none(os):
+    if os is not None:
+        return b2a(os)
+
+def b2a_l(os, lengthinbits):
+    """
+    @param os the data to be encoded (a string)
+    @param lengthinbits the number of bits of data in os to be encoded
+
+    b2a_l() will generate a base-32 encoded string big enough to encode lengthinbits bits.  So for
+    example if os is 2 bytes long and lengthinbits is 15, then b2a_l() will generate a 3-character-
+    long base-32 encoded string (since 3 quintets is sufficient to encode 15 bits).  If os is
+    2 bytes long and lengthinbits is 16 (or None), then b2a_l() will generate a 4-character string.
+    Note that b2a_l() does not mask off unused least-significant bits, so for example if os is
+    2 bytes long and lengthinbits is 15, then you must ensure that the unused least-significant bit
+    of os is a zero bit or you will get the wrong result.  This precondition is tested by assertions
+    if assertions are enabled.
+
+    Warning: if you generate a base-32 encoded string with b2a_l(), and then someone else tries to
+    decode it by calling a2b() instead of  a2b_l(), then they will (probably) get a different
+    string than the one you encoded!  So only use b2a_l() when you are sure that the encoding and
+    decoding sides know exactly which lengthinbits to use.  If you do not have a way for the
+    encoder and the decoder to agree upon the lengthinbits, then it is best to use b2a() and
+    a2b().  The only drawback to using b2a() over b2a_l() is that when you have a number of
+    bits to encode that is not a multiple of 8, b2a() can sometimes generate a base-32 encoded
+    string that is one or two characters longer than necessary.
+
+    @return the contents of os in base-32 encoded form
+    """
+    precondition(isinstance(lengthinbits, (int, long,)), "lengthinbits is required to be an integer.", lengthinbits=lengthinbits)
+    precondition((lengthinbits+7)/8 == len(os), "lengthinbits is required to specify a number of bits storable in exactly len(os) octets.", lengthinbits=lengthinbits, lenos=len(os))
+
+    os = map(ord, os)
+
+    numquintets = (lengthinbits+4)/5
+    numoctetsofdata = (lengthinbits+7)/8
+    # print "numoctetsofdata: %s, len(os): %s, lengthinbits: %s, numquintets: %s" % (numoctetsofdata, len(os), lengthinbits, numquintets,)
+    # strip trailing octets that won't be used
+    del os[numoctetsofdata:]
+    # zero out any unused bits in the final octet
+    if lengthinbits % 8 != 0:
+        os[-1] = os[-1] >> (8-(lengthinbits % 8))
+        os[-1] = os[-1] << (8-(lengthinbits % 8))
+    # append zero octets for padding if needed
+    numoctetsneeded = (numquintets*5+7)/8 + 1
+    os.extend([0]*(numoctetsneeded-len(os)))
+
+    quintets = []
+    cutoff = 256
+    num = os[0]
+    i = 0
+    while len(quintets) < numquintets:
+        i = i + 1
+        assert len(os) > i, "len(os): %s, i: %s, len(quintets): %s, numquintets: %s, lengthinbits: %s, numoctetsofdata: %s, numoctetsneeded: %s, os: %s" % (len(os), i, len(quintets), numquintets, lengthinbits, numoctetsofdata, numoctetsneeded, os,)
+        num = num * 256
+        num = num + os[i]
+        if cutoff == 1:
+            cutoff = 256
+            continue
+        cutoff = cutoff * 8
+        quintet = num / cutoff
+        quintets.append(quintet)
+        num = num - (quintet * cutoff)
+
+        cutoff = cutoff / 32
+        quintet = num / cutoff
+        quintets.append(quintet)
+        num = num - (quintet * cutoff)
+
+    if len(quintets) > numquintets:
+        assert len(quintets) == (numquintets+1), "len(quintets): %s, numquintets: %s, quintets: %s" % (len(quintets), numquintets, quintets,)
+        quintets = quintets[:numquintets]
+    res = string.translate(string.join(map(chr, quintets), ''), v2ctranstable)
+    assert could_be_base32_encoded_l(res, lengthinbits), "lengthinbits: %s, res: %s" % (lengthinbits, res,)
+    return res
+
+# b2a() uses the minimal number of quintets sufficient to encode the binary
+# input.  It just so happens that the relation is like this (everything is
+# modulo 40 bits).
+# num_qs = NUM_OS_TO_NUM_QS[num_os]
+NUM_OS_TO_NUM_QS=(0, 2, 4, 5, 7,)
+
+# num_os = NUM_QS_TO_NUM_OS[num_qs], but if not NUM_QS_LEGIT[num_qs] then
+# there is *no* number of octets which would have resulted in this number of
+# quintets, so either the encoded string has been mangled (truncated) or else
+# you were supposed to decode it with a2b_l() (which means you were supposed
+# to know the actual length of the encoded data).
+
+NUM_QS_TO_NUM_OS=(0, 1, 1, 2, 2, 3, 3, 4)
+NUM_QS_LEGIT=(1, 0, 1, 0, 1, 1, 0, 1,)
+NUM_QS_TO_NUM_BITS=tuple(map(lambda x: x*8, NUM_QS_TO_NUM_OS))
+
+# A fast way to determine whether a given string *could* be base-32 encoded data, assuming that the
+# original data had 8K bits for a positive integer K.
+# The boolean value of s8[len(s)%8][ord(s[-1])], where s is the possibly base-32 encoded string
+# tells whether the final character is reasonable.
+def add_check_array(cs, sfmap):
+    checka=[0] * 256
+    for c in cs:
+        checka[ord(c)] = 1
+    sfmap.append(tuple(checka))
+
+def init_s8():
+    s8 = []
+    add_check_array(chars, s8)
+    for lenmod8 in (1, 2, 3, 4, 5, 6, 7,):
+        if NUM_QS_LEGIT[lenmod8]:
+            add_check_array(get_trailing_chars_without_lsbs(4-(NUM_QS_TO_NUM_BITS[lenmod8]%5)), s8)
+        else:
+            add_check_array('', s8)
+    return tuple(s8)
+s8 = init_s8()
+
+# A somewhat fast way to determine whether a given string *could* be base-32 encoded data, given a
+# lengthinbits.
+# The boolean value of s5[lengthinbits%5][ord(s[-1])], where s is the possibly base-32 encoded
+# string tells whether the final character is reasonable.
+def init_s5():
+    s5 = []
+    add_check_array(get_trailing_chars_without_lsbs(0), s5)
+    for lenmod5 in [1,2,3,4]:
+        add_check_array(get_trailing_chars_without_lsbs(5-lenmod5), s5)
+    return tuple(s5)
+s5 = init_s5()
+
+def could_be_base32_encoded(s, s8=s8, tr=string.translate, identitytranstable=identitytranstable, chars=chars):
+    if s == '':
+        return True
+    return s8[len(s)%8][ord(s[-1])] and not tr(s, identitytranstable, chars)
+
+def could_be_base32_encoded_l(s, lengthinbits, s5=s5, tr=string.translate, identitytranstable=identitytranstable, chars=chars):
+    if s == '':
+        return True
+    assert lengthinbits%5 < len(s5), lengthinbits
+    assert ord(s[-1]) < s5[lengthinbits%5]
+    return (((lengthinbits+4)/5) == len(s)) and s5[lengthinbits%5][ord(s[-1])] and not string.translate(s, identitytranstable, chars)
+
+def num_octets_that_encode_to_this_many_quintets(numqs):
+    # Here is a computation that conveniently expresses this:
+    return (numqs*5+3)/8
+
+def a2b(cs):
+    """
+    @param cs the base-32 encoded data (a string)
+    """
+    precondition(could_be_base32_encoded(cs), "cs is required to be possibly base32 encoded data.", cs=cs)
+
+    return a2b_l(cs, num_octets_that_encode_to_this_many_quintets(len(cs))*8)
+
+def a2b_l(cs, lengthinbits):
+    """
+    @param lengthinbits the number of bits of data in encoded into cs
+
+    a2b_l() will return a result big enough to hold lengthinbits bits.  So for example if cs is
+    4 characters long (encoding at least 15 and up to 20 bits) and lengthinbits is 16, then a2b_l()
+    will return a string of length 2 (since 2 bytes is sufficient to store 16 bits).  If cs is 4
+    characters long and lengthinbits is 20, then a2b_l() will return a string of length 3 (since
+    3 bytes is sufficient to store 20 bits).  Note that b2a_l() does not mask off unused least-
+    significant bits, so for example if cs is 4 characters long and lengthinbits is 17, then you
+    must ensure that all three of the unused least-significant bits of cs are zero bits or you will
+    get the wrong result.  This precondition is tested by assertions if assertions are enabled.
+    (Generally you just require the encoder to ensure this consistency property between the least
+    significant zero bits and value of lengthinbits, and reject strings that have a length-in-bits
+    which isn't a multiple of 8 and yet don't have trailing zero bits, as improperly encoded.)
+
+    Please see the warning in the docstring of b2a_l() regarding the use of b2a() versus b2a_l().
+
+    @return the data encoded in cs
+    """
+    precondition(could_be_base32_encoded_l(cs, lengthinbits), "cs is required to be possibly base32 encoded data.", cs=cs, lengthinbits=lengthinbits)
+    if cs == '':
+        return ''
+
+    qs = map(ord, string.translate(cs, c2vtranstable))
+
+    numoctets = (lengthinbits+7)/8
+    numquintetsofdata = (lengthinbits+4)/5
+    # strip trailing quintets that won't be used
+    del qs[numquintetsofdata:]
+    # zero out any unused bits in the final quintet
+    if lengthinbits % 5 != 0:
+        qs[-1] = qs[-1] >> (5-(lengthinbits % 5))
+        qs[-1] = qs[-1] << (5-(lengthinbits % 5))
+    # append zero quintets for padding if needed
+    numquintetsneeded = (numoctets*8+4)/5
+    qs.extend([0]*(numquintetsneeded-len(qs)))
+
+    octets = []
+    pos = 2048
+    num = qs[0] * pos
+    readybits = 5
+    i = 1
+    while len(octets) < numoctets:
+        while pos > 256:
+            pos = pos / 32
+            num = num + (qs[i] * pos)
+            i = i + 1
+        octet = num / 256
+        octets.append(octet)
+        num = num - (octet * 256)
+        num = num * 256
+        pos = pos * 256
+    assert len(octets) == numoctets, "len(octets): %s, numoctets: %s, octets: %s" % (len(octets), numoctets, octets,)
+    res = ''.join(map(chr, octets))
+    precondition(b2a_l(res, lengthinbits) == cs, "cs is required to be the canonical base-32 encoding of some data.", b2a(res), res=res, cs=cs)
+    return res
diff --git a/src/allmydata/util/base62.py b/src/allmydata/util/base62.py

index 473cadcdccf84f1ffe7eeaf607504881f0dfc0b1..5a24c8c66d2a058678be077fd45899dc3ca9b91d 100644 (file)
--- a/src/allmydata/util/base62.py
+++ b/src/allmydata/util/base62.py
@@ -13,7 +13,7 @@ from allmydata.util.mathutil import log_ceil, log_floor
  
  chars = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
  
-ZBASE62CHAR = '[' + chars + ']'
+BASE62CHAR = '[' + chars + ']'
  
  vals = ''.join([chr(i) for i in range(62)])
  c2vtranstable = string.maketrans(chars, vals)
diff --git a/src/allmydata/util/idlib.py b/src/allmydata/util/idlib.py

index 202f1c272ed379b76e7ea824b61a28a58febbc57..5e44b9d8251c6e1ca695f77c269f6bd7eb962168 100644 (file)
--- a/src/allmydata/util/idlib.py
+++ b/src/allmydata/util/idlib.py
@@ -1,264 +1,4 @@
-# from the Python Standard Library
-import string
  
-from assertutil import precondition
-
-z_base_32_alphabet = "ybndrfg8ejkmcpqxot1uwisza345h769" # Zooko's choice, rationale in "DESIGN" doc
-rfc3548_alphabet = "abcdefghijklmnopqrstuvwxyz234567" # RFC3548 standard used by Gnutella, Content-Addressable Web, THEX, Bitzi, Web-Calculus...
-chars = z_base_32_alphabet
-
-vals = ''.join(map(chr, range(32)))
-c2vtranstable = string.maketrans(chars, vals)
-v2ctranstable = string.maketrans(vals, chars)
-identitytranstable = string.maketrans(chars, chars)
-
-def _get_trailing_chars_without_lsbs(N, d):
-    """
-    @return: a list of chars that can legitimately appear in the last place when the least significant N bits are ignored.
-    """
-    s = []
-    if N < 4:
-        s.extend(_get_trailing_chars_without_lsbs(N+1, d=d))
-    i = 0
-    while i < len(chars):
-        if not d.has_key(i):
-            d[i] = None
-            s.append(chars[i])
-        i = i + 2**N
-    return s
-
-def get_trailing_chars_without_lsbs(N):
-    precondition((N >= 0) and (N < 5), "N is required to be > 0 and < len(chars).", N=N)
-    if N == 0:
-        return chars
-    d = {}
-    return ''.join(_get_trailing_chars_without_lsbs(N, d=d))
-
-ZBASE32CHAR = '['+get_trailing_chars_without_lsbs(0)+']'
-ZBASE32CHAR_4bits = '['+get_trailing_chars_without_lsbs(1)+']'
-ZBASE32CHAR_3bits = '['+get_trailing_chars_without_lsbs(2)+']'
-ZBASE32CHAR_2bits = '['+get_trailing_chars_without_lsbs(3)+']'
-ZBASE32CHAR_1bits = '['+get_trailing_chars_without_lsbs(4)+']'
-ZBASE32STR_1byte = ZBASE32CHAR+ZBASE32CHAR_3bits
-ZBASE32STR_2bytes = ZBASE32CHAR+'{3}'+ZBASE32CHAR_1bits
-ZBASE32STR_3bytes = ZBASE32CHAR+'{4}'+ZBASE32CHAR_4bits
-ZBASE32STR_4bytes = ZBASE32CHAR+'{6}'+ZBASE32CHAR_2bits
-ZBASE32STR_anybytes = '((?:%s{8})*' % (ZBASE32CHAR,) + "(?:|%s|%s|%s|%s))" % (ZBASE32STR_1byte, ZBASE32STR_2bytes, ZBASE32STR_3bytes, ZBASE32STR_4bytes)
-
-def b2a(os):
-    """
-    @param os the data to be encoded (a string)
-
-    @return the contents of os in base-32 encoded form
-    """
-    return b2a_l(os, len(os)*8)
-
-def b2a_or_none(os):
-    if os is not None:
-        return b2a(os)
-
-def b2a_l(os, lengthinbits):
-    """
-    @param os the data to be encoded (a string)
-    @param lengthinbits the number of bits of data in os to be encoded
-
-    b2a_l() will generate a base-32 encoded string big enough to encode lengthinbits bits.  So for
-    example if os is 2 bytes long and lengthinbits is 15, then b2a_l() will generate a 3-character-
-    long base-32 encoded string (since 3 quintets is sufficient to encode 15 bits).  If os is
-    2 bytes long and lengthinbits is 16 (or None), then b2a_l() will generate a 4-character string.
-    Note that b2a_l() does not mask off unused least-significant bits, so for example if os is
-    2 bytes long and lengthinbits is 15, then you must ensure that the unused least-significant bit
-    of os is a zero bit or you will get the wrong result.  This precondition is tested by assertions
-    if assertions are enabled.
-
-    Warning: if you generate a base-32 encoded string with b2a_l(), and then someone else tries to
-    decode it by calling a2b() instead of  a2b_l(), then they will (probably) get a different
-    string than the one you encoded!  So only use b2a_l() when you are sure that the encoding and
-    decoding sides know exactly which lengthinbits to use.  If you do not have a way for the
-    encoder and the decoder to agree upon the lengthinbits, then it is best to use b2a() and
-    a2b().  The only drawback to using b2a() over b2a_l() is that when you have a number of
-    bits to encode that is not a multiple of 8, b2a() can sometimes generate a base-32 encoded
-    string that is one or two characters longer than necessary.
-
-    @return the contents of os in base-32 encoded form
-    """
-    precondition(isinstance(lengthinbits, (int, long,)), "lengthinbits is required to be an integer.", lengthinbits=lengthinbits)
-    precondition((lengthinbits+7)/8 == len(os), "lengthinbits is required to specify a number of bits storable in exactly len(os) octets.", lengthinbits=lengthinbits, lenos=len(os))
-
-    os = map(ord, os)
-
-    numquintets = (lengthinbits+4)/5
-    numoctetsofdata = (lengthinbits+7)/8
-    # print "numoctetsofdata: %s, len(os): %s, lengthinbits: %s, numquintets: %s" % (numoctetsofdata, len(os), lengthinbits, numquintets,)
-    # strip trailing octets that won't be used
-    del os[numoctetsofdata:]
-    # zero out any unused bits in the final octet
-    if lengthinbits % 8 != 0:
-        os[-1] = os[-1] >> (8-(lengthinbits % 8))
-        os[-1] = os[-1] << (8-(lengthinbits % 8))
-    # append zero octets for padding if needed
-    numoctetsneeded = (numquintets*5+7)/8 + 1
-    os.extend([0]*(numoctetsneeded-len(os)))
-
-    quintets = []
-    cutoff = 256
-    num = os[0]
-    i = 0
-    while len(quintets) < numquintets:
-        i = i + 1
-        assert len(os) > i, "len(os): %s, i: %s, len(quintets): %s, numquintets: %s, lengthinbits: %s, numoctetsofdata: %s, numoctetsneeded: %s, os: %s" % (len(os), i, len(quintets), numquintets, lengthinbits, numoctetsofdata, numoctetsneeded, os,)
-        num = num * 256
-        num = num + os[i]
-        if cutoff == 1:
-            cutoff = 256
-            continue
-        cutoff = cutoff * 8
-        quintet = num / cutoff
-        quintets.append(quintet)
-        num = num - (quintet * cutoff)
-
-        cutoff = cutoff / 32
-        quintet = num / cutoff
-        quintets.append(quintet)
-        num = num - (quintet * cutoff)
-
-    if len(quintets) > numquintets:
-        assert len(quintets) == (numquintets+1), "len(quintets): %s, numquintets: %s, quintets: %s" % (len(quintets), numquintets, quintets,)
-        quintets = quintets[:numquintets]
-    res = string.translate(string.join(map(chr, quintets), ''), v2ctranstable)
-    assert could_be_base32_encoded_l(res, lengthinbits), "lengthinbits: %s, res: %s" % (lengthinbits, res,)
-    return res
-
-# b2a() uses the minimal number of quintets sufficient to encode the binary
-# input.  It just so happens that the relation is like this (everything is
-# modulo 40 bits).
-# num_qs = NUM_OS_TO_NUM_QS[num_os]
-NUM_OS_TO_NUM_QS=(0, 2, 4, 5, 7,)
-
-# num_os = NUM_QS_TO_NUM_OS[num_qs], but if not NUM_QS_LEGIT[num_qs] then
-# there is *no* number of octets which would have resulted in this number of
-# quintets, so either the encoded string has been mangled (truncated) or else
-# you were supposed to decode it with a2b_l() (which means you were supposed
-# to know the actual length of the encoded data).
-
-NUM_QS_TO_NUM_OS=(0, 1, 1, 2, 2, 3, 3, 4)
-NUM_QS_LEGIT=(1, 0, 1, 0, 1, 1, 0, 1,)
-NUM_QS_TO_NUM_BITS=tuple(map(lambda x: x*8, NUM_QS_TO_NUM_OS))
-
-# A fast way to determine whether a given string *could* be base-32 encoded data, assuming that the
-# original data had 8K bits for a positive integer K.
-# The boolean value of s8[len(s)%8][ord(s[-1])], where s is the possibly base-32 encoded string
-# tells whether the final character is reasonable.
-def add_check_array(cs, sfmap):
-    checka=[0] * 256
-    for c in cs:
-        checka[ord(c)] = 1
-    sfmap.append(tuple(checka))
-
-def init_s8():
-    s8 = []
-    add_check_array(chars, s8)
-    for lenmod8 in (1, 2, 3, 4, 5, 6, 7,):
-        if NUM_QS_LEGIT[lenmod8]:
-            add_check_array(get_trailing_chars_without_lsbs(4-(NUM_QS_TO_NUM_BITS[lenmod8]%5)), s8)
-        else:
-            add_check_array('', s8)
-    return tuple(s8)
-s8 = init_s8()
-
-# A somewhat fast way to determine whether a given string *could* be base-32 encoded data, given a
-# lengthinbits.
-# The boolean value of s5[lengthinbits%5][ord(s[-1])], where s is the possibly base-32 encoded
-# string tells whether the final character is reasonable.
-def init_s5():
-    s5 = []
-    add_check_array(get_trailing_chars_without_lsbs(0), s5)
-    for lenmod5 in [1,2,3,4]:
-        add_check_array(get_trailing_chars_without_lsbs(5-lenmod5), s5)
-    return tuple(s5)
-s5 = init_s5()
-
-def could_be_base32_encoded(s, s8=s8, tr=string.translate, identitytranstable=identitytranstable, chars=chars):
-    if s == '':
-        return True
-    return s8[len(s)%8][ord(s[-1])] and not tr(s, identitytranstable, chars)
-
-def could_be_base32_encoded_l(s, lengthinbits, s5=s5, tr=string.translate, identitytranstable=identitytranstable, chars=chars):
-    if s == '':
-        return True
-    assert lengthinbits%5 < len(s5), lengthinbits
-    assert ord(s[-1]) < s5[lengthinbits%5]
-    return (((lengthinbits+4)/5) == len(s)) and s5[lengthinbits%5][ord(s[-1])] and not string.translate(s, identitytranstable, chars)
-
-def num_octets_that_encode_to_this_many_quintets(numqs):
-    # Here is a computation that conveniently expresses this:
-    return (numqs*5+3)/8
-
-def a2b(cs):
-    """
-    @param cs the base-32 encoded data (a string)
-    """
-    precondition(could_be_base32_encoded(cs), "cs is required to be possibly base32 encoded data.", cs=cs)
-
-    return a2b_l(cs, num_octets_that_encode_to_this_many_quintets(len(cs))*8)
-
-def a2b_l(cs, lengthinbits):
-    """
-    @param lengthinbits the number of bits of data in encoded into cs
-
-    a2b_l() will return a result big enough to hold lengthinbits bits.  So for example if cs is
-    4 characters long (encoding at least 15 and up to 20 bits) and lengthinbits is 16, then a2b_l()
-    will return a string of length 2 (since 2 bytes is sufficient to store 16 bits).  If cs is 4
-    characters long and lengthinbits is 20, then a2b_l() will return a string of length 3 (since
-    3 bytes is sufficient to store 20 bits).  Note that b2a_l() does not mask off unused least-
-    significant bits, so for example if cs is 4 characters long and lengthinbits is 17, then you
-    must ensure that all three of the unused least-significant bits of cs are zero bits or you will
-    get the wrong result.  This precondition is tested by assertions if assertions are enabled.
-    (Generally you just require the encoder to ensure this consistency property between the least
-    significant zero bits and value of lengthinbits, and reject strings that have a length-in-bits
-    which isn't a multiple of 8 and yet don't have trailing zero bits, as improperly encoded.)
-
-    Please see the warning in the docstring of b2a_l() regarding the use of b2a() versus b2a_l().
-
-    @return the data encoded in cs
-    """
-    precondition(could_be_base32_encoded_l(cs, lengthinbits), "cs is required to be possibly base32 encoded data.", cs=cs, lengthinbits=lengthinbits)
-    if cs == '':
-        return ''
-
-    qs = map(ord, string.translate(cs, c2vtranstable))
-
-    numoctets = (lengthinbits+7)/8
-    numquintetsofdata = (lengthinbits+4)/5
-    # strip trailing quintets that won't be used
-    del qs[numquintetsofdata:]
-    # zero out any unused bits in the final quintet
-    if lengthinbits % 5 != 0:
-        qs[-1] = qs[-1] >> (5-(lengthinbits % 5))
-        qs[-1] = qs[-1] << (5-(lengthinbits % 5))
-    # append zero quintets for padding if needed
-    numquintetsneeded = (numoctets*8+4)/5
-    qs.extend([0]*(numquintetsneeded-len(qs)))
-
-    octets = []
-    pos = 2048
-    num = qs[0] * pos
-    readybits = 5
-    i = 1
-    while len(octets) < numoctets:
-        while pos > 256:
-            pos = pos / 32
-            num = num + (qs[i] * pos)
-            i = i + 1
-        octet = num / 256
-        octets.append(octet)
-        num = num - (octet * 256)
-        num = num * 256
-        pos = pos * 256
-    assert len(octets) == numoctets, "len(octets): %s, numoctets: %s, octets: %s" % (len(octets), numoctets, octets,)
-    res = ''.join(map(chr, octets))
-    precondition(b2a_l(res, lengthinbits) == cs, "cs is required to be the canonical base-32 encoding of some data.", b2a(res), res=res, cs=cs)
-    return res
  
  from foolscap import base32
  def nodeid_b2a(nodeid):
diff --git a/src/allmydata/webish.py b/src/allmydata/webish.py

index fa374e904a8a4730761fec104bf4bbd847f30315..aa0b58937f99e66ab22f102014e3e39d3b3f0f27 100644 (file)
--- a/src/allmydata/webish.py
+++ b/src/allmydata/webish.py
@@ -6,7 +6,7 @@ from twisted.internet import defer, address
  from twisted.internet.interfaces import IConsumer
  from nevow import inevow, rend, loaders, appserver, url, tags as T
  from nevow.static import File as nevow_File # TODO: merge with static.File?
-from allmydata.util import fileutil, idlib, observer, log
+from allmydata.util import base32, fileutil, idlib, observer, log
  import simplejson
  from allmydata.interfaces import IDownloadTarget, IDirectoryNode, IFileNode, \
       IMutableFileNode
@@ -1567,7 +1567,7 @@ class Status(rend.Page):
  
      def _render_common(self, ctx, data):
          s = data
-        si_s = idlib.b2a_or_none(s.get_storage_index())
+        si_s = base32.b2a_or_none(s.get_storage_index())
          if si_s is None:
              si_s = "(None)"
          ctx.fillSlots("si", si_s)
author	Zooko O'Whielacronx <zooko@zooko.com>
	Fri, 15 Feb 2008 02:27:47 +0000 (19:27 -0700)
committer	Zooko O'Whielacronx <zooko@zooko.com>
	Fri, 15 Feb 2008 02:27:47 +0000 (19:27 -0700)
src/allmydata/client.py		patch \| blob \| history
src/allmydata/download.py		patch \| blob \| history
src/allmydata/encode.py		patch \| blob \| history
src/allmydata/hashtree.py		patch \| blob \| history
src/allmydata/mutable.py		patch \| blob \| history
src/allmydata/scripts/debug.py		patch \| blob \| history
src/allmydata/storage.py		patch \| blob \| history
src/allmydata/test/test_cli.py		patch \| blob \| history
src/allmydata/test/test_client.py		patch \| blob \| history
src/allmydata/test/test_uri.py		patch \| blob \| history
src/allmydata/test/test_util.py		patch \| blob \| history
src/allmydata/upload.py		patch \| blob \| history
src/allmydata/uri.py		patch \| blob \| history
src/allmydata/util/base32.py	[new file with mode: 0644]	patch \| blob
src/allmydata/util/base62.py		patch \| blob \| history
src/allmydata/util/idlib.py		patch \| blob \| history
src/allmydata/webish.py		patch \| blob \| history