From: Zooko O'Whielacronx Date: Fri, 15 Feb 2008 02:27:47 +0000 (-0700) Subject: switch from base62 to base32 for storage indices, switch from z-base-32 to rfc 3548... X-Git-Url: https://git.rkrishnan.org/pf/content/something?a=commitdiff_plain;h=7c6de95bc62265390d312fae9755664e0450cda9;p=tahoe-lafs%2Ftahoe-lafs.git switch from base62 to base32 for storage indices, switch from z-base-32 to rfc 3548 base-32 for everything, separate out base32 encoding from idlib --- diff --git a/src/allmydata/client.py b/src/allmydata/client.py index 7314cb6f..12afba92 100644 --- a/src/allmydata/client.py +++ b/src/allmydata/client.py @@ -15,7 +15,7 @@ from allmydata.checker import Checker from allmydata.offloaded import Helper from allmydata.control import ControlServer from allmydata.introducer import IntroducerClient -from allmydata.util import hashutil, idlib, testutil +from allmydata.util import hashutil, base32, testutil from allmydata.filenode import FileNode from allmydata.dirnode import NewDirectoryNode from allmydata.mutable import MutableFileNode @@ -102,9 +102,9 @@ class Client(node.Node, testutil.PollMixin): def init_lease_secret(self): def make_secret(): - return idlib.b2a(os.urandom(hashutil.CRYPTO_VAL_SIZE)) + "\n" + return base32.b2a(os.urandom(hashutil.CRYPTO_VAL_SIZE)) + "\n" secret_s = self.get_or_create_private_config("secret", make_secret) - self._lease_secret = idlib.a2b(secret_s) + self._lease_secret = base32.a2b(secret_s) def init_storage(self): # should we run a storage server (and publish it for others to use)? diff --git a/src/allmydata/download.py b/src/allmydata/download.py index aa54f0f1..c4c7ea5d 100644 --- a/src/allmydata/download.py +++ b/src/allmydata/download.py @@ -6,7 +6,7 @@ from twisted.internet.interfaces import IPushProducer, IConsumer from twisted.application import service from foolscap.eventual import eventually -from allmydata.util import idlib, mathutil, hashutil, log +from allmydata.util import base32, mathutil, hashutil, log from allmydata.util.assertutil import _assert from allmydata import codec, hashtree, storage, uri from allmydata.interfaces import IDownloadTarget, IDownloader, IFileURI, \ @@ -70,7 +70,7 @@ class Output: crypttext_leaves = {self._segment_number: ch.digest()} self.log(format="crypttext leaf hash (%(bytes)sB) [%(segnum)d] is %(hash)s", bytes=len(crypttext), - segnum=self._segment_number, hash=idlib.b2a(ch.digest()), + segnum=self._segment_number, hash=base32.b2a(ch.digest()), level=log.NOISY) self._crypttext_hash_tree.set_hashes(leaves=crypttext_leaves) @@ -86,7 +86,7 @@ class Output: plaintext_leaves = {self._segment_number: ph.digest()} self.log(format="plaintext leaf hash (%(bytes)sB) [%(segnum)d] is %(hash)s", bytes=len(plaintext), - segnum=self._segment_number, hash=idlib.b2a(ph.digest()), + segnum=self._segment_number, hash=base32.b2a(ph.digest()), level=log.NOISY) self._plaintext_hash_tree.set_hashes(leaves=plaintext_leaves) @@ -180,7 +180,7 @@ class ValidatedBucket: #log.msg("checking block_hash(shareid=%d, blocknum=%d) len=%d " # "%r .. %r: %s" % # (self.sharenum, blocknum, len(blockdata), - # blockdata[:50], blockdata[-50:], idlib.b2a(blockhash))) + # blockdata[:50], blockdata[-50:], base32.b2a(blockhash))) # we always validate the blockhash bh = dict(enumerate(blockhashes)) @@ -203,22 +203,22 @@ class ValidatedBucket: received from the remote peer were bad.""") log.msg(" have self._share_hash: %s" % bool(self._share_hash)) log.msg(" block length: %d" % len(blockdata)) - log.msg(" block hash: %s" % idlib.b2a_or_none(blockhash)) + log.msg(" block hash: %s" % base32.b2a_or_none(blockhash)) if len(blockdata) < 100: log.msg(" block data: %r" % (blockdata,)) else: log.msg(" block data start/end: %r .. %r" % (blockdata[:50], blockdata[-50:])) - log.msg(" root hash: %s" % idlib.b2a(self._roothash)) + log.msg(" root hash: %s" % base32.b2a(self._roothash)) log.msg(" share hash tree:\n" + self.share_hash_tree.dump()) log.msg(" block hash tree:\n" + self.block_hash_tree.dump()) lines = [] for i,h in sorted(sharehashes): - lines.append("%3d: %s" % (i, idlib.b2a_or_none(h))) + lines.append("%3d: %s" % (i, base32.b2a_or_none(h))) log.msg(" sharehashes:\n" + "\n".join(lines) + "\n") lines = [] for i,h in enumerate(blockhashes): - lines.append("%3d: %s" % (i, idlib.b2a_or_none(h))) + lines.append("%3d: %s" % (i, base32.b2a_or_none(h))) log.msg(" blockhashes:\n" + "\n".join(lines) + "\n") raise @@ -782,13 +782,13 @@ class FileDownloader: if self.check_crypttext_hash: _assert(self._crypttext_hash == self._output.crypttext_hash, "bad crypttext_hash: computed=%s, expected=%s" % - (idlib.b2a(self._output.crypttext_hash), - idlib.b2a(self._crypttext_hash))) + (base32.b2a(self._output.crypttext_hash), + base32.b2a(self._crypttext_hash))) if self.check_plaintext_hash: _assert(self._plaintext_hash == self._output.plaintext_hash, "bad plaintext_hash: computed=%s, expected=%s" % - (idlib.b2a(self._output.plaintext_hash), - idlib.b2a(self._plaintext_hash))) + (base32.b2a(self._output.plaintext_hash), + base32.b2a(self._plaintext_hash))) _assert(self._output.length == self._size, got=self._output.length, expected=self._size) return self._output.finish() diff --git a/src/allmydata/encode.py b/src/allmydata/encode.py index c06b1054..def30526 100644 --- a/src/allmydata/encode.py +++ b/src/allmydata/encode.py @@ -6,7 +6,7 @@ from twisted.internet import defer from foolscap import eventual from allmydata import storage, uri from allmydata.hashtree import HashTree -from allmydata.util import mathutil, hashutil, idlib, log +from allmydata.util import mathutil, hashutil, base32, log from allmydata.util.assertutil import _assert, precondition from allmydata.codec import CRSEncoder from allmydata.interfaces import IEncoder, IStorageBucketWriter, \ @@ -435,11 +435,11 @@ class Encoder(object): d = self.send_subshare(shareid, segnum, subshare, lognum) dl.append(d) subshare_hash = hashutil.block_hash(subshare) - #from allmydata.util import idlib + #from allmydata.util import base32 #log.msg("creating block (shareid=%d, blocknum=%d) " # "len=%d %r .. %r: %s" % # (shareid, segnum, len(subshare), - # subshare[:50], subshare[-50:], idlib.b2a(subshare_hash))) + # subshare[:50], subshare[-50:], base32.b2a(subshare_hash))) self.subshare_hashes[shareid].append(subshare_hash) dl = self._gather_responses(dl) @@ -518,7 +518,7 @@ class Encoder(object): d.addCallback(_got) def _got_hashtree_leaves(leaves): self.log("Encoder: got plaintext_hashtree_leaves: %s" % - (",".join([idlib.b2a(h) for h in leaves]),), + (",".join([base32.b2a(h) for h in leaves]),), level=log.NOISY) ht = list(HashTree(list(leaves))) self.uri_extension_data["plaintext_root_hash"] = ht[0] @@ -636,7 +636,7 @@ class Encoder(object): ed = {} for k,v in self.uri_extension_data.items(): if k.endswith("hash"): - ed[k] = idlib.b2a(v) + ed[k] = base32.b2a(v) else: ed[k] = v self.log("uri_extension_data is %s" % (ed,), level=log.NOISY, parent=lp) diff --git a/src/allmydata/hashtree.py b/src/allmydata/hashtree.py index f1a839f6..c2f6deca 100644 --- a/src/allmydata/hashtree.py +++ b/src/allmydata/hashtree.py @@ -48,7 +48,7 @@ or implied. It probably won't make your computer catch on fire, or eat your children, but it might. Use at your own risk. """ -from allmydata.util import idlib +from allmydata.util import base32 from allmydata.util.hashutil import tagged_hash, tagged_pair_hash __version__ = '1.0.0-allmydata' @@ -153,7 +153,7 @@ class CompleteBinaryTreeMixin: lines = [] for i,depth in self.depth_first(): lines.append("%s%3d: %s" % (" "*depth, i, - idlib.b2a_or_none(self[i]))) + base32.b2a_or_none(self[i]))) return "\n".join(lines) + "\n" def get_leaf_index(self, leafnum): diff --git a/src/allmydata/mutable.py b/src/allmydata/mutable.py index 4459ee71..b125f2f0 100644 --- a/src/allmydata/mutable.py +++ b/src/allmydata/mutable.py @@ -6,7 +6,7 @@ from twisted.internet import defer from twisted.python import failure from foolscap.eventual import eventually from allmydata.interfaces import IMutableFileNode, IMutableFileURI -from allmydata.util import hashutil, mathutil, idlib, log +from allmydata.util import base32, hashutil, mathutil, idlib, log from allmydata.uri import WriteableSSKFileURI from allmydata import hashtree, codec, storage from allmydata.encode import NotEnoughPeersError @@ -404,7 +404,7 @@ class Retrieve: # ok, it's a valid verinfo. Add it to the list of validated # versions. self.log(" found valid version %d-%s from %s-sh%d: %d-%d/%d/%d" - % (seqnum, idlib.b2a(root_hash)[:4], + % (seqnum, base32.b2a(root_hash)[:4], idlib.shortnodeid_b2a(peerid), shnum, k, N, segsize, datalength)) self._valid_versions[verinfo] = (prefix, DictOfSets()) @@ -562,7 +562,7 @@ class Retrieve: shares_s.append("#%d" % shnum) shares_s = ",".join(shares_s) self.log("_attempt_decode: version %d-%s, shares: %s" % - (seqnum, idlib.b2a(root_hash)[:4], shares_s)) + (seqnum, base32.b2a(root_hash)[:4], shares_s)) # first, validate each share that we haven't validated yet. We use # self._valid_shares to remember which ones we've already checked. @@ -963,7 +963,7 @@ class Publish: for oldplace in current_share_peers.get(shnum, []): (peerid, seqnum, R) = oldplace logmsg2.append("%s:#%d:R=%s" % (idlib.shortnodeid_b2a(peerid), - seqnum, idlib.b2a(R)[:4])) + seqnum, base32.b2a(R)[:4])) logmsg.append("sh%d on (%s)" % (shnum, "/".join(logmsg2))) self.log("sharemap: %s" % (", ".join(logmsg)), level=log.NOISY) self.log("we are planning to push new seqnum=#%d" % self._new_seqnum, @@ -1126,7 +1126,7 @@ class Publish: for i in needed_hashes ] ) root_hash = share_hash_tree[0] assert len(root_hash) == 32 - self.log("my new root_hash is %s" % idlib.b2a(root_hash)) + self.log("my new root_hash is %s" % base32.b2a(root_hash)) prefix = pack_prefix(seqnum, root_hash, IV, required_shares, total_shares, @@ -1257,8 +1257,8 @@ class Publish: " shnum=%d: I thought they had #%d:R=%s," " but testv reported #%d:R=%s" % (shnum, - seqnum, idlib.b2a(root_hash)[:4], - old_seqnum, idlib.b2a(old_root_hash)[:4]), + seqnum, base32.b2a(root_hash)[:4], + old_seqnum, base32.b2a(old_root_hash)[:4]), parent=lp, level=log.WEIRD) surprised = True if surprised: @@ -1268,7 +1268,7 @@ class Publish: for shnum, places in dispatch_map.items(): sent_to = [(idlib.shortnodeid_b2a(peerid), seqnum, - idlib.b2a(root_hash)[:4]) + base32.b2a(root_hash)[:4]) for (peerid,seqnum,root_hash) in places] self.log(" share %d sent to: %s" % (shnum, sent_to), level=log.NOISY) diff --git a/src/allmydata/scripts/debug.py b/src/allmydata/scripts/debug.py index df1d6f33..cee32560 100644 --- a/src/allmydata/scripts/debug.py +++ b/src/allmydata/scripts/debug.py @@ -98,7 +98,7 @@ def format_expiration_time(expiration_time): def dump_mutable_share(config, out, err): from allmydata import storage - from allmydata.util import idlib + from allmydata.util import base32, idlib m = storage.MutableShareFile(config['filename']) f = open(config['filename'], "rb") WE, nodeid = m._read_write_enabler_and_nodeid(f) @@ -118,7 +118,7 @@ def dump_mutable_share(config, out, err): print >>out print >>out, "Mutable slot found:" print >>out, " share_type: %s" % share_type - print >>out, " write_enabler: %s" % idlib.b2a(WE) + print >>out, " write_enabler: %s" % base32.b2a(WE) print >>out, " WE for nodeid: %s" % idlib.nodeid_b2a(nodeid) print >>out, " num_extra_leases: %d" % num_extra_leases print >>out, " container_size: %d" % container_size @@ -130,8 +130,8 @@ def dump_mutable_share(config, out, err): print >>out, " ownerid: %d" % oid when = format_expiration_time(et) print >>out, " expires in %s" % when - print >>out, " renew_secret: %s" % idlib.b2a(rs) - print >>out, " cancel_secret: %s" % idlib.b2a(cs) + print >>out, " renew_secret: %s" % base32.b2a(rs) + print >>out, " cancel_secret: %s" % base32.b2a(cs) print >>out, " secrets are for nodeid: %s" % idlib.nodeid_b2a(anid) else: print >>out, "No leases." @@ -144,7 +144,7 @@ def dump_mutable_share(config, out, err): def dump_SDMF_share(offset, length, config, out, err): from allmydata import mutable - from allmydata.util import idlib + from allmydata.util import base32 f = open(config['filename'], "rb") f.seek(offset) @@ -168,8 +168,8 @@ def dump_SDMF_share(offset, length, config, out, err): print >>out, " SDMF contents:" print >>out, " seqnum: %d" % seqnum - print >>out, " root_hash: %s" % idlib.b2a(root_hash) - print >>out, " IV: %s" % idlib.b2a(IV) + print >>out, " root_hash: %s" % base32.b2a(root_hash) + print >>out, " IV: %s" % base32.b2a(IV) print >>out, " required_shares: %d" % k print >>out, " total_shares: %d" % N print >>out, " segsize: %d" % segsize @@ -194,7 +194,7 @@ class DumpCapOptions(usage.Options): def dump_cap(config, out=sys.stdout, err=sys.stderr): from allmydata import uri - from allmydata.util.idlib import a2b + from allmydata.util import base32 from base64 import b32decode import urlparse, urllib @@ -204,11 +204,11 @@ def dump_cap(config, out=sys.stdout, err=sys.stderr): nodeid = b32decode(config['nodeid'].upper()) secret = None if config['client-secret']: - secret = a2b(config['client-secret']) + secret = base32.a2b(config['client-secret']) elif config['client-dir']: secretfile = os.path.join(config['client-dir'], "private", "secret") try: - secret = a2b(open(secretfile, "r").read().strip()) + secret = base32.a2b(open(secretfile, "r").read().strip()) except EnvironmentError: pass @@ -224,34 +224,33 @@ def dump_cap(config, out=sys.stdout, err=sys.stderr): def _dump_secrets(storage_index, secret, nodeid, out): from allmydata.util import hashutil - from allmydata.util.idlib import b2a + from allmydata.util import base32 if secret: crs = hashutil.my_renewal_secret_hash(secret) - print >>out, " client renewal secret:", b2a(crs) + print >>out, " client renewal secret:", base32.b2a(crs) frs = hashutil.file_renewal_secret_hash(crs, storage_index) - print >>out, " file renewal secret:", b2a(frs) + print >>out, " file renewal secret:", base32.b2a(frs) if nodeid: renew = hashutil.bucket_renewal_secret_hash(frs, nodeid) - print >>out, " lease renewal secret:", b2a(renew) + print >>out, " lease renewal secret:", base32.b2a(renew) ccs = hashutil.my_cancel_secret_hash(secret) - print >>out, " client cancel secret:", b2a(ccs) + print >>out, " client cancel secret:", base32.b2a(ccs) fcs = hashutil.file_cancel_secret_hash(ccs, storage_index) - print >>out, " file cancel secret:", b2a(fcs) + print >>out, " file cancel secret:", base32.b2a(fcs) if nodeid: cancel = hashutil.bucket_cancel_secret_hash(fcs, nodeid) - print >>out, " lease cancel secret:", b2a(cancel) + print >>out, " lease cancel secret:", base32.b2a(cancel) def dump_uri_instance(u, nodeid, secret, out, err, show_header=True): from allmydata import storage, uri - from allmydata.util.idlib import b2a - from allmydata.util import hashutil + from allmydata.util import base32, hashutil if isinstance(u, uri.CHKFileURI): if show_header: print >>out, "CHK File:" - print >>out, " key:", b2a(u.key) - print >>out, " UEB hash:", b2a(u.uri_extension_hash) + print >>out, " key:", base32.b2a(u.key) + print >>out, " UEB hash:", base32.b2a(u.uri_extension_hash) print >>out, " size:", u.size print >>out, " k/N: %d/%d" % (u.needed_shares, u.total_shares) print >>out, " storage index:", storage.si_b2a(u.storage_index) @@ -259,7 +258,7 @@ def dump_uri_instance(u, nodeid, secret, out, err, show_header=True): elif isinstance(u, uri.CHKFileVerifierURI): if show_header: print >>out, "CHK Verifier URI:" - print >>out, " UEB hash:", b2a(u.uri_extension_hash) + print >>out, " UEB hash:", base32.b2a(u.uri_extension_hash) print >>out, " size:", u.size print >>out, " k/N: %d/%d" % (u.needed_shares, u.total_shares) print >>out, " storage index:", storage.si_b2a(u.storage_index) @@ -272,28 +271,28 @@ def dump_uri_instance(u, nodeid, secret, out, err, show_header=True): elif isinstance(u, uri.WriteableSSKFileURI): if show_header: print >>out, "SSK Writeable URI:" - print >>out, " writekey:", b2a(u.writekey) - print >>out, " readkey:", b2a(u.readkey) + print >>out, " writekey:", base32.b2a(u.writekey) + print >>out, " readkey:", base32.b2a(u.readkey) print >>out, " storage index:", storage.si_b2a(u.storage_index) - print >>out, " fingerprint:", b2a(u.fingerprint) + print >>out, " fingerprint:", base32.b2a(u.fingerprint) print >>out if nodeid: we = hashutil.ssk_write_enabler_hash(u.writekey, nodeid) - print >>out, " write_enabler:", b2a(we) + print >>out, " write_enabler:", base32.b2a(we) print >>out _dump_secrets(u.storage_index, secret, nodeid, out) elif isinstance(u, uri.ReadonlySSKFileURI): if show_header: print >>out, "SSK Read-only URI:" - print >>out, " readkey:", b2a(u.readkey) + print >>out, " readkey:", base32.b2a(u.readkey) print >>out, " storage index:", storage.si_b2a(u.storage_index) - print >>out, " fingerprint:", b2a(u.fingerprint) + print >>out, " fingerprint:", base32.b2a(u.fingerprint) elif isinstance(u, uri.SSKVerifierURI): if show_header: print >>out, "SSK Verifier URI:" print >>out, " storage index:", storage.si_b2a(u.storage_index) - print >>out, " fingerprint:", b2a(u.fingerprint) + print >>out, " fingerprint:", base32.b2a(u.fingerprint) elif isinstance(u, uri.NewDirectoryURI): if show_header: @@ -358,7 +357,7 @@ class CatalogSharesOptions(usage.Options): def describe_share(abs_sharefile, si_s, shnum_s, now, out, err): from allmydata import uri, storage, mutable - from allmydata.util import idlib + from allmydata.util import base32 import struct f = open(abs_sharefile, "rb") @@ -403,7 +402,7 @@ def describe_share(abs_sharefile, si_s, shnum_s, now, out, err): print >>out, "SDMF %s %d/%d %d #%d:%s %d %s" % \ (si_s, k, N, datalen, - seqnum, idlib.b2a(root_hash), + seqnum, base32.b2a(root_hash), expiration, abs_sharefile) else: print >>out, "UNKNOWN mutable %s" % (abs_sharefile,) diff --git a/src/allmydata/storage.py b/src/allmydata/storage.py index 06db965e..f2df9ffb 100644 --- a/src/allmydata/storage.py +++ b/src/allmydata/storage.py @@ -9,7 +9,7 @@ from zope.interface import implements from allmydata.interfaces import RIStorageServer, RIBucketWriter, \ RIBucketReader, IStorageBucketWriter, IStorageBucketReader, HASH_SIZE, \ BadWriteEnablerError, IStatsProducer -from allmydata.util import base62, fileutil, idlib, mathutil, log +from allmydata.util import base32, fileutil, idlib, mathutil, log from allmydata.util.assertutil import precondition, _assert import allmydata # for __version__ @@ -48,10 +48,10 @@ NUM_RE=re.compile("^[0-9]+$") # B+0x48: next lease, or end of record def si_b2a(storageindex): - return base62.b2a(storageindex) + return base32.b2a(storageindex) def si_a2b(ascii_storageindex): - return base62.a2b(ascii_storageindex) + return base32.a2b(ascii_storageindex) def storage_index_to_dir(storageindex): sia = si_b2a(storageindex) diff --git a/src/allmydata/test/test_cli.py b/src/allmydata/test/test_cli.py index 30611731..fe1dad44 100644 --- a/src/allmydata/test/test_cli.py +++ b/src/allmydata/test/test_cli.py @@ -89,31 +89,31 @@ class CLI(unittest.TestCase): size=size) output = self._dump_cap(u.to_string()) self.failUnless("CHK File:" in output) - self.failUnless("key: yyyoryarywdyqnyjbefoadeqbh" in output) - self.failUnless("UEB hash: hd7rwri6djiapo6itg5hcxa7ze5im7z9qwcdu8oka6qinahsbiuo" in output) + self.failUnless("key: aaaqeayeaudaocajbifqydiob4" in output, output) + self.failUnless("UEB hash: 4d5euev6djvynq6vrg34mpy5xi3vl5x7oumdthqky6ovcy4wbvtq" in output, output) self.failUnless("size: 1234" in output) self.failUnless("k/N: 25/100" in output) - self.failUnless("storage index: 2WlXTYP4ahK2VBkx1pckfC" in output, output) + self.failUnless("storage index: kmkbjguwmkxej3wejdcvu74zki" in output, output) - output = self._dump_cap("--client-secret", "p3w849k9whqhw6b9fkf4xjs5xc", + output = self._dump_cap("--client-secret", "5s33nk3qpvnj2fw3z4mnm2y6fa", u.to_string()) - self.failUnless("client renewal secret: pu3oy5fu4irjsudwhn6c71g87anrxi1kokt4hmxz7qh5p1895zpy" in output) + self.failUnless("client renewal secret: jltcy6cppghq6ha3uzcawqr2lvwpzmw4teeqj2if6jd2vfpit6hq" in output, output) output = self._dump_cap(u.get_verifier().to_string()) self.failIf("key: " in output) - self.failUnless("UEB hash: hd7rwri6djiapo6itg5hcxa7ze5im7z9qwcdu8oka6qinahsbiuo" in output) + self.failUnless("UEB hash: 4d5euev6djvynq6vrg34mpy5xi3vl5x7oumdthqky6ovcy4wbvtq" in output, output) self.failUnless("size: 1234" in output) self.failUnless("k/N: 25/100" in output) - self.failUnless("storage index: 2WlXTYP4ahK2VBkx1pckfC" in output, output) + self.failUnless("storage index: kmkbjguwmkxej3wejdcvu74zki" in output, output) prefixed_u = "http://127.0.0.1/uri/%s" % urllib.quote(u.to_string()) output = self._dump_cap(prefixed_u) self.failUnless("CHK File:" in output) - self.failUnless("key: yyyoryarywdyqnyjbefoadeqbh" in output) - self.failUnless("UEB hash: hd7rwri6djiapo6itg5hcxa7ze5im7z9qwcdu8oka6qinahsbiuo" in output) + self.failUnless("key: aaaqeayeaudaocajbifqydiob4" in output, output) + self.failUnless("UEB hash: 4d5euev6djvynq6vrg34mpy5xi3vl5x7oumdthqky6ovcy4wbvtq" in output, output) self.failUnless("size: 1234" in output) self.failUnless("k/N: 25/100" in output) - self.failUnless("storage index: 2WlXTYP4ahK2VBkx1pckfC" in output, output) + self.failUnless("storage index: kmkbjguwmkxej3wejdcvu74zki" in output, output) def test_dump_cap_lit(self): u = uri.LiteralFileURI("this is some data") @@ -128,22 +128,22 @@ class CLI(unittest.TestCase): output = self._dump_cap(u.to_string()) self.failUnless("SSK Writeable URI:" in output) - self.failUnless("writekey: yryonyebyryonyebyryonyebyr" in output) - self.failUnless("readkey: zhgqsyrkuywo3rha41b1d7xrar" in output) - self.failUnless("storage index: 4GWqxTUinIqKqWj770lRIA" in output, output) - self.failUnless("fingerprint: 959x79z6959x79z6959x79z6959x79z6959x79z6959x79z6959y" in output) + self.failUnless("writekey: aeaqcaibaeaqcaibaeaqcaibae" in output, output) + self.failUnless("readkey: x4gowaektauqze4y2sbsd5peye" in output, output) + self.failUnless("storage index: rqx7xnpexjxuqprto6pezagdxi" in output, output) + self.failUnless("fingerprint: 737p57x6737p57x6737p57x6737p57x6737p57x6737p57x6737a" in output, output) - output = self._dump_cap("--client-secret", "p3w849k9whqhw6b9fkf4xjs5xc", + output = self._dump_cap("--client-secret", "tylkpgr364eav3ipsnq57yyafu", u.to_string()) - self.failUnless("file renewal secret: xy9p89q9pkitqn4ycwu5tpt9yia7s9izsqudnb4q5jdc3rawgcny" in output) + self.failUnless("file renewal secret: cs54qwurfjmeduruapo46kqwexpcvav5oemczblonglj6xmoyvkq" in output, output) fileutil.make_dirs("cli/test_dump_cap/private") f = open("cli/test_dump_cap/private/secret", "w") - f.write("p3w849k9whqhw6b9fkf4xjs5xc\n") + f.write("y6c7q34mjbt5kkf6hb3utuoj7u\n") f.close() output = self._dump_cap("--client-dir", "cli/test_dump_cap", u.to_string()) - self.failUnless("file renewal secret: xy9p89q9pkitqn4ycwu5tpt9yia7s9izsqudnb4q5jdc3rawgcny" in output) + self.failUnless("file renewal secret: 4jkip4ie2zgmbhcni6g4vmsivwuakpbw7hwnmdancsc6fkrv27kq" in output, output) output = self._dump_cap("--client-dir", "cli/test_dump_cap_BOGUS", u.to_string()) @@ -151,28 +151,28 @@ class CLI(unittest.TestCase): output = self._dump_cap("--nodeid", "tqc35esocrvejvg4mablt6aowg6tl43j", u.to_string()) - self.failUnless("write_enabler: rqk9q6w46dim5ybshqk9kotkyhqcdqmp1z6498xniuz5kkjs1w7o" in output) + self.failUnless("write_enabler: eok7o6u26dvl3abw4ok7kqrka4omdolnsx627hpcvtx3kkjwsu5q" in output, output) self.failIf("file renewal secret:" in output) output = self._dump_cap("--nodeid", "tqc35esocrvejvg4mablt6aowg6tl43j", - "--client-secret", "p3w849k9whqhw6b9fkf4xjs5xc", + "--client-secret", "6orzlv22ggdhphjpmsixcbwufq", u.to_string()) - self.failUnless("write_enabler: rqk9q6w46dim5ybshqk9kotkyhqcdqmp1z6498xniuz5kkjs1w7o" in output) - self.failUnless("file renewal secret: xy9p89q9pkitqn4ycwu5tpt9yia7s9izsqudnb4q5jdc3rawgcny" in output) - self.failUnless("lease renewal secret: r3fsw67mfji3c9mtsisqdumc1pz3gquzdrh4cpu63h8du4uuedgo" in output) + self.failUnless("write_enabler: eok7o6u26dvl3abw4ok7kqrka4omdolnsx627hpcvtx3kkjwsu5q" in output, output) + self.failUnless("file renewal secret: aabhsp6kfsxb57jzdan4dnyzcd3m2prx34jd4z5nj5t5a7guf5fq" in output, output) + self.failUnless("lease renewal secret: bajcslergse474ga775msalmxxapgwr27lngeja4u7ef5j7yh4bq" in output, output) u = u.get_readonly() output = self._dump_cap(u.to_string()) self.failUnless("SSK Read-only URI:" in output) - self.failUnless("readkey: zhgqsyrkuywo3rha41b1d7xrar" in output) - self.failUnless("storage index: 4GWqxTUinIqKqWj770lRIA" in output, output) - self.failUnless("fingerprint: 959x79z6959x79z6959x79z6959x79z6959x79z6959x79z6959y" in output) + self.failUnless("readkey: x4gowaektauqze4y2sbsd5peye" in output, output) + self.failUnless("storage index: rqx7xnpexjxuqprto6pezagdxi" in output, output) + self.failUnless("fingerprint: 737p57x6737p57x6737p57x6737p57x6737p57x6737p57x6737a" in output) u = u.get_verifier() output = self._dump_cap(u.to_string()) self.failUnless("SSK Verifier URI:" in output) - self.failUnless("storage index: 4GWqxTUinIqKqWj770lRIA" in output, output) - self.failUnless("fingerprint: 959x79z6959x79z6959x79z6959x79z6959x79z6959x79z6959y" in output) + self.failUnless("storage index: rqx7xnpexjxuqprto6pezagdxi" in output, output) + self.failUnless("fingerprint: 737p57x6737p57x6737p57x6737p57x6737p57x6737p57x6737a" in output) def test_dump_cap_directory(self): writekey = "\x01" * 16 @@ -182,37 +182,37 @@ class CLI(unittest.TestCase): output = self._dump_cap(u.to_string()) self.failUnless("Directory Writeable URI:" in output) - self.failUnless("writekey: yryonyebyryonyebyryonyebyr" in output) - self.failUnless("readkey: zhgqsyrkuywo3rha41b1d7xrar" in output) - self.failUnless("storage index: 4GWqxTUinIqKqWj770lRIA" in output, output) - self.failUnless("fingerprint: 959x79z6959x79z6959x79z6959x79z6959x79z6959x79z6959y" in output) + self.failUnless("writekey: aeaqcaibaeaqcaibaeaqcaibae" in output, output) + self.failUnless("readkey: x4gowaektauqze4y2sbsd5peye" in output, output) + self.failUnless("storage index: rqx7xnpexjxuqprto6pezagdxi" in output, output) + self.failUnless("fingerprint: 737p57x6737p57x6737p57x6737p57x6737p57x6737p57x6737a" in output, output) - output = self._dump_cap("--client-secret", "p3w849k9whqhw6b9fkf4xjs5xc", + output = self._dump_cap("--client-secret", "a3nyfbnkorp377jhguslgc2dqi", u.to_string()) - self.failUnless("file renewal secret: xy9p89q9pkitqn4ycwu5tpt9yia7s9izsqudnb4q5jdc3rawgcny" in output) + self.failUnless("file renewal secret: zwmq2azrd7lfcmhkrhpgjsxeb2vfpixgvrczbo2asqzdfbmiemwq" in output, output) output = self._dump_cap("--nodeid", "tqc35esocrvejvg4mablt6aowg6tl43j", u.to_string()) - self.failUnless("write_enabler: rqk9q6w46dim5ybshqk9kotkyhqcdqmp1z6498xniuz5kkjs1w7o" in output) + self.failUnless("write_enabler: eok7o6u26dvl3abw4ok7kqrka4omdolnsx627hpcvtx3kkjwsu5q" in output, output) self.failIf("file renewal secret:" in output) output = self._dump_cap("--nodeid", "tqc35esocrvejvg4mablt6aowg6tl43j", - "--client-secret", "p3w849k9whqhw6b9fkf4xjs5xc", + "--client-secret", "rzaq5to2xm6e5otctpdvzw6bfa", u.to_string()) - self.failUnless("write_enabler: rqk9q6w46dim5ybshqk9kotkyhqcdqmp1z6498xniuz5kkjs1w7o" in output) - self.failUnless("file renewal secret: xy9p89q9pkitqn4ycwu5tpt9yia7s9izsqudnb4q5jdc3rawgcny" in output) - self.failUnless("lease renewal secret: r3fsw67mfji3c9mtsisqdumc1pz3gquzdrh4cpu63h8du4uuedgo" in output) + self.failUnless("write_enabler: eok7o6u26dvl3abw4ok7kqrka4omdolnsx627hpcvtx3kkjwsu5q" in output, output) + self.failUnless("file renewal secret: wdmu6rwefvmp2venbb4xz5u3273oybmuu553mi7uic37gfu6bacq" in output, output) + self.failUnless("lease renewal secret: tlvwfudyfeqyss5kybt6ya72foedqxdovumlbt6ok7u5pyrf2mfq" in output, output) u = u.get_readonly() output = self._dump_cap(u.to_string()) self.failUnless("Directory Read-only URI:" in output) - self.failUnless("readkey: zhgqsyrkuywo3rha41b1d7xrar" in output) - self.failUnless("storage index: 4GWqxTUinIqKqWj770lRIA" in output, output) - self.failUnless("fingerprint: 959x79z6959x79z6959x79z6959x79z6959x79z6959x79z6959y" in output) + self.failUnless("readkey: x4gowaektauqze4y2sbsd5peye" in output, output) + self.failUnless("storage index: rqx7xnpexjxuqprto6pezagdxi" in output, output) + self.failUnless("fingerprint: 737p57x6737p57x6737p57x6737p57x6737p57x6737p57x6737a" in output) u = u.get_verifier() output = self._dump_cap(u.to_string()) self.failUnless("Directory Verifier URI:" in output) - self.failUnless("storage index: 4GWqxTUinIqKqWj770lRIA" in output, output) - self.failUnless("fingerprint: 959x79z6959x79z6959x79z6959x79z6959x79z6959x79z6959y" in output) + self.failUnless("storage index: rqx7xnpexjxuqprto6pezagdxi" in output, output) + self.failUnless("fingerprint: 737p57x6737p57x6737p57x6737p57x6737p57x6737p57x6737a" in output, output) diff --git a/src/allmydata/test/test_client.py b/src/allmydata/test/test_client.py index 6f2e2f5b..1a263f93 100644 --- a/src/allmydata/test/test_client.py +++ b/src/allmydata/test/test_client.py @@ -7,7 +7,7 @@ from twisted.python import log import allmydata from allmydata import client, introducer -from allmydata.util import idlib +from allmydata.util import base32 from foolscap.eventual import flushEventualQueue class FakeIntroducerClient(introducer.IntroducerClient): @@ -42,9 +42,9 @@ class Basic(unittest.TestCase): secret_fname = os.path.join(basedir, "private", "secret") self.failUnless(os.path.exists(secret_fname), secret_fname) renew_secret = c.get_renewal_secret() - self.failUnless(idlib.b2a(renew_secret)) + self.failUnless(base32.b2a(renew_secret)) cancel_secret = c.get_cancel_secret() - self.failUnless(idlib.b2a(cancel_secret)) + self.failUnless(base32.b2a(cancel_secret)) def test_sizelimit_1(self): basedir = "client.Basic.test_sizelimit_1" diff --git a/src/allmydata/test/test_uri.py b/src/allmydata/test/test_uri.py index 7ccd3278..6b5a8c6a 100644 --- a/src/allmydata/test/test_uri.py +++ b/src/allmydata/test/test_uri.py @@ -44,7 +44,7 @@ class Literal(unittest.TestCase): class Compare(unittest.TestCase): def test_compare(self): lit1 = uri.LiteralFileURI("some data") - fileURI = 'URI:CHK:f3mf6az85wpcai8ma4qayfmxuc:nnw518w5hu3t5oohwtp7ah9n81z9rfg6c1ywk33ia3m64o67nsgo:3:10:345834' + fileURI = 'URI:CHK:f5ahxa25t4qkktywz6teyfvcx4:opuioq7tj2y6idzfp6cazehtmgs5fdcebcz3cygrxyydvcozrmeq:3:10:345834' chk1 = uri.CHKFileURI.init_from_string(fileURI) chk2 = uri.CHKFileURI.init_from_string(fileURI) self.failIfEqual(lit1, chk1) @@ -167,13 +167,13 @@ class Invalid(unittest.TestCase): class Constraint(unittest.TestCase): def test_constraint(self): - good="http://127.0.0.1:8123/uri/URI%3ADIR2%3Aqo8ayna47cpw3rx3kho3mu7q4h%3Abk9qbgx76gh6eyj5ps8p6buz8fffw1ofc37e9w9d6ncsfpuz7icy/" + good="http://127.0.0.1:8123/uri/URI%3ADIR2%3Agh3l5rbvnv2333mrfvalmjfr4i%3Alz6l7u3z3b7g37s4zkdmfpx5ly4ib4m6thrpbusi6ys62qtc6mma/" uri.NewDirectoryURI.init_from_human_encoding(good) self.failUnlessRaises(AssertionError, uri.NewDirectoryURI.init_from_string, good) bad = good + '===' self.failUnlessRaises(AssertionError, uri.NewDirectoryURI.init_from_human_encoding, bad) self.failUnlessRaises(AssertionError, uri.NewDirectoryURI.init_from_string, bad) - fileURI = 'URI:CHK:f3mf6az85wpcai8ma4qayfmxuc:nnw518w5hu3t5oohwtp7ah9n81z9rfg6c1ywk33ia3m64o67nsgo:3:10:345834' + fileURI = 'URI:CHK:gh3l5rbvnv2333mrfvalmjfr4i:lz6l7u3z3b7g37s4zkdmfpx5ly4ib4m6thrpbusi6ys62qtc6mma:3:10:345834' uri.CHKFileURI.init_from_string(fileURI) class Mutable(unittest.TestCase): diff --git a/src/allmydata/test/test_util.py b/src/allmydata/test/test_util.py index 46fa94c3..52e7c0bd 100644 --- a/src/allmydata/test/test_util.py +++ b/src/allmydata/test/test_util.py @@ -6,19 +6,21 @@ from twisted.trial import unittest from twisted.internet import defer from twisted.python import failure -from allmydata.util import bencode, idlib, humanreadable, mathutil, hashutil +from allmydata.util import base32, bencode, idlib, humanreadable, mathutil, hashutil from allmydata.util import assertutil, fileutil, testutil, deferredutil -class IDLib(unittest.TestCase): +class Base32(unittest.TestCase): def test_b2a(self): - self.failUnlessEqual(idlib.b2a("\x12\x34"), "ne4y") + self.failUnlessEqual(base32.b2a("\x12\x34"), "ci2a") def test_b2a_or_none(self): - self.failUnlessEqual(idlib.b2a_or_none(None), None) - self.failUnlessEqual(idlib.b2a_or_none("\x12\x34"), "ne4y") + self.failUnlessEqual(base32.b2a_or_none(None), None) + self.failUnlessEqual(base32.b2a_or_none("\x12\x34"), "ci2a") def test_a2b(self): - self.failUnlessEqual(idlib.a2b("ne4y"), "\x12\x34") - self.failUnlessRaises(AssertionError, idlib.a2b, "b0gus") + self.failUnlessEqual(base32.a2b("ci2a"), "\x12\x34") + self.failUnlessRaises(AssertionError, base32.a2b, "b0gus") + +class IDLib(unittest.TestCase): def test_nodeid_b2a(self): self.failUnlessEqual(idlib.nodeid_b2a("\x00"*20), "a"*32) diff --git a/src/allmydata/upload.py b/src/allmydata/upload.py index 7ff7b3ef..8d7a6339 100644 --- a/src/allmydata/upload.py +++ b/src/allmydata/upload.py @@ -13,7 +13,7 @@ from allmydata.util.hashutil import file_renewal_secret_hash, \ bucket_cancel_secret_hash, plaintext_hasher, \ storage_index_hash, plaintext_segment_hasher, content_hash_key_hasher from allmydata import encode, storage, hashtree, uri -from allmydata.util import idlib, mathutil +from allmydata.util import base32, idlib, mathutil from allmydata.util.assertutil import precondition from allmydata.interfaces import IUploadable, IUploader, IUploadResults, \ IEncryptedUploadable, RIEncryptedUploadable, IUploadStatus @@ -448,7 +448,7 @@ class EncryptAnUploadable: level=log.NOISY) self.log(format="plaintext leaf hash [%(segnum)d] is %(hash)s", segnum=len(self._plaintext_segment_hashes)-1, - hash=idlib.b2a(p.digest()), + hash=base32.b2a(p.digest()), level=log.NOISY) offset += this_segment @@ -547,7 +547,7 @@ class EncryptAnUploadable: level=log.NOISY) self.log(format="plaintext leaf hash [%(segnum)d] is %(hash)s", segnum=len(self._plaintext_segment_hashes)-1, - hash=idlib.b2a(p.digest()), + hash=base32.b2a(p.digest()), level=log.NOISY) assert len(self._plaintext_segment_hashes) == num_segments return defer.succeed(tuple(self._plaintext_segment_hashes[first:last])) diff --git a/src/allmydata/uri.py b/src/allmydata/uri.py index acd31f43..770defd6 100644 --- a/src/allmydata/uri.py +++ b/src/allmydata/uri.py @@ -3,7 +3,7 @@ import re, urllib from zope.interface import implements from twisted.python.components import registerAdapter from allmydata import storage -from allmydata.util import base62, idlib, hashutil +from allmydata.util import base32, base32, hashutil from allmydata.interfaces import IURI, IDirnodeURI, IFileURI, IVerifierURI, \ IMutableFileURI, INewDirectoryURI, IReadonlyNewDirectoryURI @@ -11,9 +11,8 @@ from allmydata.interfaces import IURI, IDirnodeURI, IFileURI, IVerifierURI, \ # enough information to retrieve and validate the contents. It shall be # expressed in a limited character set (namely [TODO]). -ZBASE32STR_128bits = '(%s{25}%s)' % (idlib.ZBASE32CHAR, idlib.ZBASE32CHAR_3bits) -ZBASE32STR_256bits = '(%s{51}%s)' % (idlib.ZBASE32CHAR, idlib.ZBASE32CHAR_1bits) -ZBASE62STR_128bits = '(%s{22})' % (base62.ZBASE62CHAR) +BASE32STR_128bits = '(%s{25}%s)' % (base32.BASE32CHAR, base32.BASE32CHAR_3bits) +BASE32STR_256bits = '(%s{51}%s)' % (base32.BASE32CHAR, base32.BASE32CHAR_1bits) SEP='(?::|%3A)' NUMBER='([0-9]+)' @@ -38,11 +37,11 @@ class _BaseURI: class CHKFileURI(_BaseURI): implements(IURI, IFileURI) - STRING_RE=re.compile('^URI:CHK:'+ZBASE32STR_128bits+':'+ - ZBASE32STR_256bits+':'+NUMBER+':'+NUMBER+':'+NUMBER+ + STRING_RE=re.compile('^URI:CHK:'+BASE32STR_128bits+':'+ + BASE32STR_256bits+':'+NUMBER+':'+NUMBER+':'+NUMBER+ '$') HUMAN_RE=re.compile('^'+OPTIONALHTTPLEAD+'URI'+SEP+'CHK'+SEP+ - ZBASE32STR_128bits+SEP+ZBASE32STR_256bits+SEP+NUMBER+ + BASE32STR_128bits+SEP+BASE32STR_256bits+SEP+NUMBER+ SEP+NUMBER+SEP+NUMBER+'$') def __init__(self, key, uri_extension_hash, needed_shares, total_shares, @@ -61,14 +60,14 @@ class CHKFileURI(_BaseURI): def init_from_human_encoding(cls, uri): mo = cls.HUMAN_RE.search(uri) assert mo, uri - return cls(idlib.a2b(mo.group(1)), idlib.a2b(mo.group(2)), + return cls(base32.a2b(mo.group(1)), base32.a2b(mo.group(2)), int(mo.group(3)), int(mo.group(4)), int(mo.group(5))) @classmethod def init_from_string(cls, uri): mo = cls.STRING_RE.search(uri) assert mo, uri - return cls(idlib.a2b(mo.group(1)), idlib.a2b(mo.group(2)), + return cls(base32.a2b(mo.group(1)), base32.a2b(mo.group(2)), int(mo.group(3)), int(mo.group(4)), int(mo.group(5))) def to_string(self): @@ -77,8 +76,8 @@ class CHKFileURI(_BaseURI): assert isinstance(self.size, (int,long)) return ('URI:CHK:%s:%s:%d:%d:%d' % - (idlib.b2a(self.key), - idlib.b2a(self.uri_extension_hash), + (base32.b2a(self.key), + base32.b2a(self.uri_extension_hash), self.needed_shares, self.total_shares, self.size)) @@ -103,10 +102,10 @@ class CHKFileURI(_BaseURI): class CHKFileVerifierURI(_BaseURI): implements(IVerifierURI) - STRING_RE=re.compile('^URI:CHK-Verifier:'+ZBASE62STR_128bits+':'+ - ZBASE32STR_256bits+':'+NUMBER+':'+NUMBER+':'+NUMBER) + STRING_RE=re.compile('^URI:CHK-Verifier:'+BASE32STR_128bits+':'+ + BASE32STR_256bits+':'+NUMBER+':'+NUMBER+':'+NUMBER) HUMAN_RE=re.compile('^'+OPTIONALHTTPLEAD+'URI'+SEP+'CHK-Verifier'+SEP+ - ZBASE62STR_128bits+SEP+ZBASE32STR_256bits+SEP+NUMBER+ + BASE32STR_128bits+SEP+BASE32STR_256bits+SEP+NUMBER+ SEP+NUMBER+SEP+NUMBER) def __init__(self, storage_index, uri_extension_hash, @@ -122,14 +121,14 @@ class CHKFileVerifierURI(_BaseURI): def init_from_human_encoding(cls, uri): mo = cls.HUMAN_RE.search(uri) assert mo, uri - return cls(idlib.a2b(mo.group(1)), idlib.a2b(mo.group(2)), + return cls(base32.a2b(mo.group(1)), base32.a2b(mo.group(2)), int(mo.group(3)), int(mo.group(4)), int(mo.group(5))) @classmethod def init_from_string(cls, uri): mo = cls.STRING_RE.search(uri) assert mo, (uri, cls, cls.STRING_RE) - return cls(storage.si_a2b(mo.group(1)), idlib.a2b(mo.group(2)), + return cls(storage.si_a2b(mo.group(1)), base32.a2b(mo.group(2)), int(mo.group(3)), int(mo.group(4)), int(mo.group(5))) def to_string(self): @@ -139,7 +138,7 @@ class CHKFileVerifierURI(_BaseURI): return ('URI:CHK-Verifier:%s:%s:%d:%d:%d' % (storage.si_b2a(self.storage_index), - idlib.b2a(self.uri_extension_hash), + base32.b2a(self.uri_extension_hash), self.needed_shares, self.total_shares, self.size)) @@ -148,8 +147,8 @@ class CHKFileVerifierURI(_BaseURI): class LiteralFileURI(_BaseURI): implements(IURI, IFileURI) - STRING_RE=re.compile('^URI:LIT:'+idlib.ZBASE32STR_anybytes+'$') - HUMAN_RE=re.compile('^'+OPTIONALHTTPLEAD+'URI'+SEP+'LIT'+SEP+idlib.ZBASE32STR_anybytes+'$') + STRING_RE=re.compile('^URI:LIT:'+base32.BASE32STR_anybytes+'$') + HUMAN_RE=re.compile('^'+OPTIONALHTTPLEAD+'URI'+SEP+'LIT'+SEP+base32.BASE32STR_anybytes+'$') def __init__(self, data=None): if data is not None: @@ -159,16 +158,16 @@ class LiteralFileURI(_BaseURI): def init_from_human_encoding(cls, uri): mo = cls.HUMAN_RE.search(uri) assert mo, uri - return cls(idlib.a2b(mo.group(1))) + return cls(base32.a2b(mo.group(1))) @classmethod def init_from_string(cls, uri): mo = cls.STRING_RE.search(uri) assert mo, uri - return cls(idlib.a2b(mo.group(1))) + return cls(base32.a2b(mo.group(1))) def to_string(self): - return 'URI:LIT:%s' % idlib.b2a(self.data) + return 'URI:LIT:%s' % base32.b2a(self.data) def is_readonly(self): return True @@ -188,10 +187,10 @@ class WriteableSSKFileURI(_BaseURI): implements(IURI, IMutableFileURI) BASE_STRING='URI:SSK:' - STRING_RE=re.compile('^'+BASE_STRING+ZBASE32STR_128bits+':'+ - ZBASE32STR_256bits+'$') + STRING_RE=re.compile('^'+BASE_STRING+BASE32STR_128bits+':'+ + BASE32STR_256bits+'$') HUMAN_RE=re.compile('^'+OPTIONALHTTPLEAD+'URI'+SEP+'SSK'+SEP+ - ZBASE32STR_128bits+SEP+ZBASE32STR_256bits+'$') + BASE32STR_128bits+SEP+BASE32STR_256bits+'$') def __init__(self, writekey, fingerprint): self.writekey = writekey @@ -204,25 +203,25 @@ class WriteableSSKFileURI(_BaseURI): def init_from_human_encoding(cls, uri): mo = cls.HUMAN_RE.search(uri) assert mo, uri - return cls(idlib.a2b(mo.group(1)), idlib.a2b(mo.group(2))) + return cls(base32.a2b(mo.group(1)), base32.a2b(mo.group(2))) @classmethod def init_from_string(cls, uri): mo = cls.STRING_RE.search(uri) assert mo, (uri, cls) - return cls(idlib.a2b(mo.group(1)), idlib.a2b(mo.group(2))) + return cls(base32.a2b(mo.group(1)), base32.a2b(mo.group(2))) def to_string(self): assert isinstance(self.writekey, str) assert isinstance(self.fingerprint, str) - return 'URI:SSK:%s:%s' % (idlib.b2a(self.writekey), - idlib.b2a(self.fingerprint)) + return 'URI:SSK:%s:%s' % (base32.b2a(self.writekey), + base32.b2a(self.fingerprint)) def __repr__(self): return "<%s %s>" % (self.__class__.__name__, self.abbrev()) def abbrev(self): - return idlib.b2a(self.writekey[:5]) + return base32.b2a(self.writekey[:5]) def is_readonly(self): return False @@ -237,8 +236,8 @@ class ReadonlySSKFileURI(_BaseURI): implements(IURI, IMutableFileURI) BASE_STRING='URI:SSK-RO:' - STRING_RE=re.compile('^URI:SSK-RO:'+ZBASE32STR_128bits+':'+ZBASE32STR_256bits+'$') - HUMAN_RE=re.compile('^'+OPTIONALHTTPLEAD+'URI'+SEP+'SSK-RO'+SEP+ZBASE32STR_128bits+SEP+ZBASE32STR_256bits+'$') + STRING_RE=re.compile('^URI:SSK-RO:'+BASE32STR_128bits+':'+BASE32STR_256bits+'$') + HUMAN_RE=re.compile('^'+OPTIONALHTTPLEAD+'URI'+SEP+'SSK-RO'+SEP+BASE32STR_128bits+SEP+BASE32STR_256bits+'$') def __init__(self, readkey, fingerprint): self.readkey = readkey @@ -250,25 +249,25 @@ class ReadonlySSKFileURI(_BaseURI): def init_from_human_encoding(cls, uri): mo = cls.HUMAN_RE.search(uri) assert mo, uri - return cls(idlib.a2b(mo.group(1)), idlib.a2b(mo.group(2))) + return cls(base32.a2b(mo.group(1)), base32.a2b(mo.group(2))) @classmethod def init_from_string(cls, uri): mo = cls.STRING_RE.search(uri) assert mo, uri - return cls(idlib.a2b(mo.group(1)), idlib.a2b(mo.group(2))) + return cls(base32.a2b(mo.group(1)), base32.a2b(mo.group(2))) def to_string(self): assert isinstance(self.readkey, str) assert isinstance(self.fingerprint, str) - return 'URI:SSK-RO:%s:%s' % (idlib.b2a(self.readkey), - idlib.b2a(self.fingerprint)) + return 'URI:SSK-RO:%s:%s' % (base32.b2a(self.readkey), + base32.b2a(self.fingerprint)) def __repr__(self): return "<%s %s>" % (self.__class__.__name__, self.abbrev()) def abbrev(self): - return idlib.b2a(self.readkey[:5]) + return base32.b2a(self.readkey[:5]) def is_readonly(self): return True @@ -283,8 +282,8 @@ class SSKVerifierURI(_BaseURI): implements(IVerifierURI) BASE_STRING='URI:SSK-Verifier:' - STRING_RE=re.compile('^'+BASE_STRING+ZBASE62STR_128bits+':'+ZBASE32STR_256bits+'$') - HUMAN_RE=re.compile('^'+OPTIONALHTTPLEAD+'URI'+SEP+'SSK-RO'+SEP+ZBASE62STR_128bits+SEP+ZBASE32STR_256bits+'$') + STRING_RE=re.compile('^'+BASE_STRING+BASE32STR_128bits+':'+BASE32STR_256bits+'$') + HUMAN_RE=re.compile('^'+OPTIONALHTTPLEAD+'URI'+SEP+'SSK-RO'+SEP+BASE32STR_128bits+SEP+BASE32STR_256bits+'$') def __init__(self, storage_index, fingerprint): assert len(storage_index) == 16 @@ -295,19 +294,19 @@ class SSKVerifierURI(_BaseURI): def init_from_human_encoding(cls, uri): mo = cls.HUMAN_RE.search(uri) assert mo, uri - return cls(storage.si_a2b(mo.group(1)), idlib.a2b(mo.group(2))) + return cls(storage.si_a2b(mo.group(1)), base32.a2b(mo.group(2))) @classmethod def init_from_string(cls, uri): mo = cls.STRING_RE.search(uri) assert mo, (uri, cls) - return cls(storage.si_a2b(mo.group(1)), idlib.a2b(mo.group(2))) + return cls(storage.si_a2b(mo.group(1)), base32.a2b(mo.group(2))) def to_string(self): assert isinstance(self.storage_index, str) assert isinstance(self.fingerprint, str) return 'URI:SSK-Verifier:%s:%s' % (storage.si_b2a(self.storage_index), - idlib.b2a(self.fingerprint)) + base32.b2a(self.fingerprint)) class _NewDirectoryBaseURI(_BaseURI): implements(IURI, IDirnodeURI) @@ -513,6 +512,6 @@ def unpack_extension_readable(data): unpacked["UEB_hash"] = hashutil.uri_extension_hash(data) for k in sorted(unpacked.keys()): if 'hash' in k: - unpacked[k] = idlib.b2a(unpacked[k]) + unpacked[k] = base32.b2a(unpacked[k]) return unpacked diff --git a/src/allmydata/util/base32.py b/src/allmydata/util/base32.py new file mode 100644 index 00000000..983ff194 --- /dev/null +++ b/src/allmydata/util/base32.py @@ -0,0 +1,261 @@ +# from the Python Standard Library +import string + +from assertutil import precondition + +z_base_32_alphabet = "ybndrfg8ejkmcpqxot1uwisza345h769" # Zooko's choice, rationale in "DESIGN" doc +rfc3548_alphabet = "abcdefghijklmnopqrstuvwxyz234567" # RFC3548 standard used by Gnutella, Content-Addressable Web, THEX, Bitzi, Web-Calculus... +chars = rfc3548_alphabet + +vals = ''.join(map(chr, range(32))) +c2vtranstable = string.maketrans(chars, vals) +v2ctranstable = string.maketrans(vals, chars) +identitytranstable = string.maketrans(chars, chars) + +def _get_trailing_chars_without_lsbs(N, d): + """ + @return: a list of chars that can legitimately appear in the last place when the least significant N bits are ignored. + """ + s = [] + if N < 4: + s.extend(_get_trailing_chars_without_lsbs(N+1, d=d)) + i = 0 + while i < len(chars): + if not d.has_key(i): + d[i] = None + s.append(chars[i]) + i = i + 2**N + return s + +def get_trailing_chars_without_lsbs(N): + precondition((N >= 0) and (N < 5), "N is required to be > 0 and < len(chars).", N=N) + if N == 0: + return chars + d = {} + return ''.join(_get_trailing_chars_without_lsbs(N, d=d)) + +BASE32CHAR = '['+get_trailing_chars_without_lsbs(0)+']' +BASE32CHAR_4bits = '['+get_trailing_chars_without_lsbs(1)+']' +BASE32CHAR_3bits = '['+get_trailing_chars_without_lsbs(2)+']' +BASE32CHAR_2bits = '['+get_trailing_chars_without_lsbs(3)+']' +BASE32CHAR_1bits = '['+get_trailing_chars_without_lsbs(4)+']' +BASE32STR_1byte = BASE32CHAR+BASE32CHAR_3bits +BASE32STR_2bytes = BASE32CHAR+'{3}'+BASE32CHAR_1bits +BASE32STR_3bytes = BASE32CHAR+'{4}'+BASE32CHAR_4bits +BASE32STR_4bytes = BASE32CHAR+'{6}'+BASE32CHAR_2bits +BASE32STR_anybytes = '((?:%s{8})*' % (BASE32CHAR,) + "(?:|%s|%s|%s|%s))" % (BASE32STR_1byte, BASE32STR_2bytes, BASE32STR_3bytes, BASE32STR_4bytes) + +def b2a(os): + """ + @param os the data to be encoded (a string) + + @return the contents of os in base-32 encoded form + """ + return b2a_l(os, len(os)*8) + +def b2a_or_none(os): + if os is not None: + return b2a(os) + +def b2a_l(os, lengthinbits): + """ + @param os the data to be encoded (a string) + @param lengthinbits the number of bits of data in os to be encoded + + b2a_l() will generate a base-32 encoded string big enough to encode lengthinbits bits. So for + example if os is 2 bytes long and lengthinbits is 15, then b2a_l() will generate a 3-character- + long base-32 encoded string (since 3 quintets is sufficient to encode 15 bits). If os is + 2 bytes long and lengthinbits is 16 (or None), then b2a_l() will generate a 4-character string. + Note that b2a_l() does not mask off unused least-significant bits, so for example if os is + 2 bytes long and lengthinbits is 15, then you must ensure that the unused least-significant bit + of os is a zero bit or you will get the wrong result. This precondition is tested by assertions + if assertions are enabled. + + Warning: if you generate a base-32 encoded string with b2a_l(), and then someone else tries to + decode it by calling a2b() instead of a2b_l(), then they will (probably) get a different + string than the one you encoded! So only use b2a_l() when you are sure that the encoding and + decoding sides know exactly which lengthinbits to use. If you do not have a way for the + encoder and the decoder to agree upon the lengthinbits, then it is best to use b2a() and + a2b(). The only drawback to using b2a() over b2a_l() is that when you have a number of + bits to encode that is not a multiple of 8, b2a() can sometimes generate a base-32 encoded + string that is one or two characters longer than necessary. + + @return the contents of os in base-32 encoded form + """ + precondition(isinstance(lengthinbits, (int, long,)), "lengthinbits is required to be an integer.", lengthinbits=lengthinbits) + precondition((lengthinbits+7)/8 == len(os), "lengthinbits is required to specify a number of bits storable in exactly len(os) octets.", lengthinbits=lengthinbits, lenos=len(os)) + + os = map(ord, os) + + numquintets = (lengthinbits+4)/5 + numoctetsofdata = (lengthinbits+7)/8 + # print "numoctetsofdata: %s, len(os): %s, lengthinbits: %s, numquintets: %s" % (numoctetsofdata, len(os), lengthinbits, numquintets,) + # strip trailing octets that won't be used + del os[numoctetsofdata:] + # zero out any unused bits in the final octet + if lengthinbits % 8 != 0: + os[-1] = os[-1] >> (8-(lengthinbits % 8)) + os[-1] = os[-1] << (8-(lengthinbits % 8)) + # append zero octets for padding if needed + numoctetsneeded = (numquintets*5+7)/8 + 1 + os.extend([0]*(numoctetsneeded-len(os))) + + quintets = [] + cutoff = 256 + num = os[0] + i = 0 + while len(quintets) < numquintets: + i = i + 1 + assert len(os) > i, "len(os): %s, i: %s, len(quintets): %s, numquintets: %s, lengthinbits: %s, numoctetsofdata: %s, numoctetsneeded: %s, os: %s" % (len(os), i, len(quintets), numquintets, lengthinbits, numoctetsofdata, numoctetsneeded, os,) + num = num * 256 + num = num + os[i] + if cutoff == 1: + cutoff = 256 + continue + cutoff = cutoff * 8 + quintet = num / cutoff + quintets.append(quintet) + num = num - (quintet * cutoff) + + cutoff = cutoff / 32 + quintet = num / cutoff + quintets.append(quintet) + num = num - (quintet * cutoff) + + if len(quintets) > numquintets: + assert len(quintets) == (numquintets+1), "len(quintets): %s, numquintets: %s, quintets: %s" % (len(quintets), numquintets, quintets,) + quintets = quintets[:numquintets] + res = string.translate(string.join(map(chr, quintets), ''), v2ctranstable) + assert could_be_base32_encoded_l(res, lengthinbits), "lengthinbits: %s, res: %s" % (lengthinbits, res,) + return res + +# b2a() uses the minimal number of quintets sufficient to encode the binary +# input. It just so happens that the relation is like this (everything is +# modulo 40 bits). +# num_qs = NUM_OS_TO_NUM_QS[num_os] +NUM_OS_TO_NUM_QS=(0, 2, 4, 5, 7,) + +# num_os = NUM_QS_TO_NUM_OS[num_qs], but if not NUM_QS_LEGIT[num_qs] then +# there is *no* number of octets which would have resulted in this number of +# quintets, so either the encoded string has been mangled (truncated) or else +# you were supposed to decode it with a2b_l() (which means you were supposed +# to know the actual length of the encoded data). + +NUM_QS_TO_NUM_OS=(0, 1, 1, 2, 2, 3, 3, 4) +NUM_QS_LEGIT=(1, 0, 1, 0, 1, 1, 0, 1,) +NUM_QS_TO_NUM_BITS=tuple(map(lambda x: x*8, NUM_QS_TO_NUM_OS)) + +# A fast way to determine whether a given string *could* be base-32 encoded data, assuming that the +# original data had 8K bits for a positive integer K. +# The boolean value of s8[len(s)%8][ord(s[-1])], where s is the possibly base-32 encoded string +# tells whether the final character is reasonable. +def add_check_array(cs, sfmap): + checka=[0] * 256 + for c in cs: + checka[ord(c)] = 1 + sfmap.append(tuple(checka)) + +def init_s8(): + s8 = [] + add_check_array(chars, s8) + for lenmod8 in (1, 2, 3, 4, 5, 6, 7,): + if NUM_QS_LEGIT[lenmod8]: + add_check_array(get_trailing_chars_without_lsbs(4-(NUM_QS_TO_NUM_BITS[lenmod8]%5)), s8) + else: + add_check_array('', s8) + return tuple(s8) +s8 = init_s8() + +# A somewhat fast way to determine whether a given string *could* be base-32 encoded data, given a +# lengthinbits. +# The boolean value of s5[lengthinbits%5][ord(s[-1])], where s is the possibly base-32 encoded +# string tells whether the final character is reasonable. +def init_s5(): + s5 = [] + add_check_array(get_trailing_chars_without_lsbs(0), s5) + for lenmod5 in [1,2,3,4]: + add_check_array(get_trailing_chars_without_lsbs(5-lenmod5), s5) + return tuple(s5) +s5 = init_s5() + +def could_be_base32_encoded(s, s8=s8, tr=string.translate, identitytranstable=identitytranstable, chars=chars): + if s == '': + return True + return s8[len(s)%8][ord(s[-1])] and not tr(s, identitytranstable, chars) + +def could_be_base32_encoded_l(s, lengthinbits, s5=s5, tr=string.translate, identitytranstable=identitytranstable, chars=chars): + if s == '': + return True + assert lengthinbits%5 < len(s5), lengthinbits + assert ord(s[-1]) < s5[lengthinbits%5] + return (((lengthinbits+4)/5) == len(s)) and s5[lengthinbits%5][ord(s[-1])] and not string.translate(s, identitytranstable, chars) + +def num_octets_that_encode_to_this_many_quintets(numqs): + # Here is a computation that conveniently expresses this: + return (numqs*5+3)/8 + +def a2b(cs): + """ + @param cs the base-32 encoded data (a string) + """ + precondition(could_be_base32_encoded(cs), "cs is required to be possibly base32 encoded data.", cs=cs) + + return a2b_l(cs, num_octets_that_encode_to_this_many_quintets(len(cs))*8) + +def a2b_l(cs, lengthinbits): + """ + @param lengthinbits the number of bits of data in encoded into cs + + a2b_l() will return a result big enough to hold lengthinbits bits. So for example if cs is + 4 characters long (encoding at least 15 and up to 20 bits) and lengthinbits is 16, then a2b_l() + will return a string of length 2 (since 2 bytes is sufficient to store 16 bits). If cs is 4 + characters long and lengthinbits is 20, then a2b_l() will return a string of length 3 (since + 3 bytes is sufficient to store 20 bits). Note that b2a_l() does not mask off unused least- + significant bits, so for example if cs is 4 characters long and lengthinbits is 17, then you + must ensure that all three of the unused least-significant bits of cs are zero bits or you will + get the wrong result. This precondition is tested by assertions if assertions are enabled. + (Generally you just require the encoder to ensure this consistency property between the least + significant zero bits and value of lengthinbits, and reject strings that have a length-in-bits + which isn't a multiple of 8 and yet don't have trailing zero bits, as improperly encoded.) + + Please see the warning in the docstring of b2a_l() regarding the use of b2a() versus b2a_l(). + + @return the data encoded in cs + """ + precondition(could_be_base32_encoded_l(cs, lengthinbits), "cs is required to be possibly base32 encoded data.", cs=cs, lengthinbits=lengthinbits) + if cs == '': + return '' + + qs = map(ord, string.translate(cs, c2vtranstable)) + + numoctets = (lengthinbits+7)/8 + numquintetsofdata = (lengthinbits+4)/5 + # strip trailing quintets that won't be used + del qs[numquintetsofdata:] + # zero out any unused bits in the final quintet + if lengthinbits % 5 != 0: + qs[-1] = qs[-1] >> (5-(lengthinbits % 5)) + qs[-1] = qs[-1] << (5-(lengthinbits % 5)) + # append zero quintets for padding if needed + numquintetsneeded = (numoctets*8+4)/5 + qs.extend([0]*(numquintetsneeded-len(qs))) + + octets = [] + pos = 2048 + num = qs[0] * pos + readybits = 5 + i = 1 + while len(octets) < numoctets: + while pos > 256: + pos = pos / 32 + num = num + (qs[i] * pos) + i = i + 1 + octet = num / 256 + octets.append(octet) + num = num - (octet * 256) + num = num * 256 + pos = pos * 256 + assert len(octets) == numoctets, "len(octets): %s, numoctets: %s, octets: %s" % (len(octets), numoctets, octets,) + res = ''.join(map(chr, octets)) + precondition(b2a_l(res, lengthinbits) == cs, "cs is required to be the canonical base-32 encoding of some data.", b2a(res), res=res, cs=cs) + return res diff --git a/src/allmydata/util/base62.py b/src/allmydata/util/base62.py index 473cadcd..5a24c8c6 100644 --- a/src/allmydata/util/base62.py +++ b/src/allmydata/util/base62.py @@ -13,7 +13,7 @@ from allmydata.util.mathutil import log_ceil, log_floor chars = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" -ZBASE62CHAR = '[' + chars + ']' +BASE62CHAR = '[' + chars + ']' vals = ''.join([chr(i) for i in range(62)]) c2vtranstable = string.maketrans(chars, vals) diff --git a/src/allmydata/util/idlib.py b/src/allmydata/util/idlib.py index 202f1c27..5e44b9d8 100644 --- a/src/allmydata/util/idlib.py +++ b/src/allmydata/util/idlib.py @@ -1,264 +1,4 @@ -# from the Python Standard Library -import string -from assertutil import precondition - -z_base_32_alphabet = "ybndrfg8ejkmcpqxot1uwisza345h769" # Zooko's choice, rationale in "DESIGN" doc -rfc3548_alphabet = "abcdefghijklmnopqrstuvwxyz234567" # RFC3548 standard used by Gnutella, Content-Addressable Web, THEX, Bitzi, Web-Calculus... -chars = z_base_32_alphabet - -vals = ''.join(map(chr, range(32))) -c2vtranstable = string.maketrans(chars, vals) -v2ctranstable = string.maketrans(vals, chars) -identitytranstable = string.maketrans(chars, chars) - -def _get_trailing_chars_without_lsbs(N, d): - """ - @return: a list of chars that can legitimately appear in the last place when the least significant N bits are ignored. - """ - s = [] - if N < 4: - s.extend(_get_trailing_chars_without_lsbs(N+1, d=d)) - i = 0 - while i < len(chars): - if not d.has_key(i): - d[i] = None - s.append(chars[i]) - i = i + 2**N - return s - -def get_trailing_chars_without_lsbs(N): - precondition((N >= 0) and (N < 5), "N is required to be > 0 and < len(chars).", N=N) - if N == 0: - return chars - d = {} - return ''.join(_get_trailing_chars_without_lsbs(N, d=d)) - -ZBASE32CHAR = '['+get_trailing_chars_without_lsbs(0)+']' -ZBASE32CHAR_4bits = '['+get_trailing_chars_without_lsbs(1)+']' -ZBASE32CHAR_3bits = '['+get_trailing_chars_without_lsbs(2)+']' -ZBASE32CHAR_2bits = '['+get_trailing_chars_without_lsbs(3)+']' -ZBASE32CHAR_1bits = '['+get_trailing_chars_without_lsbs(4)+']' -ZBASE32STR_1byte = ZBASE32CHAR+ZBASE32CHAR_3bits -ZBASE32STR_2bytes = ZBASE32CHAR+'{3}'+ZBASE32CHAR_1bits -ZBASE32STR_3bytes = ZBASE32CHAR+'{4}'+ZBASE32CHAR_4bits -ZBASE32STR_4bytes = ZBASE32CHAR+'{6}'+ZBASE32CHAR_2bits -ZBASE32STR_anybytes = '((?:%s{8})*' % (ZBASE32CHAR,) + "(?:|%s|%s|%s|%s))" % (ZBASE32STR_1byte, ZBASE32STR_2bytes, ZBASE32STR_3bytes, ZBASE32STR_4bytes) - -def b2a(os): - """ - @param os the data to be encoded (a string) - - @return the contents of os in base-32 encoded form - """ - return b2a_l(os, len(os)*8) - -def b2a_or_none(os): - if os is not None: - return b2a(os) - -def b2a_l(os, lengthinbits): - """ - @param os the data to be encoded (a string) - @param lengthinbits the number of bits of data in os to be encoded - - b2a_l() will generate a base-32 encoded string big enough to encode lengthinbits bits. So for - example if os is 2 bytes long and lengthinbits is 15, then b2a_l() will generate a 3-character- - long base-32 encoded string (since 3 quintets is sufficient to encode 15 bits). If os is - 2 bytes long and lengthinbits is 16 (or None), then b2a_l() will generate a 4-character string. - Note that b2a_l() does not mask off unused least-significant bits, so for example if os is - 2 bytes long and lengthinbits is 15, then you must ensure that the unused least-significant bit - of os is a zero bit or you will get the wrong result. This precondition is tested by assertions - if assertions are enabled. - - Warning: if you generate a base-32 encoded string with b2a_l(), and then someone else tries to - decode it by calling a2b() instead of a2b_l(), then they will (probably) get a different - string than the one you encoded! So only use b2a_l() when you are sure that the encoding and - decoding sides know exactly which lengthinbits to use. If you do not have a way for the - encoder and the decoder to agree upon the lengthinbits, then it is best to use b2a() and - a2b(). The only drawback to using b2a() over b2a_l() is that when you have a number of - bits to encode that is not a multiple of 8, b2a() can sometimes generate a base-32 encoded - string that is one or two characters longer than necessary. - - @return the contents of os in base-32 encoded form - """ - precondition(isinstance(lengthinbits, (int, long,)), "lengthinbits is required to be an integer.", lengthinbits=lengthinbits) - precondition((lengthinbits+7)/8 == len(os), "lengthinbits is required to specify a number of bits storable in exactly len(os) octets.", lengthinbits=lengthinbits, lenos=len(os)) - - os = map(ord, os) - - numquintets = (lengthinbits+4)/5 - numoctetsofdata = (lengthinbits+7)/8 - # print "numoctetsofdata: %s, len(os): %s, lengthinbits: %s, numquintets: %s" % (numoctetsofdata, len(os), lengthinbits, numquintets,) - # strip trailing octets that won't be used - del os[numoctetsofdata:] - # zero out any unused bits in the final octet - if lengthinbits % 8 != 0: - os[-1] = os[-1] >> (8-(lengthinbits % 8)) - os[-1] = os[-1] << (8-(lengthinbits % 8)) - # append zero octets for padding if needed - numoctetsneeded = (numquintets*5+7)/8 + 1 - os.extend([0]*(numoctetsneeded-len(os))) - - quintets = [] - cutoff = 256 - num = os[0] - i = 0 - while len(quintets) < numquintets: - i = i + 1 - assert len(os) > i, "len(os): %s, i: %s, len(quintets): %s, numquintets: %s, lengthinbits: %s, numoctetsofdata: %s, numoctetsneeded: %s, os: %s" % (len(os), i, len(quintets), numquintets, lengthinbits, numoctetsofdata, numoctetsneeded, os,) - num = num * 256 - num = num + os[i] - if cutoff == 1: - cutoff = 256 - continue - cutoff = cutoff * 8 - quintet = num / cutoff - quintets.append(quintet) - num = num - (quintet * cutoff) - - cutoff = cutoff / 32 - quintet = num / cutoff - quintets.append(quintet) - num = num - (quintet * cutoff) - - if len(quintets) > numquintets: - assert len(quintets) == (numquintets+1), "len(quintets): %s, numquintets: %s, quintets: %s" % (len(quintets), numquintets, quintets,) - quintets = quintets[:numquintets] - res = string.translate(string.join(map(chr, quintets), ''), v2ctranstable) - assert could_be_base32_encoded_l(res, lengthinbits), "lengthinbits: %s, res: %s" % (lengthinbits, res,) - return res - -# b2a() uses the minimal number of quintets sufficient to encode the binary -# input. It just so happens that the relation is like this (everything is -# modulo 40 bits). -# num_qs = NUM_OS_TO_NUM_QS[num_os] -NUM_OS_TO_NUM_QS=(0, 2, 4, 5, 7,) - -# num_os = NUM_QS_TO_NUM_OS[num_qs], but if not NUM_QS_LEGIT[num_qs] then -# there is *no* number of octets which would have resulted in this number of -# quintets, so either the encoded string has been mangled (truncated) or else -# you were supposed to decode it with a2b_l() (which means you were supposed -# to know the actual length of the encoded data). - -NUM_QS_TO_NUM_OS=(0, 1, 1, 2, 2, 3, 3, 4) -NUM_QS_LEGIT=(1, 0, 1, 0, 1, 1, 0, 1,) -NUM_QS_TO_NUM_BITS=tuple(map(lambda x: x*8, NUM_QS_TO_NUM_OS)) - -# A fast way to determine whether a given string *could* be base-32 encoded data, assuming that the -# original data had 8K bits for a positive integer K. -# The boolean value of s8[len(s)%8][ord(s[-1])], where s is the possibly base-32 encoded string -# tells whether the final character is reasonable. -def add_check_array(cs, sfmap): - checka=[0] * 256 - for c in cs: - checka[ord(c)] = 1 - sfmap.append(tuple(checka)) - -def init_s8(): - s8 = [] - add_check_array(chars, s8) - for lenmod8 in (1, 2, 3, 4, 5, 6, 7,): - if NUM_QS_LEGIT[lenmod8]: - add_check_array(get_trailing_chars_without_lsbs(4-(NUM_QS_TO_NUM_BITS[lenmod8]%5)), s8) - else: - add_check_array('', s8) - return tuple(s8) -s8 = init_s8() - -# A somewhat fast way to determine whether a given string *could* be base-32 encoded data, given a -# lengthinbits. -# The boolean value of s5[lengthinbits%5][ord(s[-1])], where s is the possibly base-32 encoded -# string tells whether the final character is reasonable. -def init_s5(): - s5 = [] - add_check_array(get_trailing_chars_without_lsbs(0), s5) - for lenmod5 in [1,2,3,4]: - add_check_array(get_trailing_chars_without_lsbs(5-lenmod5), s5) - return tuple(s5) -s5 = init_s5() - -def could_be_base32_encoded(s, s8=s8, tr=string.translate, identitytranstable=identitytranstable, chars=chars): - if s == '': - return True - return s8[len(s)%8][ord(s[-1])] and not tr(s, identitytranstable, chars) - -def could_be_base32_encoded_l(s, lengthinbits, s5=s5, tr=string.translate, identitytranstable=identitytranstable, chars=chars): - if s == '': - return True - assert lengthinbits%5 < len(s5), lengthinbits - assert ord(s[-1]) < s5[lengthinbits%5] - return (((lengthinbits+4)/5) == len(s)) and s5[lengthinbits%5][ord(s[-1])] and not string.translate(s, identitytranstable, chars) - -def num_octets_that_encode_to_this_many_quintets(numqs): - # Here is a computation that conveniently expresses this: - return (numqs*5+3)/8 - -def a2b(cs): - """ - @param cs the base-32 encoded data (a string) - """ - precondition(could_be_base32_encoded(cs), "cs is required to be possibly base32 encoded data.", cs=cs) - - return a2b_l(cs, num_octets_that_encode_to_this_many_quintets(len(cs))*8) - -def a2b_l(cs, lengthinbits): - """ - @param lengthinbits the number of bits of data in encoded into cs - - a2b_l() will return a result big enough to hold lengthinbits bits. So for example if cs is - 4 characters long (encoding at least 15 and up to 20 bits) and lengthinbits is 16, then a2b_l() - will return a string of length 2 (since 2 bytes is sufficient to store 16 bits). If cs is 4 - characters long and lengthinbits is 20, then a2b_l() will return a string of length 3 (since - 3 bytes is sufficient to store 20 bits). Note that b2a_l() does not mask off unused least- - significant bits, so for example if cs is 4 characters long and lengthinbits is 17, then you - must ensure that all three of the unused least-significant bits of cs are zero bits or you will - get the wrong result. This precondition is tested by assertions if assertions are enabled. - (Generally you just require the encoder to ensure this consistency property between the least - significant zero bits and value of lengthinbits, and reject strings that have a length-in-bits - which isn't a multiple of 8 and yet don't have trailing zero bits, as improperly encoded.) - - Please see the warning in the docstring of b2a_l() regarding the use of b2a() versus b2a_l(). - - @return the data encoded in cs - """ - precondition(could_be_base32_encoded_l(cs, lengthinbits), "cs is required to be possibly base32 encoded data.", cs=cs, lengthinbits=lengthinbits) - if cs == '': - return '' - - qs = map(ord, string.translate(cs, c2vtranstable)) - - numoctets = (lengthinbits+7)/8 - numquintetsofdata = (lengthinbits+4)/5 - # strip trailing quintets that won't be used - del qs[numquintetsofdata:] - # zero out any unused bits in the final quintet - if lengthinbits % 5 != 0: - qs[-1] = qs[-1] >> (5-(lengthinbits % 5)) - qs[-1] = qs[-1] << (5-(lengthinbits % 5)) - # append zero quintets for padding if needed - numquintetsneeded = (numoctets*8+4)/5 - qs.extend([0]*(numquintetsneeded-len(qs))) - - octets = [] - pos = 2048 - num = qs[0] * pos - readybits = 5 - i = 1 - while len(octets) < numoctets: - while pos > 256: - pos = pos / 32 - num = num + (qs[i] * pos) - i = i + 1 - octet = num / 256 - octets.append(octet) - num = num - (octet * 256) - num = num * 256 - pos = pos * 256 - assert len(octets) == numoctets, "len(octets): %s, numoctets: %s, octets: %s" % (len(octets), numoctets, octets,) - res = ''.join(map(chr, octets)) - precondition(b2a_l(res, lengthinbits) == cs, "cs is required to be the canonical base-32 encoding of some data.", b2a(res), res=res, cs=cs) - return res from foolscap import base32 def nodeid_b2a(nodeid): diff --git a/src/allmydata/webish.py b/src/allmydata/webish.py index fa374e90..aa0b5893 100644 --- a/src/allmydata/webish.py +++ b/src/allmydata/webish.py @@ -6,7 +6,7 @@ from twisted.internet import defer, address from twisted.internet.interfaces import IConsumer from nevow import inevow, rend, loaders, appserver, url, tags as T from nevow.static import File as nevow_File # TODO: merge with static.File? -from allmydata.util import fileutil, idlib, observer, log +from allmydata.util import base32, fileutil, idlib, observer, log import simplejson from allmydata.interfaces import IDownloadTarget, IDirectoryNode, IFileNode, \ IMutableFileNode @@ -1567,7 +1567,7 @@ class Status(rend.Page): def _render_common(self, ctx, data): s = data - si_s = idlib.b2a_or_none(s.get_storage_index()) + si_s = base32.b2a_or_none(s.get_storage_index()) if si_s is None: si_s = "(None)" ctx.fillSlots("si", si_s)