From 4101bcf2188d1f610c249b9b194ff69a12f22ff0 Mon Sep 17 00:00:00 2001
From: Brian Warner <warner@allmydata.com>
Date: Tue, 16 Jan 2007 21:29:59 -0700
Subject: [PATCH] update URI format, include codec name

---
 src/allmydata/codec.py            | 13 +++++++++----
 src/allmydata/download.py         | 16 ++++++----------
 src/allmydata/interfaces.py       | 11 ++++++-----
 src/allmydata/test/test_upload.py |  5 +++--
 src/allmydata/upload.py           |  4 +++-
 src/allmydata/uri.py              | 25 +++++++++++++++++++++++++
 6 files changed, 52 insertions(+), 22 deletions(-)
 create mode 100644 src/allmydata/uri.py

diff --git a/src/allmydata/codec.py b/src/allmydata/codec.py
index e330fe2e..ac610834 100644
--- a/src/allmydata/codec.py
+++ b/src/allmydata/codec.py
@@ -12,7 +12,7 @@ def netstring(s):
 
 class ReplicatingEncoder(object):
     implements(ICodecEncoder)
-    ENCODER_TYPE = 0
+    ENCODER_TYPE = "rep"
 
     def set_params(self, data_size, required_shares, max_shares):
         self.data_size = data_size
@@ -96,7 +96,7 @@ class Decoder(object):
 
 class PyRSEncoder(object):
     implements(ICodecEncoder)
-    ENCODER_TYPE = 1
+    ENCODER_TYPE = "pyrs"
 
     # we will break the data into vectors in which each element is a single
     # byte (i.e. a single number from 0 to 255), and the length of the vector
@@ -138,7 +138,7 @@ class PyRSEncoder(object):
         return self.ENCODER_TYPE
 
     def get_serialized_params(self):
-        return "%d:%d:%d" % (self.data_size, self.required_shares,
+        return "%d-%d-%d" % (self.data_size, self.required_shares,
                              self.max_shares)
 
     def get_share_size(self):
@@ -179,7 +179,7 @@ class PyRSDecoder(object):
     implements(ICodecDecoder)
 
     def set_serialized_params(self, params):
-        pieces = params.split(":")
+        pieces = params.split("-")
         self.data_size = int(pieces[0])
         self.required_shares = int(pieces[1])
         self.max_shares = int(pieces[2])
@@ -234,3 +234,8 @@ all_encoders = {
     ReplicatingEncoder.ENCODER_TYPE: (ReplicatingEncoder, ReplicatingDecoder),
     PyRSEncoder.ENCODER_TYPE: (PyRSEncoder, PyRSDecoder),
     }
+
+def get_decoder_by_name(name):
+    decoder_class = all_encoders[name][1]
+    return decoder_class()
+
diff --git a/src/allmydata/download.py b/src/allmydata/download.py
index 2b5fca76..94ccf50f 100644
--- a/src/allmydata/download.py
+++ b/src/allmydata/download.py
@@ -8,6 +8,7 @@ from twisted.application import service
 from allmydata.util import idlib, bencode
 from allmydata.util.deferredutil import DeferredListShouldSucceed
 from allmydata import codec
+from allmydata.uri import unpack_uri
 
 class NotEnoughPeersError(Exception):
     pass
@@ -16,20 +17,17 @@ class HaveAllPeersError(Exception):
     # we use this to jump out of the loop
     pass
 
-def unpack_uri(uri):
-    assert uri.startswith("URI:")
-    return bencode.bdecode(uri[4:])
-
 class FileDownloader:
     debug = False
 
-    def __init__(self, peer, verifierid, encoding_params):
+    def __init__(self, peer, uri):
         self._peer = peer
+        (codec_name, codec_params, verifierid) = unpack_uri(uri)
         assert isinstance(verifierid, str)
         assert len(verifierid) == 20
         self._verifierid = verifierid
-        self._decoder = codec.ReplicatingDecoder()
-        self._decoder.set_serialized_params(encoding_params)
+        self._decoder = codec.get_decoder_by_name(codec_name)
+        self._decoder.set_serialized_params(codec_params)
         self.needed_shares = self._decoder.get_required_shares()
 
     def set_download_target(self, target):
@@ -236,14 +234,12 @@ class Downloader(service.MultiService):
     debug = False
 
     def download(self, uri, t):
-        (verifierid, params) = unpack_uri(uri)
         assert self.parent
         assert self.running
-        assert isinstance(verifierid, str)
         t = IDownloadTarget(t)
         assert t.write
         assert t.close
-        dl = FileDownloader(self.parent, verifierid, params)
+        dl = FileDownloader(self.parent, uri)
         dl.set_download_target(t)
         if self.debug:
             dl.debug = True
diff --git a/src/allmydata/interfaces.py b/src/allmydata/interfaces.py
index 246e3d7f..893d2a13 100644
--- a/src/allmydata/interfaces.py
+++ b/src/allmydata/interfaces.py
@@ -83,7 +83,7 @@ class ICodecEncoder(Interface):
         """
 
     def get_encoder_type():
-        """Return an integer that describes the type of this encoder.
+        """Return a short string that describes the type of this encoder.
 
         There must be a global table of encoder classes. This method returns
         an index into this table; the value at this index is an encoder
@@ -100,10 +100,11 @@ class ICodecEncoder(Interface):
 
         This string is intended to be embedded in the URI, so there are
         several restrictions on its contents. At the moment I'm thinking that
-        this means it may contain hex digits and colons, and nothing else.
-        The idea is that the URI contains '%d:%s.' %
-        (encoder.get_encoder_type(), encoder.get_serialized_params()), and
-        this is enough information to construct a compatible decoder.
+        this means it may contain hex digits and hyphens, and nothing else.
+        The idea is that the URI contains something like '%s:%s:%s' %
+        (encoder.get_encoder_name(), encoder.get_serialized_params(),
+        b2a(verifierid)), and this is enough information to construct a
+        compatible decoder.
         """
 
     def get_share_size():
diff --git a/src/allmydata/test/test_upload.py b/src/allmydata/test/test_upload.py
index 82cd5a15..1e7bb9a0 100644
--- a/src/allmydata/test/test_upload.py
+++ b/src/allmydata/test/test_upload.py
@@ -4,6 +4,7 @@ from twisted.internet import defer
 from cStringIO import StringIO
 
 from allmydata import upload, download
+from allmydata.uri import unpack_uri
 
 class StringBucketProxy:
     # This is for unit tests: make a StringIO look like a RIBucketWriter.
@@ -226,10 +227,10 @@ class Uploader(unittest.TestCase):
     def _check(self, uri):
         self.failUnless(isinstance(uri, str))
         self.failUnless(uri.startswith("URI:"))
-        verifierid, params = download.unpack_uri(uri)
+        codec_name, codec_params, verifierid = unpack_uri(uri)
         self.failUnless(isinstance(verifierid, str))
         self.failUnlessEqual(len(verifierid), 20)
-        self.failUnless(isinstance(params, str))
+        self.failUnless(isinstance(codec_params, str))
         peers = self.node.peers
         self.failUnlessEqual(peers[0].allocated_size,
                              len(peers[0].data))
diff --git a/src/allmydata/upload.py b/src/allmydata/upload.py
index 844eaab6..29938e09 100644
--- a/src/allmydata/upload.py
+++ b/src/allmydata/upload.py
@@ -9,6 +9,7 @@ from allmydata.util import idlib, bencode
 from allmydata.util.idlib import peerid_to_short_string as shortid
 from allmydata.util.deferredutil import DeferredListShouldSucceed
 from allmydata import codec
+from allmydata.uri import pack_uri
 
 from cStringIO import StringIO
 import sha
@@ -67,6 +68,7 @@ class FileUploader:
         total_shares = self.max_shares
         needed_shares = self.min_shares
         self._encoder = codec.ReplicatingEncoder()
+        self._codec_name = self._encoder.get_encoder_type()
         self._encoder.set_params(self._size, needed_shares, total_shares)
         self._share_size = self._encoder.get_share_size()
 
@@ -93,7 +95,7 @@ class FileUploader:
         return d
 
     def _compute_uri(self, params):
-        return "URI:%s" % bencode.bencode((self._verifierid, params))
+        return pack_uri(self._codec_name, params, self._verifierid)
 
     def _build_not_enough_peers_error(self):
         yes = ",".join([shortid(p) for p in self.peers_who_said_yes])
diff --git a/src/allmydata/uri.py b/src/allmydata/uri.py
new file mode 100644
index 00000000..5ddaebd6
--- /dev/null
+++ b/src/allmydata/uri.py
@@ -0,0 +1,25 @@
+
+from allmydata.util import bencode, idlib
+
+# the URI shall be an ascii representation of the file. It shall contain
+# enough information to retrieve and validate the contents. It shall be
+# expressed in a limited character set (namely [TODO]).
+
+def pack_uri(codec_name, codec_params, verifierid):
+    assert isinstance(codec_name, str)
+    assert len(codec_name) < 10
+    assert ":" not in codec_name
+    assert isinstance(codec_params, str)
+    assert ":" not in codec_params
+    assert isinstance(verifierid, str)
+    assert len(verifierid) == 20 # sha1 hash
+    return "URI:%s:%s:%s" % (codec_name, codec_params, idlib.b2a(verifierid))
+
+
+def unpack_uri(uri):
+    assert uri.startswith("URI:")
+    header, codec_name, codec_params, verifierid_s = uri.split(":")
+    verifierid = idlib.a2b(verifierid_s)
+    return codec_name, codec_params, verifierid
+
+
-- 
2.45.2