From: Brian Warner <warner@lothar.com>
Date: Sun, 8 Jul 2007 07:17:11 +0000 (-0700)
Subject: web: use real JSON instead of the fake stubs
X-Git-Url: https://git.rkrishnan.org/?a=commitdiff_plain;h=72fc8c5cb875fab76036c3dce9f3b2645309be52;p=tahoe-lafs%2Ftahoe-lafs.git

web: use real JSON instead of the fake stubs
Also include the encoder portion of Bob Ippolito's simplejson-1.7.1 as
allmydata.util.json_encoder . simplejson is distributed under a more liberal
license than Tahoe (looks to be modified BSD), so redistributing it should be ok.
---

diff --git a/docs/webapi.txt b/docs/webapi.txt
index a325321d..060f2674 100644
--- a/docs/webapi.txt
+++ b/docs/webapi.txt
@@ -91,6 +91,11 @@ for files and directories which do not yet exist.
   between files and directories. Programatic clients are expected to use this
   query before actually downloading the file's contents.
 
+  The JSON data is as follows:
+
+   [ 'filenode', { 'mutable': bool, 'uri': file_uri, 'size': bytes } ]
+
+
  GET FILEURL?localfile=$FILENAME
 
   This instructs the client to download the given file and write its contents
@@ -146,6 +151,20 @@ for files and directories which do not yet exist.
   the information from this query to display filesystem navigation choices to
   a human user.
 
+  The JSON data is as follows:
+
+    [ 'dirnode', { 'mutable': bool, 'uri': uri, 'children': children } ]
+
+   where 'children' is a dictionary in which the keys are child names
+   and the values depend upon whether the child is a file or a directory:
+
+     'foo.txt': [ 'filenode', { 'mutable': bool, 'uri': uri, 'size': bytes } ]
+     'subdir':  [ 'dirnode', { 'mutable': bool, 'uri': uri } ]
+
+   note that the value is the same as the JSON representation of the
+   corresponding FILEURL or DIRURL (except that dirnodes do not recurse).
+
+
  GET DIRURL?t=uri
  GET DIRURL?t=readonly-uri
 
diff --git a/src/allmydata/test/test_web.py b/src/allmydata/test/test_web.py
index d7de57b8..7652ca8b 100644
--- a/src/allmydata/test/test_web.py
+++ b/src/allmydata/test/test_web.py
@@ -227,11 +227,37 @@ class Web(unittest.TestCase):
     def failUnlessIsBarDotTxt(self, res):
         self.failUnlessEqual(res, self.BAR_CONTENTS)
 
+    def worlds_cheapest_json_decoder(self, json):
+        # don't write tests that use 'true' or 'false' as filenames
+        json = re.sub('false', 'False', json)
+        json = re.sub('true', 'True', json)
+        json = re.sub(r'\\/', '/', json)
+        return eval(json)
+
+    def failUnlessIsBarJSON(self, res):
+        data = self.worlds_cheapest_json_decoder(res)
+        self.failUnless(isinstance(data, list))
+        self.failUnlessEqual(data[0], "filenode")
+        self.failUnless(isinstance(data[1], dict))
+        self.failUnlessEqual(data[1]["mutable"], False)
+        self.failUnlessEqual(data[1]["size"], 123)
+        self.failUnlessEqual(data[1]["uri"], self._bar_txt_uri)
+
     def failUnlessIsFooJSON(self, res):
-        self.failUnless("JSONny stuff here" in res)
-        self.failUnless("name=bar.txt, child_uri=%s" % self._bar_txt_uri
-                        in res)
-        self.failUnless("name=blockingfile" in res)
+        data = self.worlds_cheapest_json_decoder(res)
+        self.failUnless(isinstance(data, list))
+        self.failUnlessEqual(data[0], "dirnode")
+        self.failUnless(isinstance(data[1], dict))
+        self.failUnlessEqual(data[1]["mutable"], True)
+        self.failUnlessEqual(data[1]["uri"], self._foo_uri)
+        kidnames = sorted(data[1]["children"].keys())
+        self.failUnlessEqual(kidnames,
+                             ["bar.txt", "blockingfile", "empty", "sub"])
+        kids = data[1]["children"]
+        self.failUnlessEqual(kids["sub"][0], "dirnode")
+        self.failUnlessEqual(kids["bar.txt"][0], "filenode")
+        self.failUnlessEqual(kids["bar.txt"][1]["size"], 123)
+        self.failUnlessEqual(kids["bar.txt"][1]["uri"], self._bar_txt_uri)
 
     def GET(self, urlpath, followRedirect=False):
         url = self.webish_url + urlpath
@@ -370,10 +396,7 @@ class Web(unittest.TestCase):
         # instead. This may make it tricky to emulate the S3 interface
         # completely.
         d = self.GET("/vdrive/global/foo/bar.txt?t=json")
-        def _got(json):
-            # TODO
-            self.failUnless("JSON" in json, json)
-        d.addCallback(_got)
+        d.addCallback(self.failUnlessIsBarJSON)
         return d
 
     def test_GET_FILEURL_json_missing(self): # YES
diff --git a/src/allmydata/util/json_encoder.py b/src/allmydata/util/json_encoder.py
new file mode 100644
index 00000000..d261f94b
--- /dev/null
+++ b/src/allmydata/util/json_encoder.py
@@ -0,0 +1,395 @@
+"""
+Implementation of JSONEncoder
+"""
+
+# this is simplejson/encoder.py, from Bob Ippolito's simplejson-1.7.1
+#  (http://undefined.org/python/#simplejson)
+# which is distributed under the BSD license as copied here:
+#
+# Copyright (c) 2006 Bob Ippolito
+# 
+# Permission is hereby granted, free of charge, to any person obtaining a copy of
+# this software and associated documentation files (the "Software"), to deal in
+# the Software without restriction, including without limitation the rights to
+# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+# of the Software, and to permit persons to whom the Software is furnished to do
+# so, subject to the following conditions:
+# 
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+# 
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+# we extract just the encoder here because that's all we need.
+
+import re
+_speedups = None
+
+ESCAPE = re.compile(r'[\x00-\x19\\"\b\f\n\r\t]')
+ESCAPE_ASCII = re.compile(r'([\\"/]|[^\ -~])')
+ESCAPE_DCT = {
+    # escape all forward slashes to prevent </script> attack
+    '/': '\\/',
+    '\\': '\\\\',
+    '"': '\\"',
+    '\b': '\\b',
+    '\f': '\\f',
+    '\n': '\\n',
+    '\r': '\\r',
+    '\t': '\\t',
+}
+for i in range(0x20):
+    ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
+
+# assume this produces an infinity on all machines (probably not guaranteed)
+INFINITY = float('1e66666')
+
+def floatstr(o, allow_nan=True):
+    # Check for specials.  Note that this type of test is processor- and/or
+    # platform-specific, so do tests which don't depend on the internals.
+
+    if o != o:
+        text = 'NaN'
+    elif o == INFINITY:
+        text = 'Infinity'
+    elif o == -INFINITY:
+        text = '-Infinity'
+    else:
+        return str(o)
+
+    if not allow_nan:
+        raise ValueError("Out of range float values are not JSON compliant: %r"
+            % (o,))
+
+    return text
+
+
+def encode_basestring(s):
+    """
+    Return a JSON representation of a Python string
+    """
+    def replace(match):
+        return ESCAPE_DCT[match.group(0)]
+    return '"' + ESCAPE.sub(replace, s) + '"'
+
+def encode_basestring_ascii(s):
+    def replace(match):
+        s = match.group(0)
+        try:
+            return ESCAPE_DCT[s]
+        except KeyError:
+            n = ord(s)
+            if n < 0x10000:
+                return '\\u%04x' % (n,)
+            else:
+                # surrogate pair
+                n -= 0x10000
+                s1 = 0xd800 | ((n >> 10) & 0x3ff)
+                s2 = 0xdc00 | (n & 0x3ff)
+                return '\\u%04x\\u%04x' % (s1, s2)
+    return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
+        
+try:
+    encode_basestring_ascii = _speedups.encode_basestring_ascii
+    _need_utf8 = True
+except AttributeError:
+    _need_utf8 = False
+
+class JSONEncoder(object):
+    """
+    Extensible JSON <http://json.org> encoder for Python data structures.
+
+    Supports the following objects and types by default:
+    
+    +-------------------+---------------+
+    | Python            | JSON          |
+    +===================+===============+
+    | dict              | object        |
+    +-------------------+---------------+
+    | list, tuple       | array         |
+    +-------------------+---------------+
+    | str, unicode      | string        |
+    +-------------------+---------------+
+    | int, long, float  | number        |
+    +-------------------+---------------+
+    | True              | true          |
+    +-------------------+---------------+
+    | False             | false         |
+    +-------------------+---------------+
+    | None              | null          |
+    +-------------------+---------------+
+
+    To extend this to recognize other objects, subclass and implement a
+    ``.default()`` method with another method that returns a serializable
+    object for ``o`` if possible, otherwise it should call the superclass
+    implementation (to raise ``TypeError``).
+    """
+    __all__ = ['__init__', 'default', 'encode', 'iterencode']
+    item_separator = ', '
+    key_separator = ': '
+    def __init__(self, skipkeys=False, ensure_ascii=True,
+            check_circular=True, allow_nan=True, sort_keys=False,
+            indent=None, separators=None, encoding='utf-8'):
+        """
+        Constructor for JSONEncoder, with sensible defaults.
+
+        If skipkeys is False, then it is a TypeError to attempt
+        encoding of keys that are not str, int, long, float or None.  If
+        skipkeys is True, such items are simply skipped.
+
+        If ensure_ascii is True, the output is guaranteed to be str
+        objects with all incoming unicode characters escaped.  If
+        ensure_ascii is false, the output will be unicode object.
+
+        If check_circular is True, then lists, dicts, and custom encoded
+        objects will be checked for circular references during encoding to
+        prevent an infinite recursion (which would cause an OverflowError).
+        Otherwise, no such check takes place.
+
+        If allow_nan is True, then NaN, Infinity, and -Infinity will be
+        encoded as such.  This behavior is not JSON specification compliant,
+        but is consistent with most JavaScript based encoders and decoders.
+        Otherwise, it will be a ValueError to encode such floats.
+
+        If sort_keys is True, then the output of dictionaries will be
+        sorted by key; this is useful for regression tests to ensure
+        that JSON serializations can be compared on a day-to-day basis.
+
+        If indent is a non-negative integer, then JSON array
+        elements and object members will be pretty-printed with that
+        indent level.  An indent level of 0 will only insert newlines.
+        None is the most compact representation.
+
+        If specified, separators should be a (item_separator, key_separator)
+        tuple. The default is (', ', ': '). To get the most compact JSON
+        representation you should specify (',', ':') to eliminate whitespace.
+
+        If encoding is not None, then all input strings will be
+        transformed into unicode using that encoding prior to JSON-encoding. 
+        The default is UTF-8.
+        """
+
+        self.skipkeys = skipkeys
+        self.ensure_ascii = ensure_ascii
+        self.check_circular = check_circular
+        self.allow_nan = allow_nan
+        self.sort_keys = sort_keys
+        self.indent = indent
+        self.current_indent_level = 0
+        if separators is not None:
+            self.item_separator, self.key_separator = separators
+        self.encoding = encoding
+
+    def _newline_indent(self):
+        return '\n' + (' ' * (self.indent * self.current_indent_level))
+
+    def _iterencode_list(self, lst, markers=None):
+        if not lst:
+            yield '[]'
+            return
+        if markers is not None:
+            markerid = id(lst)
+            if markerid in markers:
+                raise ValueError("Circular reference detected")
+            markers[markerid] = lst
+        yield '['
+        if self.indent is not None:
+            self.current_indent_level += 1
+            newline_indent = self._newline_indent()
+            separator = self.item_separator + newline_indent
+            yield newline_indent
+        else:
+            newline_indent = None
+            separator = self.item_separator
+        first = True
+        for value in lst:
+            if first:
+                first = False
+            else:
+                yield separator
+            for chunk in self._iterencode(value, markers):
+                yield chunk
+        if newline_indent is not None:
+            self.current_indent_level -= 1
+            yield self._newline_indent()
+        yield ']'
+        if markers is not None:
+            del markers[markerid]
+
+    def _iterencode_dict(self, dct, markers=None):
+        if not dct:
+            yield '{}'
+            return
+        if markers is not None:
+            markerid = id(dct)
+            if markerid in markers:
+                raise ValueError("Circular reference detected")
+            markers[markerid] = dct
+        yield '{'
+        key_separator = self.key_separator
+        if self.indent is not None:
+            self.current_indent_level += 1
+            newline_indent = self._newline_indent()
+            item_separator = self.item_separator + newline_indent
+            yield newline_indent
+        else:
+            newline_indent = None
+            item_separator = self.item_separator
+        first = True
+        if self.ensure_ascii:
+            encoder = encode_basestring_ascii
+        else:
+            encoder = encode_basestring
+        allow_nan = self.allow_nan
+        if self.sort_keys:
+            keys = dct.keys()
+            keys.sort()
+            items = [(k, dct[k]) for k in keys]
+        else:
+            items = dct.iteritems()
+        _encoding = self.encoding
+        _do_decode = (_encoding is not None
+            and not (_need_utf8 and _encoding == 'utf-8'))
+        for key, value in items:
+            if isinstance(key, str):
+                if _do_decode:
+                    key = key.decode(_encoding)
+            elif isinstance(key, basestring):
+                pass
+            # JavaScript is weakly typed for these, so it makes sense to
+            # also allow them.  Many encoders seem to do something like this.
+            elif isinstance(key, float):
+                key = floatstr(key, allow_nan)
+            elif isinstance(key, (int, long)):
+                key = str(key)
+            elif key is True:
+                key = 'true'
+            elif key is False:
+                key = 'false'
+            elif key is None:
+                key = 'null'
+            elif self.skipkeys:
+                continue
+            else:
+                raise TypeError("key %r is not a string" % (key,))
+            if first:
+                first = False
+            else:
+                yield item_separator
+            yield encoder(key)
+            yield key_separator
+            for chunk in self._iterencode(value, markers):
+                yield chunk
+        if newline_indent is not None:
+            self.current_indent_level -= 1
+            yield self._newline_indent()
+        yield '}'
+        if markers is not None:
+            del markers[markerid]
+
+    def _iterencode(self, o, markers=None):
+        if isinstance(o, basestring):
+            if self.ensure_ascii:
+                encoder = encode_basestring_ascii
+            else:
+                encoder = encode_basestring
+            _encoding = self.encoding
+            if (_encoding is not None and isinstance(o, str)
+                    and not (_need_utf8 and _encoding == 'utf-8')):
+                o = o.decode(_encoding)
+            yield encoder(o)
+        elif o is None:
+            yield 'null'
+        elif o is True:
+            yield 'true'
+        elif o is False:
+            yield 'false'
+        elif isinstance(o, (int, long)):
+            yield str(o)
+        elif isinstance(o, float):
+            yield floatstr(o, self.allow_nan)
+        elif isinstance(o, (list, tuple)):
+            for chunk in self._iterencode_list(o, markers):
+                yield chunk
+        elif isinstance(o, dict):
+            for chunk in self._iterencode_dict(o, markers):
+                yield chunk
+        else:
+            if markers is not None:
+                markerid = id(o)
+                if markerid in markers:
+                    raise ValueError("Circular reference detected")
+                markers[markerid] = o
+            for chunk in self._iterencode_default(o, markers):
+                yield chunk
+            if markers is not None:
+                del markers[markerid]
+
+    def _iterencode_default(self, o, markers=None):
+        newobj = self.default(o)
+        return self._iterencode(newobj, markers)
+
+    def default(self, o):
+        """
+        Implement this method in a subclass such that it returns
+        a serializable object for ``o``, or calls the base implementation
+        (to raise a ``TypeError``).
+
+        For example, to support arbitrary iterators, you could
+        implement default like this::
+            
+            def default(self, o):
+                try:
+                    iterable = iter(o)
+                except TypeError:
+                    pass
+                else:
+                    return list(iterable)
+                return JSONEncoder.default(self, o)
+        """
+        raise TypeError("%r is not JSON serializable" % (o,))
+
+    def encode(self, o):
+        """
+        Return a JSON string representation of a Python data structure.
+
+        >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
+        '{"foo":["bar", "baz"]}'
+        """
+        # This is for extremely simple cases and benchmarks...
+        if isinstance(o, basestring):
+            if isinstance(o, str):
+                _encoding = self.encoding
+                if (_encoding is not None 
+                        and not (_encoding == 'utf-8' and _need_utf8)):
+                    o = o.decode(_encoding)
+            return encode_basestring_ascii(o)
+        # This doesn't pass the iterator directly to ''.join() because it
+        # sucks at reporting exceptions.  It's going to do this internally
+        # anyway because it uses PySequence_Fast or similar.
+        chunks = list(self.iterencode(o))
+        return ''.join(chunks)
+
+    def iterencode(self, o):
+        """
+        Encode the given object and yield each string
+        representation as available.
+        
+        For example::
+            
+            for chunk in JSONEncoder().iterencode(bigobject):
+                mysocket.write(chunk)
+        """
+        if self.check_circular:
+            markers = {}
+        else:
+            markers = None
+        return self._iterencode(o, markers)
+
+__all__ = ['JSONEncoder']
diff --git a/src/allmydata/webish.py b/src/allmydata/webish.py
index 1d6a8d21..187c05d1 100644
--- a/src/allmydata/webish.py
+++ b/src/allmydata/webish.py
@@ -7,6 +7,7 @@ from twisted.internet import defer
 from nevow import inevow, rend, loaders, appserver, url, tags as T
 from nevow.static import File as nevow_File # TODO: merge with static.File?
 from allmydata.util import idlib, fileutil
+from allmydata.util.json_encoder import JSONEncoder
 from allmydata.uri import unpack_uri, is_dirnode_uri
 from allmydata.interfaces import IDownloadTarget, IDirectoryNode, IFileNode
 from allmydata import upload, download
@@ -289,6 +290,7 @@ class LocalFileDownloader(resource.Resource):
         d.addCallback(_done)
         return server.NOT_DONE_YET
 
+
 class FileJSONMetadata(rend.Page):
     def __init__(self, filenode):
         self._filenode = filenode
@@ -301,10 +303,12 @@ class FileJSONMetadata(rend.Page):
     def renderNode(self, filenode):
         file_uri = filenode.get_uri()
         pieces = unpack_uri(file_uri)
-        data = "filenode\n"
-        data += "JSONny stuff here\n"
-        data += "uri=%s, size=%s" % (file_uri, pieces['size'])
-        return data
+        data = ("filenode",
+                {'mutable': False,
+                 'uri': file_uri,
+                 'size': pieces['size'],
+                 })
+        return JSONEncoder().encode(data)
 
 class FileURI(FileJSONMetadata):
     def renderNode(self, filenode):
@@ -383,18 +387,33 @@ class DirectoryJSONMetadata(rend.Page):
         return self.renderNode(self._dirnode)
 
     def renderNode(self, node):
-        data = "dirnode\n"
-        data += "JSONny stuff here\n"
         d = node.list()
-        def _got(children, data):
+        def _got(children):
+            kids = {}
             for name, childnode in children.iteritems():
-                data += "name=%s, child_uri=%s" % (name, childnode.get_uri())
-            return data
-        d.addCallback(_got, data)
-        def _done(data):
-            data += "done\n"
-            return data
-        d.addCallback(_done)
+                if IFileNode.providedBy(childnode):
+                    kiduri = childnode.get_uri()
+                    pieces = unpack_uri(kiduri)
+                    kiddata = ("filenode",
+                               {'mutable': False,
+                                'uri': kiduri,
+                                'size': pieces['size'],
+                                })
+                else:
+                    assert IDirectoryNode.providedBy(childnode)
+                    kiduri = childnode.get_uri()
+                    kiddata = ("dirnode",
+                               {'mutable': childnode.is_mutable(),
+                                'uri': kiduri,
+                                })
+                kids[name] = kiddata
+            contents = { 'children': kids,
+                         'mutable': node.is_mutable(),
+                         'uri': node.get_uri(),
+                         }
+            data = ("dirnode", contents)
+            return JSONEncoder().encode(data)
+        d.addCallback(_got)
         return d
 
 class DirectoryURI(DirectoryJSONMetadata):