From: Brian Warner Date: Sun, 8 Jul 2007 07:17:11 +0000 (-0700) Subject: web: use real JSON instead of the fake stubs X-Git-Url: https://git.rkrishnan.org/?a=commitdiff_plain;h=72fc8c5cb875fab76036c3dce9f3b2645309be52;p=tahoe-lafs%2Ftahoe-lafs.git web: use real JSON instead of the fake stubs Also include the encoder portion of Bob Ippolito's simplejson-1.7.1 as allmydata.util.json_encoder . simplejson is distributed under a more liberal license than Tahoe (looks to be modified BSD), so redistributing it should be ok. --- diff --git a/docs/webapi.txt b/docs/webapi.txt index a325321d..060f2674 100644 --- a/docs/webapi.txt +++ b/docs/webapi.txt @@ -91,6 +91,11 @@ for files and directories which do not yet exist. between files and directories. Programatic clients are expected to use this query before actually downloading the file's contents. + The JSON data is as follows: + + [ 'filenode', { 'mutable': bool, 'uri': file_uri, 'size': bytes } ] + + GET FILEURL?localfile=$FILENAME This instructs the client to download the given file and write its contents @@ -146,6 +151,20 @@ for files and directories which do not yet exist. the information from this query to display filesystem navigation choices to a human user. + The JSON data is as follows: + + [ 'dirnode', { 'mutable': bool, 'uri': uri, 'children': children } ] + + where 'children' is a dictionary in which the keys are child names + and the values depend upon whether the child is a file or a directory: + + 'foo.txt': [ 'filenode', { 'mutable': bool, 'uri': uri, 'size': bytes } ] + 'subdir': [ 'dirnode', { 'mutable': bool, 'uri': uri } ] + + note that the value is the same as the JSON representation of the + corresponding FILEURL or DIRURL (except that dirnodes do not recurse). + + GET DIRURL?t=uri GET DIRURL?t=readonly-uri diff --git a/src/allmydata/test/test_web.py b/src/allmydata/test/test_web.py index d7de57b8..7652ca8b 100644 --- a/src/allmydata/test/test_web.py +++ b/src/allmydata/test/test_web.py @@ -227,11 +227,37 @@ class Web(unittest.TestCase): def failUnlessIsBarDotTxt(self, res): self.failUnlessEqual(res, self.BAR_CONTENTS) + def worlds_cheapest_json_decoder(self, json): + # don't write tests that use 'true' or 'false' as filenames + json = re.sub('false', 'False', json) + json = re.sub('true', 'True', json) + json = re.sub(r'\\/', '/', json) + return eval(json) + + def failUnlessIsBarJSON(self, res): + data = self.worlds_cheapest_json_decoder(res) + self.failUnless(isinstance(data, list)) + self.failUnlessEqual(data[0], "filenode") + self.failUnless(isinstance(data[1], dict)) + self.failUnlessEqual(data[1]["mutable"], False) + self.failUnlessEqual(data[1]["size"], 123) + self.failUnlessEqual(data[1]["uri"], self._bar_txt_uri) + def failUnlessIsFooJSON(self, res): - self.failUnless("JSONny stuff here" in res) - self.failUnless("name=bar.txt, child_uri=%s" % self._bar_txt_uri - in res) - self.failUnless("name=blockingfile" in res) + data = self.worlds_cheapest_json_decoder(res) + self.failUnless(isinstance(data, list)) + self.failUnlessEqual(data[0], "dirnode") + self.failUnless(isinstance(data[1], dict)) + self.failUnlessEqual(data[1]["mutable"], True) + self.failUnlessEqual(data[1]["uri"], self._foo_uri) + kidnames = sorted(data[1]["children"].keys()) + self.failUnlessEqual(kidnames, + ["bar.txt", "blockingfile", "empty", "sub"]) + kids = data[1]["children"] + self.failUnlessEqual(kids["sub"][0], "dirnode") + self.failUnlessEqual(kids["bar.txt"][0], "filenode") + self.failUnlessEqual(kids["bar.txt"][1]["size"], 123) + self.failUnlessEqual(kids["bar.txt"][1]["uri"], self._bar_txt_uri) def GET(self, urlpath, followRedirect=False): url = self.webish_url + urlpath @@ -370,10 +396,7 @@ class Web(unittest.TestCase): # instead. This may make it tricky to emulate the S3 interface # completely. d = self.GET("/vdrive/global/foo/bar.txt?t=json") - def _got(json): - # TODO - self.failUnless("JSON" in json, json) - d.addCallback(_got) + d.addCallback(self.failUnlessIsBarJSON) return d def test_GET_FILEURL_json_missing(self): # YES diff --git a/src/allmydata/util/json_encoder.py b/src/allmydata/util/json_encoder.py new file mode 100644 index 00000000..d261f94b --- /dev/null +++ b/src/allmydata/util/json_encoder.py @@ -0,0 +1,395 @@ +""" +Implementation of JSONEncoder +""" + +# this is simplejson/encoder.py, from Bob Ippolito's simplejson-1.7.1 +# (http://undefined.org/python/#simplejson) +# which is distributed under the BSD license as copied here: +# +# Copyright (c) 2006 Bob Ippolito +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of +# this software and associated documentation files (the "Software"), to deal in +# the Software without restriction, including without limitation the rights to +# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +# of the Software, and to permit persons to whom the Software is furnished to do +# so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# we extract just the encoder here because that's all we need. + +import re +_speedups = None + +ESCAPE = re.compile(r'[\x00-\x19\\"\b\f\n\r\t]') +ESCAPE_ASCII = re.compile(r'([\\"/]|[^\ -~])') +ESCAPE_DCT = { + # escape all forward slashes to prevent attack + '/': '\\/', + '\\': '\\\\', + '"': '\\"', + '\b': '\\b', + '\f': '\\f', + '\n': '\\n', + '\r': '\\r', + '\t': '\\t', +} +for i in range(0x20): + ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,)) + +# assume this produces an infinity on all machines (probably not guaranteed) +INFINITY = float('1e66666') + +def floatstr(o, allow_nan=True): + # Check for specials. Note that this type of test is processor- and/or + # platform-specific, so do tests which don't depend on the internals. + + if o != o: + text = 'NaN' + elif o == INFINITY: + text = 'Infinity' + elif o == -INFINITY: + text = '-Infinity' + else: + return str(o) + + if not allow_nan: + raise ValueError("Out of range float values are not JSON compliant: %r" + % (o,)) + + return text + + +def encode_basestring(s): + """ + Return a JSON representation of a Python string + """ + def replace(match): + return ESCAPE_DCT[match.group(0)] + return '"' + ESCAPE.sub(replace, s) + '"' + +def encode_basestring_ascii(s): + def replace(match): + s = match.group(0) + try: + return ESCAPE_DCT[s] + except KeyError: + n = ord(s) + if n < 0x10000: + return '\\u%04x' % (n,) + else: + # surrogate pair + n -= 0x10000 + s1 = 0xd800 | ((n >> 10) & 0x3ff) + s2 = 0xdc00 | (n & 0x3ff) + return '\\u%04x\\u%04x' % (s1, s2) + return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"' + +try: + encode_basestring_ascii = _speedups.encode_basestring_ascii + _need_utf8 = True +except AttributeError: + _need_utf8 = False + +class JSONEncoder(object): + """ + Extensible JSON encoder for Python data structures. + + Supports the following objects and types by default: + + +-------------------+---------------+ + | Python | JSON | + +===================+===============+ + | dict | object | + +-------------------+---------------+ + | list, tuple | array | + +-------------------+---------------+ + | str, unicode | string | + +-------------------+---------------+ + | int, long, float | number | + +-------------------+---------------+ + | True | true | + +-------------------+---------------+ + | False | false | + +-------------------+---------------+ + | None | null | + +-------------------+---------------+ + + To extend this to recognize other objects, subclass and implement a + ``.default()`` method with another method that returns a serializable + object for ``o`` if possible, otherwise it should call the superclass + implementation (to raise ``TypeError``). + """ + __all__ = ['__init__', 'default', 'encode', 'iterencode'] + item_separator = ', ' + key_separator = ': ' + def __init__(self, skipkeys=False, ensure_ascii=True, + check_circular=True, allow_nan=True, sort_keys=False, + indent=None, separators=None, encoding='utf-8'): + """ + Constructor for JSONEncoder, with sensible defaults. + + If skipkeys is False, then it is a TypeError to attempt + encoding of keys that are not str, int, long, float or None. If + skipkeys is True, such items are simply skipped. + + If ensure_ascii is True, the output is guaranteed to be str + objects with all incoming unicode characters escaped. If + ensure_ascii is false, the output will be unicode object. + + If check_circular is True, then lists, dicts, and custom encoded + objects will be checked for circular references during encoding to + prevent an infinite recursion (which would cause an OverflowError). + Otherwise, no such check takes place. + + If allow_nan is True, then NaN, Infinity, and -Infinity will be + encoded as such. This behavior is not JSON specification compliant, + but is consistent with most JavaScript based encoders and decoders. + Otherwise, it will be a ValueError to encode such floats. + + If sort_keys is True, then the output of dictionaries will be + sorted by key; this is useful for regression tests to ensure + that JSON serializations can be compared on a day-to-day basis. + + If indent is a non-negative integer, then JSON array + elements and object members will be pretty-printed with that + indent level. An indent level of 0 will only insert newlines. + None is the most compact representation. + + If specified, separators should be a (item_separator, key_separator) + tuple. The default is (', ', ': '). To get the most compact JSON + representation you should specify (',', ':') to eliminate whitespace. + + If encoding is not None, then all input strings will be + transformed into unicode using that encoding prior to JSON-encoding. + The default is UTF-8. + """ + + self.skipkeys = skipkeys + self.ensure_ascii = ensure_ascii + self.check_circular = check_circular + self.allow_nan = allow_nan + self.sort_keys = sort_keys + self.indent = indent + self.current_indent_level = 0 + if separators is not None: + self.item_separator, self.key_separator = separators + self.encoding = encoding + + def _newline_indent(self): + return '\n' + (' ' * (self.indent * self.current_indent_level)) + + def _iterencode_list(self, lst, markers=None): + if not lst: + yield '[]' + return + if markers is not None: + markerid = id(lst) + if markerid in markers: + raise ValueError("Circular reference detected") + markers[markerid] = lst + yield '[' + if self.indent is not None: + self.current_indent_level += 1 + newline_indent = self._newline_indent() + separator = self.item_separator + newline_indent + yield newline_indent + else: + newline_indent = None + separator = self.item_separator + first = True + for value in lst: + if first: + first = False + else: + yield separator + for chunk in self._iterencode(value, markers): + yield chunk + if newline_indent is not None: + self.current_indent_level -= 1 + yield self._newline_indent() + yield ']' + if markers is not None: + del markers[markerid] + + def _iterencode_dict(self, dct, markers=None): + if not dct: + yield '{}' + return + if markers is not None: + markerid = id(dct) + if markerid in markers: + raise ValueError("Circular reference detected") + markers[markerid] = dct + yield '{' + key_separator = self.key_separator + if self.indent is not None: + self.current_indent_level += 1 + newline_indent = self._newline_indent() + item_separator = self.item_separator + newline_indent + yield newline_indent + else: + newline_indent = None + item_separator = self.item_separator + first = True + if self.ensure_ascii: + encoder = encode_basestring_ascii + else: + encoder = encode_basestring + allow_nan = self.allow_nan + if self.sort_keys: + keys = dct.keys() + keys.sort() + items = [(k, dct[k]) for k in keys] + else: + items = dct.iteritems() + _encoding = self.encoding + _do_decode = (_encoding is not None + and not (_need_utf8 and _encoding == 'utf-8')) + for key, value in items: + if isinstance(key, str): + if _do_decode: + key = key.decode(_encoding) + elif isinstance(key, basestring): + pass + # JavaScript is weakly typed for these, so it makes sense to + # also allow them. Many encoders seem to do something like this. + elif isinstance(key, float): + key = floatstr(key, allow_nan) + elif isinstance(key, (int, long)): + key = str(key) + elif key is True: + key = 'true' + elif key is False: + key = 'false' + elif key is None: + key = 'null' + elif self.skipkeys: + continue + else: + raise TypeError("key %r is not a string" % (key,)) + if first: + first = False + else: + yield item_separator + yield encoder(key) + yield key_separator + for chunk in self._iterencode(value, markers): + yield chunk + if newline_indent is not None: + self.current_indent_level -= 1 + yield self._newline_indent() + yield '}' + if markers is not None: + del markers[markerid] + + def _iterencode(self, o, markers=None): + if isinstance(o, basestring): + if self.ensure_ascii: + encoder = encode_basestring_ascii + else: + encoder = encode_basestring + _encoding = self.encoding + if (_encoding is not None and isinstance(o, str) + and not (_need_utf8 and _encoding == 'utf-8')): + o = o.decode(_encoding) + yield encoder(o) + elif o is None: + yield 'null' + elif o is True: + yield 'true' + elif o is False: + yield 'false' + elif isinstance(o, (int, long)): + yield str(o) + elif isinstance(o, float): + yield floatstr(o, self.allow_nan) + elif isinstance(o, (list, tuple)): + for chunk in self._iterencode_list(o, markers): + yield chunk + elif isinstance(o, dict): + for chunk in self._iterencode_dict(o, markers): + yield chunk + else: + if markers is not None: + markerid = id(o) + if markerid in markers: + raise ValueError("Circular reference detected") + markers[markerid] = o + for chunk in self._iterencode_default(o, markers): + yield chunk + if markers is not None: + del markers[markerid] + + def _iterencode_default(self, o, markers=None): + newobj = self.default(o) + return self._iterencode(newobj, markers) + + def default(self, o): + """ + Implement this method in a subclass such that it returns + a serializable object for ``o``, or calls the base implementation + (to raise a ``TypeError``). + + For example, to support arbitrary iterators, you could + implement default like this:: + + def default(self, o): + try: + iterable = iter(o) + except TypeError: + pass + else: + return list(iterable) + return JSONEncoder.default(self, o) + """ + raise TypeError("%r is not JSON serializable" % (o,)) + + def encode(self, o): + """ + Return a JSON string representation of a Python data structure. + + >>> JSONEncoder().encode({"foo": ["bar", "baz"]}) + '{"foo":["bar", "baz"]}' + """ + # This is for extremely simple cases and benchmarks... + if isinstance(o, basestring): + if isinstance(o, str): + _encoding = self.encoding + if (_encoding is not None + and not (_encoding == 'utf-8' and _need_utf8)): + o = o.decode(_encoding) + return encode_basestring_ascii(o) + # This doesn't pass the iterator directly to ''.join() because it + # sucks at reporting exceptions. It's going to do this internally + # anyway because it uses PySequence_Fast or similar. + chunks = list(self.iterencode(o)) + return ''.join(chunks) + + def iterencode(self, o): + """ + Encode the given object and yield each string + representation as available. + + For example:: + + for chunk in JSONEncoder().iterencode(bigobject): + mysocket.write(chunk) + """ + if self.check_circular: + markers = {} + else: + markers = None + return self._iterencode(o, markers) + +__all__ = ['JSONEncoder'] diff --git a/src/allmydata/webish.py b/src/allmydata/webish.py index 1d6a8d21..187c05d1 100644 --- a/src/allmydata/webish.py +++ b/src/allmydata/webish.py @@ -7,6 +7,7 @@ from twisted.internet import defer from nevow import inevow, rend, loaders, appserver, url, tags as T from nevow.static import File as nevow_File # TODO: merge with static.File? from allmydata.util import idlib, fileutil +from allmydata.util.json_encoder import JSONEncoder from allmydata.uri import unpack_uri, is_dirnode_uri from allmydata.interfaces import IDownloadTarget, IDirectoryNode, IFileNode from allmydata import upload, download @@ -289,6 +290,7 @@ class LocalFileDownloader(resource.Resource): d.addCallback(_done) return server.NOT_DONE_YET + class FileJSONMetadata(rend.Page): def __init__(self, filenode): self._filenode = filenode @@ -301,10 +303,12 @@ class FileJSONMetadata(rend.Page): def renderNode(self, filenode): file_uri = filenode.get_uri() pieces = unpack_uri(file_uri) - data = "filenode\n" - data += "JSONny stuff here\n" - data += "uri=%s, size=%s" % (file_uri, pieces['size']) - return data + data = ("filenode", + {'mutable': False, + 'uri': file_uri, + 'size': pieces['size'], + }) + return JSONEncoder().encode(data) class FileURI(FileJSONMetadata): def renderNode(self, filenode): @@ -383,18 +387,33 @@ class DirectoryJSONMetadata(rend.Page): return self.renderNode(self._dirnode) def renderNode(self, node): - data = "dirnode\n" - data += "JSONny stuff here\n" d = node.list() - def _got(children, data): + def _got(children): + kids = {} for name, childnode in children.iteritems(): - data += "name=%s, child_uri=%s" % (name, childnode.get_uri()) - return data - d.addCallback(_got, data) - def _done(data): - data += "done\n" - return data - d.addCallback(_done) + if IFileNode.providedBy(childnode): + kiduri = childnode.get_uri() + pieces = unpack_uri(kiduri) + kiddata = ("filenode", + {'mutable': False, + 'uri': kiduri, + 'size': pieces['size'], + }) + else: + assert IDirectoryNode.providedBy(childnode) + kiduri = childnode.get_uri() + kiddata = ("dirnode", + {'mutable': childnode.is_mutable(), + 'uri': kiduri, + }) + kids[name] = kiddata + contents = { 'children': kids, + 'mutable': node.is_mutable(), + 'uri': node.get_uri(), + } + data = ("dirnode", contents) + return JSONEncoder().encode(data) + d.addCallback(_got) return d class DirectoryURI(DirectoryJSONMetadata):