From: Brian Warner Date: Tue, 10 Jul 2007 20:12:40 +0000 (-0700) Subject: import the decoder portion of simplejson-1.7.1 too X-Git-Url: https://git.rkrishnan.org/specifications/index.php?a=commitdiff_plain;h=ad038497db952fa6914286950e98f82f93933853;p=tahoe-lafs%2Ftahoe-lafs.git import the decoder portion of simplejson-1.7.1 too --- diff --git a/src/allmydata/util/json_decoder.py b/src/allmydata/util/json_decoder.py new file mode 100644 index 00000000..03b135fa --- /dev/null +++ b/src/allmydata/util/json_decoder.py @@ -0,0 +1,300 @@ +""" +Implementation of JSONDecoder +""" + +# this is simplejson/decoder.py, from Bob Ippolito's simplejson-1.7.1 +# (http://undefined.org/python/#simplejson) +# which is distributed under the BSD license as copied here: +# +# Copyright (c) 2006 Bob Ippolito +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of +# this software and associated documentation files (the "Software"), to deal in +# the Software without restriction, including without limitation the rights to +# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +# of the Software, and to permit persons to whom the Software is furnished to do +# so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# we extract just the decoder here because that's all we need. + +import re + +from simplejson.scanner import Scanner, pattern + +FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL + +def _floatconstants(): + import struct + import sys + _BYTES = '7FF80000000000007FF0000000000000'.decode('hex') + if sys.byteorder != 'big': + _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1] + nan, inf = struct.unpack('dd', _BYTES) + return nan, inf, -inf + +NaN, PosInf, NegInf = _floatconstants() + +def linecol(doc, pos): + lineno = doc.count('\n', 0, pos) + 1 + if lineno == 1: + colno = pos + else: + colno = pos - doc.rindex('\n', 0, pos) + return lineno, colno + +def errmsg(msg, doc, pos, end=None): + lineno, colno = linecol(doc, pos) + if end is None: + return '%s: line %d column %d (char %d)' % (msg, lineno, colno, pos) + endlineno, endcolno = linecol(doc, end) + return '%s: line %d column %d - line %d column %d (char %d - %d)' % ( + msg, lineno, colno, endlineno, endcolno, pos, end) + +_CONSTANTS = { + '-Infinity': NegInf, + 'Infinity': PosInf, + 'NaN': NaN, + 'true': True, + 'false': False, + 'null': None, +} + +def JSONConstant(match, context, c=_CONSTANTS): + return c[match.group(0)], None +pattern('(-?Infinity|NaN|true|false|null)')(JSONConstant) + +def JSONNumber(match, context): + match = JSONNumber.regex.match(match.string, *match.span()) + integer, frac, exp = match.groups() + if frac or exp: + res = float(integer + (frac or '') + (exp or '')) + else: + res = int(integer) + return res, None +pattern(r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?')(JSONNumber) + +STRINGCHUNK = re.compile(r'(.*?)(["\\])', FLAGS) +BACKSLASH = { + '"': u'"', '\\': u'\\', '/': u'/', + 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t', +} + +DEFAULT_ENCODING = "utf-8" + +def scanstring(s, end, encoding=None, _b=BACKSLASH, _m=STRINGCHUNK.match): + if encoding is None: + encoding = DEFAULT_ENCODING + chunks = [] + _append = chunks.append + begin = end - 1 + while 1: + chunk = _m(s, end) + if chunk is None: + raise ValueError( + errmsg("Unterminated string starting at", s, begin)) + end = chunk.end() + content, terminator = chunk.groups() + if content: + if not isinstance(content, unicode): + content = unicode(content, encoding) + _append(content) + if terminator == '"': + break + try: + esc = s[end] + except IndexError: + raise ValueError( + errmsg("Unterminated string starting at", s, begin)) + if esc != 'u': + try: + m = _b[esc] + except KeyError: + raise ValueError( + errmsg("Invalid \\escape: %r" % (esc,), s, end)) + end += 1 + else: + esc = s[end + 1:end + 5] + try: + m = unichr(int(esc, 16)) + if len(esc) != 4 or not esc.isalnum(): + raise ValueError + except ValueError: + raise ValueError(errmsg("Invalid \\uXXXX escape", s, end)) + end += 5 + _append(m) + return u''.join(chunks), end + +def JSONString(match, context): + encoding = getattr(context, 'encoding', None) + return scanstring(match.string, match.end(), encoding) +pattern(r'"')(JSONString) + +WHITESPACE = re.compile(r'\s*', FLAGS) + +def JSONObject(match, context, _w=WHITESPACE.match): + pairs = {} + s = match.string + end = _w(s, match.end()).end() + nextchar = s[end:end + 1] + # trivial empty object + if nextchar == '}': + return pairs, end + 1 + if nextchar != '"': + raise ValueError(errmsg("Expecting property name", s, end)) + end += 1 + encoding = getattr(context, 'encoding', None) + iterscan = JSONScanner.iterscan + while True: + key, end = scanstring(s, end, encoding) + end = _w(s, end).end() + if s[end:end + 1] != ':': + raise ValueError(errmsg("Expecting : delimiter", s, end)) + end = _w(s, end + 1).end() + try: + value, end = iterscan(s, idx=end, context=context).next() + except StopIteration: + raise ValueError(errmsg("Expecting object", s, end)) + pairs[key] = value + end = _w(s, end).end() + nextchar = s[end:end + 1] + end += 1 + if nextchar == '}': + break + if nextchar != ',': + raise ValueError(errmsg("Expecting , delimiter", s, end - 1)) + end = _w(s, end).end() + nextchar = s[end:end + 1] + end += 1 + if nextchar != '"': + raise ValueError(errmsg("Expecting property name", s, end - 1)) + object_hook = getattr(context, 'object_hook', None) + if object_hook is not None: + pairs = object_hook(pairs) + return pairs, end +pattern(r'{')(JSONObject) + +def JSONArray(match, context, _w=WHITESPACE.match): + values = [] + s = match.string + end = _w(s, match.end()).end() + # look-ahead for trivial empty array + nextchar = s[end:end + 1] + if nextchar == ']': + return values, end + 1 + iterscan = JSONScanner.iterscan + while True: + try: + value, end = iterscan(s, idx=end, context=context).next() + except StopIteration: + raise ValueError(errmsg("Expecting object", s, end)) + values.append(value) + end = _w(s, end).end() + nextchar = s[end:end + 1] + end += 1 + if nextchar == ']': + break + if nextchar != ',': + raise ValueError(errmsg("Expecting , delimiter", s, end)) + end = _w(s, end).end() + return values, end +pattern(r'\[')(JSONArray) + +ANYTHING = [ + JSONObject, + JSONArray, + JSONString, + JSONConstant, + JSONNumber, +] + +JSONScanner = Scanner(ANYTHING) + +class JSONDecoder(object): + """ + Simple JSON decoder + + Performs the following translations in decoding: + + +---------------+-------------------+ + | JSON | Python | + +===============+===================+ + | object | dict | + +---------------+-------------------+ + | array | list | + +---------------+-------------------+ + | string | unicode | + +---------------+-------------------+ + | number (int) | int, long | + +---------------+-------------------+ + | number (real) | float | + +---------------+-------------------+ + | true | True | + +---------------+-------------------+ + | false | False | + +---------------+-------------------+ + | null | None | + +---------------+-------------------+ + + It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as + their corresponding ``float`` values, which is outside the JSON spec. + """ + + _scanner = Scanner(ANYTHING) + __all__ = ['__init__', 'decode', 'raw_decode'] + + def __init__(self, encoding=None, object_hook=None): + """ + ``encoding`` determines the encoding used to interpret any ``str`` + objects decoded by this instance (utf-8 by default). It has no + effect when decoding ``unicode`` objects. + + Note that currently only encodings that are a superset of ASCII work, + strings of other encodings should be passed in as ``unicode``. + + ``object_hook``, if specified, will be called with the result + of every JSON object decoded and its return value will be used in + place of the given ``dict``. This can be used to provide custom + deserializations (e.g. to support JSON-RPC class hinting). + """ + self.encoding = encoding + self.object_hook = object_hook + + def decode(self, s, _w=WHITESPACE.match): + """ + Return the Python representation of ``s`` (a ``str`` or ``unicode`` + instance containing a JSON document) + """ + obj, end = self.raw_decode(s, idx=_w(s, 0).end()) + end = _w(s, end).end() + if end != len(s): + raise ValueError(errmsg("Extra data", s, end, len(s))) + return obj + + def raw_decode(self, s, **kw): + """ + Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning + with a JSON document) and return a 2-tuple of the Python + representation and the index in ``s`` where the document ended. + + This can be used to decode a JSON document from a string that may + have extraneous data at the end. + """ + kw.setdefault('context', self) + try: + obj, end = self._scanner.iterscan(s, **kw).next() + except StopIteration: + raise ValueError("No JSON object could be decoded") + return obj, end + +__all__ = ['JSONDecoder']