0001"""
0002Implementation of JSONEncoder
0003"""
0004import re
0005
0006ESCAPE = re.compile(r'[\x00-\x19\\"\b\f\n\r\t]')
0007ESCAPE_ASCII = re.compile(r'([\\"/]|[^\ -~])')
0008ESCAPE_DCT = {
0009
0010 '/': '\\/',
0011 '\\': '\\\\',
0012 '"': '\\"',
0013 '\b': '\\b',
0014 '\f': '\\f',
0015 '\n': '\\n',
0016 '\r': '\\r',
0017 '\t': '\\t',
0018}
0019for i in range(0x20):
0020 ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
0021
0022
0023INFINITY = float('1e66666')
0024
0025def floatstr(o, allow_nan=True):
0026
0027
0028
0029 if o != o:
0030 text = 'NaN'
0031 elif o == INFINITY:
0032 text = 'Infinity'
0033 elif o == -INFINITY:
0034 text = '-Infinity'
0035 else:
0036 return str(o)
0037
0038 if not allow_nan:
0039 raise ValueError("Out of range float values are not JSON compliant: %r"
0040 % (o,))
0041
0042 return text
0043
0044
0045def encode_basestring(s):
0046 """
0047 Return a JSON representation of a Python string
0048 """
0049 def replace(match):
0050 return ESCAPE_DCT[match.group(0)]
0051 return '"' + ESCAPE.sub(replace, s) + '"'
0052
0053def encode_basestring_ascii(s):
0054 def replace(match):
0055 s = match.group(0)
0056 try:
0057 return ESCAPE_DCT[s]
0058 except KeyError:
0059 return '\\u%04x' % (ord(s),)
0060 return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
0061
0062
0063class JSONEncoder(object):
0064 """
0065 Extensible JSON <http://json.org> encoder for Python data structures.
0066
0067 Supports the following objects and types by default:
0068
0069 +-------------------+---------------+
0070 | Python | JSON |
0071 +===================+===============+
0072 | dict | object |
0073 +-------------------+---------------+
0074 | list, tuple | array |
0075 +-------------------+---------------+
0076 | str, unicode | string |
0077 +-------------------+---------------+
0078 | int, long, float | number |
0079 +-------------------+---------------+
0080 | True | true |
0081 +-------------------+---------------+
0082 | False | false |
0083 +-------------------+---------------+
0084 | None | null |
0085 +-------------------+---------------+
0086
0087 To extend this to recognize other objects, subclass and implement a
0088 ``.default()`` method with another method that returns a serializable
0089 object for ``o`` if possible, otherwise it should call the superclass
0090 implementation (to raise ``TypeError``).
0091 """
0092 __all__ = ['__init__', 'default', 'encode', 'iterencode']
0093 item_separator = ', '
0094 key_separator = ': '
0095 def __init__(self, skipkeys=False, ensure_ascii=True,
0096 check_circular=True, allow_nan=True, sort_keys=False,
0097 indent=None, separators=None, encoding='utf-8'):
0098 """
0099 Constructor for JSONEncoder, with sensible defaults.
0100
0101 If skipkeys is False, then it is a TypeError to attempt
0102 encoding of keys that are not str, int, long, float or None. If
0103 skipkeys is True, such items are simply skipped.
0104
0105 If ensure_ascii is True, the output is guaranteed to be str
0106 objects with all incoming unicode characters escaped. If
0107 ensure_ascii is false, the output will be unicode object.
0108
0109 If check_circular is True, then lists, dicts, and custom encoded
0110 objects will be checked for circular references during encoding to
0111 prevent an infinite recursion (which would cause an OverflowError).
0112 Otherwise, no such check takes place.
0113
0114 If allow_nan is True, then NaN, Infinity, and -Infinity will be
0115 encoded as such. This behavior is not JSON specification compliant,
0116 but is consistent with most JavaScript based encoders and decoders.
0117 Otherwise, it will be a ValueError to encode such floats.
0118
0119 If sort_keys is True, then the output of dictionaries will be
0120 sorted by key; this is useful for regression tests to ensure
0121 that JSON serializations can be compared on a day-to-day basis.
0122
0123 If indent is a non-negative integer, then JSON array
0124 elements and object members will be pretty-printed with that
0125 indent level. An indent level of 0 will only insert newlines.
0126 None is the most compact representation.
0127
0128 If specified, separators should be a (item_separator, key_separator)
0129 tuple. The default is (', ', ': '). To get the most compact JSON
0130 representation you should specify (',', ':') to eliminate whitespace.
0131
0132 If encoding is not None, then all input strings will be
0133 transformed into unicode using that encoding prior to JSON-encoding.
0134 The default is UTF-8.
0135 """
0136
0137 self.skipkeys = skipkeys
0138 self.ensure_ascii = ensure_ascii
0139 self.check_circular = check_circular
0140 self.allow_nan = allow_nan
0141 self.sort_keys = sort_keys
0142 self.indent = indent
0143 self.current_indent_level = 0
0144 if separators is not None:
0145 self.item_separator, self.key_separator = separators
0146 self.encoding = encoding
0147
0148 def _newline_indent(self):
0149 return '\n' + (' ' * (self.indent * self.current_indent_level))
0150
0151 def _iterencode_list(self, lst, markers=None):
0152 if not lst:
0153 yield '[]'
0154 return
0155 if markers is not None:
0156 markerid = id(lst)
0157 if markerid in markers:
0158 raise ValueError("Circular reference detected")
0159 markers[markerid] = lst
0160 yield '['
0161 if self.indent is not None:
0162 self.current_indent_level += 1
0163 newline_indent = self._newline_indent()
0164 separator = self.item_separator + newline_indent
0165 yield newline_indent
0166 else:
0167 newline_indent = None
0168 separator = self.item_separator
0169 first = True
0170 for value in lst:
0171 if first:
0172 first = False
0173 else:
0174 yield separator
0175 for chunk in self._iterencode(value, markers):
0176 yield chunk
0177 if newline_indent is not None:
0178 self.current_indent_level -= 1
0179 yield self._newline_indent()
0180 yield ']'
0181 if markers is not None:
0182 del markers[markerid]
0183
0184 def _iterencode_dict(self, dct, markers=None):
0185 if not dct:
0186 yield '{}'
0187 return
0188 if markers is not None:
0189 markerid = id(dct)
0190 if markerid in markers:
0191 raise ValueError("Circular reference detected")
0192 markers[markerid] = dct
0193 yield '{'
0194 key_separator = self.key_separator
0195 if self.indent is not None:
0196 self.current_indent_level += 1
0197 newline_indent = self._newline_indent()
0198 item_separator = self.item_separator + newline_indent
0199 yield newline_indent
0200 else:
0201 newline_indent = None
0202 item_separator = self.item_separator
0203 first = True
0204 if self.ensure_ascii:
0205 encoder = encode_basestring_ascii
0206 else:
0207 encoder = encode_basestring
0208 allow_nan = self.allow_nan
0209 if self.sort_keys:
0210 keys = dct.keys()
0211 keys.sort()
0212 items = [(k, dct[k]) for k in keys]
0213 else:
0214 items = dct.iteritems()
0215 for key, value in items:
0216 if self.encoding is not None and isinstance(key, str):
0217 key = key.decode(self.encoding)
0218 elif isinstance(key, basestring):
0219 pass
0220
0221
0222 elif isinstance(key, float):
0223 key = floatstr(key, allow_nan)
0224 elif isinstance(key, (int, long)):
0225 key = str(key)
0226 elif key is True:
0227 key = 'true'
0228 elif key is False:
0229 key = 'false'
0230 elif key is None:
0231 key = 'null'
0232 elif self.skipkeys:
0233 continue
0234 else:
0235 raise TypeError("key %r is not a string" % (key,))
0236 if first:
0237 first = False
0238 else:
0239 yield item_separator
0240 yield encoder(key)
0241 yield key_separator
0242 for chunk in self._iterencode(value, markers):
0243 yield chunk
0244 if newline_indent is not None:
0245 self.current_indent_level -= 1
0246 yield self._newline_indent()
0247 yield '}'
0248 if markers is not None:
0249 del markers[markerid]
0250
0251 def _iterencode(self, o, markers=None):
0252 if isinstance(o, basestring):
0253 if self.ensure_ascii:
0254 encoder = encode_basestring_ascii
0255 else:
0256 encoder = encode_basestring
0257 if self.encoding and isinstance(o, str):
0258 o = o.decode(self.encoding)
0259 yield encoder(o)
0260 elif o is None:
0261 yield 'null'
0262 elif o is True:
0263 yield 'true'
0264 elif o is False:
0265 yield 'false'
0266 elif isinstance(o, (int, long)):
0267 yield str(o)
0268 elif isinstance(o, float):
0269 yield floatstr(o, self.allow_nan)
0270 elif isinstance(o, (list, tuple)):
0271 for chunk in self._iterencode_list(o, markers):
0272 yield chunk
0273 elif isinstance(o, dict):
0274 for chunk in self._iterencode_dict(o, markers):
0275 yield chunk
0276 else:
0277 if markers is not None:
0278 markerid = id(o)
0279 if markerid in markers:
0280 raise ValueError("Circular reference detected")
0281 markers[markerid] = o
0282 for chunk in self._iterencode_default(o, markers):
0283 yield chunk
0284 if markers is not None:
0285 del markers[markerid]
0286
0287 def _iterencode_default(self, o, markers=None):
0288 newobj = self.default(o)
0289 return self._iterencode(newobj, markers)
0290
0291 def default(self, o):
0292 """
0293 Implement this method in a subclass such that it returns
0294 a serializable object for ``o``, or calls the base implementation
0295 (to raise a ``TypeError``).
0296
0297 For example, to support arbitrary iterators, you could
0298 implement default like this::
0299
0300 def default(self, o):
0301 try:
0302 iterable = iter(o)
0303 except TypeError:
0304 pass
0305 else:
0306 return list(iterable)
0307 return JSONEncoder.default(self, o)
0308 """
0309 raise TypeError("%r is not JSON serializable" % (o,))
0310
0311 def encode(self, o):
0312 """
0313 Return a JSON string representation of a Python data structure.
0314
0315 >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
0316 '{"foo":["bar", "baz"]}'
0317 """
0318
0319
0320
0321 chunks = list(self.iterencode(o))
0322 return ''.join(chunks)
0323
0324 def iterencode(self, o):
0325 """
0326 Encode the given object and yield each string
0327 representation as available.
0328
0329 For example::
0330
0331 for chunk in JSONEncoder().iterencode(bigobject):
0332 mysocket.write(chunk)
0333 """
0334 if self.check_circular:
0335 markers = {}
0336 else:
0337 markers = None
0338 return self._iterencode(o, markers)
0339
0340__all__ = ['JSONEncoder']