pax_global_header00006660000000000000000000000064126552436760014532gustar00rootroot0000000000000052 comment=415e4b0a45651e11da3ee36f444d7175135eaf29 ijson-2.3/000077500000000000000000000000001265524367600125205ustar00rootroot00000000000000ijson-2.3/.gitignore000066400000000000000000000000321265524367600145030ustar00rootroot00000000000000*.pyc .tox ijson.egg-info ijson-2.3/.travis.yml000066400000000000000000000002611265524367600146300ustar00rootroot00000000000000language: python python: - "2.7" - "3.4" before_install: - sudo apt-get update -qq - sudo apt-get install -y libyajl1 install: - pip install . script: python tests.py ijson-2.3/LICENSE.txt000066400000000000000000000027201265524367600143440ustar00rootroot00000000000000Copyright (c) 2010, Ivan Sagalaev All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name "ijson" nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ijson-2.3/MANIFEST.in000066400000000000000000000000461265524367600142560ustar00rootroot00000000000000include LICENSE.txt include README.rstijson-2.3/README.rst000066400000000000000000000057331265524367600142170ustar00rootroot00000000000000.. image:: https://travis-ci.org/isagalaev/ijson.svg?branch=master :target: https://travis-ci.org/isagalaev/ijson ===== ijson ===== Ijson is an iterative JSON parser with a standard Python iterator interface. Usage ===== All usage example will be using a JSON document describing geographical objects:: { "earth": { "europe": [ {"name": "Paris", "type": "city", "info": { ... }}, {"name": "Thames", "type": "river", "info": { ... }}, // ... ], "america": [ {"name": "Texas", "type": "state", "info": { ... }}, // ... ] } } Most common usage is having ijson yield native Python objects out of a JSON stream located under a prefix. Here's how to process all European cities:: import ijson f = urlopen('http://.../') objects = ijson.items(f, 'earth.europe.item') cities = (o for o in objects if o['type'] == 'city') for city in cities: do_something_with(city) Sometimes when dealing with a particularly large JSON payload it may worth to not even construct individual Python objects and react on individual events immediately producing some result:: import ijson parser = ijson.parse(urlopen('http://.../')) stream.write('') for prefix, event, value in parser: if (prefix, event) == ('earth', 'map_key'): stream.write('<%s>' % value) continent = value elif prefix.endswith('.name'): stream.write('' % value) elif (prefix, event) == ('earth.%s' % continent, 'end_map'): stream.write('' % continent) stream.write('') Backends ======== Ijson provides several implementations of the actual parsing in the form of backends located in ijson/backends: - ``yajl2_cffi``: wrapper around `YAJL `_ 2.x using CFFI, this is the fastest. - ``yajl2``: wrapper around YAJL 2.x using ctypes, for when you can't use CFFI for some reason. - ``yajl``: deprecated YAJL 1.x + ctypes wrapper, for even older systems. - ``python``: pure Python parser, good to use with PyPy You can import a specific backend and use it in the same way as the top level library:: import ijson.backends.yajl2_cffi as ijson for item in ijson.items(...): # ... Importing the top level library as ``import ijson`` uses the pure Python backend. Acknowledgements ================ Python parser in ijson is relatively simple thanks to `Douglas Crockford `_ who invented a strict, easy to parse syntax. The `YAJL `_ library by `Lloyd Hilaiel `_ is the most popular and efficient way to parse JSON in an iterative fashion. Ijson was inspired by `yajl-py `_ wrapper by `Hatem Nassrat `_. Though ijson borrows almost nothing from the actual yajl-py code it was used as an example of integration with yajl using ctypes. ijson-2.3/ijson/000077500000000000000000000000001265524367600136425ustar00rootroot00000000000000ijson-2.3/ijson/__init__.py000066400000000000000000000012561265524367600157570ustar00rootroot00000000000000''' Iterative JSON parser. Main API: - ``ijson.parse``: iterator returning parsing events with the object tree context, see ``ijson.common.parse`` for docs. - ``ijson.items``: iterator returning Python objects found under a specified prefix, see ``ijson.common.items`` for docs. Top-level ``ijson`` module exposes method from the pure Python backend. There's also two other backends using the C library yajl in ``ijson.backends`` that have the same API and are faster under CPython. ''' from ijson.common import JSONError, IncompleteJSONError, ObjectBuilder import ijson.backends.python as backend basic_parse = backend.basic_parse parse = backend.parse items = backend.items ijson-2.3/ijson/backends/000077500000000000000000000000001265524367600154145ustar00rootroot00000000000000ijson-2.3/ijson/backends/__init__.py000066400000000000000000000025721265524367600175330ustar00rootroot00000000000000class YAJLImportError(ImportError): pass def require_version(version, required): ''' Asserts that the major component of 'version' is equal to 'required'. Raises YAJLImportError otherwise. ''' major, rest = divmod(version, 10000) minor, micro = divmod(rest, 100) if major != required: raise YAJLImportError('YAJL version %s.x required, found %s.%s.%s' % (required, major, minor, micro)) def find_yajl_ctypes(required): ''' Finds and loads yajl shared object of the required major version (1, 2, ...) using ctypes. ''' # Importing ``ctypes`` should be in scope of this function to prevent failure # of `backends`` package load in a runtime where ``ctypes`` is not available. # Example of such environment is Google App Engine (GAE). from ctypes import util, cdll so_name = util.find_library('yajl') if so_name is None: raise YAJLImportError('YAJL shared object not found.') yajl = cdll.LoadLibrary(so_name) require_version(yajl.yajl_version(), required) return yajl def find_yajl_cffi(ffi, required): ''' Finds and loads yajl shared object of the required major version (1, 2, ...) using cffi. ''' try: yajl = ffi.dlopen('yajl') except OSError: raise YAJLImportError('Unable to load YAJL.') require_version(yajl.yajl_version(), required) return yajl ijson-2.3/ijson/backends/python.py000066400000000000000000000134731265524367600173170ustar00rootroot00000000000000''' Pure-python parsing backend. ''' from __future__ import unicode_literals import decimal import re from codecs import getreader from ijson import common from ijson.compat import chr, bytetype BUFSIZE = 16 * 1024 LEXEME_RE = re.compile(r'[a-z0-9eE\.\+-]+|\S') class UnexpectedSymbol(common.JSONError): def __init__(self, symbol, pos): super(UnexpectedSymbol, self).__init__( 'Unexpected symbol %r at %d' % (symbol, pos) ) def Lexer(f, buf_size=BUFSIZE): if type(f.read(0)) == bytetype: f = getreader('utf-8')(f) buf = f.read(buf_size) pos = 0 discarded = 0 while True: match = LEXEME_RE.search(buf, pos) if match: lexeme = match.group() if lexeme == '"': pos = match.start() start = pos + 1 while True: try: end = buf.index('"', start) escpos = end - 1 while buf[escpos] == '\\': escpos -= 1 if (end - escpos) % 2 == 0: start = end + 1 else: break except ValueError: data = f.read(buf_size) if not data: raise common.IncompleteJSONError('Incomplete string lexeme') buf += data yield discarded + pos, buf[pos:end + 1] pos = end + 1 else: while match.end() == len(buf): data = f.read(buf_size) if not data: break buf += data match = LEXEME_RE.search(buf, pos) lexeme = match.group() yield discarded + match.start(), lexeme pos = match.end() else: data = f.read(buf_size) if not data: break discarded += len(buf) buf = data pos = 0 def unescape(s): start = 0 result = '' while start < len(s): pos = s.find('\\', start) if pos == -1: if start == 0: return s result += s[start:] break result += s[start:pos] pos += 1 esc = s[pos] if esc == 'u': result += chr(int(s[pos + 1:pos + 5], 16)) pos += 4 elif esc == 'b': result += '\b' elif esc == 'f': result += '\f' elif esc == 'n': result += '\n' elif esc == 'r': result += '\r' elif esc == 't': result += '\t' else: result += esc start = pos + 1 return result def parse_value(lexer, symbol=None, pos=0): try: if symbol is None: pos, symbol = next(lexer) if symbol == 'null': yield ('null', None) elif symbol == 'true': yield ('boolean', True) elif symbol == 'false': yield ('boolean', False) elif symbol == '[': for event in parse_array(lexer): yield event elif symbol == '{': for event in parse_object(lexer): yield event elif symbol[0] == '"': yield ('string', unescape(symbol[1:-1])) else: try: yield ('number', common.number(symbol)) except decimal.InvalidOperation: raise UnexpectedSymbol(symbol, pos) except StopIteration: raise common.IncompleteJSONError('Incomplete JSON data') def parse_array(lexer): yield ('start_array', None) try: pos, symbol = next(lexer) if symbol != ']': while True: for event in parse_value(lexer, symbol, pos): yield event pos, symbol = next(lexer) if symbol == ']': break if symbol != ',': raise UnexpectedSymbol(symbol, pos) pos, symbol = next(lexer) yield ('end_array', None) except StopIteration: raise common.IncompleteJSONError('Incomplete JSON data') def parse_object(lexer): yield ('start_map', None) try: pos, symbol = next(lexer) if symbol != '}': while True: if symbol[0] != '"': raise UnexpectedSymbol(symbol, pos) yield ('map_key', unescape(symbol[1:-1])) pos, symbol = next(lexer) if symbol != ':': raise UnexpectedSymbol(symbol, pos) for event in parse_value(lexer, None, pos): yield event pos, symbol = next(lexer) if symbol == '}': break if symbol != ',': raise UnexpectedSymbol(symbol, pos) pos, symbol = next(lexer) yield ('end_map', None) except StopIteration: raise common.IncompleteJSONError('Incomplete JSON data') def basic_parse(file, buf_size=BUFSIZE): ''' Iterator yielding unprefixed events. Parameters: - file: a readable file-like object with JSON input ''' lexer = iter(Lexer(file, buf_size)) for value in parse_value(lexer): yield value try: next(lexer) except StopIteration: pass else: raise common.JSONError('Additional data') def parse(file, buf_size=BUFSIZE): ''' Backend-specific wrapper for ijson.common.parse. ''' return common.parse(basic_parse(file, buf_size=buf_size)) def items(file, prefix): ''' Backend-specific wrapper for ijson.common.items. ''' return common.items(parse(file), prefix) ijson-2.3/ijson/backends/yajl.py000066400000000000000000000074041265524367600167320ustar00rootroot00000000000000''' Wrapper for YAJL C library version 1.x. ''' from ctypes import Structure, c_uint, c_ubyte, c_int, c_long, c_double, c_char, \ c_void_p, c_char_p, CFUNCTYPE, POINTER, byref, string_at, cast from ijson import common, backends from ijson.compat import b2s yajl = backends.find_yajl_ctypes(1) yajl.yajl_alloc.restype = POINTER(c_char) yajl.yajl_get_error.restype = POINTER(c_char) C_EMPTY = CFUNCTYPE(c_int, c_void_p) C_INT = CFUNCTYPE(c_int, c_void_p, c_int) C_LONG = CFUNCTYPE(c_int, c_void_p, c_long) C_DOUBLE = CFUNCTYPE(c_int, c_void_p, c_double) C_STR = CFUNCTYPE(c_int, c_void_p, POINTER(c_ubyte), c_uint) _callback_data = [ # Mapping of JSON parser events to callback C types and value converters. # Used to define the Callbacks structure and actual callback functions # inside the parse function. ('null', C_EMPTY, lambda: None), ('boolean', C_INT, lambda v: bool(v)), # "integer" and "double" aren't actually yielded by yajl since "number" # takes precedence if defined ('integer', C_LONG, lambda v, l: int(string_at(v, l))), ('double', C_DOUBLE, lambda v, l: float(string_at(v, l))), ('number', C_STR, lambda v, l: common.number(b2s(string_at(v, l)))), ('string', C_STR, lambda v, l: string_at(v, l).decode('utf-8')), ('start_map', C_EMPTY, lambda: None), ('map_key', C_STR, lambda v, l: b2s(string_at(v, l))), ('end_map', C_EMPTY, lambda: None), ('start_array', C_EMPTY, lambda: None), ('end_array', C_EMPTY, lambda: None), ] class Callbacks(Structure): _fields_ = [(name, type) for name, type, func in _callback_data] class Config(Structure): _fields_ = [ ("allowComments", c_uint), ("checkUTF8", c_uint) ] YAJL_OK = 0 YAJL_CANCELLED = 1 YAJL_INSUFFICIENT_DATA = 2 YAJL_ERROR = 3 def basic_parse(f, allow_comments=False, check_utf8=False, buf_size=64 * 1024): ''' Iterator yielding unprefixed events. Parameters: - f: a readable file-like object with JSON input - allow_comments: tells parser to allow comments in JSON input - check_utf8: if True, parser will cause an error if input is invalid utf-8 - buf_size: a size of an input buffer ''' events = [] def callback(event, func_type, func): def c_callback(context, *args): events.append((event, func(*args))) return 1 return func_type(c_callback) callbacks = Callbacks(*[callback(*data) for data in _callback_data]) config = Config(allow_comments, check_utf8) handle = yajl.yajl_alloc(byref(callbacks), byref(config), None, None) try: while True: buffer = f.read(buf_size) if buffer: result = yajl.yajl_parse(handle, buffer, len(buffer)) else: result = yajl.yajl_parse_complete(handle) if result == YAJL_ERROR: perror = yajl.yajl_get_error(handle, 1, buffer, len(buffer)) error = cast(perror, c_char_p).value yajl.yajl_free_error(handle, perror) exception = common.IncompleteJSONError if result == YAJL_INSUFFICIENT_DATA else common.JSONError raise common.JSONError(error) if not buffer and not events: if result == YAJL_INSUFFICIENT_DATA: raise common.IncompleteJSONError('Incomplete JSON data') break for event in events: yield event events = [] finally: yajl.yajl_free(handle) def parse(file, **kwargs): ''' Backend-specific wrapper for ijson.common.parse. ''' return common.parse(basic_parse(file, **kwargs)) def items(file, prefix): ''' Backend-specific wrapper for ijson.common.items. ''' return common.items(parse(file), prefix) ijson-2.3/ijson/backends/yajl2.py000066400000000000000000000073341265524367600170160ustar00rootroot00000000000000''' Wrapper for YAJL C library version 2.x. ''' from ctypes import Structure, c_uint, c_ubyte, c_int, c_long, c_double, c_char, \ c_void_p, c_char_p, CFUNCTYPE, POINTER, byref, string_at, cast from ijson import common, backends from ijson.compat import b2s yajl = backends.find_yajl_ctypes(2) yajl.yajl_alloc.restype = POINTER(c_char) yajl.yajl_get_error.restype = POINTER(c_char) C_EMPTY = CFUNCTYPE(c_int, c_void_p) C_INT = CFUNCTYPE(c_int, c_void_p, c_int) C_LONG = CFUNCTYPE(c_int, c_void_p, c_long) C_DOUBLE = CFUNCTYPE(c_int, c_void_p, c_double) C_STR = CFUNCTYPE(c_int, c_void_p, POINTER(c_ubyte), c_uint) _callback_data = [ # Mapping of JSON parser events to callback C types and value converters. # Used to define the Callbacks structure and actual callback functions # inside the parse function. ('null', C_EMPTY, lambda: None), ('boolean', C_INT, lambda v: bool(v)), # "integer" and "double" aren't actually yielded by yajl since "number" # takes precedence if defined ('integer', C_LONG, lambda v, l: int(string_at(v, l))), ('double', C_DOUBLE, lambda v, l: float(string_at(v, l))), ('number', C_STR, lambda v, l: common.number(b2s(string_at(v, l)))), ('string', C_STR, lambda v, l: string_at(v, l).decode('utf-8')), ('start_map', C_EMPTY, lambda: None), ('map_key', C_STR, lambda v, l: b2s(string_at(v, l))), ('end_map', C_EMPTY, lambda: None), ('start_array', C_EMPTY, lambda: None), ('end_array', C_EMPTY, lambda: None), ] class Callbacks(Structure): _fields_ = [(name, type) for name, type, func in _callback_data] YAJL_OK = 0 YAJL_CANCELLED = 1 YAJL_INSUFFICIENT_DATA = 2 YAJL_ERROR = 3 # constants defined in yajl_parse.h YAJL_ALLOW_COMMENTS = 1 YAJL_MULTIPLE_VALUES = 8 def basic_parse(f, allow_comments=False, buf_size=64 * 1024, multiple_values=False): ''' Iterator yielding unprefixed events. Parameters: - f: a readable file-like object with JSON input - allow_comments: tells parser to allow comments in JSON input - buf_size: a size of an input buffer - multiple_values: allows the parser to parse multiple JSON objects ''' events = [] def callback(event, func_type, func): def c_callback(context, *args): events.append((event, func(*args))) return 1 return func_type(c_callback) callbacks = Callbacks(*[callback(*data) for data in _callback_data]) handle = yajl.yajl_alloc(byref(callbacks), None, None) if allow_comments: yajl.yajl_config(handle, YAJL_ALLOW_COMMENTS, 1) if multiple_values: yajl.yajl_config(handle, YAJL_MULTIPLE_VALUES, 1) try: while True: buffer = f.read(buf_size) if buffer: result = yajl.yajl_parse(handle, buffer, len(buffer)) else: result = yajl.yajl_complete_parse(handle) if result != YAJL_OK: perror = yajl.yajl_get_error(handle, 1, buffer, len(buffer)) error = cast(perror, c_char_p).value yajl.yajl_free_error(handle, perror) exception = common.IncompleteJSONError if result == YAJL_INSUFFICIENT_DATA else common.JSONError raise exception(error.decode('utf-8')) if not buffer and not events: break for event in events: yield event events = [] finally: yajl.yajl_free(handle) def parse(file, **kwargs): ''' Backend-specific wrapper for ijson.common.parse. ''' return common.parse(basic_parse(file, **kwargs)) def items(file, prefix): ''' Backend-specific wrapper for ijson.common.items. ''' return common.items(parse(file), prefix) ijson-2.3/ijson/backends/yajl2_cffi.py000066400000000000000000000147201265524367600200020ustar00rootroot00000000000000''' CFFI-Wrapper for YAJL C library version 2.x. ''' from cffi import FFI import functools import sys from ijson import common, backends from ijson.compat import b2s ffi = FFI() ffi.cdef(""" typedef void * (*yajl_malloc_func)(void *ctx, size_t sz); typedef void (*yajl_free_func)(void *ctx, void * ptr); typedef void * (*yajl_realloc_func)(void *ctx, void * ptr, size_t sz); typedef struct { yajl_malloc_func malloc; yajl_realloc_func realloc; yajl_free_func free; void * ctx; } yajl_alloc_funcs; typedef struct yajl_handle_t * yajl_handle; typedef enum { yajl_status_ok, yajl_status_client_canceled, yajl_status_error } yajl_status; typedef enum { yajl_allow_comments = 0x01, yajl_dont_validate_strings = 0x02, yajl_allow_trailing_garbage = 0x04, yajl_allow_multiple_values = 0x08, yajl_allow_partial_values = 0x10 } yajl_option; typedef struct { int (* yajl_null)(void * ctx); int (* yajl_boolean)(void * ctx, int boolVal); int (* yajl_integer)(void * ctx, long long integerVal); int (* yajl_double)(void * ctx, double doubleVal); int (* yajl_number)(void * ctx, const char * numberVal, size_t numberLen); int (* yajl_string)(void * ctx, const unsigned char * stringVal, size_t stringLen); int (* yajl_start_map)(void * ctx); int (* yajl_map_key)(void * ctx, const unsigned char * key, size_t stringLen); int (* yajl_end_map)(void * ctx); int (* yajl_start_array)(void * ctx); int (* yajl_end_array)(void * ctx); } yajl_callbacks; int yajl_version(void); yajl_handle yajl_alloc(const yajl_callbacks *callbacks, yajl_alloc_funcs *afs, void *ctx); int yajl_config(yajl_handle h, yajl_option opt, ...); yajl_status yajl_parse(yajl_handle hand, const unsigned char *jsonText, size_t jsonTextLength); yajl_status yajl_complete_parse(yajl_handle hand); unsigned char* yajl_get_error(yajl_handle hand, int verbose, const unsigned char *jsonText, size_t jsonTextLength); void yajl_free_error(yajl_handle hand, unsigned char * str); void yajl_free(yajl_handle handle); """) yajl = backends.find_yajl_cffi(ffi, 2) YAJL_OK = 0 YAJL_CANCELLED = 1 YAJL_INSUFFICIENT_DATA = 2 YAJL_ERROR = 3 # constants defined in yajl_parse.h YAJL_ALLOW_COMMENTS = 1 YAJL_MULTIPLE_VALUES = 8 def append_event_to_ctx(event): def wrapper(func): @functools.wraps(func) def wrapped(ctx, *args, **kwargs): value = func(*args, **kwargs) ctx = ffi.from_handle(ctx) ctx.append((event, value)) return 1 return wrapped return wrapper @ffi.callback('int(void *ctx)') @append_event_to_ctx('null') def null(): return None @ffi.callback('int(void *ctx, int val)') @append_event_to_ctx('boolean') def boolean(val): return bool(val) @ffi.callback('int(void *ctx, long long integerVal)') @append_event_to_ctx('integer') def integer(val): return int(val) @ffi.callback('int(void *ctx, double doubleVal)') @append_event_to_ctx('double') def double(val): return float(val) @ffi.callback('int(void *ctx, const char *numberVal, size_t numberLen)') @append_event_to_ctx('number') def number(val, length): return common.number(b2s(ffi.string(val, maxlen=length))) @ffi.callback('int(void *ctx, const unsigned char *stringVal, size_t stringLen)') @append_event_to_ctx('string') def string(val, length): return ffi.string(val, maxlen=length).decode('utf-8') @ffi.callback('int(void *ctx)') @append_event_to_ctx('start_map') def start_map(): return None @ffi.callback('int(void *ctx, const unsigned char *key, size_t stringLen)') @append_event_to_ctx('map_key') def map_key(key, length): return b2s(ffi.string(key, maxlen=length)) @ffi.callback('int(void *ctx)') @append_event_to_ctx('end_map') def end_map(): return None @ffi.callback('int(void *ctx)') @append_event_to_ctx('start_array') def start_array(): return None @ffi.callback('int(void *ctx)') @append_event_to_ctx('end_array') def end_array(): return None _callback_data = ( # For more information about callbacks, # take a look at the ctypes backend null, boolean, integer, double, number, string, start_map, map_key, end_map, start_array, end_array ) _asd = list() def yajl_init(scope, events, allow_comments=False, multiple_values=False): scope.ctx = ffi.new_handle(events) scope.callbacks = ffi.new('yajl_callbacks*', _callback_data) handle = yajl.yajl_alloc(scope.callbacks, ffi.NULL, scope.ctx) if allow_comments: yajl.yajl_config(handle, YAJL_ALLOW_COMMENTS, 1) if multiple_values: yajl.yajl_config(handle, YAJL_MULTIPLE_VALUES, 1) return handle def yajl_parse(handle, buffer): if buffer: result = yajl.yajl_parse(handle, buffer, len(buffer)) else: result = yajl.yajl_complete_parse(handle) if result != YAJL_OK: perror = yajl.yajl_get_error(handle, 1, buffer, len(buffer)) error = ffi.string(perror) yajl.yajl_free_error(handle, perror) exception = common.IncompleteJSONError if result == YAJL_INSUFFICIENT_DATA else common.JSONError raise exception(error) class Container(object): pass def basic_parse(f, buf_size=64*1024, **config): ''' Iterator yielding unprefixed events. Parameters: - f: a readable file-like object with JSON input - allow_comments: tells parser to allow comments in JSON input - buf_size: a size of an input buffer - multiple_values: allows the parser to parse multiple JSON objects ''' # the scope objects makes sure the C objects allocated in _yajl.init # are kept alive until this function is done scope = Container() events = [] handle = yajl_init(scope, events, **config) try: while True: buffer = f.read(buf_size) # this calls the callbacks which will # fill the events list yajl_parse(handle, buffer) if not buffer and not events: break for event in events: yield event # clear all events, but don't replace the # the events list instance del events[:] finally: yajl.yajl_free(handle) def parse(file, **kwargs): ''' Backend-specific wrapper for ijson.common.parse. ''' return common.parse(basic_parse(file, **kwargs)) def items(file, prefix): ''' Backend-specific wrapper for ijson.common.items. ''' return common.items(parse(file), prefix) ijson-2.3/ijson/common.py000066400000000000000000000107371265524367600155140ustar00rootroot00000000000000''' Backend independent higher level interfaces, common exceptions. ''' import decimal class JSONError(Exception): ''' Base exception for all parsing errors. ''' pass class IncompleteJSONError(JSONError): ''' Raised when the parser can't read expected data from a stream. ''' pass def parse(basic_events): ''' An iterator returning parsing events with the information about their location with the JSON object tree. Events are tuples ``(prefix, type, value)``. Available types and values are: ('null', None) ('boolean', ) ('number', ) ('string', ) ('map_key', ) ('start_map', None) ('end_map', None) ('start_array', None) ('end_array', None) Prefixes represent the path to the nested elements from the root of the JSON document. For example, given this document:: { "array": [1, 2], "map": { "key": "value" } } the parser would yield events: ('', 'start_map', None) ('', 'map_key', 'array') ('array', 'start_array', None) ('array.item', 'number', 1) ('array.item', 'number', 2) ('array', 'end_array', None) ('', 'map_key', 'map') ('map', 'start_map', None) ('map', 'map_key', 'key') ('map.key', 'string', u'value') ('map', 'end_map', None) ('', 'end_map', None) ''' path = [] for event, value in basic_events: if event == 'map_key': prefix = '.'.join(path[:-1]) path[-1] = value elif event == 'start_map': prefix = '.'.join(path) path.append(None) elif event == 'end_map': path.pop() prefix = '.'.join(path) elif event == 'start_array': prefix = '.'.join(path) path.append('item') elif event == 'end_array': path.pop() prefix = '.'.join(path) else: # any scalar value prefix = '.'.join(path) yield prefix, event, value class ObjectBuilder(object): ''' Incrementally builds an object from JSON parser events. Events are passed into the `event` function that accepts two parameters: event type and value. The object being built is available at any time from the `value` attribute. Example:: from StringIO import StringIO from ijson.parse import basic_parse from ijson.utils import ObjectBuilder builder = ObjectBuilder() f = StringIO('{"key": "value"}) for event, value in basic_parse(f): builder.event(event, value) print builder.value ''' def __init__(self): def initial_set(value): self.value = value self.containers = [initial_set] def event(self, event, value): if event == 'map_key': self.key = value elif event == 'start_map': map = {} self.containers[-1](map) def setter(value): map[self.key] = value self.containers.append(setter) elif event == 'start_array': array = [] self.containers[-1](array) self.containers.append(array.append) elif event == 'end_array' or event == 'end_map': self.containers.pop() else: self.containers[-1](value) def items(prefixed_events, prefix): ''' An iterator returning native Python objects constructed from the events under a given prefix. ''' prefixed_events = iter(prefixed_events) try: while True: current, event, value = next(prefixed_events) if current == prefix: if event in ('start_map', 'start_array'): builder = ObjectBuilder() end_event = event.replace('start', 'end') while (current, event) != (prefix, end_event): builder.event(event, value) current, event, value = next(prefixed_events) yield builder.value else: yield value except StopIteration: pass def number(str_value): ''' Converts string with a numeric value into an int or a Decimal. Used in different backends for consistent number representation. ''' number = decimal.Decimal(str_value) int_number = int(number) if int_number == number: number = int_number return number ijson-2.3/ijson/compat.py000066400000000000000000000004011265524367600154720ustar00rootroot00000000000000''' Python2/Python3 compatibility utilities. ''' import sys IS_PY2 = sys.version_info[0] < 3 if IS_PY2: b2s = lambda s: s chr = unichr bytetype = str else: def b2s(b): return b.decode('utf-8') chr = chr bytetype = bytes ijson-2.3/ijson/utils.py000066400000000000000000000036041265524367600153570ustar00rootroot00000000000000# -*- coding:utf-8 -*- from functools import wraps def coroutine(func): ''' Wraps a generator which intended to be used as a pure coroutine by .send()ing it values. The only thing that the wrapper does is calling .next() for the first time which is required by Python generator protocol. ''' @wraps(func) def wrapper(*args, **kwargs): g = func(*args, **kwargs) next(g) return g return wrapper @coroutine def foreach(coroutine_func): ''' Dispatches each JSON array item to a handler coroutine. A coroutine is created anew for each item by calling `coroutine_func` callable. The resulting coroutine should accept value in the form of tuple of values generated by rich JSON parser: (prefix, event, value). First event received by foreach should be a "start_array" event. ''' g = None base, event, value = yield if event != 'start_array': raise Exception('foreach requires "start_array" as the first event, got %s' % repr((base, event, value))) START_EVENTS = set(['start_map', 'start_array', 'null', 'boolean', 'number', 'string']) itemprefix = base + '.item' if base else 'item' while True: prefix, event, value = yield if prefix == itemprefix and event in START_EVENTS: g = coroutine_func() if (prefix, event) != (base, 'end_array'): g.send((prefix, event, value)) @coroutine def dispatcher(targets): ''' Dispatches JSON parser events into several handlers depending on event prefixes. Accepts a list of tuples (base_prefix, coroutine). A coroutine then receives all the events with prefixes starting with its base_prefix. ''' while True: prefix, event, value = yield for base, target in targets: if prefix.startswith(base): target.send((prefix, event, value)) break ijson-2.3/setup.py000066400000000000000000000013061265524367600142320ustar00rootroot00000000000000from setuptools import setup, find_packages setup( name = 'ijson', version = '2.3', author = 'Ivan Sagalaev', author_email = 'maniac@softwaremaniacs.org', url = 'https://github.com/isagalaev/ijson', license = 'BSD', description = 'Iterative JSON parser with a standard Python iterator interface', long_description = open('README.rst').read(), classifiers = [ 'Development Status :: 5 - Production/Stable', 'License :: OSI Approved :: BSD License', 'Programming Language :: Python :: 2', 'Programming Language :: Python :: 3', 'Topic :: Software Development :: Libraries :: Python Modules', ], packages = find_packages(), ) ijson-2.3/tests.py000066400000000000000000000203011265524367600142300ustar00rootroot00000000000000# -*- coding:utf-8 -*- from __future__ import unicode_literals import unittest from io import BytesIO, StringIO from decimal import Decimal import threading from importlib import import_module from ijson import common from ijson.backends.python import basic_parse, Lexer from ijson.compat import IS_PY2 JSON = b''' { "docs": [ { "null": null, "boolean": false, "true": true, "integer": 0, "double": 0.5, "exponent": 1.0e+2, "long": 10000000000, "string": "\\u0441\\u0442\\u0440\\u043e\\u043a\\u0430 - \xd1\x82\xd0\xb5\xd1\x81\xd1\x82" }, { "meta": [[1], {}] }, { "meta": {"key": "value"} }, { "meta": null } ] } ''' JSON_EVENTS = [ ('start_map', None), ('map_key', 'docs'), ('start_array', None), ('start_map', None), ('map_key', 'null'), ('null', None), ('map_key', 'boolean'), ('boolean', False), ('map_key', 'true'), ('boolean', True), ('map_key', 'integer'), ('number', 0), ('map_key', 'double'), ('number', Decimal('0.5')), ('map_key', 'exponent'), ('number', 100), ('map_key', 'long'), ('number', 10000000000), ('map_key', 'string'), ('string', 'строка - тест'), ('end_map', None), ('start_map', None), ('map_key', 'meta'), ('start_array', None), ('start_array', None), ('number', 1), ('end_array', None), ('start_map', None), ('end_map', None), ('end_array', None), ('end_map', None), ('start_map', None), ('map_key', 'meta'), ('start_map', None), ('map_key', 'key'), ('string', 'value'), ('end_map', None), ('end_map', None), ('start_map', None), ('map_key', 'meta'), ('null', None), ('end_map', None), ('end_array', None), ('end_map', None), ] SCALAR_JSON = b'0' INVALID_JSONS = [ b'["key", "value",]', # trailing comma b'["key" "value"]', # no comma b'{"key": "value",}', # trailing comma b'{"key": "value" "key"}', # no comma b'{"key" "value"}', # no colon b'invalid', # unknown lexeme b'[1, 2] dangling junk' # dangling junk ] YAJL1_PASSING_INVALID = INVALID_JSONS[6] INCOMPLETE_JSONS = [ b'', b'"test', b'[', b'[1', b'[1,', b'{', b'{"key"', b'{"key":', b'{"key": "value"', b'{"key": "value",', ] STRINGS_JSON = br''' { "str1": "", "str2": "\"", "str3": "\\", "str4": "\\\\", "special\t": "\b\f\n\r\t" } ''' INT_NUMBERS_JSON = b'[1, 1.0, 1E2]' class Parse(object): ''' Base class for parsing tests that is used to create test cases for each available backends. ''' def test_basic_parse(self): events = list(self.backend.basic_parse(BytesIO(JSON))) self.assertEqual(events, JSON_EVENTS) def test_basic_parse_threaded(self): thread = threading.Thread(target=self.test_basic_parse) thread.start() thread.join() def test_scalar(self): events = list(self.backend.basic_parse(BytesIO(SCALAR_JSON))) self.assertEqual(events, [('number', 0)]) def test_strings(self): events = list(self.backend.basic_parse(BytesIO(STRINGS_JSON))) strings = [value for event, value in events if event == 'string'] self.assertEqual(strings, ['', '"', '\\', '\\\\', '\b\f\n\r\t']) self.assertTrue(('map_key', 'special\t') in events) def test_int_numbers(self): events = list(self.backend.basic_parse(BytesIO(INT_NUMBERS_JSON))) numbers = [value for event, value in events if event == 'number'] self.assertTrue(all(type(n) is int for n in numbers)) def test_invalid(self): for json in INVALID_JSONS: # Yajl1 doesn't complain about additional data after the end # of a parsed object. Skipping this test. if self.__class__.__name__ == 'YajlParse' and json == YAJL1_PASSING_INVALID: continue with self.assertRaises(common.JSONError) as cm: list(self.backend.basic_parse(BytesIO(json))) def test_incomplete(self): for json in INCOMPLETE_JSONS: with self.assertRaises(common.IncompleteJSONError): list(self.backend.basic_parse(BytesIO(json))) def test_utf8_split(self): buf_size = JSON.index(b'\xd1') + 1 try: events = list(self.backend.basic_parse(BytesIO(JSON), buf_size=buf_size)) except UnicodeDecodeError: self.fail('UnicodeDecodeError raised') def test_lazy(self): # shouldn't fail since iterator is not exhausted self.backend.basic_parse(BytesIO(INVALID_JSONS[0])) self.assertTrue(True) def test_boundary_lexeme(self): buf_size = JSON.index(b'false') + 1 events = list(self.backend.basic_parse(BytesIO(JSON), buf_size=buf_size)) self.assertEqual(events, JSON_EVENTS) def test_boundary_whitespace(self): buf_size = JSON.index(b' ') + 1 events = list(self.backend.basic_parse(BytesIO(JSON), buf_size=buf_size)) self.assertEqual(events, JSON_EVENTS) def test_api(self): self.assertTrue(list(self.backend.items(BytesIO(JSON), ''))) self.assertTrue(list(self.backend.parse(BytesIO(JSON)))) # Generating real TestCase classes for each importable backend for name in ['python', 'yajl', 'yajl2', 'yajl2_cffi']: try: classname = '%sParse' % ''.join(p.capitalize() for p in name.split('_')) if IS_PY2: classname = classname.encode('ascii') locals()[classname] = type( classname, (unittest.TestCase, Parse), {'backend': import_module('ijson.backends.%s' % name)}, ) except ImportError: pass class Common(unittest.TestCase): ''' Backend independent tests. They all use basic_parse imported explicitly from the python backend to generate parsing events. ''' def test_object_builder(self): builder = common.ObjectBuilder() for event, value in basic_parse(BytesIO(JSON)): builder.event(event, value) self.assertEqual(builder.value, { 'docs': [ { 'string': 'строка - тест', 'null': None, 'boolean': False, 'true': True, 'integer': 0, 'double': Decimal('0.5'), 'exponent': 100, 'long': 10000000000, }, { 'meta': [[1], {}], }, { 'meta': {'key': 'value'}, }, { 'meta': None, }, ], }) def test_scalar_builder(self): builder = common.ObjectBuilder() for event, value in basic_parse(BytesIO(SCALAR_JSON)): builder.event(event, value) self.assertEqual(builder.value, 0) def test_parse(self): events = common.parse(basic_parse(BytesIO(JSON))) events = [value for prefix, event, value in events if prefix == 'docs.item.meta.item.item' ] self.assertEqual(events, [1]) def test_items(self): events = basic_parse(BytesIO(JSON)) meta = list(common.items(common.parse(events), 'docs.item.meta')) self.assertEqual(meta, [ [[1], {}], {'key': 'value'}, None, ]) class Stream(unittest.TestCase): def test_bytes(self): l = Lexer(BytesIO(JSON)) self.assertEqual(next(l)[1], '{') def test_string(self): l = Lexer(StringIO(JSON.decode('utf-8'))) self.assertEqual(next(l)[1], '{') if __name__ == '__main__': unittest.main() ijson-2.3/tox.ini000066400000000000000000000001071265524367600140310ustar00rootroot00000000000000[tox] envlist = py27, py34 [testenv] commands = {envpython} tests.py