json_tricks-3.11.0/0002750000175000017500000000000013211041626014023 5ustar markmark00000000000000json_tricks-3.11.0/json_tricks/0002750000175000017500000000000013211041626016353 5ustar markmark00000000000000json_tricks-3.11.0/json_tricks/nonp.py0000640000175000017500000002337513211011205017676 0ustar markmark00000000000000 from gzip import GzipFile from io import BytesIO from json import loads as json_loads from os import fsync from sys import exc_info, version from .utils import NoNumpyException # keep 'unused' imports from .comment import strip_comment_line_with_symbol, strip_comments # keep 'unused' imports from .encoders import TricksEncoder, json_date_time_encode, \ class_instance_encode, ClassInstanceEncoder, \ json_complex_encode, json_set_encode, numeric_types_encode, numpy_encode, \ nonumpy_encode, NoNumpyEncoder, \ nopandas_encode, pandas_encode, noenum_instance_encode, \ enum_instance_encode # keep 'unused' imports from .decoders import DuplicateJsonKeyException, TricksPairHook, \ json_date_time_hook, ClassInstanceHook, \ json_complex_hook, json_set_hook, numeric_types_hook, json_numpy_obj_hook, \ json_nonumpy_obj_hook, \ nopandas_hook, pandas_hook, EnumInstanceHook, \ noenum_hook # keep 'unused' imports from json import JSONEncoder is_py3 = (version[:2] == '3.') str_type = str if is_py3 else (basestring, unicode,) ENCODING = 'UTF-8' _cih_instance = ClassInstanceHook() _eih_instance = EnumInstanceHook() DEFAULT_ENCODERS = [json_date_time_encode, json_complex_encode, json_set_encode, numeric_types_encode, class_instance_encode, ] DEFAULT_HOOKS = [json_date_time_hook, json_complex_hook, json_set_hook, numeric_types_hook, _cih_instance, ] try: import enum except ImportError: DEFAULT_ENCODERS = [noenum_instance_encode,] + DEFAULT_ENCODERS DEFAULT_HOOKS = [noenum_hook,] + DEFAULT_HOOKS else: DEFAULT_ENCODERS = [enum_instance_encode,] + DEFAULT_ENCODERS DEFAULT_HOOKS = [_eih_instance,] + DEFAULT_HOOKS try: import numpy except ImportError: DEFAULT_ENCODERS = [nonumpy_encode,] + DEFAULT_ENCODERS DEFAULT_HOOKS = [json_nonumpy_obj_hook,] + DEFAULT_HOOKS else: # numpy encode needs to be before complex DEFAULT_ENCODERS = [numpy_encode,] + DEFAULT_ENCODERS DEFAULT_HOOKS = [json_numpy_obj_hook,] + DEFAULT_HOOKS try: import pandas except ImportError: DEFAULT_ENCODERS = [nopandas_encode,] + DEFAULT_ENCODERS DEFAULT_HOOKS = [nopandas_hook,] + DEFAULT_HOOKS else: DEFAULT_ENCODERS = [pandas_encode,] + DEFAULT_ENCODERS DEFAULT_HOOKS = [pandas_hook,] + DEFAULT_HOOKS DEFAULT_NONP_ENCODERS = [nonumpy_encode,] + DEFAULT_ENCODERS # DEPRECATED DEFAULT_NONP_HOOKS = [json_nonumpy_obj_hook,] + DEFAULT_HOOKS # DEPRECATED def dumps(obj, sort_keys=None, cls=TricksEncoder, obj_encoders=DEFAULT_ENCODERS, extra_obj_encoders=(), primitives=False, compression=None, allow_nan=False, conv_str_byte=False, **jsonkwargs): """ Convert a nested data structure to a json string. :param obj: The Python object to convert. :param sort_keys: Keep this False if you want order to be preserved. :param cls: The json encoder class to use, defaults to NoNumpyEncoder which gives a warning for numpy arrays. :param obj_encoders: Iterable of encoders to use to convert arbitrary objects into json-able promitives. :param extra_obj_encoders: Like `obj_encoders` but on top of them: use this to add encoders without replacing defaults. Since v3.5 these happen before default encoders. :param allow_nan: Allow NaN and Infinity values, which is a (useful) violation of the JSON standard (default False). :param conv_str_byte: Try to automatically convert between strings and bytes (assuming utf-8) (default False). :return: The string containing the json-encoded version of obj. Other arguments are passed on to `cls`. Note that `sort_keys` should be false if you want to preserve order. """ if not hasattr(extra_obj_encoders, '__iter__'): raise TypeError('`extra_obj_encoders` should be a tuple in `json_tricks.dump(s)`') encoders = tuple(extra_obj_encoders) + tuple(obj_encoders) txt = cls(sort_keys=sort_keys, obj_encoders=encoders, allow_nan=allow_nan, primitives=primitives, **jsonkwargs).encode(obj) if not is_py3 and isinstance(txt, str): txt = unicode(txt, ENCODING) if not compression: return txt if compression is True: compression = 5 txt = txt.encode(ENCODING) sh = BytesIO() with GzipFile(mode='wb', fileobj=sh, compresslevel=compression) as zh: zh.write(txt) gzstring = sh.getvalue() return gzstring def dump(obj, fp, sort_keys=None, cls=TricksEncoder, obj_encoders=DEFAULT_ENCODERS, extra_obj_encoders=(), primitives=False, compression=None, force_flush=False, allow_nan=False, conv_str_byte=False, **jsonkwargs): """ Convert a nested data structure to a json string. :param fp: File handle or path to write to. :param compression: The gzip compression level, or None for no compression. :param force_flush: If True, flush the file handle used, when possibly also in the operating system (default False). The other arguments are identical to `dumps`. """ txt = dumps(obj, sort_keys=sort_keys, cls=cls, obj_encoders=obj_encoders, extra_obj_encoders=extra_obj_encoders, primitives=primitives, compression=compression, allow_nan=allow_nan, conv_str_byte=conv_str_byte, **jsonkwargs) if isinstance(fp, str_type): fh = open(fp, 'wb+') else: fh = fp if conv_str_byte: try: fh.write(b'') except TypeError: pass # if not isinstance(txt, str_type): # # Cannot write bytes, so must be in text mode, but we didn't get a text # if not compression: # txt = txt.decode(ENCODING) else: try: fh.write(u'') except TypeError: if isinstance(txt, str_type): txt = txt.encode(ENCODING) try: if 'b' not in getattr(fh, 'mode', 'b?') and not isinstance(txt, str_type) and compression: raise IOError('If compression is enabled, the file must be opened in binary mode.') try: fh.write(txt) except TypeError as err: err.args = (err.args[0] + '. A possible reason is that the file is not opened in binary mode; ' 'be sure to set file mode to something like "wb".',) raise finally: if force_flush: fh.flush() try: if fh.fileno() is not None: fsync(fh.fileno()) except (ValueError,): pass if isinstance(fp, str_type): fh.close() return txt def loads(string, preserve_order=True, ignore_comments=True, decompression=None, obj_pairs_hooks=DEFAULT_HOOKS, extra_obj_pairs_hooks=(), cls_lookup_map=None, allow_duplicates=True, conv_str_byte=False, **jsonkwargs): """ Convert a nested data structure to a json string. :param string: The string containing a json encoded data structure. :param decode_cls_instances: True to attempt to decode class instances (requires the environment to be similar the the encoding one). :param preserve_order: Whether to preserve order by using OrderedDicts or not. :param ignore_comments: Remove comments (starting with # or //). :param decompression: True to use gzip decompression, False to use raw data, None to automatically determine (default). Assumes utf-8 encoding! :param obj_pairs_hooks: A list of dictionary hooks to apply. :param extra_obj_pairs_hooks: Like `obj_pairs_hooks` but on top of them: use this to add hooks without replacing defaults. Since v3.5 these happen before default hooks. :param cls_lookup_map: If set to a dict, for example ``globals()``, then classes encoded from __main__ are looked up this dict. :param allow_duplicates: If set to False, an error will be raised when loading a json-map that contains duplicate keys. :param parse_float: A function to parse strings to integers (e.g. Decimal). There is also `parse_int`. :param conv_str_byte: Try to automatically convert between strings and bytes (assuming utf-8) (default False). :return: The string containing the json-encoded version of obj. Other arguments are passed on to json_func. """ if not hasattr(extra_obj_pairs_hooks, '__iter__'): raise TypeError('`extra_obj_pairs_hooks` should be a tuple in `json_tricks.load(s)`') if decompression is None: decompression = isinstance(string, bytes) and string[:2] == b'\x1f\x8b' if decompression: with GzipFile(fileobj=BytesIO(string), mode='rb') as zh: string = zh.read() string = string.decode(ENCODING) if not isinstance(string, str_type): if conv_str_byte: string = string.decode(ENCODING) else: raise TypeError(('Cannot automatically encode object of type "{0:}" in `json_tricks.load(s)` since ' 'the encoding is not known. You should instead encode the bytes to a string and pass that ' 'string to `load(s)`, for example bytevar.encode("utf-8") if utf-8 is the encoding.').format(type(string))) if ignore_comments: string = strip_comments(string) obj_pairs_hooks = tuple(obj_pairs_hooks) _cih_instance.cls_lookup_map = cls_lookup_map or {} _eih_instance.cls_lookup_map = cls_lookup_map or {} hooks = tuple(extra_obj_pairs_hooks) + obj_pairs_hooks hook = TricksPairHook(ordered=preserve_order, obj_pairs_hooks=hooks, allow_duplicates=allow_duplicates) return json_loads(string, object_pairs_hook=hook, **jsonkwargs) def load(fp, preserve_order=True, ignore_comments=True, decompression=None, obj_pairs_hooks=DEFAULT_HOOKS, extra_obj_pairs_hooks=(), cls_lookup_map=None, allow_duplicates=True, conv_str_byte=False, **jsonkwargs): """ Convert a nested data structure to a json string. :param fp: File handle or path to load from. The other arguments are identical to loads. """ try: if isinstance(fp, str_type): with open(fp, 'rb') as fh: string = fh.read() else: string = fp.read() except UnicodeDecodeError as err: # todo: not covered in tests, is it relevant? raise Exception('There was a problem decoding the file content. A possible reason is that the file is not ' + 'opened in binary mode; be sure to set file mode to something like "rb".').with_traceback(exc_info()[2]) return loads(string, preserve_order=preserve_order, ignore_comments=ignore_comments, decompression=decompression, obj_pairs_hooks=obj_pairs_hooks, extra_obj_pairs_hooks=extra_obj_pairs_hooks, cls_lookup_map=cls_lookup_map, allow_duplicates=allow_duplicates, conv_str_byte=conv_str_byte, **jsonkwargs) json_tricks-3.11.0/json_tricks/np_utils.py0000640000175000017500000000053313210325740020563 0ustar markmark00000000000000 """ This file exists for backward compatibility reasons. """ from .utils import hashodict, get_scalar_repr, encode_scalars_inplace from .nonp import NoNumpyException from . import np # try: # from numpy import generic, complex64, complex128 # except ImportError: # raise NoNumpyException('Could not load numpy, maybe it is not installed?') json_tricks-3.11.0/json_tricks/np.py0000640000175000017500000000225313211002052017331 0ustar markmark00000000000000 """ This file exists for backward compatibility reasons. """ from logging import warning from .nonp import NoNumpyException, DEFAULT_ENCODERS, DEFAULT_HOOKS, dumps, dump, loads, load # keep 'unused' imports from .utils import hashodict, NoPandasException from .comment import strip_comment_line_with_symbol, strip_comments # keep 'unused' imports from .encoders import TricksEncoder, json_date_time_encode, class_instance_encode, ClassInstanceEncoder, \ numpy_encode, NumpyEncoder # keep 'unused' imports from .decoders import DuplicateJsonKeyException, TricksPairHook, json_date_time_hook, ClassInstanceHook, \ json_complex_hook, json_set_hook, json_numpy_obj_hook # keep 'unused' imports try: import numpy except ImportError: raise NoNumpyException('Could not load numpy, maybe it is not installed? If you do not want to use numpy encoding ' 'or decoding, you can import the functions from json_tricks.nonp instead, which do not need numpy.') warning('`json_tricks.np` is deprecated, you can import directly from `json_tricks`') DEFAULT_NP_ENCODERS = [numpy_encode,] + DEFAULT_ENCODERS # DEPRECATED DEFAULT_NP_HOOKS = [json_numpy_obj_hook,] + DEFAULT_HOOKS # DEPRECATED json_tricks-3.11.0/json_tricks/utils.py0000640000175000017500000001234613211037715020076 0ustar markmark00000000000000 from collections import OrderedDict from functools import partial from importlib import import_module from logging import warning, warn from sys import version_info class hashodict(OrderedDict): """ This dictionary is hashable. It should NOT be mutated, or all kinds of weird bugs may appear. This is not enforced though, it's only used for encoding. """ def __hash__(self): return hash(frozenset(self.items())) try: from inspect import signature except ImportError: try: from inspect import getfullargspec except ImportError: from inspect import getargspec def get_arg_names(callable): if type(callable) == partial and version_info[0] == 2: if not hasattr(get_arg_names, '__warned_partial_argspec'): get_arg_names.__warned_partial_argspec = True warn("'functools.partial' and 'inspect.getargspec' are not compatible in this Python version; " "ignoring the 'partial' wrapper when inspecting arguments of {}, which can lead to problems".format(callable)) return set(getargspec(callable.func).args) argspec = getargspec(callable) return set(argspec.args) else: #todo: this is not covered in test case (py 3+ uses `signature`, py2 `getfullargspec`); consider removing it def get_arg_names(callable): argspec = getfullargspec(callable) return set(argspec.args) | set(argspec.kwonlyargs) else: def get_arg_names(callable): sig = signature(callable) return set(sig.parameters.keys()) def call_with_optional_kwargs(callable, *args, **optional_kwargs): accepted_kwargs = get_arg_names(callable) use_kwargs = {} for key, val in optional_kwargs.items(): if key in accepted_kwargs: use_kwargs[key] = val return callable(*args, **use_kwargs) class NoNumpyException(Exception): """ Trying to use numpy features, but numpy cannot be found. """ class NoPandasException(Exception): """ Trying to use pandas features, but pandas cannot be found. """ class NoEnumException(Exception): """ Trying to use enum features, but enum cannot be found. """ class ClassInstanceHookBase(object): def __init__(self, cls_lookup_map=None): self.cls_lookup_map = cls_lookup_map or {} def get_cls_from_instance_type(self, mod, name): if mod is None: try: Cls = getattr((__import__('__main__')), name) except (ImportError, AttributeError) as err: if name not in self.cls_lookup_map: raise ImportError(('class {0:s} seems to have been exported from the main file, which means ' 'it has no module/import path set; you need to provide cls_lookup_map which maps names ' 'to classes').format(name)) Cls = self.cls_lookup_map[name] else: imp_err = None try: module = import_module('{0:}'.format(mod, name)) except ImportError as err: imp_err = ('encountered import error "{0:}" while importing "{1:}" to decode a json file; perhaps ' 'it was encoded in a different environment where {1:}.{2:} was available').format(err, mod, name) else: if not hasattr(module, name): imp_err = 'imported "{0:}" but could find "{1:}" inside while decoding a json file (found {2:}'.format( module, name, ', '.join(attr for attr in dir(module) if not attr.startswith('_'))) Cls = getattr(module, name) if imp_err: if 'name' in self.cls_lookup_map: Cls = self.cls_lookup_map[name] else: raise ImportError(imp_err) return Cls def get_scalar_repr(npscalar): return hashodict(( ('__ndarray__', npscalar.item()), ('dtype', str(npscalar.dtype)), ('shape', ()), )) def encode_scalars_inplace(obj): """ Searches a data structure of lists, tuples and dicts for numpy scalars and replaces them by their dictionary representation, which can be loaded by json-tricks. This happens in-place (the object is changed, use a copy). """ from numpy import generic, complex64, complex128 if isinstance(obj, (generic, complex64, complex128)): return get_scalar_repr(obj) if isinstance(obj, dict): for key, val in tuple(obj.items()): obj[key] = encode_scalars_inplace(val) return obj if isinstance(obj, list): for k, val in enumerate(obj): obj[k] = encode_scalars_inplace(val) return obj if isinstance(obj, (tuple, set)): return type(obj)(encode_scalars_inplace(val) for val in obj) return obj def encode_intenums_inplace(obj): """ Searches a data structure of lists, tuples and dicts for IntEnum and replaces them by their dictionary representation, which can be loaded by json-tricks. This happens in-place (the object is changed, use a copy). """ from enum import IntEnum from json_tricks import encoders if isinstance(obj, IntEnum): return encoders.enum_instance_encode(obj) if isinstance(obj, dict): for key, val in obj.items(): obj[key] = encode_intenums_inplace(val) return obj if isinstance(obj, list): for index, val in enumerate(obj): obj[index] = encode_intenums_inplace(val) return obj if isinstance(obj, (tuple, set)): return type(obj)(encode_intenums_inplace(val) for val in obj) return obj def get_module_name_from_object(obj): mod = obj.__class__.__module__ if mod == '__main__': mod = None warning(('class {0:} seems to have been defined in the main file; unfortunately this means' ' that it\'s module/import path is unknown, so you might have to provide cls_lookup_map when ' 'decoding').format(obj.__class__)) return mod json_tricks-3.11.0/json_tricks/__init__.py0000640000175000017500000000257313210331034020464 0ustar markmark00000000000000 try: from json import JSONDecodeError # imported for convenience except ImportError: """ Older versions of Python use ValueError, of which JSONDecodeError is a subclass; it's recommended to catch ValueError. """ from .utils import hashodict, NoEnumException, NoNumpyException, NoPandasException, get_scalar_repr, encode_intenums_inplace, encode_scalars_inplace from .comment import strip_comment_line_with_symbol, strip_comments from .encoders import TricksEncoder, json_date_time_encode, class_instance_encode, json_complex_encode, \ numeric_types_encode, ClassInstanceEncoder, json_set_encode, pandas_encode, nopandas_encode, \ numpy_encode, NumpyEncoder, nonumpy_encode, NoNumpyEncoder from .decoders import DuplicateJsonKeyException, TricksPairHook, json_date_time_hook, json_complex_hook, \ numeric_types_hook, ClassInstanceHook, json_set_hook, pandas_hook, nopandas_hook, json_numpy_obj_hook, \ json_nonumpy_obj_hook from .nonp import dumps, dump, loads, load try: # find_module takes just as long as importing, so no optimization possible import numpy except ImportError: NUMPY_MODE = False # from .nonp import dumps, dump, loads, load, nonumpy_encode as numpy_encode, json_nonumpy_obj_hook as json_numpy_obj_hook else: NUMPY_MODE = True # from .np import dumps, dump, loads, load, numpy_encode, NumpyEncoder, json_numpy_obj_hook # from .np_utils import encode_scalars_inplace json_tricks-3.11.0/json_tricks/encoders.py0000640000175000017500000003015213211011431020516 0ustar markmark00000000000000 from datetime import datetime, date, time, timedelta from fractions import Fraction from logging import warning from json import JSONEncoder from sys import version from decimal import Decimal from .utils import hashodict, call_with_optional_kwargs, get_module_name_from_object, NoEnumException, NoPandasException, NoNumpyException class TricksEncoder(JSONEncoder): """ Encoder that runs any number of encoder functions or instances on the objects that are being encoded. Each encoder should make any appropriate changes and return an object, changed or not. This will be passes to the other encoders. """ def __init__(self, obj_encoders=None, silence_typeerror=False, primitives=False, **json_kwargs): """ :param obj_encoders: An iterable of functions or encoder instances to try. :param silence_typeerror: If set to True, ignore the TypeErrors that Encoder instances throw (default False). """ self.obj_encoders = [] if obj_encoders: self.obj_encoders = list(obj_encoders) self.silence_typeerror = silence_typeerror self.primitives = primitives super(TricksEncoder, self).__init__(**json_kwargs) def default(self, obj, *args, **kwargs): """ This is the method of JSONEncoders that is called for each object; it calls all the encoders with the previous one's output used as input. It works for Encoder instances, but they are expected not to throw `TypeError` for unrecognized types (the super method does that by default). It never calls the `super` method so if there are non-primitive types left at the end, you'll get an encoding error. """ prev_id = id(obj) for encoder in self.obj_encoders: if hasattr(encoder, 'default'): #todo: write test for this scenario (maybe ClassInstanceEncoder?) try: obj = call_with_optional_kwargs(encoder.default, obj, primitives=self.primitives) except TypeError as err: if not self.silence_typeerror: raise elif hasattr(encoder, '__call__'): obj = call_with_optional_kwargs(encoder, obj, primitives=self.primitives) else: raise TypeError('`obj_encoder` {0:} does not have `default` method and is not callable'.format(encoder)) if id(obj) == prev_id: #todo: test raise TypeError('Object of type {0:} could not be encoded by {1:} using encoders [{2:s}]'.format( type(obj), self.__class__.__name__, ', '.join(str(encoder) for encoder in self.obj_encoders))) return obj def json_date_time_encode(obj, primitives=False): """ Encode a date, time, datetime or timedelta to a string of a json dictionary, including optional timezone. :param obj: date/time/datetime/timedelta obj :return: (dict) json primitives representation of date, time, datetime or timedelta """ if primitives and isinstance(obj, (date, time, datetime)): return obj.isoformat() if isinstance(obj, datetime): dct = hashodict([('__datetime__', None), ('year', obj.year), ('month', obj.month), ('day', obj.day), ('hour', obj.hour), ('minute', obj.minute), ('second', obj.second), ('microsecond', obj.microsecond)]) if obj.tzinfo: dct['tzinfo'] = obj.tzinfo.zone elif isinstance(obj, date): dct = hashodict([('__date__', None), ('year', obj.year), ('month', obj.month), ('day', obj.day)]) elif isinstance(obj, time): dct = hashodict([('__time__', None), ('hour', obj.hour), ('minute', obj.minute), ('second', obj.second), ('microsecond', obj.microsecond)]) if obj.tzinfo: dct['tzinfo'] = obj.tzinfo.zone elif isinstance(obj, timedelta): if primitives: return obj.total_seconds() else: dct = hashodict([('__timedelta__', None), ('days', obj.days), ('seconds', obj.seconds), ('microseconds', obj.microseconds)]) else: return obj for key, val in tuple(dct.items()): if not key.startswith('__') and not val: del dct[key] return dct def enum_instance_encode(obj, primitives=False, with_enum_value=False): """Encodes an enum instance to json. Note that it can only be recovered if the environment allows the enum to be imported in the same way. :param primitives: If true, encode the enum values as primitive (more readable, but cannot be restored automatically). :param with_enum_value: If true, the value of the enum is also exported (it is not used during import, as it should be constant). """ from enum import Enum if not isinstance(obj, Enum): return obj if primitives: return {obj.name: obj.value} mod = get_module_name_from_object(obj) representation = dict( __enum__=dict( # Don't use __instance_type__ here since enums members cannot be created with __new__ # Ie we can't rely on class deserialization to read them. __enum_instance_type__=[mod, type(obj).__name__], name=obj.name, ), ) if with_enum_value: representation['__enum__']['value'] = obj.value return representation def noenum_instance_encode(obj, primitives=False): if type(obj.__class__).__name__ == 'EnumMeta': raise NoEnumException(('Trying to encode an object of type {0:} which appears to be ' 'an enum, but enum support is not enabled, perhaps it is not installed.').format(type(obj))) return obj def class_instance_encode(obj, primitives=False): """ Encodes a class instance to json. Note that it can only be recovered if the environment allows the class to be imported in the same way. """ if isinstance(obj, list) or isinstance(obj, dict): return obj if hasattr(obj, '__class__') and (hasattr(obj, '__dict__') or hasattr(obj, '__slots__')): if not hasattr(obj, '__new__'): raise TypeError('class "{0:s}" does not have a __new__ method; '.format(obj.__class__) + ('perhaps it is an old-style class not derived from `object`; add `object` as a base class to encode it.' if (version[:2] == '2.') else 'this should not happen in Python3')) try: obj.__new__(obj.__class__) except TypeError: raise TypeError(('instance "{0:}" of class "{1:}" cannot be encoded because it\'s __new__ method ' 'cannot be called, perhaps it requires extra parameters').format(obj, obj.__class__)) mod = get_module_name_from_object(obj) name = obj.__class__.__name__ if hasattr(obj, '__json_encode__'): attrs = obj.__json_encode__() if primitives: return attrs else: return hashodict((('__instance_type__', (mod, name)), ('attributes', attrs))) dct = hashodict([('__instance_type__',(mod, name))]) if hasattr(obj, '__slots__'): slots = obj.__slots__ if isinstance(slots, str): slots = [slots] slots = list(item for item in slots if item != '__dict__') dct['slots'] = hashodict([]) for s in slots: dct['slots'][s] = getattr(obj, s) if hasattr(obj, '__dict__'): dct['attributes'] = hashodict(obj.__dict__) if primitives: attrs = dct.get('attributes',{}) attrs.update(dct.get('slots',{})) return attrs else: return dct return obj def json_complex_encode(obj, primitives=False): """ Encode a complex number as a json dictionary of it's real and imaginary part. :param obj: complex number, e.g. `2+1j` :return: (dict) json primitives representation of `obj` """ if isinstance(obj, complex): if primitives: return [obj.real, obj.imag] else: return hashodict(__complex__=[obj.real, obj.imag]) return obj def numeric_types_encode(obj, primitives=False): """ Encode Decimal and Fraction. :param primitives: Encode decimals and fractions as standard floats. You may lose precision. If you do this, you may need to enable `allow_nan` (decimals always allow NaNs but floats do not). """ if isinstance(obj, Decimal): if primitives: return float(obj) else: return { '__decimal__': str(obj.canonical()), } if isinstance(obj, Fraction): if primitives: return float(obj) else: return hashodict(( ('__fraction__', True), ('numerator', obj.numerator), ('denominator', obj.denominator), )) return obj class ClassInstanceEncoder(JSONEncoder): """ See `class_instance_encoder`. """ # Not covered in tests since `class_instance_encode` is recommended way. def __init__(self, obj, encode_cls_instances=True, **kwargs): self.encode_cls_instances = encode_cls_instances super(ClassInstanceEncoder, self).__init__(obj, **kwargs) def default(self, obj, *args, **kwargs): if self.encode_cls_instances: obj = class_instance_encode(obj) return super(ClassInstanceEncoder, self).default(obj, *args, **kwargs) def json_set_encode(obj, primitives=False): """ Encode python sets as dictionary with key __set__ and a list of the values. Try to sort the set to get a consistent json representation, use arbitrary order if the data is not ordinal. """ if isinstance(obj, set): try: repr = sorted(obj) except Exception: repr = list(obj) if primitives: return repr else: return hashodict(__set__=repr) return obj def pandas_encode(obj, primitives=False): from pandas import DataFrame, Series if isinstance(obj, (DataFrame, Series)): #todo: this is experimental if not getattr(pandas_encode, '_warned', False): pandas_encode._warned = True warning('Pandas dumping support in json-tricks is experimental and may change in future versions.') if isinstance(obj, DataFrame): repr = hashodict() if not primitives: repr['__pandas_dataframe__'] = hashodict(( ('column_order', tuple(obj.columns.values)), ('types', tuple(str(dt) for dt in obj.dtypes)), )) repr['index'] = tuple(obj.index.values) for k, name in enumerate(obj.columns.values): repr[name] = tuple(obj.ix[:, k].values) return repr if isinstance(obj, Series): repr = hashodict() if not primitives: repr['__pandas_series__'] = hashodict(( ('name', str(obj.name)), ('type', str(obj.dtype)), )) repr['index'] = tuple(obj.index.values) repr['data'] = tuple(obj.values) return repr return obj def nopandas_encode(obj): if ('DataFrame' in getattr(obj.__class__, '__name__', '') or 'Series' in getattr(obj.__class__, '__name__', '')) \ and 'pandas.' in getattr(obj.__class__, '__module__', ''): raise NoPandasException(('Trying to encode an object of type {0:} which appears to be ' 'a numpy array, but numpy support is not enabled, perhaps it is not installed.').format(type(obj))) return obj def numpy_encode(obj, primitives=False): """ Encodes numpy `ndarray`s as lists with meta data. Encodes numpy scalar types as Python equivalents. Special encoding is not possible, because int64 (in py2) and float64 (in py2 and py3) are subclasses of primitives, which never reach the encoder. :param primitives: If True, arrays are serialized as (nested) lists without meta info. """ from numpy import ndarray, generic if isinstance(obj, ndarray): if primitives: return obj.tolist() else: dct = hashodict(( ('__ndarray__', obj.tolist()), ('dtype', str(obj.dtype)), ('shape', obj.shape), )) if len(obj.shape) > 1: dct['Corder'] = obj.flags['C_CONTIGUOUS'] return dct elif isinstance(obj, generic): if NumpyEncoder.SHOW_SCALAR_WARNING: NumpyEncoder.SHOW_SCALAR_WARNING = False warning('json-tricks: numpy scalar serialization is experimental and may work differently in future versions') return obj.item() return obj class NumpyEncoder(ClassInstanceEncoder): """ JSON encoder for numpy arrays. """ SHOW_SCALAR_WARNING = True # show a warning that numpy scalar serialization is experimental def default(self, obj, *args, **kwargs): """ If input object is a ndarray it will be converted into a dict holding data type, shape and the data. The object can be restored using json_numpy_obj_hook. """ warning('`NumpyEncoder` is deprecated, use `numpy_encode`') #todo obj = numpy_encode(obj) return super(NumpyEncoder, self).default(obj, *args, **kwargs) def nonumpy_encode(obj): """ Raises an error for numpy arrays. """ if 'ndarray' in getattr(obj.__class__, '__name__', '') and 'numpy.' in getattr(obj.__class__, '__module__', ''): raise NoNumpyException(('Trying to encode an object of type {0:} which appears to be ' 'a pandas data stucture, but pandas support is not enabled, perhaps it is not installed.').format(type(obj))) return obj class NoNumpyEncoder(JSONEncoder): """ See `nonumpy_encode`. """ def default(self, obj, *args, **kwargs): warning('`NoNumpyEncoder` is deprecated, use `nonumpy_encode`') #todo obj = nonumpy_encode(obj) return super(NoNumpyEncoder, self).default(obj, *args, **kwargs) json_tricks-3.11.0/json_tricks/decoders.py0000640000175000017500000002063313211010726020515 0ustar markmark00000000000000 from datetime import datetime, date, time, timedelta from fractions import Fraction from collections import OrderedDict from decimal import Decimal from logging import warning from json_tricks import NoEnumException, NoPandasException, NoNumpyException from .utils import ClassInstanceHookBase class DuplicateJsonKeyException(Exception): """ Trying to load a json map which contains duplicate keys, but allow_duplicates is False """ class TricksPairHook(object): """ Hook that converts json maps to the appropriate python type (dict or OrderedDict) and then runs any number of hooks on the individual maps. """ def __init__(self, ordered=True, obj_pairs_hooks=None, allow_duplicates=True): """ :param ordered: True if maps should retain their ordering. :param obj_pairs_hooks: An iterable of hooks to apply to elements. """ self.map_type = OrderedDict if not ordered: self.map_type = dict self.obj_pairs_hooks = [] if obj_pairs_hooks: self.obj_pairs_hooks = list(obj_pairs_hooks) self.allow_duplicates = allow_duplicates def __call__(self, pairs): if not self.allow_duplicates: known = set() for key, value in pairs: if key in known: raise DuplicateJsonKeyException(('Trying to load a json map which contains a' + ' duplicate key "{0:}" (but allow_duplicates is False)').format(key)) known.add(key) map = self.map_type(pairs) for hook in self.obj_pairs_hooks: map = hook(map) return map def json_date_time_hook(dct): """ Return an encoded date, time, datetime or timedelta to it's python representation, including optional timezone. :param dct: (dict) json encoded date, time, datetime or timedelta :return: (date/time/datetime/timedelta obj) python representation of the above """ def get_tz(dct): if not 'tzinfo' in dct: return None try: import pytz except ImportError as err: raise ImportError(('Tried to load a json object which has a timezone-aware (date)time. ' 'However, `pytz` could not be imported, so the object could not be loaded. ' 'Error: {0:}').format(str(err))) return pytz.timezone(dct['tzinfo']) if not isinstance(dct, dict): return dct if '__date__' in dct: return date(year=dct.get('year', 0), month=dct.get('month', 0), day=dct.get('day', 0)) elif '__time__' in dct: tzinfo = get_tz(dct) return time(hour=dct.get('hour', 0), minute=dct.get('minute', 0), second=dct.get('second', 0), microsecond=dct.get('microsecond', 0), tzinfo=tzinfo) elif '__datetime__' in dct: tzinfo = get_tz(dct) return datetime(year=dct.get('year', 0), month=dct.get('month', 0), day=dct.get('day', 0), hour=dct.get('hour', 0), minute=dct.get('minute', 0), second=dct.get('second', 0), microsecond=dct.get('microsecond', 0), tzinfo=tzinfo) elif '__timedelta__' in dct: return timedelta(days=dct.get('days', 0), seconds=dct.get('seconds', 0), microseconds=dct.get('microseconds', 0)) return dct def json_complex_hook(dct): """ Return an encoded complex number to it's python representation. :param dct: (dict) json encoded complex number (__complex__) :return: python complex number """ if not isinstance(dct, dict): return dct if not '__complex__' in dct: return dct parts = dct['__complex__'] assert len(parts) == 2 return parts[0] + parts[1] * 1j def numeric_types_hook(dct): if not isinstance(dct, dict): return dct if '__decimal__' in dct: return Decimal(dct['__decimal__']) if '__fraction__' in dct: return Fraction(numerator=dct['numerator'], denominator=dct['denominator']) return dct def noenum_hook(dct): if isinstance(dct, dict) and '__enum__' in dct: raise NoEnumException(('Trying to decode a map which appears to represent a enum ' 'data structure, but enum support is not enabled, perhaps it is not installed.')) return dct class EnumInstanceHook(ClassInstanceHookBase): """ This hook tries to convert json encoded by enum_instance_encode back to it's original instance. It only works if the environment is the same, e.g. the enum is similarly importable and hasn't changed. """ def __call__(self, dct): if not isinstance(dct, dict): return dct if '__enum__' not in dct: return dct mod, name = dct['__enum__']['__enum_instance_type__'] Cls = self.get_cls_from_instance_type(mod, name) return Cls[dct['__enum__']['name']] class ClassInstanceHook(ClassInstanceHookBase): """ This hook tries to convert json encoded by class_instance_encoder back to it's original instance. It only works if the environment is the same, e.g. the class is similarly importable and hasn't changed. """ def __call__(self, dct): if not isinstance(dct, dict): return dct if '__instance_type__' not in dct: return dct mod, name = dct['__instance_type__'] Cls = self.get_cls_from_instance_type(mod, name) try: obj = Cls.__new__(Cls) except TypeError: raise TypeError(('problem while decoding instance of "{0:s}"; this instance has a special ' '__new__ method and can\'t be restored').format(name)) if hasattr(obj, '__json_decode__'): properties = {} if 'slots' in dct: properties.update(dct['slots']) if 'attributes' in dct: properties.update(dct['attributes']) obj.__json_decode__(**properties) else: if 'slots' in dct: for slot,value in dct['slots'].items(): setattr(obj, slot, value) if 'attributes' in dct: obj.__dict__ = dict(dct['attributes']) return obj def json_set_hook(dct): """ Return an encoded set to it's python representation. """ if not isinstance(dct, dict): return dct if '__set__' not in dct: return dct return set((tuple(item) if isinstance(item, list) else item) for item in dct['__set__']) def pandas_hook(dct): if not isinstance(dct, dict): return dct if '__pandas_dataframe__' not in dct and '__pandas_series__' not in dct: return dct # todo: this is experimental if not getattr(pandas_hook, '_warned', False): pandas_hook._warned = True warning('Pandas loading support in json-tricks is experimental and may change in future versions.') if '__pandas_dataframe__' in dct: try: from pandas import DataFrame except ImportError: raise NoPandasException('Trying to decode a map which appears to repr esent a pandas data structure, but pandas appears not to be installed.') from numpy import dtype, array meta = dct.pop('__pandas_dataframe__') indx = dct.pop('index') if 'index' in dct else None dtypes = dict((colname, dtype(tp)) for colname, tp in zip(meta['column_order'], meta['types'])) data = OrderedDict() for name, col in dct.items(): data[name] = array(col, dtype=dtypes[name]) return DataFrame( data=data, index=indx, columns=meta['column_order'], # mixed `dtypes` argument not supported, so use duct of numpy arrays ) elif '__pandas_series__' in dct: from pandas import Series from numpy import dtype, array meta = dct.pop('__pandas_series__') indx = dct.pop('index') if 'index' in dct else None return Series( data=dct['data'], index=indx, name=meta['name'], dtype=dtype(meta['type']), ) return dct # impossible def nopandas_hook(dct): if isinstance(dct, dict) and ('__pandas_dataframe__' in dct or '__pandas_series__' in dct): raise NoPandasException(('Trying to decode a map which appears to represent a pandas ' 'data structure, but pandas support is not enabled, perhaps it is not installed.')) return dct def json_numpy_obj_hook(dct): """ Replace any numpy arrays previously encoded by NumpyEncoder to their proper shape, data type and data. :param dct: (dict) json encoded ndarray :return: (ndarray) if input was an encoded ndarray """ if not isinstance(dct, dict): return dct if not '__ndarray__' in dct: return dct try: from numpy import asarray import numpy as nptypes except ImportError: raise NoNumpyException('Trying to decode a map which appears to represent a numpy ' 'array, but numpy appears not to be installed.') order = 'A' if 'Corder' in dct: order = 'C' if dct['Corder'] else 'F' if dct['shape']: return asarray(dct['__ndarray__'], dtype=dct['dtype'], order=order) else: dtype = getattr(nptypes, dct['dtype']) return dtype(dct['__ndarray__']) def json_nonumpy_obj_hook(dct): """ This hook has no effect except to check if you're trying to decode numpy arrays without support, and give you a useful message. """ if isinstance(dct, dict) and '__ndarray__' in dct: raise NoNumpyException(('Trying to decode a map which appears to represent a numpy array, ' 'but numpy support is not enabled, perhaps it is not installed.')) return dct json_tricks-3.11.0/json_tricks/comment.py0000640000175000017500000000150113210325740020364 0ustar markmark00000000000000 from re import findall def strip_comment_line_with_symbol(line, start): parts = line.split(start) counts = [len(findall(r'(?:^|[^"\\]|(?:\\\\|\\")+)(")', part)) for part in parts] total = 0 for nr, count in enumerate(counts): total += count if total % 2 == 0: return start.join(parts[:nr+1]).rstrip() else: return line.rstrip() def strip_comments(string, comment_symbols=frozenset(('#', '//'))): """ :param string: A string containing json with comments started by comment_symbols. :param comment_symbols: Iterable of symbols that start a line comment (default # or //). :return: The string with the comments removed. """ lines = string.splitlines() for k in range(len(lines)): for symbol in comment_symbols: lines[k] = strip_comment_line_with_symbol(lines[k], start=symbol) return '\n'.join(lines) json_tricks-3.11.0/PKG-INFO0000640000175000017500000004407013211041626015124 0ustar markmark00000000000000Metadata-Version: 1.1 Name: json_tricks Version: 3.11.0 Summary: Extra features for Python's JSON: comments, order, numpy, pandas, datetimes, and many more! Simple but customizable. Home-page: https://github.com/mverleg/pyjson_tricks Author: Mark V Author-email: markv.nl.dev@gmail.com License: Revised BSD License (LICENSE.txt) Description: JSON tricks (python) --------------------------------------- The `pyjson-tricks` package brings several pieces of functionality to python handling of json files: 1. **Store and load numpy arrays** in human-readable format. 2. **Store and load class instances** both generic and customized. 3. **Store and load date/times** as a dictionary (including timezone). 4. **Preserve map order** ``{}`` using ``OrderedDict``. 5. **Allow for comments** in json files by starting lines with ``#``. `6. Sets, complex numbers, Decimal, Fraction, enums, compression, duplicate keys, ... As well as compression and disallowing duplicate keys. * Code: https://github.com/mverleg/pyjson_tricks * Documentation: http://json-tricks.readthedocs.org/en/latest/ * PIP: https://pypi.python.org/pypi/json_tricks The 2.0 series added some of the above features and broke backward compatibility. The version 3.0 series is a more readable rewrite that also makes it easier to combine encoders, again not fully backward compatible. Several keys of the format ``__keyname__`` have special meanings, and more might be added in future releases. If you're considering JSON-but-with-comments as a config file format, have a look at HJSON_, it might be more appropriate. For other purposes, keep reading! Thanks for all the Github stars! Installation and use --------------------------------------- You can install using .. code-block:: bash pip install json-tricks # or e.g. 'json-tricks<3.0' for older versions Decoding of some data types needs the corresponding package to be installed, e.g. ``numpy`` for arrays, ``pandas`` for dataframes and ``pytz`` for timezone-aware datetimes. You can import the usual json functions dump(s) and load(s), as well as a separate comment removal function, as follows: .. code-block:: bash from json_tricks import dump, dumps, load, loads, strip_comments The exact signatures of these and other functions are in the documentation_. ``json-tricks`` supports Python 2.7, and Python 3.4 and later, and is automatically tested on 2.7, 3.4, 3.5 and 3.6. Pypy is supported without numpy and pandas. Preserve type vs use primitive ------------------------------- By default, types are encoded such that they can be restored to their original type when loaded with ``json-tricks``. Example encodings in this documentation refer to that format. You can also choose to store things as their closest primitive type (e.g. arrays and sets as lists, decimals as floats). This may be desirable if you don't care about the exact type, or you are loading the json in another language (which doesn't restore python types). It's also smaller. To forego meta data and store primitives instead, pass ``primitives`` to ``dump(s)``. This is available in version ``3.8`` and later. Example: .. code-block:: python data = [ arange(0, 10, 1, dtype=int).reshape((2, 5)), datetime(year=2017, month=1, day=19, hour=23, minute=00, second=00), 1 + 2j, Decimal(42), Fraction(1, 3), MyTestCls(s='ub', dct={'7': 7}), # see later set(range(7)), ] # Encode with metadata to preserve types when decoding print(dumps(data)) .. code-block:: javascript // (comments added and indenting changed) [ // numpy array { "__ndarray__": [ [0, 1, 2, 3, 4], [5, 6, 7, 8, 9]], "dtype": "int64", "shape": [2, 5], "Corder": true }, // datetime (naive) { "__datetime__": null, "year": 2017, "month": 1, "day": 19, "hour": 23 }, // complex number { "__complex__": [1.0, 2.0] }, // decimal & fraction { "__decimal__": "42" }, { "__fraction__": true "numerator": 1, "denominator": 3, }, // class instance { "__instance_type__": [ "tests.test_class", "MyTestCls" ], "attributes": { "s": "ub", "dct": {"7": 7} } }, // set { "__set__": [0, 1, 2, 3, 4, 5, 6] } ] .. code-block:: python # Encode as primitive types; more simple but loses type information print(dumps(data, primitives=True)) .. code-block:: javascript // (comments added and indentation changed) [ // numpy array [[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]], // datetime (naive) "2017-01-19T23:00:00", // complex number [1.0, 2.0], // decimal & fraction 42.0, 0.3333333333333333, // class instance { "s": "ub", "dct": {"7": 7} }, // set [0, 1, 2, 3, 4, 5, 6] ] Note that valid json is produced either way: ``json-tricks`` stores meta data as normal json, but other packages probably won't interpret it. Features --------------------------------------- Numpy arrays +++++++++++++++++++++++++++++++++++++++ The array is encoded in sort-of-readable and very flexible and portable format, like so: .. code-block:: python arr = arange(0, 10, 1, dtype=uint8).reshape((2, 5)) print(dumps({'mydata': arr})) this yields: .. code-block:: javascript { "mydata": { "dtype": "uint8", "shape": [2, 5], "Corder": true, "__ndarray__": [[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]] } } which will be converted back to a numpy array when using ``json_tricks.loads``. Note that the memory order (``Corder``) is only stored in v3.1 and later and for arrays with at least 2 dimensions. As you've seen, this uses the magic key ``__ndarray__``. Don't use ``__ndarray__`` as a dictionary key unless you're trying to make a numpy array (and know what you're doing). Numpy scalars are also serialized (v3.5+). They are represented by the closest python primitive type. A special representation was not feasible, because Python's json implementation serializes some numpy types as primitives, without consulting custom encoders. If you want to preverse the exact numpy type, use encode_scalars_inplace_. **Performance**: this method has slow write times similar to other human-readable formats, although read time is worse than csv. File size (with compression) is high on a relative scale, but it's only around 30% above binary. See this benchmark_ (it's called JSONGzip). A binary alternative `might be added`_, but is not yet available. This implementation is inspired by an answer by tlausch on stackoverflow_ that you could read for details. Class instances +++++++++++++++++++++++++++++++++++++++ ``json_tricks`` can serialize class instances. If the class behaves normally (not generated dynamic, no ``__new__`` or ``__metaclass__`` magic, etc) *and* all it's attributes are serializable, then this should work by default. .. code-block:: python # json_tricks/test_class.py class MyTestCls: def __init__(self, **kwargs): for k, v in kwargs.items(): setattr(self, k, v) cls_instance = MyTestCls(s='ub', dct={'7': 7}) json = dumps(cls_instance, indent=4) cls_instance_again = loads(json) You'll get your instance back. Here the json looks like this: .. code-block:: javascript { "__instance_type__": [ "json_tricks.test_class", "MyTestCls" ], "attributes": { "s": "ub", "dct": { "7": 7 } } } As you can see, this stores the module and class name. The class must be importable from the same module when decoding (and should not have changed). If it isn't, you have to manually provide a dictionary to ``cls_lookup_map`` when loading in which the class name can be looked up. Note that if the class is imported, then ``globals()`` is such a dictionary (so try ``loads(json, cls_lookup_map=glboals())``). Also note that if the class is defined in the 'top' script (that you're calling directly), then this isn't a module and the import part cannot be extracted. Only the class name will be stored; it can then only be deserialized in the same script, or if you provide ``cls_lookup_map``. Note that this also works with ``slots`` without having to do anything (thanks to ``koffie``), which encodes like this (custom indentation): .. code-block:: javascript { "__instance_type__": ["module.path", "ClassName"], "slots": {"slotattr": 37}, "attributes": {"dictattr": 42} } If the instance doesn't serialize automatically, or if you want custom behaviour, then you can implement ``__json__encode__(self)`` and ``__json_decode__(self, **attributes)`` methods, like so: .. code-block:: python class CustomEncodeCls: def __init__(self): self.relevant = 42 self.irrelevant = 37 def __json_encode__(self): # should return primitive, serializable types like dict, list, int, string, float... return {'relevant': self.relevant} def __json_decode__(self, **attrs): # should initialize all properties; note that __init__ is not called implicitly self.relevant = attrs['relevant'] self.irrelevant = 12 As you've seen, this uses the magic key ``__instance_type__``. Don't use ``__instance_type__`` as a dictionary key unless you know what you're doing. Date, time, datetime and timedelta +++++++++++++++++++++++++++++++++++++++ Date, time, datetime and timedelta objects are stored as dictionaries of "day", "hour", "millisecond" etc keys, for each nonzero property. Timezone name is also stored in case it is set. You'll need to have ``pytz`` installed to use timezone-aware date/times, it's not needed for naive date/times. .. code-block:: javascript { "__datetime__": null, "year": 1988, "month": 3, "day": 15, "hour": 8, "minute": 3, "second": 59, "microsecond": 7, "tzinfo": "Europe/Amsterdam" } This approach was chosen over timestamps for readability and consistency between date and time, and over a single string to prevent parsing problems and reduce dependencies. Note that if ``primitives=True``, date/times are encoded as ISO 8601, but they won't be restored automatically. Don't use ``__date__``, ``__time__``, ``__datetime__``, ``__timedelta__`` or ``__tzinfo__`` as dictionary keys unless you know what you're doing, as they have special meaning. Order +++++++++++++++++++++++++++++++++++++++ Given an ordered dictionary like this (see the tests for a longer one): .. code-block:: python ordered = OrderedDict(( ('elephant', None), ('chicken', None), ('tortoise', None), )) Converting to json and back will preserve the order: .. code-block:: python from json_tricks import dumps, loads json = dumps(ordered) ordered = loads(json, preserve_order=True) where ``preserve_order=True`` is added for emphasis; it can be left out since it's the default. As a note on performance_, both dicts and OrderedDicts have the same scaling for getting and setting items (``O(1)``). In Python versions before 3.5, OrderedDicts were implemented in Python rather than C, so were somewhat slower; since Python 3.5 both are implemented in C. In summary, you should have no scaling problems and probably no performance problems at all, especially for 3.5 and later. Python 3.6+ preserve order of dictionaries by default making this redundant, but this is an implementation detail that should not be relied on. Comments +++++++++++++++++++++++++++++++++++++++ This package uses ``#`` and ``//`` for comments, which seem to be the most common conventions, though only the latter is valid javascript. For example, you could call ``loads`` on the following string:: { # "comment 1 "hello": "Wor#d", "Bye": "\"M#rk\"", "yes\\\"": 5,# comment" 2 "quote": "\"th#t's\" what she said", // comment "3" "list": [1, 1, "#", "\"", "\\", 8], "dict": {"q": 7} #" comment 4 with quotes } // comment 5 And it would return the de-commented version: .. code-block:: javascript { "hello": "Wor#d", "Bye": "\"M#rk\"", "yes\\\"": 5, "quote": "\"th#t's\" what she said", "list": [1, 1, "#", "\"", "\\", 8], "dict": {"q": 7} } Since comments aren't stored in the Python representation of the data, loading and then saving a json file will remove the comments (it also likely changes the indentation). The implementation of comments is not particularly efficient, but it does handle all the special cases I could think of. For a few files you shouldn't notice any performance problems, but if you're reading hundreds of files, then they are presumably computer-generated, and you could consider turning comments off (``ignore_comments=False``). Other features +++++++++++++++++++++++++++++++++++++++ * Sets are serializable and can be loaded. By default the set json representation is sorted, to have a consistent representation. * Save and load complex numbers (version 3.2) with ``1+2j`` serializing as ``{'__complex__': [1, 2]}``. * Save and load ``Decimal`` and ``Fraction`` (including NaN, infinity, -0 for Decimal). * Save and load ``Enum`` (thanks to ``Jenselme``), either built-in in python3.4+, or with the enum34_ package in earlier versions. ``IntEnum`` needs encode_intenums_inplace_. * ``json_tricks`` allows for gzip compression using the ``compression=True`` argument (off by default). * ``json_tricks`` can check for duplicate keys in maps by setting ``allow_duplicates`` to False. These are `kind of allowed`_, but are handled inconsistently between json implementations. In Python, for ``dict`` and ``OrderedDict``, duplicate keys are silently overwritten. Usage & contributions --------------------------------------- Revised BSD License; at your own risk, you can mostly do whatever you want with this code, just don't use my name for promotion and do keep the license file. Contributions (ideas, issues, pull requests) are welcome! .. image:: https://travis-ci.org/mverleg/pyjson_tricks.svg?branch=master :target: https://travis-ci.org/mverleg/pyjson_tricks .. _HJSON: https://github.com/hjson/hjson-py .. _documentation: http://json-tricks.readthedocs.org/en/latest/#main-components .. _stackoverflow: http://stackoverflow.com/questions/3488934/simplejson-and-numpy-array .. _performance: http://stackoverflow.com/a/8177061/723090 .. _`kind of allowed`: http://stackoverflow.com/questions/21832701/does-json-syntax-allow-duplicate-keys-in-an-object .. _benchmark: https://github.com/mverleg/array_storage_benchmark .. _`might be added`: https://github.com/mverleg/pyjson_tricks/issues/9 .. _encode_scalars_inplace: https://json-tricks.readthedocs.io/en/latest/#json_tricks.np_utils.encode_scalars_inplace .. _encode_intenums_inplace: https://json-tricks.readthedocs.io/en/latest/#json_tricks.utils.encode_intenums_inplace .. _enum34: https://pypi.org/project/enum34/ Keywords: json,numpy,OrderedDict,comments,pandas,pytz,enum,encode,decode,serialize,deserialize Platform: UNKNOWN Classifier: Development Status :: 5 - Production/Stable Classifier: Development Status :: 6 - Mature Classifier: Intended Audience :: Developers Classifier: Natural Language :: English Classifier: License :: OSI Approved :: BSD License Classifier: Operating System :: OS Independent Classifier: Programming Language :: Python Classifier: Programming Language :: Python :: 2 Classifier: Programming Language :: Python :: 2.7 Classifier: Programming Language :: Python :: 3 Classifier: Programming Language :: Python :: 3.4 Classifier: Programming Language :: Python :: 3.5 Classifier: Programming Language :: Python :: 3.6 Classifier: Programming Language :: Python :: Implementation :: CPython Classifier: Programming Language :: Python :: Implementation :: PyPy Classifier: Topic :: Software Development :: Libraries :: Python Modules json_tricks-3.11.0/README.rst0000640000175000017500000003364413211005562015522 0ustar markmark00000000000000 JSON tricks (python) --------------------------------------- The `pyjson-tricks` package brings several pieces of functionality to python handling of json files: 1. **Store and load numpy arrays** in human-readable format. 2. **Store and load class instances** both generic and customized. 3. **Store and load date/times** as a dictionary (including timezone). 4. **Preserve map order** ``{}`` using ``OrderedDict``. 5. **Allow for comments** in json files by starting lines with ``#``. `6. Sets, complex numbers, Decimal, Fraction, enums, compression, duplicate keys, ... As well as compression and disallowing duplicate keys. * Code: https://github.com/mverleg/pyjson_tricks * Documentation: http://json-tricks.readthedocs.org/en/latest/ * PIP: https://pypi.python.org/pypi/json_tricks The 2.0 series added some of the above features and broke backward compatibility. The version 3.0 series is a more readable rewrite that also makes it easier to combine encoders, again not fully backward compatible. Several keys of the format ``__keyname__`` have special meanings, and more might be added in future releases. If you're considering JSON-but-with-comments as a config file format, have a look at HJSON_, it might be more appropriate. For other purposes, keep reading! Thanks for all the Github stars! Installation and use --------------------------------------- You can install using .. code-block:: bash pip install json-tricks # or e.g. 'json-tricks<3.0' for older versions Decoding of some data types needs the corresponding package to be installed, e.g. ``numpy`` for arrays, ``pandas`` for dataframes and ``pytz`` for timezone-aware datetimes. You can import the usual json functions dump(s) and load(s), as well as a separate comment removal function, as follows: .. code-block:: bash from json_tricks import dump, dumps, load, loads, strip_comments The exact signatures of these and other functions are in the documentation_. ``json-tricks`` supports Python 2.7, and Python 3.4 and later, and is automatically tested on 2.7, 3.4, 3.5 and 3.6. Pypy is supported without numpy and pandas. Preserve type vs use primitive ------------------------------- By default, types are encoded such that they can be restored to their original type when loaded with ``json-tricks``. Example encodings in this documentation refer to that format. You can also choose to store things as their closest primitive type (e.g. arrays and sets as lists, decimals as floats). This may be desirable if you don't care about the exact type, or you are loading the json in another language (which doesn't restore python types). It's also smaller. To forego meta data and store primitives instead, pass ``primitives`` to ``dump(s)``. This is available in version ``3.8`` and later. Example: .. code-block:: python data = [ arange(0, 10, 1, dtype=int).reshape((2, 5)), datetime(year=2017, month=1, day=19, hour=23, minute=00, second=00), 1 + 2j, Decimal(42), Fraction(1, 3), MyTestCls(s='ub', dct={'7': 7}), # see later set(range(7)), ] # Encode with metadata to preserve types when decoding print(dumps(data)) .. code-block:: javascript // (comments added and indenting changed) [ // numpy array { "__ndarray__": [ [0, 1, 2, 3, 4], [5, 6, 7, 8, 9]], "dtype": "int64", "shape": [2, 5], "Corder": true }, // datetime (naive) { "__datetime__": null, "year": 2017, "month": 1, "day": 19, "hour": 23 }, // complex number { "__complex__": [1.0, 2.0] }, // decimal & fraction { "__decimal__": "42" }, { "__fraction__": true "numerator": 1, "denominator": 3, }, // class instance { "__instance_type__": [ "tests.test_class", "MyTestCls" ], "attributes": { "s": "ub", "dct": {"7": 7} } }, // set { "__set__": [0, 1, 2, 3, 4, 5, 6] } ] .. code-block:: python # Encode as primitive types; more simple but loses type information print(dumps(data, primitives=True)) .. code-block:: javascript // (comments added and indentation changed) [ // numpy array [[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]], // datetime (naive) "2017-01-19T23:00:00", // complex number [1.0, 2.0], // decimal & fraction 42.0, 0.3333333333333333, // class instance { "s": "ub", "dct": {"7": 7} }, // set [0, 1, 2, 3, 4, 5, 6] ] Note that valid json is produced either way: ``json-tricks`` stores meta data as normal json, but other packages probably won't interpret it. Features --------------------------------------- Numpy arrays +++++++++++++++++++++++++++++++++++++++ The array is encoded in sort-of-readable and very flexible and portable format, like so: .. code-block:: python arr = arange(0, 10, 1, dtype=uint8).reshape((2, 5)) print(dumps({'mydata': arr})) this yields: .. code-block:: javascript { "mydata": { "dtype": "uint8", "shape": [2, 5], "Corder": true, "__ndarray__": [[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]] } } which will be converted back to a numpy array when using ``json_tricks.loads``. Note that the memory order (``Corder``) is only stored in v3.1 and later and for arrays with at least 2 dimensions. As you've seen, this uses the magic key ``__ndarray__``. Don't use ``__ndarray__`` as a dictionary key unless you're trying to make a numpy array (and know what you're doing). Numpy scalars are also serialized (v3.5+). They are represented by the closest python primitive type. A special representation was not feasible, because Python's json implementation serializes some numpy types as primitives, without consulting custom encoders. If you want to preverse the exact numpy type, use encode_scalars_inplace_. **Performance**: this method has slow write times similar to other human-readable formats, although read time is worse than csv. File size (with compression) is high on a relative scale, but it's only around 30% above binary. See this benchmark_ (it's called JSONGzip). A binary alternative `might be added`_, but is not yet available. This implementation is inspired by an answer by tlausch on stackoverflow_ that you could read for details. Class instances +++++++++++++++++++++++++++++++++++++++ ``json_tricks`` can serialize class instances. If the class behaves normally (not generated dynamic, no ``__new__`` or ``__metaclass__`` magic, etc) *and* all it's attributes are serializable, then this should work by default. .. code-block:: python # json_tricks/test_class.py class MyTestCls: def __init__(self, **kwargs): for k, v in kwargs.items(): setattr(self, k, v) cls_instance = MyTestCls(s='ub', dct={'7': 7}) json = dumps(cls_instance, indent=4) cls_instance_again = loads(json) You'll get your instance back. Here the json looks like this: .. code-block:: javascript { "__instance_type__": [ "json_tricks.test_class", "MyTestCls" ], "attributes": { "s": "ub", "dct": { "7": 7 } } } As you can see, this stores the module and class name. The class must be importable from the same module when decoding (and should not have changed). If it isn't, you have to manually provide a dictionary to ``cls_lookup_map`` when loading in which the class name can be looked up. Note that if the class is imported, then ``globals()`` is such a dictionary (so try ``loads(json, cls_lookup_map=glboals())``). Also note that if the class is defined in the 'top' script (that you're calling directly), then this isn't a module and the import part cannot be extracted. Only the class name will be stored; it can then only be deserialized in the same script, or if you provide ``cls_lookup_map``. Note that this also works with ``slots`` without having to do anything (thanks to ``koffie``), which encodes like this (custom indentation): .. code-block:: javascript { "__instance_type__": ["module.path", "ClassName"], "slots": {"slotattr": 37}, "attributes": {"dictattr": 42} } If the instance doesn't serialize automatically, or if you want custom behaviour, then you can implement ``__json__encode__(self)`` and ``__json_decode__(self, **attributes)`` methods, like so: .. code-block:: python class CustomEncodeCls: def __init__(self): self.relevant = 42 self.irrelevant = 37 def __json_encode__(self): # should return primitive, serializable types like dict, list, int, string, float... return {'relevant': self.relevant} def __json_decode__(self, **attrs): # should initialize all properties; note that __init__ is not called implicitly self.relevant = attrs['relevant'] self.irrelevant = 12 As you've seen, this uses the magic key ``__instance_type__``. Don't use ``__instance_type__`` as a dictionary key unless you know what you're doing. Date, time, datetime and timedelta +++++++++++++++++++++++++++++++++++++++ Date, time, datetime and timedelta objects are stored as dictionaries of "day", "hour", "millisecond" etc keys, for each nonzero property. Timezone name is also stored in case it is set. You'll need to have ``pytz`` installed to use timezone-aware date/times, it's not needed for naive date/times. .. code-block:: javascript { "__datetime__": null, "year": 1988, "month": 3, "day": 15, "hour": 8, "minute": 3, "second": 59, "microsecond": 7, "tzinfo": "Europe/Amsterdam" } This approach was chosen over timestamps for readability and consistency between date and time, and over a single string to prevent parsing problems and reduce dependencies. Note that if ``primitives=True``, date/times are encoded as ISO 8601, but they won't be restored automatically. Don't use ``__date__``, ``__time__``, ``__datetime__``, ``__timedelta__`` or ``__tzinfo__`` as dictionary keys unless you know what you're doing, as they have special meaning. Order +++++++++++++++++++++++++++++++++++++++ Given an ordered dictionary like this (see the tests for a longer one): .. code-block:: python ordered = OrderedDict(( ('elephant', None), ('chicken', None), ('tortoise', None), )) Converting to json and back will preserve the order: .. code-block:: python from json_tricks import dumps, loads json = dumps(ordered) ordered = loads(json, preserve_order=True) where ``preserve_order=True`` is added for emphasis; it can be left out since it's the default. As a note on performance_, both dicts and OrderedDicts have the same scaling for getting and setting items (``O(1)``). In Python versions before 3.5, OrderedDicts were implemented in Python rather than C, so were somewhat slower; since Python 3.5 both are implemented in C. In summary, you should have no scaling problems and probably no performance problems at all, especially for 3.5 and later. Python 3.6+ preserve order of dictionaries by default making this redundant, but this is an implementation detail that should not be relied on. Comments +++++++++++++++++++++++++++++++++++++++ This package uses ``#`` and ``//`` for comments, which seem to be the most common conventions, though only the latter is valid javascript. For example, you could call ``loads`` on the following string:: { # "comment 1 "hello": "Wor#d", "Bye": "\"M#rk\"", "yes\\\"": 5,# comment" 2 "quote": "\"th#t's\" what she said", // comment "3" "list": [1, 1, "#", "\"", "\\", 8], "dict": {"q": 7} #" comment 4 with quotes } // comment 5 And it would return the de-commented version: .. code-block:: javascript { "hello": "Wor#d", "Bye": "\"M#rk\"", "yes\\\"": 5, "quote": "\"th#t's\" what she said", "list": [1, 1, "#", "\"", "\\", 8], "dict": {"q": 7} } Since comments aren't stored in the Python representation of the data, loading and then saving a json file will remove the comments (it also likely changes the indentation). The implementation of comments is not particularly efficient, but it does handle all the special cases I could think of. For a few files you shouldn't notice any performance problems, but if you're reading hundreds of files, then they are presumably computer-generated, and you could consider turning comments off (``ignore_comments=False``). Other features +++++++++++++++++++++++++++++++++++++++ * Sets are serializable and can be loaded. By default the set json representation is sorted, to have a consistent representation. * Save and load complex numbers (version 3.2) with ``1+2j`` serializing as ``{'__complex__': [1, 2]}``. * Save and load ``Decimal`` and ``Fraction`` (including NaN, infinity, -0 for Decimal). * Save and load ``Enum`` (thanks to ``Jenselme``), either built-in in python3.4+, or with the enum34_ package in earlier versions. ``IntEnum`` needs encode_intenums_inplace_. * ``json_tricks`` allows for gzip compression using the ``compression=True`` argument (off by default). * ``json_tricks`` can check for duplicate keys in maps by setting ``allow_duplicates`` to False. These are `kind of allowed`_, but are handled inconsistently between json implementations. In Python, for ``dict`` and ``OrderedDict``, duplicate keys are silently overwritten. Usage & contributions --------------------------------------- Revised BSD License; at your own risk, you can mostly do whatever you want with this code, just don't use my name for promotion and do keep the license file. Contributions (ideas, issues, pull requests) are welcome! .. image:: https://travis-ci.org/mverleg/pyjson_tricks.svg?branch=master :target: https://travis-ci.org/mverleg/pyjson_tricks .. _HJSON: https://github.com/hjson/hjson-py .. _documentation: http://json-tricks.readthedocs.org/en/latest/#main-components .. _stackoverflow: http://stackoverflow.com/questions/3488934/simplejson-and-numpy-array .. _performance: http://stackoverflow.com/a/8177061/723090 .. _`kind of allowed`: http://stackoverflow.com/questions/21832701/does-json-syntax-allow-duplicate-keys-in-an-object .. _benchmark: https://github.com/mverleg/array_storage_benchmark .. _`might be added`: https://github.com/mverleg/pyjson_tricks/issues/9 .. _encode_scalars_inplace: https://json-tricks.readthedocs.io/en/latest/#json_tricks.np_utils.encode_scalars_inplace .. _encode_intenums_inplace: https://json-tricks.readthedocs.io/en/latest/#json_tricks.utils.encode_intenums_inplace .. _enum34: https://pypi.org/project/enum34/ json_tricks-3.11.0/setup.cfg0000700000175000017500000000015513211041626015641 0ustar markmark00000000000000[bdist_wheel] universal = 1 [metadata] description-file = README.rst [egg_info] tag_build = tag_date = 0 json_tricks-3.11.0/json_tricks.egg-info/0002750000175000017500000000000013211041626020045 5ustar markmark00000000000000json_tricks-3.11.0/json_tricks.egg-info/SOURCES.txt0000640000175000017500000000057513211041626021737 0ustar markmark00000000000000README.rst setup.cfg setup.py json_tricks/__init__.py json_tricks/comment.py json_tricks/decoders.py json_tricks/encoders.py json_tricks/nonp.py json_tricks/np.py json_tricks/np_utils.py json_tricks/utils.py json_tricks.egg-info/PKG-INFO json_tricks.egg-info/SOURCES.txt json_tricks.egg-info/dependency_links.txt json_tricks.egg-info/not-zip-safe json_tricks.egg-info/top_level.txtjson_tricks-3.11.0/json_tricks.egg-info/PKG-INFO0000640000175000017500000004407013211041626021146 0ustar markmark00000000000000Metadata-Version: 1.1 Name: json-tricks Version: 3.11.0 Summary: Extra features for Python's JSON: comments, order, numpy, pandas, datetimes, and many more! Simple but customizable. Home-page: https://github.com/mverleg/pyjson_tricks Author: Mark V Author-email: markv.nl.dev@gmail.com License: Revised BSD License (LICENSE.txt) Description: JSON tricks (python) --------------------------------------- The `pyjson-tricks` package brings several pieces of functionality to python handling of json files: 1. **Store and load numpy arrays** in human-readable format. 2. **Store and load class instances** both generic and customized. 3. **Store and load date/times** as a dictionary (including timezone). 4. **Preserve map order** ``{}`` using ``OrderedDict``. 5. **Allow for comments** in json files by starting lines with ``#``. `6. Sets, complex numbers, Decimal, Fraction, enums, compression, duplicate keys, ... As well as compression and disallowing duplicate keys. * Code: https://github.com/mverleg/pyjson_tricks * Documentation: http://json-tricks.readthedocs.org/en/latest/ * PIP: https://pypi.python.org/pypi/json_tricks The 2.0 series added some of the above features and broke backward compatibility. The version 3.0 series is a more readable rewrite that also makes it easier to combine encoders, again not fully backward compatible. Several keys of the format ``__keyname__`` have special meanings, and more might be added in future releases. If you're considering JSON-but-with-comments as a config file format, have a look at HJSON_, it might be more appropriate. For other purposes, keep reading! Thanks for all the Github stars! Installation and use --------------------------------------- You can install using .. code-block:: bash pip install json-tricks # or e.g. 'json-tricks<3.0' for older versions Decoding of some data types needs the corresponding package to be installed, e.g. ``numpy`` for arrays, ``pandas`` for dataframes and ``pytz`` for timezone-aware datetimes. You can import the usual json functions dump(s) and load(s), as well as a separate comment removal function, as follows: .. code-block:: bash from json_tricks import dump, dumps, load, loads, strip_comments The exact signatures of these and other functions are in the documentation_. ``json-tricks`` supports Python 2.7, and Python 3.4 and later, and is automatically tested on 2.7, 3.4, 3.5 and 3.6. Pypy is supported without numpy and pandas. Preserve type vs use primitive ------------------------------- By default, types are encoded such that they can be restored to their original type when loaded with ``json-tricks``. Example encodings in this documentation refer to that format. You can also choose to store things as their closest primitive type (e.g. arrays and sets as lists, decimals as floats). This may be desirable if you don't care about the exact type, or you are loading the json in another language (which doesn't restore python types). It's also smaller. To forego meta data and store primitives instead, pass ``primitives`` to ``dump(s)``. This is available in version ``3.8`` and later. Example: .. code-block:: python data = [ arange(0, 10, 1, dtype=int).reshape((2, 5)), datetime(year=2017, month=1, day=19, hour=23, minute=00, second=00), 1 + 2j, Decimal(42), Fraction(1, 3), MyTestCls(s='ub', dct={'7': 7}), # see later set(range(7)), ] # Encode with metadata to preserve types when decoding print(dumps(data)) .. code-block:: javascript // (comments added and indenting changed) [ // numpy array { "__ndarray__": [ [0, 1, 2, 3, 4], [5, 6, 7, 8, 9]], "dtype": "int64", "shape": [2, 5], "Corder": true }, // datetime (naive) { "__datetime__": null, "year": 2017, "month": 1, "day": 19, "hour": 23 }, // complex number { "__complex__": [1.0, 2.0] }, // decimal & fraction { "__decimal__": "42" }, { "__fraction__": true "numerator": 1, "denominator": 3, }, // class instance { "__instance_type__": [ "tests.test_class", "MyTestCls" ], "attributes": { "s": "ub", "dct": {"7": 7} } }, // set { "__set__": [0, 1, 2, 3, 4, 5, 6] } ] .. code-block:: python # Encode as primitive types; more simple but loses type information print(dumps(data, primitives=True)) .. code-block:: javascript // (comments added and indentation changed) [ // numpy array [[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]], // datetime (naive) "2017-01-19T23:00:00", // complex number [1.0, 2.0], // decimal & fraction 42.0, 0.3333333333333333, // class instance { "s": "ub", "dct": {"7": 7} }, // set [0, 1, 2, 3, 4, 5, 6] ] Note that valid json is produced either way: ``json-tricks`` stores meta data as normal json, but other packages probably won't interpret it. Features --------------------------------------- Numpy arrays +++++++++++++++++++++++++++++++++++++++ The array is encoded in sort-of-readable and very flexible and portable format, like so: .. code-block:: python arr = arange(0, 10, 1, dtype=uint8).reshape((2, 5)) print(dumps({'mydata': arr})) this yields: .. code-block:: javascript { "mydata": { "dtype": "uint8", "shape": [2, 5], "Corder": true, "__ndarray__": [[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]] } } which will be converted back to a numpy array when using ``json_tricks.loads``. Note that the memory order (``Corder``) is only stored in v3.1 and later and for arrays with at least 2 dimensions. As you've seen, this uses the magic key ``__ndarray__``. Don't use ``__ndarray__`` as a dictionary key unless you're trying to make a numpy array (and know what you're doing). Numpy scalars are also serialized (v3.5+). They are represented by the closest python primitive type. A special representation was not feasible, because Python's json implementation serializes some numpy types as primitives, without consulting custom encoders. If you want to preverse the exact numpy type, use encode_scalars_inplace_. **Performance**: this method has slow write times similar to other human-readable formats, although read time is worse than csv. File size (with compression) is high on a relative scale, but it's only around 30% above binary. See this benchmark_ (it's called JSONGzip). A binary alternative `might be added`_, but is not yet available. This implementation is inspired by an answer by tlausch on stackoverflow_ that you could read for details. Class instances +++++++++++++++++++++++++++++++++++++++ ``json_tricks`` can serialize class instances. If the class behaves normally (not generated dynamic, no ``__new__`` or ``__metaclass__`` magic, etc) *and* all it's attributes are serializable, then this should work by default. .. code-block:: python # json_tricks/test_class.py class MyTestCls: def __init__(self, **kwargs): for k, v in kwargs.items(): setattr(self, k, v) cls_instance = MyTestCls(s='ub', dct={'7': 7}) json = dumps(cls_instance, indent=4) cls_instance_again = loads(json) You'll get your instance back. Here the json looks like this: .. code-block:: javascript { "__instance_type__": [ "json_tricks.test_class", "MyTestCls" ], "attributes": { "s": "ub", "dct": { "7": 7 } } } As you can see, this stores the module and class name. The class must be importable from the same module when decoding (and should not have changed). If it isn't, you have to manually provide a dictionary to ``cls_lookup_map`` when loading in which the class name can be looked up. Note that if the class is imported, then ``globals()`` is such a dictionary (so try ``loads(json, cls_lookup_map=glboals())``). Also note that if the class is defined in the 'top' script (that you're calling directly), then this isn't a module and the import part cannot be extracted. Only the class name will be stored; it can then only be deserialized in the same script, or if you provide ``cls_lookup_map``. Note that this also works with ``slots`` without having to do anything (thanks to ``koffie``), which encodes like this (custom indentation): .. code-block:: javascript { "__instance_type__": ["module.path", "ClassName"], "slots": {"slotattr": 37}, "attributes": {"dictattr": 42} } If the instance doesn't serialize automatically, or if you want custom behaviour, then you can implement ``__json__encode__(self)`` and ``__json_decode__(self, **attributes)`` methods, like so: .. code-block:: python class CustomEncodeCls: def __init__(self): self.relevant = 42 self.irrelevant = 37 def __json_encode__(self): # should return primitive, serializable types like dict, list, int, string, float... return {'relevant': self.relevant} def __json_decode__(self, **attrs): # should initialize all properties; note that __init__ is not called implicitly self.relevant = attrs['relevant'] self.irrelevant = 12 As you've seen, this uses the magic key ``__instance_type__``. Don't use ``__instance_type__`` as a dictionary key unless you know what you're doing. Date, time, datetime and timedelta +++++++++++++++++++++++++++++++++++++++ Date, time, datetime and timedelta objects are stored as dictionaries of "day", "hour", "millisecond" etc keys, for each nonzero property. Timezone name is also stored in case it is set. You'll need to have ``pytz`` installed to use timezone-aware date/times, it's not needed for naive date/times. .. code-block:: javascript { "__datetime__": null, "year": 1988, "month": 3, "day": 15, "hour": 8, "minute": 3, "second": 59, "microsecond": 7, "tzinfo": "Europe/Amsterdam" } This approach was chosen over timestamps for readability and consistency between date and time, and over a single string to prevent parsing problems and reduce dependencies. Note that if ``primitives=True``, date/times are encoded as ISO 8601, but they won't be restored automatically. Don't use ``__date__``, ``__time__``, ``__datetime__``, ``__timedelta__`` or ``__tzinfo__`` as dictionary keys unless you know what you're doing, as they have special meaning. Order +++++++++++++++++++++++++++++++++++++++ Given an ordered dictionary like this (see the tests for a longer one): .. code-block:: python ordered = OrderedDict(( ('elephant', None), ('chicken', None), ('tortoise', None), )) Converting to json and back will preserve the order: .. code-block:: python from json_tricks import dumps, loads json = dumps(ordered) ordered = loads(json, preserve_order=True) where ``preserve_order=True`` is added for emphasis; it can be left out since it's the default. As a note on performance_, both dicts and OrderedDicts have the same scaling for getting and setting items (``O(1)``). In Python versions before 3.5, OrderedDicts were implemented in Python rather than C, so were somewhat slower; since Python 3.5 both are implemented in C. In summary, you should have no scaling problems and probably no performance problems at all, especially for 3.5 and later. Python 3.6+ preserve order of dictionaries by default making this redundant, but this is an implementation detail that should not be relied on. Comments +++++++++++++++++++++++++++++++++++++++ This package uses ``#`` and ``//`` for comments, which seem to be the most common conventions, though only the latter is valid javascript. For example, you could call ``loads`` on the following string:: { # "comment 1 "hello": "Wor#d", "Bye": "\"M#rk\"", "yes\\\"": 5,# comment" 2 "quote": "\"th#t's\" what she said", // comment "3" "list": [1, 1, "#", "\"", "\\", 8], "dict": {"q": 7} #" comment 4 with quotes } // comment 5 And it would return the de-commented version: .. code-block:: javascript { "hello": "Wor#d", "Bye": "\"M#rk\"", "yes\\\"": 5, "quote": "\"th#t's\" what she said", "list": [1, 1, "#", "\"", "\\", 8], "dict": {"q": 7} } Since comments aren't stored in the Python representation of the data, loading and then saving a json file will remove the comments (it also likely changes the indentation). The implementation of comments is not particularly efficient, but it does handle all the special cases I could think of. For a few files you shouldn't notice any performance problems, but if you're reading hundreds of files, then they are presumably computer-generated, and you could consider turning comments off (``ignore_comments=False``). Other features +++++++++++++++++++++++++++++++++++++++ * Sets are serializable and can be loaded. By default the set json representation is sorted, to have a consistent representation. * Save and load complex numbers (version 3.2) with ``1+2j`` serializing as ``{'__complex__': [1, 2]}``. * Save and load ``Decimal`` and ``Fraction`` (including NaN, infinity, -0 for Decimal). * Save and load ``Enum`` (thanks to ``Jenselme``), either built-in in python3.4+, or with the enum34_ package in earlier versions. ``IntEnum`` needs encode_intenums_inplace_. * ``json_tricks`` allows for gzip compression using the ``compression=True`` argument (off by default). * ``json_tricks`` can check for duplicate keys in maps by setting ``allow_duplicates`` to False. These are `kind of allowed`_, but are handled inconsistently between json implementations. In Python, for ``dict`` and ``OrderedDict``, duplicate keys are silently overwritten. Usage & contributions --------------------------------------- Revised BSD License; at your own risk, you can mostly do whatever you want with this code, just don't use my name for promotion and do keep the license file. Contributions (ideas, issues, pull requests) are welcome! .. image:: https://travis-ci.org/mverleg/pyjson_tricks.svg?branch=master :target: https://travis-ci.org/mverleg/pyjson_tricks .. _HJSON: https://github.com/hjson/hjson-py .. _documentation: http://json-tricks.readthedocs.org/en/latest/#main-components .. _stackoverflow: http://stackoverflow.com/questions/3488934/simplejson-and-numpy-array .. _performance: http://stackoverflow.com/a/8177061/723090 .. _`kind of allowed`: http://stackoverflow.com/questions/21832701/does-json-syntax-allow-duplicate-keys-in-an-object .. _benchmark: https://github.com/mverleg/array_storage_benchmark .. _`might be added`: https://github.com/mverleg/pyjson_tricks/issues/9 .. _encode_scalars_inplace: https://json-tricks.readthedocs.io/en/latest/#json_tricks.np_utils.encode_scalars_inplace .. _encode_intenums_inplace: https://json-tricks.readthedocs.io/en/latest/#json_tricks.utils.encode_intenums_inplace .. _enum34: https://pypi.org/project/enum34/ Keywords: json,numpy,OrderedDict,comments,pandas,pytz,enum,encode,decode,serialize,deserialize Platform: UNKNOWN Classifier: Development Status :: 5 - Production/Stable Classifier: Development Status :: 6 - Mature Classifier: Intended Audience :: Developers Classifier: Natural Language :: English Classifier: License :: OSI Approved :: BSD License Classifier: Operating System :: OS Independent Classifier: Programming Language :: Python Classifier: Programming Language :: Python :: 2 Classifier: Programming Language :: Python :: 2.7 Classifier: Programming Language :: Python :: 3 Classifier: Programming Language :: Python :: 3.4 Classifier: Programming Language :: Python :: 3.5 Classifier: Programming Language :: Python :: 3.6 Classifier: Programming Language :: Python :: Implementation :: CPython Classifier: Programming Language :: Python :: Implementation :: PyPy Classifier: Topic :: Software Development :: Libraries :: Python Modules json_tricks-3.11.0/json_tricks.egg-info/dependency_links.txt0000640000175000017500000000000113211041626024112 0ustar markmark00000000000000 json_tricks-3.11.0/json_tricks.egg-info/not-zip-safe0000640000175000017500000000000113210752321022272 0ustar markmark00000000000000 json_tricks-3.11.0/json_tricks.egg-info/top_level.txt0000640000175000017500000000001413211041626022571 0ustar markmark00000000000000json_tricks json_tricks-3.11.0/setup.py0000640000175000017500000000430113211035540015530 0ustar markmark00000000000000# -*- coding: utf-8 -*- from sys import version_info from logging import warn from setuptools import setup with open('README.rst', 'r') as fh: readme = fh.read() requires = [] if version_info < (2, 7, 0): requires.append('ordereddict') if (version_info[0] == 2 and version_info[1] < 7) or \ (version_info[0] == 3 and version_info[1] < 4) or \ version_info[0] not in (2, 3): raise warn('`json_tricks` does not support Python version {}.{}' .format(version_info[0], version_info[1])) setup( name='json_tricks', description='Extra features for Python\'s JSON: comments, order, numpy, ' + 'pandas, datetimes, and many more! Simple but customizable.', long_description=readme, url='https://github.com/mverleg/pyjson_tricks', author='Mark V', maintainer='Mark V', author_email='markv.nl.dev@gmail.com', license='Revised BSD License (LICENSE.txt)', keywords=['json', 'numpy', 'OrderedDict', 'comments', 'pandas', 'pytz', 'enum', 'encode', 'decode', 'serialize', 'deserialize'], version='3.11.0', packages=['json_tricks'], include_package_data=True, zip_safe=False, classifiers=[ 'Development Status :: 5 - Production/Stable', 'Development Status :: 6 - Mature', 'Intended Audience :: Developers', 'Natural Language :: English', 'License :: OSI Approved :: BSD License', 'Operating System :: OS Independent', 'Programming Language :: Python', 'Programming Language :: Python :: 2', 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: Implementation :: CPython', 'Programming Language :: Python :: Implementation :: PyPy', 'Topic :: Software Development :: Libraries :: Python Modules', # 'Topic :: Documentation', # 'Topic :: Documentation :: Sphinx', # 'Topic :: Utilities', ], install_requires=requires, # numpy for numpy functionality # pytz for timezones awareness # pandas for data frame functionality # enum34 for enums on python2x # pytest for tests # pytest-cov for test coverage # tox for tests # detox for parallel tests # sphinx for documentation )