import argparse import json import sys from difflib import unified_diff from typing import List from typing import Mapping from typing import Optional from typing import Sequence from typing import Tuple from typing import Union INFINITY = float('inf') def _make_iterencode( markers, _default, _encoder, _indent, _floatstr, _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot, ## HACK: hand-optimized bytecode; turn globals into locals ValueError=ValueError, dict=dict, float=float, id=id, int=int, isinstance=isinstance, list=list, str=str, tuple=tuple, _intstr=int.__str__, ): if _indent is not None and not isinstance(_indent, str): _indent = ' ' * _indent def _iterencode_list(lst, _current_indent_level): if not lst: yield '[]' return if markers is not None: markerid = id(lst) if markerid in markers: raise ValueError('Circular reference detected') markers[markerid] = lst buf = '[' if _indent is not None: _current_indent_level += 1 newline_indent = '\n' + _indent * _current_indent_level separator = _item_separator.rstrip() + newline_indent buf += newline_indent else: newline_indent = None separator = _item_separator first = True for value in lst: if first: first = False else: buf = separator if isinstance(value, str): yield buf + _encoder(value) elif value is None: yield buf + 'null' elif value is True: yield buf + 'true' elif value is False: yield buf + 'false' elif isinstance(value, int): # Subclasses of int/float may override __str__, but we still # want to encode them as integers/floats in JSON. One example # within the standard library is IntEnum. yield buf + _intstr(value) elif isinstance(value, float): # see comment above for int yield buf + _floatstr(value) else: yield buf if isinstance(value, (list, tuple)): chunks = _iterencode_list(value, _current_indent_level) elif isinstance(value, dict): chunks = _iterencode_dict(value, _current_indent_level) else: chunks = _iterencode(value, _current_indent_level) yield from chunks if newline_indent is not None: _current_indent_level -= 1 yield '\n' + _indent * _current_indent_level yield ']' if markers is not None: del markers[markerid] def _iterencode_dict(dct, _current_indent_level): if not dct: yield '{}' return _indent = None # No newlines or indentation for the k-v pairs. if markers is not None: markerid = id(dct) if markerid in markers: raise ValueError('Circular reference detected') markers[markerid] = dct yield '{' if _indent is not None: _current_indent_level += 1 newline_indent = '\n' + _indent * _current_indent_level item_separator = _item_separator + newline_indent yield newline_indent else: newline_indent = None item_separator = _item_separator first = True if _sort_keys: items = sorted(dct.items(), key=lambda kv: kv[0]) else: items = dct.items() for key, value in items: if isinstance(key, str): pass # JavaScript is weakly typed for these, so it makes sense to # also allow them. Many encoders seem to do something like this. elif isinstance(key, float): # see comment for int/float in _make_iterencode key = _floatstr(key) elif key is True: key = 'true' elif key is False: key = 'false' elif key is None: key = 'null' elif isinstance(key, int): # see comment for int/float in _make_iterencode key = _intstr(key) elif _skipkeys: continue else: raise TypeError( f'keys must be str, int, float, bool or None, ' f'not {key.__class__.__name__}', ) if first: first = False else: yield item_separator yield _encoder(key) yield _key_separator if isinstance(value, str): yield _encoder(value) elif value is None: yield 'null' elif value is True: yield 'true' elif value is False: yield 'false' elif isinstance(value, int): # see comment for int/float in _make_iterencode yield _intstr(value) elif isinstance(value, float): # see comment for int/float in _make_iterencode yield _floatstr(value) else: if isinstance(value, (list, tuple)): chunks = _iterencode_list(value, _current_indent_level) elif isinstance(value, dict): chunks = _iterencode_dict(value, _current_indent_level) else: chunks = _iterencode(value, _current_indent_level) yield from chunks if newline_indent is not None: _current_indent_level -= 1 yield '\n' + _indent * _current_indent_level yield '}' if markers is not None: del markers[markerid] def _iterencode(o, _current_indent_level): if isinstance(o, str): yield _encoder(o) elif o is None: yield 'null' elif o is True: yield 'true' elif o is False: yield 'false' elif isinstance(o, int): # see comment for int/float in _make_iterencode yield _intstr(o) elif isinstance(o, float): # see comment for int/float in _make_iterencode yield _floatstr(o) elif isinstance(o, (list, tuple)): yield from _iterencode_list(o, _current_indent_level) elif isinstance(o, dict): yield from _iterencode_dict(o, _current_indent_level) else: if markers is not None: markerid = id(o) if markerid in markers: raise ValueError('Circular reference detected') markers[markerid] = o o = _default(o) yield from _iterencode(o, _current_indent_level) if markers is not None: del markers[markerid] return _iterencode class CustomJSONEncoder(json.JSONEncoder): def iterencode(self, o, _one_shot=False): """Encode the given object and yield each string representation as available. For example:: for chunk in JSONEncoder().iterencode(bigobject): mysocket.write(chunk) """ if self.check_circular: markers = {} else: markers = None def floatstr( o, allow_nan=self.allow_nan, _repr=float.__repr__, _inf=INFINITY, _neginf=-INFINITY, ): # Check for specials. Note that this type of test is processor # and/or platform-specific, so do tests which don't depend on the # internals. if o != o: text = 'NaN' elif o == _inf: text = 'Infinity' elif o == _neginf: text = '-Infinity' else: return _repr(o) if not allow_nan: raise ValueError( 'Out of range float values are not JSON compliant: ' + repr(o), ) return text _encoder = json.encoder.py_encode_basestring _iterencode = _make_iterencode( markers, self.default, _encoder, self.indent, floatstr, self.key_separator, self.item_separator, self.sort_keys, self.skipkeys, _one_shot, ) return _iterencode(o, 0) def _get_pretty_format( contents: str, indent: str, ensure_ascii: bool = True, sort_keys: bool = True, top_keys: Sequence[str] = (), sort_by_first_key: bool = False, ) -> str: def pairs_first(pairs: Sequence[Tuple[str, str]]) -> Mapping[str, str]: before = [pair for pair in pairs if pair[0] in top_keys] before = sorted(before, key=lambda x: top_keys.index(x[0])) after = [pair for pair in pairs if pair[0] not in top_keys] if sort_keys: after.sort() return dict(before + after) json_contents = json.loads(contents, object_pairs_hook=pairs_first) if sort_by_first_key: json_contents.sort(key=lambda row: list(row.values())[0]) json_pretty = json.dumps( json_contents, indent=indent, ensure_ascii=ensure_ascii, cls=CustomJSONEncoder, separators=(', ', ': '), ) return f'{json_pretty}\n' def _autofix(filename: str, new_contents: str) -> None: print(f'Fixing file {filename}') with open(filename, 'w', encoding='UTF-8') as f: f.write(new_contents) def parse_num_to_int(s: str) -> Union[int, str]: """Convert string numbers to int, leaving strings as is.""" try: return int(s) except ValueError: return s def parse_topkeys(s: str) -> List[str]: return s.split(',') def get_diff(source: str, target: str, file: str) -> str: source_lines = source.splitlines(True) target_lines = target.splitlines(True) diff = unified_diff(source_lines, target_lines, fromfile=file, tofile=file) return ''.join(diff) def main(argv: Optional[Sequence[str]] = None) -> int: parser = argparse.ArgumentParser() parser.add_argument( '--autofix', action='store_true', dest='autofix', help='Automatically fixes encountered not-pretty-formatted files', ) parser.add_argument( '--indent', type=parse_num_to_int, default='2', help=( 'The number of indent spaces or a string to be used as delimiter' ' for indentation level e.g. 4 or "\t" (Default: 2)' ), ) parser.add_argument( '--no-ensure-ascii', action='store_true', dest='no_ensure_ascii', default=False, help=( 'Do NOT convert non-ASCII characters to Unicode escape sequences ' '(\\uXXXX)' ), ) parser.add_argument( '--no-sort-keys', action='store_true', dest='no_sort_keys', default=False, help='Keep JSON nodes in the same order', ) parser.add_argument( '--top-keys', type=parse_topkeys, dest='top_keys', default=[], help='Ordered list of keys to keep at the top of JSON hashes', ) parser.add_argument( '--sort-by-first-key', dest='sort_by_first_key', action='store_true', default=False, help='Sort the json by a specific key', ) parser.add_argument('filenames', nargs='*', help='Filenames to fix') args = parser.parse_args(argv) status = 0 for json_file in args.filenames: with open(json_file, encoding='UTF-8') as f: contents = f.read() try: pretty_contents = _get_pretty_format( contents, args.indent, ensure_ascii=not args.no_ensure_ascii, sort_keys=not args.no_sort_keys, top_keys=args.top_keys, sort_by_first_key=args.sort_by_first_key, ) except ValueError: print( f'Input File {json_file} is not a valid JSON, consider using ' f'check-json', ) return 1 if contents != pretty_contents: if args.autofix: _autofix(json_file, pretty_contents) else: diff_output = get_diff(contents, pretty_contents, json_file) sys.stdout.buffer.write(diff_output.encode()) status = 1 return status if __name__ == '__main__': exit(main())