mirror of
https://github.com/pre-commit/pre-commit-hooks.git
synced 2026-03-30 02:16:52 +00:00
Added a custom JSON encoder based on json.JSONEncoder. Changes from the default behaviour: - Using `_make_iterencode` as the only `_iterencode`. - Change `_make_iterencode`, setting `_indent` in `_iterencode_dict` to `None`. - Use `py_encode_basestring` as the only `encoder`. Also, updated the json writer to sort keys according to requested top_keys. Signed-off-by: Md Safiyat Reza <safiyat@voereir.com>
374 lines
12 KiB
Python
374 lines
12 KiB
Python
import argparse
|
|
import json
|
|
import sys
|
|
from difflib import unified_diff
|
|
from typing import List
|
|
from typing import Mapping
|
|
from typing import Optional
|
|
from typing import Sequence
|
|
from typing import Tuple
|
|
from typing import Union
|
|
|
|
INFINITY = float('inf')
|
|
|
|
|
|
def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
|
|
_key_separator, _item_separator, _sort_keys, _skipkeys,
|
|
_one_shot,
|
|
## HACK: hand-optimized bytecode; turn globals into locals
|
|
ValueError=ValueError,
|
|
dict=dict,
|
|
float=float,
|
|
id=id,
|
|
int=int,
|
|
isinstance=isinstance,
|
|
list=list,
|
|
str=str,
|
|
tuple=tuple,
|
|
_intstr=int.__str__,
|
|
):
|
|
|
|
if _indent is not None and not isinstance(_indent, str):
|
|
_indent = ' ' * _indent
|
|
|
|
def _iterencode_list(lst, _current_indent_level):
|
|
if not lst:
|
|
yield '[]'
|
|
return
|
|
if markers is not None:
|
|
markerid = id(lst)
|
|
if markerid in markers:
|
|
raise ValueError("Circular reference detected")
|
|
markers[markerid] = lst
|
|
buf = '['
|
|
if _indent is not None:
|
|
_current_indent_level += 1
|
|
newline_indent = '\n' + _indent * _current_indent_level
|
|
separator = _item_separator.rstrip() + newline_indent
|
|
buf += newline_indent
|
|
else:
|
|
newline_indent = None
|
|
separator = _item_separator
|
|
first = True
|
|
for value in lst:
|
|
if first:
|
|
first = False
|
|
else:
|
|
buf = separator
|
|
if isinstance(value, str):
|
|
yield buf + _encoder(value)
|
|
elif value is None:
|
|
yield buf + 'null'
|
|
elif value is True:
|
|
yield buf + 'true'
|
|
elif value is False:
|
|
yield buf + 'false'
|
|
elif isinstance(value, int):
|
|
# Subclasses of int/float may override __str__, but we still
|
|
# want to encode them as integers/floats in JSON. One example
|
|
# within the standard library is IntEnum.
|
|
yield buf + _intstr(value)
|
|
elif isinstance(value, float):
|
|
# see comment above for int
|
|
yield buf + _floatstr(value)
|
|
else:
|
|
yield buf
|
|
if isinstance(value, (list, tuple)):
|
|
chunks = _iterencode_list(value, _current_indent_level)
|
|
elif isinstance(value, dict):
|
|
chunks = _iterencode_dict(value, _current_indent_level)
|
|
else:
|
|
chunks = _iterencode(value, _current_indent_level)
|
|
yield from chunks
|
|
if newline_indent is not None:
|
|
_current_indent_level -= 1
|
|
yield '\n' + _indent * _current_indent_level
|
|
yield ']'
|
|
if markers is not None:
|
|
del markers[markerid]
|
|
|
|
def _iterencode_dict(dct, _current_indent_level):
|
|
if not dct:
|
|
yield '{}'
|
|
return
|
|
_indent = None # No newlines or indentation for the k-v pairs.
|
|
if markers is not None:
|
|
markerid = id(dct)
|
|
if markerid in markers:
|
|
raise ValueError("Circular reference detected")
|
|
markers[markerid] = dct
|
|
yield '{'
|
|
if _indent is not None:
|
|
_current_indent_level += 1
|
|
newline_indent = '\n' + _indent * _current_indent_level
|
|
item_separator = _item_separator + newline_indent
|
|
yield newline_indent
|
|
else:
|
|
newline_indent = None
|
|
item_separator = _item_separator
|
|
first = True
|
|
if _sort_keys:
|
|
items = sorted(dct.items(), key=lambda kv: kv[0])
|
|
else:
|
|
items = dct.items()
|
|
for key, value in items:
|
|
if isinstance(key, str):
|
|
pass
|
|
# JavaScript is weakly typed for these, so it makes sense to
|
|
# also allow them. Many encoders seem to do something like this.
|
|
elif isinstance(key, float):
|
|
# see comment for int/float in _make_iterencode
|
|
key = _floatstr(key)
|
|
elif key is True:
|
|
key = 'true'
|
|
elif key is False:
|
|
key = 'false'
|
|
elif key is None:
|
|
key = 'null'
|
|
elif isinstance(key, int):
|
|
# see comment for int/float in _make_iterencode
|
|
key = _intstr(key)
|
|
elif _skipkeys:
|
|
continue
|
|
else:
|
|
raise TypeError(f'keys must be str, int, float, bool or None, '
|
|
f'not {key.__class__.__name__}')
|
|
if first:
|
|
first = False
|
|
else:
|
|
yield item_separator
|
|
yield _encoder(key)
|
|
yield _key_separator
|
|
if isinstance(value, str):
|
|
yield _encoder(value)
|
|
elif value is None:
|
|
yield 'null'
|
|
elif value is True:
|
|
yield 'true'
|
|
elif value is False:
|
|
yield 'false'
|
|
elif isinstance(value, int):
|
|
# see comment for int/float in _make_iterencode
|
|
yield _intstr(value)
|
|
elif isinstance(value, float):
|
|
# see comment for int/float in _make_iterencode
|
|
yield _floatstr(value)
|
|
else:
|
|
if isinstance(value, (list, tuple)):
|
|
chunks = _iterencode_list(value, _current_indent_level)
|
|
elif isinstance(value, dict):
|
|
chunks = _iterencode_dict(value, _current_indent_level)
|
|
else:
|
|
chunks = _iterencode(value, _current_indent_level)
|
|
yield from chunks
|
|
if newline_indent is not None:
|
|
_current_indent_level -= 1
|
|
yield '\n' + _indent * _current_indent_level
|
|
yield '}'
|
|
if markers is not None:
|
|
del markers[markerid]
|
|
|
|
def _iterencode(o, _current_indent_level):
|
|
if isinstance(o, str):
|
|
yield _encoder(o)
|
|
elif o is None:
|
|
yield 'null'
|
|
elif o is True:
|
|
yield 'true'
|
|
elif o is False:
|
|
yield 'false'
|
|
elif isinstance(o, int):
|
|
# see comment for int/float in _make_iterencode
|
|
yield _intstr(o)
|
|
elif isinstance(o, float):
|
|
# see comment for int/float in _make_iterencode
|
|
yield _floatstr(o)
|
|
elif isinstance(o, (list, tuple)):
|
|
yield from _iterencode_list(o, _current_indent_level)
|
|
elif isinstance(o, dict):
|
|
yield from _iterencode_dict(o, _current_indent_level)
|
|
else:
|
|
if markers is not None:
|
|
markerid = id(o)
|
|
if markerid in markers:
|
|
raise ValueError("Circular reference detected")
|
|
markers[markerid] = o
|
|
o = _default(o)
|
|
yield from _iterencode(o, _current_indent_level)
|
|
if markers is not None:
|
|
del markers[markerid]
|
|
return _iterencode
|
|
|
|
|
|
class CustomJSONEncoder(json.JSONEncoder):
|
|
def iterencode(self, o, _one_shot=False):
|
|
"""Encode the given object and yield each string
|
|
representation as available.
|
|
|
|
For example::
|
|
|
|
for chunk in JSONEncoder().iterencode(bigobject):
|
|
mysocket.write(chunk)
|
|
|
|
"""
|
|
if self.check_circular:
|
|
markers = {}
|
|
else:
|
|
markers = None
|
|
|
|
def floatstr(o, allow_nan=self.allow_nan,
|
|
_repr=float.__repr__, _inf=INFINITY, _neginf=-INFINITY):
|
|
# Check for specials. Note that this type of test is processor
|
|
# and/or platform-specific, so do tests which don't depend on the
|
|
# internals.
|
|
|
|
if o != o:
|
|
text = 'NaN'
|
|
elif o == _inf:
|
|
text = 'Infinity'
|
|
elif o == _neginf:
|
|
text = '-Infinity'
|
|
else:
|
|
return _repr(o)
|
|
|
|
if not allow_nan:
|
|
raise ValueError(
|
|
"Out of range float values are not JSON compliant: " +
|
|
repr(o))
|
|
|
|
return text
|
|
|
|
_encoder = json.encoder.py_encode_basestring
|
|
|
|
_iterencode = _make_iterencode(
|
|
markers, self.default, _encoder, self.indent, floatstr,
|
|
self.key_separator, self.item_separator, self.sort_keys,
|
|
self.skipkeys, _one_shot)
|
|
return _iterencode(o, 0)
|
|
|
|
|
|
def _get_pretty_format(
|
|
contents: str,
|
|
indent: str,
|
|
ensure_ascii: bool = True,
|
|
sort_keys: bool = True,
|
|
top_keys: Sequence[str] = (),
|
|
) -> str:
|
|
def pairs_first(pairs: Sequence[Tuple[str, str]]) -> Mapping[str, str]:
|
|
before = [pair for pair in pairs if pair[0] in top_keys]
|
|
before = sorted(before, key=lambda x: top_keys.index(x[0]))
|
|
after = [pair for pair in pairs if pair[0] not in top_keys]
|
|
if sort_keys:
|
|
after.sort()
|
|
return dict(before + after)
|
|
json_pretty = json.dumps(
|
|
json.loads(contents, object_pairs_hook=pairs_first),
|
|
indent=indent,
|
|
ensure_ascii=ensure_ascii,
|
|
cls=CustomJSONEncoder,
|
|
separators=(', ', ': ')
|
|
)
|
|
return f'{json_pretty}\n'
|
|
|
|
|
|
def _autofix(filename: str, new_contents: str) -> None:
|
|
print(f'Fixing file {filename}')
|
|
with open(filename, 'w', encoding='UTF-8') as f:
|
|
f.write(new_contents)
|
|
|
|
|
|
def parse_num_to_int(s: str) -> Union[int, str]:
|
|
"""Convert string numbers to int, leaving strings as is."""
|
|
try:
|
|
return int(s)
|
|
except ValueError:
|
|
return s
|
|
|
|
|
|
def parse_topkeys(s: str) -> List[str]:
|
|
return s.split(',')
|
|
|
|
|
|
def get_diff(source: str, target: str, file: str) -> str:
|
|
source_lines = source.splitlines(True)
|
|
target_lines = target.splitlines(True)
|
|
diff = unified_diff(source_lines, target_lines, fromfile=file, tofile=file)
|
|
return ''.join(diff)
|
|
|
|
|
|
def main(argv: Optional[Sequence[str]] = None) -> int:
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument(
|
|
'--autofix',
|
|
action='store_true',
|
|
dest='autofix',
|
|
help='Automatically fixes encountered not-pretty-formatted files',
|
|
)
|
|
parser.add_argument(
|
|
'--indent',
|
|
type=parse_num_to_int,
|
|
default='2',
|
|
help=(
|
|
'The number of indent spaces or a string to be used as delimiter'
|
|
' for indentation level e.g. 4 or "\t" (Default: 2)'
|
|
),
|
|
)
|
|
parser.add_argument(
|
|
'--no-ensure-ascii',
|
|
action='store_true',
|
|
dest='no_ensure_ascii',
|
|
default=False,
|
|
help=(
|
|
'Do NOT convert non-ASCII characters to Unicode escape sequences '
|
|
'(\\uXXXX)'
|
|
),
|
|
)
|
|
parser.add_argument(
|
|
'--no-sort-keys',
|
|
action='store_true',
|
|
dest='no_sort_keys',
|
|
default=False,
|
|
help='Keep JSON nodes in the same order',
|
|
)
|
|
parser.add_argument(
|
|
'--top-keys',
|
|
type=parse_topkeys,
|
|
dest='top_keys',
|
|
default=[],
|
|
help='Ordered list of keys to keep at the top of JSON hashes',
|
|
)
|
|
parser.add_argument('filenames', nargs='*', help='Filenames to fix')
|
|
args = parser.parse_args(argv)
|
|
|
|
status = 0
|
|
|
|
for json_file in args.filenames:
|
|
with open(json_file, encoding='UTF-8') as f:
|
|
contents = f.read()
|
|
|
|
try:
|
|
pretty_contents = _get_pretty_format(
|
|
contents, args.indent, ensure_ascii=not args.no_ensure_ascii,
|
|
sort_keys=not args.no_sort_keys, top_keys=args.top_keys,
|
|
)
|
|
except ValueError:
|
|
print(
|
|
f'Input File {json_file} is not a valid JSON, consider using '
|
|
f'check-json',
|
|
)
|
|
return 1
|
|
|
|
if contents != pretty_contents:
|
|
if args.autofix:
|
|
_autofix(json_file, pretty_contents)
|
|
else:
|
|
diff_output = get_diff(contents, pretty_contents, json_file)
|
|
sys.stdout.buffer.write(diff_output.encode())
|
|
|
|
status = 1
|
|
|
|
return status
|
|
|
|
|
|
if __name__ == '__main__':
|
|
exit(main())
|