[pretty_format_json] Add compact array feature

- Find non-nested numeric arrays using JSON spec:
https://www.json.org/json-en.html
This commit is contained in:
Adithya Balaji 2025-03-18 14:37:31 -07:00
parent 31903eabdb
commit eeaa627f56
No known key found for this signature in database
2 changed files with 150 additions and 0 deletions

View file

@ -2,6 +2,7 @@ from __future__ import annotations
import argparse import argparse
import json import json
import re
import sys import sys
from collections.abc import Mapping from collections.abc import Mapping
from collections.abc import Sequence from collections.abc import Sequence
@ -14,6 +15,7 @@ def _get_pretty_format(
ensure_ascii: bool = True, ensure_ascii: bool = True,
sort_keys: bool = True, sort_keys: bool = True,
top_keys: Sequence[str] = (), top_keys: Sequence[str] = (),
compact_arrays: bool = False,
) -> str: ) -> str:
def pairs_first(pairs: Sequence[tuple[str, str]]) -> Mapping[str, str]: def pairs_first(pairs: Sequence[tuple[str, str]]) -> Mapping[str, str]:
before = [pair for pair in pairs if pair[0] in top_keys] before = [pair for pair in pairs if pair[0] in top_keys]
@ -22,14 +24,58 @@ def _get_pretty_format(
if sort_keys: if sort_keys:
after.sort() after.sort()
return dict(before + after) return dict(before + after)
json_pretty = json.dumps( json_pretty = json.dumps(
json.loads(contents, object_pairs_hook=pairs_first), json.loads(contents, object_pairs_hook=pairs_first),
indent=indent, indent=indent,
ensure_ascii=ensure_ascii, ensure_ascii=ensure_ascii,
) )
if compact_arrays:
json_pretty = _compact_arrays(json_pretty)
return f'{json_pretty}\n' return f'{json_pretty}\n'
def _compact_arrays(json_text: str) -> str:
"""Convert arrays with simple values to a single line format."""
pattern = re.compile(
r'''
( # Capturing group for the entire array
\[ # Opening bracket
\s* # Optional whitespace
(?: # Non-capturing group for array elements
(?: # Non-capturing group for each value type
"[^"]*" # String: anything in quotes
|
-? # Optional negative sign
(?:
0|[1-9]\d* # Integer part: 0 or non-zero digit
# followed by digits
)
(?:\.\d+)? # Optional fractional part
(?:[eE][+-]?\d+)? # Optional exponent part
|
true|false # Boolean
|
null # Null
)
(?:\s*,\s*)? # Optional comma and whitespace
)++ # One or more elements
\s* # Optional whitespace
\] # Closing bracket
)
''', re.VERBOSE,
)
def compact_match(match: re.Match[str]) -> str:
array_content = match.group(0)
compact = re.sub(r'\s*\n\s*', ' ', array_content)
return compact
return re.sub(pattern, compact_match, json_text)
def _autofix(filename: str, new_contents: str) -> None: def _autofix(filename: str, new_contents: str) -> None:
print(f'Fixing file {filename}') print(f'Fixing file {filename}')
with open(filename, 'w', encoding='UTF-8') as f: with open(filename, 'w', encoding='UTF-8') as f:
@ -96,6 +142,16 @@ def main(argv: Sequence[str] | None = None) -> int:
default=[], default=[],
help='Ordered list of keys to keep at the top of JSON hashes', help='Ordered list of keys to keep at the top of JSON hashes',
) )
parser.add_argument(
'--compact-arrays',
action='store_true',
dest='compact_arrays',
default=False,
help=(
'Format simple arrays on a single line for more '
'compact representation'
),
)
parser.add_argument('filenames', nargs='*', help='Filenames to fix') parser.add_argument('filenames', nargs='*', help='Filenames to fix')
args = parser.parse_args(argv) args = parser.parse_args(argv)
@ -109,6 +165,7 @@ def main(argv: Sequence[str] | None = None) -> int:
pretty_contents = _get_pretty_format( pretty_contents = _get_pretty_format(
contents, args.indent, ensure_ascii=not args.no_ensure_ascii, contents, args.indent, ensure_ascii=not args.no_ensure_ascii,
sort_keys=not args.no_sort_keys, top_keys=args.top_keys, sort_keys=not args.no_sort_keys, top_keys=args.top_keys,
compact_arrays=args.compact_arrays,
) )
except ValueError: except ValueError:
print( print(

View file

@ -155,3 +155,96 @@ def test_diffing_output(capsys):
assert actual_retval == expected_retval assert actual_retval == expected_retval
assert actual_out == expected_out assert actual_out == expected_out
assert actual_err == '' assert actual_err == ''
def test_compact_arrays_main(tmpdir):
# TODO: Intentionally don't address round trip bug caused by
# using `json.loads(json.dumps(data))`. This will need to be
# resolved separately.
srcfile = tmpdir.join('to_be_compacted.json')
srcfile.write(
'{\n'
' "simple_array": [\n'
' 1,\n'
' 2,\n'
' 3\n'
' ],\n'
' "string_array": [\n'
' "a",\n'
' "b",\n'
' "c"\n'
' ],\n'
' "mixed_array": [\n'
' 1,\n'
' "string",\n'
' true,\n'
' null\n'
' ],\n'
' "nested_objects": [\n'
' {\n'
' "a": 1\n'
' },\n'
' {\n'
' "b": 2\n'
' }\n'
' ]\n'
'}',
)
ret = main(['--compact-arrays', '--autofix', str(srcfile)])
assert ret == 1
with open(str(srcfile), encoding='UTF-8') as f:
contents = f.read()
# Simple arrays should be compacted
assert '"simple_array": [ 1, 2, 3 ]' in contents
assert '"string_array": [ "a", "b", "c" ]' in contents
assert '"mixed_array": [ 1, "string", true, null ]' in contents
# Nested array objects should remain expanded
assert ' "nested_objects": [\n' in contents
assert ' "a": 1\n' in contents
def test_compact_arrays_diff_output(tmpdir, capsys):
srcfile = tmpdir.join('expanded_arrays.json')
srcfile.write(
'{\n'
' "array": [\n'
' 1,\n'
' 2,\n'
' 3\n'
' ]\n'
'}',
)
ret = main(['--compact-arrays', str(srcfile)])
assert ret == 1
out, _ = capsys.readouterr()
assert '+ "array": [ 1, 2, 3 ]' in out
# Validate diff output
assert '- 1,' in out
assert '- 2,' in out
assert '- 3' in out
assert '- "array": [' in out
assert '- ]' in out
def test_compact_arrays_disabled(tmpdir):
"""Test that compacting arrays does not impact default formatting."""
srcfile = tmpdir.join('already_compact.json')
srcfile.write('{\n "array": [ 1, 2, 3 ]\n}')
ret = main(['--autofix', str(srcfile)])
assert ret == 1
with open(str(srcfile), encoding='UTF-8') as f:
contents = f.read()
assert '"array": [\n' in contents
assert ' 1,' in contents
assert ' 2,' in contents
assert ' 3\n ]' in contents