From 1cf0b2d22d9848295e804b09d19c2078f24e5259 Mon Sep 17 00:00:00 2001 From: Alexander Fischer Date: Tue, 14 Sep 2021 11:44:38 +0200 Subject: [PATCH] Add unique values/sort values to pretty_formatted_json --- .vscode/settings.json | 4 + pre_commit_hooks/pretty_format_json.py | 93 ++++++++++++++++++- .../sort_values_pretty_formatted_json.json | 25 +++++ .../unique_values_pretty_formatted_json.json | 25 +++++ tests/pretty_format_json_test.py | 52 +++++++++++ 5 files changed, 195 insertions(+), 4 deletions(-) create mode 100644 .vscode/settings.json create mode 100644 testing/resources/sort_values_pretty_formatted_json.json create mode 100644 testing/resources/unique_values_pretty_formatted_json.json diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..e137fad --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,4 @@ +{ + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true +} \ No newline at end of file diff --git a/pre_commit_hooks/pretty_format_json.py b/pre_commit_hooks/pretty_format_json.py index 61b0169..15dbc38 100644 --- a/pre_commit_hooks/pretty_format_json.py +++ b/pre_commit_hooks/pretty_format_json.py @@ -2,6 +2,7 @@ import argparse import json import sys from difflib import unified_diff +from typing import Any from typing import List from typing import Mapping from typing import Optional @@ -15,17 +16,78 @@ def _get_pretty_format( indent: str, ensure_ascii: bool = True, sort_keys: bool = True, + sort_values: Sequence[str] = (), top_keys: Sequence[str] = (), + unique_values: Sequence[str] = (), ) -> str: - def pairs_first(pairs: Sequence[Tuple[str, str]]) -> Mapping[str, str]: + def transform_top_keys(pairs: Sequence[Tuple[str, Any]]) -> Sequence[Tuple[str, Any]]: + transformed_pairs = [] before = [pair for pair in pairs if pair[0] in top_keys] before = sorted(before, key=lambda x: top_keys.index(x[0])) after = [pair for pair in pairs if pair[0] not in top_keys] if sort_keys: after.sort() - return dict(before + after) + transformed_pairs.extend(before) + transformed_pairs.extend(after) + return transformed_pairs + + def transform_sort_values(pairs: Sequence[Tuple[str, Any]]) -> Sequence[Tuple[str, Any]]: + if not sort_values: + return pairs + transformed_pairs = [] + for (key, value) in pairs: + if key not in sort_values: + # No sorting requested + transformed_pairs.append((key, value)) + continue + if not isinstance(value, List): + # Value is no list, sorting makes no sense + transformed_pairs.append((key, value)) + continue + if len(set([type(x) for x in value])) > 1: + # Only sort if all list entries are of the same type + transformed_pairs.append((key, value)) + continue + if any([isinstance(x, (List, Mapping)) for x in value]): + # Only sort if all list entries are no list or mapping + transformed_pairs.append((key, value)) + continue + transformed_pairs.append((key, sorted(value))) + return transformed_pairs + + def transform_unique_values(pairs: Sequence[Tuple[str, Any]]) -> Sequence[Tuple[str, Any]]: + if not unique_values: + return pairs + print(pairs) + transformed_pairs = [] + for (key, value) in pairs: + if key not in unique_values: + transformed_pairs.append((key, value)) + continue + if not isinstance(value, List): + # Value is no list, sorting makes no sense + transformed_pairs.append((key, value)) + continue + if len(set([type(x) for x in value])) > 1: + # Only sort if all list entries are of the same type + transformed_pairs.append((key, value)) + continue + if any([isinstance(x, (List, Mapping)) for x in value]): + # Only sort if all list entries are no list or mapping + transformed_pairs.append((key, value)) + continue + transformed_pairs.append((key, list(dict.fromkeys(value)))) + return transformed_pairs + + def pairs_first(pairs: Sequence[Tuple[str, Any]]) -> Mapping[str, Any]: + transformed_pairs = transform_unique_values(pairs) + transformed_pairs = transform_sort_values(transformed_pairs) + transformed_pairs = transform_top_keys(transformed_pairs) + return dict(transformed_pairs) + + load=json.loads(contents, object_pairs_hook=pairs_first) json_pretty = json.dumps( - json.loads(contents, object_pairs_hook=pairs_first), + load, indent=indent, ensure_ascii=ensure_ascii, ) @@ -50,6 +112,14 @@ def parse_topkeys(s: str) -> List[str]: return s.split(',') +def parse_sortvalues(s: str) -> List[str]: + return s.split(',') + + +def parse_uniquevalues(s: str) -> List[str]: + return s.split(',') + + def get_diff(source: str, target: str, file: str) -> str: source_lines = source.splitlines(True) target_lines = target.splitlines(True) @@ -91,6 +161,20 @@ def main(argv: Optional[Sequence[str]] = None) -> int: default=False, help='Keep JSON nodes in the same order', ) + parser.add_argument( + '--sort-values', + type=parse_sortvalues, + dest='sort_values', + default=[], + help='The values of the given dict keys are sorted', + ) + parser.add_argument( + '--unique-values', + type=parse_uniquevalues, + dest='unique_values', + default=[], + help='The values of the given dict keys are are made unique', + ) parser.add_argument( '--top-keys', type=parse_topkeys, @@ -110,7 +194,8 @@ def main(argv: Optional[Sequence[str]] = None) -> int: try: pretty_contents = _get_pretty_format( contents, args.indent, ensure_ascii=not args.no_ensure_ascii, - sort_keys=not args.no_sort_keys, top_keys=args.top_keys, + sort_keys=not args.no_sort_keys, sort_values=args.sort_values, + top_keys=args.top_keys, unique_values=args.unique_values, ) except ValueError: print( diff --git a/testing/resources/sort_values_pretty_formatted_json.json b/testing/resources/sort_values_pretty_formatted_json.json new file mode 100644 index 0000000..7ac0881 --- /dev/null +++ b/testing/resources/sort_values_pretty_formatted_json.json @@ -0,0 +1,25 @@ +{ + "foo": "bar", + "sort_list": [ + 34, + 2, + 234 + ], + "wont_sort_list": [ + 34, + 2, + "" + ], + "sub_dict": { + "sub_sub_dict": { + "sort_sub_sub_sub_dict": [ + "foo", + "bar", + "bar", + "baz" + ], + "do_not_sort": [] + } + }, + "blah": null +} diff --git a/testing/resources/unique_values_pretty_formatted_json.json b/testing/resources/unique_values_pretty_formatted_json.json new file mode 100644 index 0000000..790def9 --- /dev/null +++ b/testing/resources/unique_values_pretty_formatted_json.json @@ -0,0 +1,25 @@ +{ + "unique_list": [ + 234, + 34, + 2, + 234 + ], + "sub_dict": { + "sub_list": [ + "foo", + "foo", + { + "sub_sub_dict_entry": "sub_sub_dict_entry_value" + }, + "bar", + [ + 1, + 2, + 3 + ] + ], + "sub_list_2": "sub_list_2_value" + }, + "foo": "bar" +} diff --git a/tests/pretty_format_json_test.py b/tests/pretty_format_json_test.py index 7fda23b..c8dee32 100644 --- a/tests/pretty_format_json_test.py +++ b/tests/pretty_format_json_test.py @@ -106,6 +106,58 @@ def test_badfile_main(): assert ret == 1 +def test_sort_values_get_pretty_format(): + ret = main(( + '--no-sort-keys', '--sort-values=sort_list,wont_sort_list,sort_sub_sub_sub_dict', get_resource_path('sort_values_pretty_formatted_json.json'))) + assert ret == 1 + + +def test_sort_values_diffing_output(capsys): + resource_path = get_resource_path('sort_values_pretty_formatted_json.json') + expected_retval = 1 + a = os.path.join('a', resource_path) + b = os.path.join('b', resource_path) + expected_out = f'''\ +--- {a} ++++ {b} +@@ -1,8 +1,8 @@ + {{ + "foo": "bar", + "sort_list": [ ++ 2, + 34, +- 2, + 234 + ], + "wont_sort_list": [ +@@ -13,10 +13,10 @@ + "sub_dict": {{ + "sub_sub_dict": {{ + "sort_sub_sub_sub_dict": [ +- "foo", + "bar", + "bar", +- "baz" ++ "baz", ++ "foo" + ], + "do_not_sort": [] + }} +''' + actual_retval = main(['--no-sort-keys', '--sort-values=sort_list,wont_sort_list,sort_sub_sub_sub_dict', resource_path]) + actual_out, actual_err = capsys.readouterr() + + assert actual_retval == expected_retval + assert actual_out == expected_out + assert actual_err == '' + + +def test_uniquevalues_get_pretty_format(): + ret = main([ + '--no-sort-keys', '--unique-values=unique_list,sub_list', get_resource_path('unique_values_pretty_formatted_json.json')]) + assert ret == 1 + + def test_diffing_output(capsys): resource_path = get_resource_path('not_pretty_formatted_json.json') expected_retval = 1