[Delivers #183229025] Add duplicate entry and sort feature for json

Signed-off-by: Tanmay Pandey <tanmay@voereir.com>
2026-05-15 21:50:33 +00:00 · 2022-09-15 11:25:52 +05:30 · 2022-09-15 11:25:52 +05:30 · 2331edcb1d
commit 2331edcb1d
parent 78506ced37
4 changed files with 112 additions and 27 deletions
--- a/.pre-commit-hooks.yaml
+++ b/.pre-commit-hooks.yaml
@ -191,3 +191,8 @@
    language: python
    types: [text]
    stages: [commit, push, manual]
+-   id: notify-duplicate-entry
+    name: Notify duplicate entry
+    description: Notifies duplicate entry in the same file
+    entry: notify-duplicate-entry
+    language: python
--- a/pre_commit_hooks/notify_duplicate_entry.py
+++ b/pre_commit_hooks/notify_duplicate_entry.py
@ -0,0 +1,59 @@
+import argparse
+import json
+from typing import Optional
+from typing import Sequence
+from pathlib import Path
+
+def _check_duplicate_entry(json_contents, key):
+    json_dict = {}
+    duplicate_uuids = set()
+    for row in json_contents:
+        if row[key] not in json_dict:
+            json_dict[row[key]] = row
+        else:
+            duplicate_uuids.add(row[key])
+    return duplicate_uuids, len(duplicate_uuids)
+
+
+def main(argv: Optional[Sequence[str]] = None) -> int:
+    parser = argparse.ArgumentParser()
+    parser.add_argument('filenames', nargs='*', type=str,
+                        help='Names of the JSON files to check duplicate entries'
+                        )
+    table_uuid_mapping = {
+        'action': 'uuid', 'env_property_group': 'uuid',
+        'environment': 'uuid', 'environment_property': 'code',
+        'report_summary': 'uuid',
+        'runner': 'uuid', 'scenario': 'uuid',
+        'sla': 'uuid', 'sla_scenario_association': 'sla', 'tag': 'uuid',
+        'tag_action_association': 'tag_uuid',
+        'tag_case_association': 'test_case_uuid',
+        'teams': 'uuid',
+        'test_case': 'uuid',
+        'test_suit': 'uuid', 'test_supported_version': 'test_case_uuid',
+        'testcase_workload_association': 'uuid', 'user': 'uuid',
+        'user_tokens': 'user_token', 'workflow_task': 'workflow_id'
+    }
+
+    args = vars(parser.parse_args(argv))
+    filenames = args['filenames']
+    flag = False
+
+    for i in range(len(filenames)):
+        json_file = filenames[i]
+        file_name = Path(filenames[i]).stem
+        key = table_uuid_mapping[file_name]
+        with open(json_file, encoding='UTF-8') as f:
+            contents = json.load(f)
+        duplicate_uuids, status = _check_duplicate_entry(contents, key)
+
+        if status:
+            print(f"Duplicate UUIDs found - {duplicate_uuids} in file "
+                  f"{json_file}")
+            flag = True
+
+    return flag
+
+
+if __name__ == "__main__":
+    exit(main())
--- a/pre_commit_hooks/pretty_format_json.py
+++ b/pre_commit_hooks/pretty_format_json.py
@ -12,21 +12,22 @@ from typing import Union
 INFINITY = float('inf')


-def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
-                     _key_separator, _item_separator, _sort_keys, _skipkeys,
-                     _one_shot,
-                     ## HACK: hand-optimized bytecode; turn globals into locals
-                     ValueError=ValueError,
-                     dict=dict,
-                     float=float,
-                     id=id,
-                     int=int,
-                     isinstance=isinstance,
-                     list=list,
-                     str=str,
-                     tuple=tuple,
-                     _intstr=int.__str__,
-                     ):
+def _make_iterencode(
+    markers, _default, _encoder, _indent, _floatstr,
+    _key_separator, _item_separator, _sort_keys, _skipkeys,
+    _one_shot,
+    ## HACK: hand-optimized bytecode; turn globals into locals
+    ValueError=ValueError,
+    dict=dict,
+    float=float,
+    id=id,
+    int=int,
+    isinstance=isinstance,
+    list=list,
+    str=str,
+    tuple=tuple,
+    _intstr=int.__str__,
+):

    if _indent is not None and not isinstance(_indent, str):
        _indent = ' ' * _indent
@ -38,7 +39,7 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
        if markers is not None:
            markerid = id(lst)
            if markerid in markers:
-                raise ValueError("Circular reference detected")
+                raise ValueError('Circular reference detected')
            markers[markerid] = lst
        buf = '['
        if _indent is not None:
@ -95,7 +96,7 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
        if markers is not None:
            markerid = id(dct)
            if markerid in markers:
-                raise ValueError("Circular reference detected")
+                raise ValueError('Circular reference detected')
            markers[markerid] = dct
        yield '{'
        if _indent is not None:
@ -131,8 +132,10 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
            elif _skipkeys:
                continue
            else:
-                raise TypeError(f'keys must be str, int, float, bool or None, '
-                                f'not {key.__class__.__name__}')
+                raise TypeError(
+                    f'keys must be str, int, float, bool or None, '
+                    f'not {key.__class__.__name__}',
+                )
            if first:
                first = False
            else:
@ -191,7 +194,7 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
            if markers is not None:
                markerid = id(o)
                if markerid in markers:
-                    raise ValueError("Circular reference detected")
+                    raise ValueError('Circular reference detected')
                markers[markerid] = o
            o = _default(o)
            yield from _iterencode(o, _current_indent_level)
@ -216,8 +219,10 @@ class CustomJSONEncoder(json.JSONEncoder):
        else:
            markers = None

-        def floatstr(o, allow_nan=self.allow_nan,
-                     _repr=float.__repr__, _inf=INFINITY, _neginf=-INFINITY):
+        def floatstr(
+            o, allow_nan=self.allow_nan,
+            _repr=float.__repr__, _inf=INFINITY, _neginf=-INFINITY,
+        ):
            # Check for specials.  Note that this type of test is processor
            # and/or platform-specific, so do tests which don't depend on the
            # internals.
@ -233,8 +238,9 @@ class CustomJSONEncoder(json.JSONEncoder):

            if not allow_nan:
                raise ValueError(
-                    "Out of range float values are not JSON compliant: " +
-                    repr(o))
+                    'Out of range float values are not JSON compliant: ' +
+                    repr(o),
+                )

            return text

@ -243,7 +249,8 @@ class CustomJSONEncoder(json.JSONEncoder):
        _iterencode = _make_iterencode(
            markers, self.default, _encoder, self.indent, floatstr,
            self.key_separator, self.item_separator, self.sort_keys,
-            self.skipkeys, _one_shot)
+            self.skipkeys, _one_shot,
+        )
        return _iterencode(o, 0)


@ -253,6 +260,7 @@ def _get_pretty_format(
        ensure_ascii: bool = True,
        sort_keys: bool = True,
        top_keys: Sequence[str] = (),
+        sort_by_first_key: bool = False,
 ) -> str:
    def pairs_first(pairs: Sequence[Tuple[str, str]]) -> Mapping[str, str]:
        before = [pair for pair in pairs if pair[0] in top_keys]
@ -261,12 +269,16 @@ def _get_pretty_format(
        if sort_keys:
            after.sort()
        return dict(before + after)
+
+    json_contents = json.loads(contents, object_pairs_hook=pairs_first)
+    if sort_by_first_key:
+        json_contents.sort(key=lambda row: list(row.values())[0])
    json_pretty = json.dumps(
-        json.loads(contents, object_pairs_hook=pairs_first),
+        json_contents,
        indent=indent,
        ensure_ascii=ensure_ascii,
        cls=CustomJSONEncoder,
-        separators=(', ', ': ')
+        separators=(', ', ': '),
    )
    return f'{json_pretty}\n'

@ -337,6 +349,13 @@ def main(argv: Optional[Sequence[str]] = None) -> int:
        default=[],
        help='Ordered list of keys to keep at the top of JSON hashes',
    )
+    parser.add_argument(
+        '--sort-by-first-key',
+        dest='sort_by_first_key',
+        action='store_true',
+        default=False,
+        help='Sort the json by a specific key',
+    )
    parser.add_argument('filenames', nargs='*', help='Filenames to fix')
    args = parser.parse_args(argv)

@ -350,6 +369,7 @@ def main(argv: Optional[Sequence[str]] = None) -> int:
            pretty_contents = _get_pretty_format(
                contents, args.indent, ensure_ascii=not args.no_ensure_ascii,
                sort_keys=not args.no_sort_keys, top_keys=args.top_keys,
+                sort_by_first_key=args.sort_by_first_key,
            )
        except ValueError:
            print(
--- a/setup.cfg
+++ b/setup.cfg
@ -61,6 +61,7 @@ console_scripts =
    forbid-new-submodules = pre_commit_hooks.forbid_new_submodules:main
    mixed-line-ending = pre_commit_hooks.mixed_line_ending:main
    name-tests-test = pre_commit_hooks.tests_should_end_in_test:main
+    notify-duplicate-entry = pre_commit_hooks.notify_duplicate_entry:main
    no-commit-to-branch = pre_commit_hooks.no_commit_to_branch:main
    pre-commit-hooks-removed = pre_commit_hooks.removed:main
    pretty-format-json = pre_commit_hooks.pretty_format_json:main