diff --git a/.pre-commit-hooks.yaml b/.pre-commit-hooks.yaml index c0d811c..46034c6 100644 --- a/.pre-commit-hooks.yaml +++ b/.pre-commit-hooks.yaml @@ -95,6 +95,12 @@ entry: check-yaml language: python types: [yaml] +- id: check-yaml-sorted + name: check yaml sorted + description: checks top-level items of yaml files are sorted. + entry: check-yaml-sorted + language: python + types: [yaml] - id: debug-statements name: debug statements (python) description: checks for debugger imports and py37+ `breakpoint()` calls in python source. diff --git a/README.md b/README.md index 9ae7ec5..c75c4f0 100644 --- a/README.md +++ b/README.md @@ -87,6 +87,11 @@ Attempts to load all yaml files to verify syntax. portability to other yaml implementations. Implies `--allow-multiple-documents`. +#### `check-yaml-sorted` +Checks that top-level items in yaml files are sorted. This is done on the +stringified representation, which means it can handle lists of scalars, lists +of dicts by first key or val, etc. + #### `debug-statements` Check for debugger imports and py37+ `breakpoint()` calls in python source. diff --git a/pre_commit_hooks/check_yaml_sorted.py b/pre_commit_hooks/check_yaml_sorted.py new file mode 100644 index 0000000..bc69283 --- /dev/null +++ b/pre_commit_hooks/check_yaml_sorted.py @@ -0,0 +1,51 @@ +"""Pre-commit hook to check that yaml files are sorted at the top-level. + +Does not modify files. Simply parses and compares the stringified values of +each element of the top-level object. + +This allows it to handle all kinds of cases +- lists of scalars +- dicts by top-level key +- lists of dicts by first key name +- lists of dicts with same keys by first value... +""" +from __future__ import annotations + +import argparse +from itertools import tee +from typing import Any +from typing import Iterable +from typing import Sequence + +import yaml + + +def is_sorted(iterable: Iterable[Any]) -> bool: + a_iter, b_iter = tee(str(e) for e in iterable) + next(b_iter, None) + for a, b in zip(a_iter, b_iter): + if a > b: + print(f'Items ({a[:32]}..., {b[:32]}...)') + return False + return True + + +def main(argv: Sequence[str] | None = None) -> int: + parser = argparse.ArgumentParser() + parser.add_argument('filenames', nargs='*', help='Filenames to check') + args = parser.parse_args(argv) + + retval = 0 + + for filename in args.filenames: + with open(filename) as fp: + data = yaml.safe_load(fp) + if not is_sorted(data): + print(f'In file {filename}, items are out of order.') + retval += 1 + + return retval + + +if __name__ == '__main__': + raise SystemExit(main()) diff --git a/setup.cfg b/setup.cfg index 6a4c459..9ed70eb 100644 --- a/setup.cfg +++ b/setup.cfg @@ -45,6 +45,7 @@ console_scripts = check-vcs-permalinks = pre_commit_hooks.check_vcs_permalinks:main check-xml = pre_commit_hooks.check_xml:main check-yaml = pre_commit_hooks.check_yaml:main + check-yaml-sorted = pre_commit_hooks.check_yaml_sorted:main debug-statement-hook = pre_commit_hooks.debug_statement_hook:main destroyed-symlinks = pre_commit_hooks.destroyed_symlinks:main detect-aws-credentials = pre_commit_hooks.detect_aws_credentials:main diff --git a/tests/check_yaml_sorted_test.py b/tests/check_yaml_sorted_test.py new file mode 100644 index 0000000..1b3eaf9 --- /dev/null +++ b/tests/check_yaml_sorted_test.py @@ -0,0 +1,102 @@ +from __future__ import annotations + +import yaml + +from pre_commit_hooks.check_yaml_sorted import is_sorted + + +def test_sort_list_by_items(): + assert is_sorted(['a']) + assert is_sorted(['a', 'b']) + assert not is_sorted(['b', 'a']) + assert is_sorted(['a', 'b', 'c', 'd']) + assert not is_sorted(['a', 'b', 'd', 'c']) + + +def test_sort_dicts_by_keys(): + assert is_sorted({'a': 1, 'b': ['nested', 'list'], 'c': 3}) + assert not is_sorted({'a': 1, 'c': ['nested', 'list'], 'b': 3}) + + +_list_dicts_first_key_sorted = """ +- first_dict: + some: stuff +- second_dict: + other: stuff +- third_dict: + even: more stuff +""" + +_list_dicts_first_key_unsorted = """ +- second_dict: + some: stuff +- first_dict: + other: stuff +- third_dict: + even: more stuff +""" + + +def test_sort_list_of_dicts_by_first_key(): + assert is_sorted(yaml.safe_load(_list_dicts_first_key_sorted)) + assert not is_sorted(yaml.safe_load(_list_dicts_first_key_unsorted)) + + +_sorted_yaml = """ +- id: check-builtin-literals +- id: check-case-conflict +- id: check-docstring-first +""" + +_sorted_yaml_long = """ +- id: check-builtin-literals + name: check builtin type constructor use + description: requires literal syntax when initializing empty or zero.... + entry: check-builtin-literals + language: python + types: [python] +- id: check-case-conflict + name: check for case conflicts + description: checks for files that would conflict in case-insensitive... + entry: check-case-conflict + language: python +- id: check-docstring-first + name: check docstring is first + description: checks a common error of defining a docstring after code. + entry: check-docstring-first + language: python + types: [python] +""" + +_unsorted_yaml = """ +- id: check-builtin-literals +- id: check-docstring-first +- id: check-case-conflict +""" + +_unsorted_yaml_long = """ +- id: check-builtin-literals + name: check builtin type constructor use + description: requires literal syntax when initializing empty or zero.... + entry: check-builtin-literals + language: python + types: [python] +- id: check-docstring-first + name: check docstring is first + description: checks a common error of defining a docstring after code. + entry: check-docstring-first + language: python + types: [python] +- id: check-case-conflict + name: check for case conflicts + description: checks for files that would conflict in case-insensitive... + entry: check-case-conflict + language: python +""" + + +def test_sort_list_of_dicts_same_first_key_by_val(): + assert is_sorted(yaml.safe_load(_sorted_yaml)) + assert is_sorted(yaml.safe_load(_sorted_yaml_long)) + assert not is_sorted(yaml.safe_load(_unsorted_yaml)) + assert not is_sorted(yaml.safe_load(_unsorted_yaml_long))