From 69d0dfbab2fcbb1dbc222a535f2eb2fab5955824 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Skytt=C3=A4?= Date: Thu, 11 Nov 2021 19:23:42 +0200 Subject: [PATCH] Add check for text file encodings --- .pre-commit-hooks.yaml | 6 ++++++ README.md | 5 +++++ pre_commit_hooks/check_encoding.py | 29 +++++++++++++++++++++++++++++ setup.cfg | 1 + tests/check_encoding_test.py | 17 +++++++++++++++++ 5 files changed, 58 insertions(+) create mode 100644 pre_commit_hooks/check_encoding.py create mode 100644 tests/check_encoding_test.py diff --git a/.pre-commit-hooks.yaml b/.pre-commit-hooks.yaml index 5690660..22dcd4f 100644 --- a/.pre-commit-hooks.yaml +++ b/.pre-commit-hooks.yaml @@ -32,6 +32,12 @@ entry: check-docstring-first language: python types: [python] +- id: check-encoding + name: check text files have desired encoding + description: checks that text files decode cleanly using an encoding. + entry: check-encoding + language: python + types: [text] - id: check-executables-have-shebangs name: check that executables have shebangs description: ensures that (non-binary) executables have a shebang. diff --git a/README.md b/README.md index 7486aba..daf3a96 100644 --- a/README.md +++ b/README.md @@ -49,6 +49,11 @@ Check for files with names that would conflict on a case-insensitive filesystem #### `check-docstring-first` Checks for a common error of placing code before the docstring. +#### `check-encoding` +Checks that text files have desired encoding. + - `--encoding` - specify encoding to assert; if not specified, default is + [platform dependent](https://docs.python.org/3/library/functions.html?highlight=encoding#open) + #### `check-executables-have-shebangs` Checks that non-binary executables have a proper shebang. diff --git a/pre_commit_hooks/check_encoding.py b/pre_commit_hooks/check_encoding.py new file mode 100644 index 0000000..f472e2e --- /dev/null +++ b/pre_commit_hooks/check_encoding.py @@ -0,0 +1,29 @@ +import argparse +from typing import Optional +from typing import Sequence + + +def main(argv: Optional[Sequence[str]] = None) -> int: + parser = argparse.ArgumentParser() + parser.add_argument('filenames', nargs='*', help='Filenames to check.') + parser.add_argument('--encoding', help='Encoding to assert.') + args = parser.parse_args(argv) + + retval = 0 + for filename in args.filenames: + try: + with open(filename, encoding=args.encoding) as f: + f.read() + except LookupError as exc: + # Unknown encoding, don't bother with the rest + print(f'{__file__}: {exc}') + retval = 2 + break + except Exception as exc: + print(f'{filename}: {exc}') + retval = 1 + return retval + + +if __name__ == '__main__': + raise SystemExit(main()) diff --git a/setup.cfg b/setup.cfg index 890d629..b7cbb8f 100644 --- a/setup.cfg +++ b/setup.cfg @@ -41,6 +41,7 @@ console_scripts = check-byte-order-marker = pre_commit_hooks.check_byte_order_marker:main check-case-conflict = pre_commit_hooks.check_case_conflict:main check-docstring-first = pre_commit_hooks.check_docstring_first:main + check-encoding = pre_commit_hooks.check_encoding:main check-executables-have-shebangs = pre_commit_hooks.check_executables_have_shebangs:main check-json = pre_commit_hooks.check_json:main check-merge-conflict = pre_commit_hooks.check_merge_conflict:main diff --git a/tests/check_encoding_test.py b/tests/check_encoding_test.py new file mode 100644 index 0000000..bd9dd32 --- /dev/null +++ b/tests/check_encoding_test.py @@ -0,0 +1,17 @@ +import pytest + +from pre_commit_hooks.check_encoding import main + + +@pytest.mark.parametrize( + ('content', 'encoding', 'expected'), + ( + (b'Hello!', 'ascii', 0), + (b'Hello!', 'unknown-encoding', 2), + ('Hello ☃!'.encode(), 'ascii', 1), + ), +) +def test_has_encoding(content, encoding, expected, tmpdir): + path = tmpdir.join('path') + path.write(content, 'wb') + assert main(('--encoding', encoding, str(path))) == expected