diff --git a/.pre-commit-hooks.yaml b/.pre-commit-hooks.yaml index 9e835d5..03b446e 100644 --- a/.pre-commit-hooks.yaml +++ b/.pre-commit-hooks.yaml @@ -45,6 +45,12 @@ entry: check-json language: python types: [json] +- id: check-permitted-path-characters + name: check for permitted path characters + description: checks pathnames for allowed characters + entry: check-permitted-path-characters + language: python + args: ['--'] - id: check-shebang-scripts-are-executable name: check that scripts with shebangs are executable description: ensures that (non-binary) files with a shebang are executable. diff --git a/README.md b/README.md index 0bf5751..d0b7a77 100644 --- a/README.md +++ b/README.md @@ -58,6 +58,9 @@ Attempts to load all json files to verify syntax. #### `check-merge-conflict` Check for files that contain merge conflict strings. +#### `check-permitted-path-characters` +Check for allowed characters in pathnames. + #### `check-shebang-scripts-are-executable` Checks that scripts with shebangs are executable. diff --git a/pre_commit_hooks/check_permitted_path_characters.py b/pre_commit_hooks/check_permitted_path_characters.py new file mode 100644 index 0000000..2c529a2 --- /dev/null +++ b/pre_commit_hooks/check_permitted_path_characters.py @@ -0,0 +1,49 @@ +from __future__ import annotations + +import argparse +import string +from typing import Sequence + +DEFAULT_ALLOWLIST = string.ascii_letters + string.digits + '-_./' + + +def main(argv: Sequence[str] | None = None) -> int: + parser = argparse.ArgumentParser() + parser.add_argument('filenames', nargs='*') + parser.add_argument( + '--allowlist', + default=DEFAULT_ALLOWLIST, + help=( + 'Override the default allowlist of permitted characters. The' + ' default is %(default)s' + ), + ) + parser.add_argument( + '--extra-allowlist', + default='', + help='Extend the default allowlist with these characters.', + ) + args = parser.parse_args(argv) + + allowlist = set(args.allowlist + args.extra_allowlist) + + retcode = 0 + for filename in args.filenames: + # check the entire path, not just the filename, to catch directories + # with invalid characters + file_chars = set(filename) + if not file_chars.issubset(allowlist): + # sorted and stringified for readability + pretty_allowlist = ''.join(sorted(allowlist)) + pretty_banlist = repr(''.join(sorted(file_chars - allowlist))) + print( + f'"{filename}" contains characters not in the allowlist:' + f' "{pretty_banlist}". Allowlist is: "{pretty_allowlist}".', + ) + retcode = 1 + + return retcode + + +if __name__ == '__main__': + raise SystemExit(main()) diff --git a/setup.cfg b/setup.cfg index 5b0f7ca..923d0cc 100644 --- a/setup.cfg +++ b/setup.cfg @@ -43,6 +43,7 @@ console_scripts = check-executables-have-shebangs = pre_commit_hooks.check_executables_have_shebangs:main check-json = pre_commit_hooks.check_json:main check-merge-conflict = pre_commit_hooks.check_merge_conflict:main + check-permitted-path-characters = pre_commit_hooks.check_permitted_path_characters:main check-shebang-scripts-are-executable = pre_commit_hooks.check_shebang_scripts_are_executable:main check-symlinks = pre_commit_hooks.check_symlinks:main check-toml = pre_commit_hooks.check_toml:main diff --git a/tests/check_permitted_path_characters_test.py b/tests/check_permitted_path_characters_test.py new file mode 100644 index 0000000..95f263b --- /dev/null +++ b/tests/check_permitted_path_characters_test.py @@ -0,0 +1,50 @@ +from __future__ import annotations + +from pre_commit_hooks.check_permitted_path_characters import main + + +def test_main_all_pass(): + ret = main( + [ + '/some/path/foo_test.py', + './relative/path/bar_test.py', + 'filename-only.py', + ], + ) + assert ret == 0 + + +def test_main_default_chars(): + # use '--' for separating pathnames from args, so pathnames with leading + # '-' are not interpreted as flags + ret = main( + [ + '--', + '-.0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_', + 'abcdefghijklmnopqrstuvwxyz', + ], + ) + assert ret == 0 + + ret = main(['+']) + assert ret == 1 + + +def test_main_invalid_dir(): + ret = main(['--', '/some+funky%%dir/pathname']) + assert ret == 1 + + +def test_main_allowlist(): + ret = main(['--allowlist', 'abc', 'invalid.py']) + assert ret == 1 + ret = main(['--allowlist', 'abc', 'cba']) + assert ret == 0 + # a pathological case + ret = main(['--allowlist', '\b\x01\t/.', '\b.\x01/\t']) + assert ret == 0 + + +def test_main_extra_allowlist(): + ret = main(['--extra-allowlist', '+', 'valid+']) + assert ret == 0