Add check for permitted characters in pathnames

Add a new checker that checks for allowed characters in pathnames.

Includes arguments for overriding the default allowed list and extending
it.

Default allow list is alphanumeric plus `.-_`.
This commit is contained in:
Noah Pendleton 2022-03-01 18:13:56 -05:00
parent 69b4df5589
commit 9bb6ebc34d
5 changed files with 109 additions and 0 deletions

View file

@ -45,6 +45,12 @@
entry: check-json
language: python
types: [json]
- id: check-permitted-path-characters
name: check for permitted path characters
description: checks pathnames for allowed characters
entry: check-permitted-path-characters
language: python
args: ['--']
- id: check-shebang-scripts-are-executable
name: check that scripts with shebangs are executable
description: ensures that (non-binary) files with a shebang are executable.

View file

@ -58,6 +58,9 @@ Attempts to load all json files to verify syntax.
#### `check-merge-conflict`
Check for files that contain merge conflict strings.
#### `check-permitted-path-characters`
Check for allowed characters in pathnames.
#### `check-shebang-scripts-are-executable`
Checks that scripts with shebangs are executable.

View file

@ -0,0 +1,49 @@
from __future__ import annotations
import argparse
import string
from typing import Sequence
DEFAULT_ALLOWLIST = string.ascii_letters + string.digits + '-_./'
def main(argv: Sequence[str] | None = None) -> int:
parser = argparse.ArgumentParser()
parser.add_argument('filenames', nargs='*')
parser.add_argument(
'--allowlist',
default=DEFAULT_ALLOWLIST,
help=(
'Override the default allowlist of permitted characters. The'
' default is %(default)s'
),
)
parser.add_argument(
'--extra-allowlist',
default='',
help='Extend the default allowlist with these characters.',
)
args = parser.parse_args(argv)
allowlist = set(args.allowlist + args.extra_allowlist)
retcode = 0
for filename in args.filenames:
# check the entire path, not just the filename, to catch directories
# with invalid characters
file_chars = set(filename)
if not file_chars.issubset(allowlist):
# sorted and stringified for readability
pretty_allowlist = ''.join(sorted(allowlist))
pretty_banlist = repr(''.join(sorted(file_chars - allowlist)))
print(
f'"{filename}" contains characters not in the allowlist:'
f' "{pretty_banlist}". Allowlist is: "{pretty_allowlist}".',
)
retcode = 1
return retcode
if __name__ == '__main__':
raise SystemExit(main())

View file

@ -43,6 +43,7 @@ console_scripts =
check-executables-have-shebangs = pre_commit_hooks.check_executables_have_shebangs:main
check-json = pre_commit_hooks.check_json:main
check-merge-conflict = pre_commit_hooks.check_merge_conflict:main
check-permitted-path-characters = pre_commit_hooks.check_permitted_path_characters:main
check-shebang-scripts-are-executable = pre_commit_hooks.check_shebang_scripts_are_executable:main
check-symlinks = pre_commit_hooks.check_symlinks:main
check-toml = pre_commit_hooks.check_toml:main

View file

@ -0,0 +1,50 @@
from __future__ import annotations
from pre_commit_hooks.check_permitted_path_characters import main
def test_main_all_pass():
ret = main(
[
'/some/path/foo_test.py',
'./relative/path/bar_test.py',
'filename-only.py',
],
)
assert ret == 0
def test_main_default_chars():
# use '--' for separating pathnames from args, so pathnames with leading
# '-' are not interpreted as flags
ret = main(
[
'--',
'-.0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_',
'abcdefghijklmnopqrstuvwxyz',
],
)
assert ret == 0
ret = main(['+'])
assert ret == 1
def test_main_invalid_dir():
ret = main(['--', '/some+funky%%dir/pathname'])
assert ret == 1
def test_main_allowlist():
ret = main(['--allowlist', 'abc', 'invalid.py'])
assert ret == 1
ret = main(['--allowlist', 'abc', 'cba'])
assert ret == 0
# a pathological case
ret = main(['--allowlist', '\b\x01\t/.', '\b.\x01/\t'])
assert ret == 0
def test_main_extra_allowlist():
ret = main(['--extra-allowlist', '+', 'valid+'])
assert ret == 0