mirror of
https://github.com/pre-commit/pre-commit-hooks.git
synced 2026-04-05 11:36:54 +00:00
Add check for unicode replacement characters
This commit is contained in:
parent
a18c5af5d4
commit
9e417077fc
5 changed files with 55 additions and 0 deletions
|
|
@ -22,6 +22,12 @@
|
||||||
entry: check-byte-order-marker
|
entry: check-byte-order-marker
|
||||||
language: python
|
language: python
|
||||||
types: [text]
|
types: [text]
|
||||||
|
- id: check-unicode-replacement-char
|
||||||
|
name: Check for Unicode replacement character
|
||||||
|
description: Forbid files which have a UTF-8 Unicode replacement character
|
||||||
|
entry: check-unicode-replacement-char
|
||||||
|
language: python
|
||||||
|
types: [text]
|
||||||
- id: check-builtin-literals
|
- id: check-builtin-literals
|
||||||
name: Check builtin type constructor use
|
name: Check builtin type constructor use
|
||||||
description: Require literal syntax when initializing empty or zero Python builtin types.
|
description: Require literal syntax when initializing empty or zero Python builtin types.
|
||||||
|
|
|
||||||
|
|
@ -62,6 +62,9 @@ Checks for symlinks which do not point to anything.
|
||||||
#### `check-toml`
|
#### `check-toml`
|
||||||
Attempts to load all TOML files to verify syntax.
|
Attempts to load all TOML files to verify syntax.
|
||||||
|
|
||||||
|
#### `check-unicode-replacement-char`
|
||||||
|
Forbid files which have a UTF-8 Unicode replacement character.
|
||||||
|
|
||||||
#### `check-vcs-permalinks`
|
#### `check-vcs-permalinks`
|
||||||
Ensures that links to vcs websites are permalinks.
|
Ensures that links to vcs websites are permalinks.
|
||||||
|
|
||||||
|
|
|
||||||
32
pre_commit_hooks/check_unicode_replacement_char.py
Normal file
32
pre_commit_hooks/check_unicode_replacement_char.py
Normal file
|
|
@ -0,0 +1,32 @@
|
||||||
|
import argparse
|
||||||
|
import fileinput
|
||||||
|
from typing import Optional
|
||||||
|
from typing import Sequence
|
||||||
|
|
||||||
|
|
||||||
|
def main(argv: Optional[Sequence[str]] = None) -> int:
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument('filenames', nargs='*', help='Filenames to check')
|
||||||
|
args = parser.parse_args(argv)
|
||||||
|
|
||||||
|
retv = 0
|
||||||
|
|
||||||
|
for line in fileinput.input(files=args.filenames, mode='rb'):
|
||||||
|
try:
|
||||||
|
col = line.index(b'\xEF\xBF\xBD')
|
||||||
|
except ValueError:
|
||||||
|
continue
|
||||||
|
retv = 1
|
||||||
|
# Not saying filename:line:col: because that kind of format is usually
|
||||||
|
# used for character offsets, and we have a byte offset which might be
|
||||||
|
# different, emphasize that.
|
||||||
|
print(
|
||||||
|
f'{fileinput.filename()}:{fileinput.lineno()}: '
|
||||||
|
f'UTF-8 Unicode replacement character at byte {col}'
|
||||||
|
)
|
||||||
|
|
||||||
|
return retv
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
exit(main())
|
||||||
|
|
@ -34,6 +34,7 @@ console_scripts =
|
||||||
check-ast = pre_commit_hooks.check_ast:main
|
check-ast = pre_commit_hooks.check_ast:main
|
||||||
check-builtin-literals = pre_commit_hooks.check_builtin_literals:main
|
check-builtin-literals = pre_commit_hooks.check_builtin_literals:main
|
||||||
check-byte-order-marker = pre_commit_hooks.check_byte_order_marker:main
|
check-byte-order-marker = pre_commit_hooks.check_byte_order_marker:main
|
||||||
|
check-unicode-replacement-char = pre_commit_hooks.check_unicode_replacement_char:main
|
||||||
check-case-conflict = pre_commit_hooks.check_case_conflict:main
|
check-case-conflict = pre_commit_hooks.check_case_conflict:main
|
||||||
check-docstring-first = pre_commit_hooks.check_docstring_first:main
|
check-docstring-first = pre_commit_hooks.check_docstring_first:main
|
||||||
check-executables-have-shebangs = pre_commit_hooks.check_executables_have_shebangs:main
|
check-executables-have-shebangs = pre_commit_hooks.check_executables_have_shebangs:main
|
||||||
|
|
|
||||||
13
tests/check_unicode_replacement_char_test.py
Normal file
13
tests/check_unicode_replacement_char_test.py
Normal file
|
|
@ -0,0 +1,13 @@
|
||||||
|
from pre_commit_hooks import check_unicode_replacement_char
|
||||||
|
|
||||||
|
|
||||||
|
def test_failure(tmpdir):
|
||||||
|
f = tmpdir.join('f.txt')
|
||||||
|
f.write_text(str(b'\x80abc', errors='replace'), encoding='utf-8')
|
||||||
|
assert check_unicode_replacement_char.main((f.strpath,)) == 1
|
||||||
|
|
||||||
|
|
||||||
|
def test_success(tmpdir):
|
||||||
|
f = tmpdir.join('f.txt')
|
||||||
|
f.write_text(str(b'\x80abc', errors='backslashreplace'), encoding='utf-8')
|
||||||
|
assert check_unicode_replacement_char.main((f.strpath,)) == 0
|
||||||
Loading…
Add table
Add a link
Reference in a new issue