mirror of
https://github.com/pre-commit/pre-commit-hooks.git
synced 2026-04-05 11:36:54 +00:00
Add check for unicode replacement characters
This commit is contained in:
parent
a18c5af5d4
commit
9e417077fc
5 changed files with 55 additions and 0 deletions
|
|
@ -22,6 +22,12 @@
|
|||
entry: check-byte-order-marker
|
||||
language: python
|
||||
types: [text]
|
||||
- id: check-unicode-replacement-char
|
||||
name: Check for Unicode replacement character
|
||||
description: Forbid files which have a UTF-8 Unicode replacement character
|
||||
entry: check-unicode-replacement-char
|
||||
language: python
|
||||
types: [text]
|
||||
- id: check-builtin-literals
|
||||
name: Check builtin type constructor use
|
||||
description: Require literal syntax when initializing empty or zero Python builtin types.
|
||||
|
|
|
|||
|
|
@ -62,6 +62,9 @@ Checks for symlinks which do not point to anything.
|
|||
#### `check-toml`
|
||||
Attempts to load all TOML files to verify syntax.
|
||||
|
||||
#### `check-unicode-replacement-char`
|
||||
Forbid files which have a UTF-8 Unicode replacement character.
|
||||
|
||||
#### `check-vcs-permalinks`
|
||||
Ensures that links to vcs websites are permalinks.
|
||||
|
||||
|
|
|
|||
32
pre_commit_hooks/check_unicode_replacement_char.py
Normal file
32
pre_commit_hooks/check_unicode_replacement_char.py
Normal file
|
|
@ -0,0 +1,32 @@
|
|||
import argparse
|
||||
import fileinput
|
||||
from typing import Optional
|
||||
from typing import Sequence
|
||||
|
||||
|
||||
def main(argv: Optional[Sequence[str]] = None) -> int:
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('filenames', nargs='*', help='Filenames to check')
|
||||
args = parser.parse_args(argv)
|
||||
|
||||
retv = 0
|
||||
|
||||
for line in fileinput.input(files=args.filenames, mode='rb'):
|
||||
try:
|
||||
col = line.index(b'\xEF\xBF\xBD')
|
||||
except ValueError:
|
||||
continue
|
||||
retv = 1
|
||||
# Not saying filename:line:col: because that kind of format is usually
|
||||
# used for character offsets, and we have a byte offset which might be
|
||||
# different, emphasize that.
|
||||
print(
|
||||
f'{fileinput.filename()}:{fileinput.lineno()}: '
|
||||
f'UTF-8 Unicode replacement character at byte {col}'
|
||||
)
|
||||
|
||||
return retv
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
exit(main())
|
||||
|
|
@ -34,6 +34,7 @@ console_scripts =
|
|||
check-ast = pre_commit_hooks.check_ast:main
|
||||
check-builtin-literals = pre_commit_hooks.check_builtin_literals:main
|
||||
check-byte-order-marker = pre_commit_hooks.check_byte_order_marker:main
|
||||
check-unicode-replacement-char = pre_commit_hooks.check_unicode_replacement_char:main
|
||||
check-case-conflict = pre_commit_hooks.check_case_conflict:main
|
||||
check-docstring-first = pre_commit_hooks.check_docstring_first:main
|
||||
check-executables-have-shebangs = pre_commit_hooks.check_executables_have_shebangs:main
|
||||
|
|
|
|||
13
tests/check_unicode_replacement_char_test.py
Normal file
13
tests/check_unicode_replacement_char_test.py
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
from pre_commit_hooks import check_unicode_replacement_char
|
||||
|
||||
|
||||
def test_failure(tmpdir):
|
||||
f = tmpdir.join('f.txt')
|
||||
f.write_text(str(b'\x80abc', errors='replace'), encoding='utf-8')
|
||||
assert check_unicode_replacement_char.main((f.strpath,)) == 1
|
||||
|
||||
|
||||
def test_success(tmpdir):
|
||||
f = tmpdir.join('f.txt')
|
||||
f.write_text(str(b'\x80abc', errors='backslashreplace'), encoding='utf-8')
|
||||
assert check_unicode_replacement_char.main((f.strpath,)) == 0
|
||||
Loading…
Add table
Add a link
Reference in a new issue