mirror of
https://github.com/pre-commit/pre-commit-hooks.git
synced 2026-04-05 19:46:54 +00:00
Add check for unicode replacement characters
This commit is contained in:
parent
a18c5af5d4
commit
9e417077fc
5 changed files with 55 additions and 0 deletions
32
pre_commit_hooks/check_unicode_replacement_char.py
Normal file
32
pre_commit_hooks/check_unicode_replacement_char.py
Normal file
|
|
@ -0,0 +1,32 @@
|
|||
import argparse
|
||||
import fileinput
|
||||
from typing import Optional
|
||||
from typing import Sequence
|
||||
|
||||
|
||||
def main(argv: Optional[Sequence[str]] = None) -> int:
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('filenames', nargs='*', help='Filenames to check')
|
||||
args = parser.parse_args(argv)
|
||||
|
||||
retv = 0
|
||||
|
||||
for line in fileinput.input(files=args.filenames, mode='rb'):
|
||||
try:
|
||||
col = line.index(b'\xEF\xBF\xBD')
|
||||
except ValueError:
|
||||
continue
|
||||
retv = 1
|
||||
# Not saying filename:line:col: because that kind of format is usually
|
||||
# used for character offsets, and we have a byte offset which might be
|
||||
# different, emphasize that.
|
||||
print(
|
||||
f'{fileinput.filename()}:{fileinput.lineno()}: '
|
||||
f'UTF-8 Unicode replacement character at byte {col}'
|
||||
)
|
||||
|
||||
return retv
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
exit(main())
|
||||
Loading…
Add table
Add a link
Reference in a new issue