mirror of
https://github.com/pre-commit/pre-commit-hooks.git
synced 2026-04-04 19:26:52 +00:00
Added case for which the file has mixed line endings. In this case, default into using LF for end of file line.
82 lines
2.4 KiB
Python
82 lines
2.4 KiB
Python
from __future__ import annotations
|
|
|
|
import argparse
|
|
import os
|
|
from collections.abc import Sequence
|
|
from typing import IO
|
|
|
|
LF = b'\n'
|
|
CR = b'\r'
|
|
CRLF = b'\r\n'
|
|
|
|
|
|
def fix_file(file_obj: IO[bytes]) -> int:
|
|
# Test for newline at end of file
|
|
# Empty files will throw IOError here
|
|
try:
|
|
file_obj.seek(-1, os.SEEK_END)
|
|
except OSError:
|
|
return 0
|
|
last_character = file_obj.read(1)
|
|
# last_character will be '' for an empty file
|
|
if last_character not in {LF, CR} and last_character != b'':
|
|
# Check for consistent CRLF usage
|
|
file_obj.seek(0, os.SEEK_SET)
|
|
content = file_obj.read()
|
|
lf_count = content.count(LF)
|
|
crlf_count = content.count(CRLF)
|
|
# Use CRLF only if all line endings are CRLF
|
|
ending = CRLF if crlf_count > 0 and crlf_count == lf_count else LF
|
|
# Needs this seek for windows, otherwise IOError
|
|
file_obj.seek(0, os.SEEK_END)
|
|
file_obj.write(ending)
|
|
return 1
|
|
|
|
while last_character in {LF, CR}:
|
|
# Deal with the beginning of the file
|
|
if file_obj.tell() == 1:
|
|
# If we've reached the beginning of the file and it is all
|
|
# linebreaks then we can make this file empty
|
|
file_obj.seek(0)
|
|
file_obj.truncate()
|
|
return 1
|
|
|
|
# Go back two bytes and read a character
|
|
file_obj.seek(-2, os.SEEK_CUR)
|
|
last_character = file_obj.read(1)
|
|
|
|
# Our current position is at the end of the file just before any amount of
|
|
# newlines. If we find extraneous newlines, then backtrack and trim them.
|
|
position = file_obj.tell()
|
|
remaining = file_obj.read()
|
|
for sequence in (LF, CRLF, CR):
|
|
if remaining == sequence:
|
|
return 0
|
|
elif remaining.startswith(sequence):
|
|
file_obj.seek(position + len(sequence))
|
|
file_obj.truncate()
|
|
return 1
|
|
|
|
return 0
|
|
|
|
|
|
def main(argv: Sequence[str] | None = None) -> int:
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument('filenames', nargs='*', help='Filenames to fix')
|
|
args = parser.parse_args(argv)
|
|
|
|
retv = 0
|
|
|
|
for filename in args.filenames:
|
|
# Read as binary so we can read byte-by-byte
|
|
with open(filename, 'rb+') as file_obj:
|
|
ret_for_file = fix_file(file_obj)
|
|
if ret_for_file:
|
|
print(f'Fixing {filename}')
|
|
retv |= ret_for_file
|
|
|
|
return retv
|
|
|
|
|
|
if __name__ == '__main__':
|
|
raise SystemExit(main())
|