pre-commit-hooks/pre_commit_hooks/end_of_file_fixer.py
Edoardo Bezzeccheri 203735ecf7 Edge case with mixed endings
Added case for which the file has mixed line endings.
In this case, default into using LF
for end of file line.
2025-05-07 09:00:23 +00:00

82 lines
2.4 KiB
Python

from __future__ import annotations
import argparse
import os
from collections.abc import Sequence
from typing import IO
LF = b'\n'
CR = b'\r'
CRLF = b'\r\n'
def fix_file(file_obj: IO[bytes]) -> int:
# Test for newline at end of file
# Empty files will throw IOError here
try:
file_obj.seek(-1, os.SEEK_END)
except OSError:
return 0
last_character = file_obj.read(1)
# last_character will be '' for an empty file
if last_character not in {LF, CR} and last_character != b'':
# Check for consistent CRLF usage
file_obj.seek(0, os.SEEK_SET)
content = file_obj.read()
lf_count = content.count(LF)
crlf_count = content.count(CRLF)
# Use CRLF only if all line endings are CRLF
ending = CRLF if crlf_count > 0 and crlf_count == lf_count else LF
# Needs this seek for windows, otherwise IOError
file_obj.seek(0, os.SEEK_END)
file_obj.write(ending)
return 1
while last_character in {LF, CR}:
# Deal with the beginning of the file
if file_obj.tell() == 1:
# If we've reached the beginning of the file and it is all
# linebreaks then we can make this file empty
file_obj.seek(0)
file_obj.truncate()
return 1
# Go back two bytes and read a character
file_obj.seek(-2, os.SEEK_CUR)
last_character = file_obj.read(1)
# Our current position is at the end of the file just before any amount of
# newlines. If we find extraneous newlines, then backtrack and trim them.
position = file_obj.tell()
remaining = file_obj.read()
for sequence in (LF, CRLF, CR):
if remaining == sequence:
return 0
elif remaining.startswith(sequence):
file_obj.seek(position + len(sequence))
file_obj.truncate()
return 1
return 0
def main(argv: Sequence[str] | None = None) -> int:
parser = argparse.ArgumentParser()
parser.add_argument('filenames', nargs='*', help='Filenames to fix')
args = parser.parse_args(argv)
retv = 0
for filename in args.filenames:
# Read as binary so we can read byte-by-byte
with open(filename, 'rb+') as file_obj:
ret_for_file = fix_file(file_obj)
if ret_for_file:
print(f'Fixing {filename}')
retv |= ret_for_file
return retv
if __name__ == '__main__':
raise SystemExit(main())