From 336f2ed6d1549422ce735ffdb027d09b91e16740 Mon Sep 17 00:00:00 2001 From: Bernhard Walle Date: Wed, 19 Apr 2023 11:29:31 +0200 Subject: [PATCH] end_of_file_fixer: detect line ending Instead of writing \n we try to detect the line ending used in the file. We use the first line ending sequence we find. --- pre_commit_hooks/end_of_file_fixer.py | 25 +++++++++++++++++++++++-- tests/end_of_file_fixer_test.py | 3 +++ 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/pre_commit_hooks/end_of_file_fixer.py b/pre_commit_hooks/end_of_file_fixer.py index a30dce9..aca4226 100644 --- a/pre_commit_hooks/end_of_file_fixer.py +++ b/pre_commit_hooks/end_of_file_fixer.py @@ -6,6 +6,26 @@ from typing import IO from typing import Sequence +def detect_eol_equence(file_obj: IO[bytes]) -> bytes: + # readline() doesn't work because it doesn't get \r right + eol_marker = b'' + last_was_eol = False + while True: + next = file_obj.read(1) + if not next: + return eol_marker if eol_marker else b'\n' + + if next in (b'\r\n', b'\r', b'\n'): + eol_marker += next + last_was_eol = True + else: + # normal character + if last_was_eol: + return eol_marker + + return b'\n' + + def fix_file(file_obj: IO[bytes]) -> int: # Test for newline at end of file # Empty files will throw IOError here @@ -16,9 +36,10 @@ def fix_file(file_obj: IO[bytes]) -> int: last_character = file_obj.read(1) # last_character will be '' for an empty file if last_character not in {b'\n', b'\r'} and last_character != b'': - # Needs this seek for windows, otherwise IOError + file_obj.seek(0, os.SEEK_SET) + eol_seq = detect_eol_equence(file_obj) file_obj.seek(0, os.SEEK_END) - file_obj.write(b'\n') + file_obj.write(eol_seq) return 1 while last_character in {b'\n', b'\r'}: diff --git a/tests/end_of_file_fixer_test.py b/tests/end_of_file_fixer_test.py index 8a5d889..8896dd3 100644 --- a/tests/end_of_file_fixer_test.py +++ b/tests/end_of_file_fixer_test.py @@ -21,6 +21,9 @@ TESTS = ( (b'foo\r\n\r\n\r\n', 1, b'foo\r\n'), (b'foo\r', 0, b'foo\r'), (b'foo\r\r\r\r', 1, b'foo\r'), + (b'foo\r\nbar', 1, b'foo\r\nbar\r\n'), + (b'foo\nbar', 1, b'foo\nbar\n'), + (b'foo\rbar', 1, b'foo\rbar\r'), )