Don't convert windows style \r\n to \n in file-contents-sorter

Instead preserve the original line endings.

With the `--unique` flag identical lines with different line endings
are collapsed to the first entry.
This commit is contained in:
Mészáros Gergely 2023-09-05 21:10:59 +00:00
parent 8ef58bed01
commit 7400b45161
2 changed files with 79 additions and 7 deletions

View file

@ -22,6 +22,55 @@ PASS = 0
FAIL = 1
class Line:
"""Wrapper to ignore end-of-line characters for sorting and comparison"""
def __init__(self, value: bytes, eol: bytes):
self._value = value
# Add an EOL if none present (can only happen to the last line)
if not self._value.endswith(b'\n'):
self._value += eol
def without_eol(self) -> bytes:
return self._value.rstrip(b'\n\r')
def unwrap(self) -> bytes:
return self._value
@classmethod
def key(
cls,
key: Callable[[bytes], Any] | None = None,
) -> Callable[[Line], Any]:
if key is None:
return cls.without_eol
else:
def eol_key(val: Line) -> Any:
return key(val.without_eol())
return eol_key
def __eq__(self, o: object) -> bool:
if not isinstance(o, Line):
return NotImplemented
return self.without_eol().__eq__(o.without_eol())
def __hash__(self) -> int:
return self.without_eol().__hash__()
def guess_eol(lines: list[bytes]) -> bytes:
if len(lines) == 0:
return b'\n'
for eol in [b'\r\n', b'\n']:
if lines[0].endswith(eol):
return eol
# Prefer '\n' if the first (only) line does not have a line ending
return b'\n'
def sort_file_contents(
f: IO[bytes],
key: Callable[[bytes], Any] | None,
@ -29,18 +78,16 @@ def sort_file_contents(
unique: bool = False,
) -> int:
before = list(f)
lines: Iterable[bytes] = (
line.rstrip(b'\n\r') for line in before if line.strip()
eol = guess_eol(before)
lines: Iterable[Line] = (
Line(line, eol) for line in before if line.strip()
)
if unique:
lines = set(lines)
after = sorted(lines, key=key)
after = sorted(lines, key=Line.key(key))
before_string = b''.join(before)
after_string = b'\n'.join(after)
if after_string:
after_string += b'\n'
after_string = b''.join(line.unwrap() for line in after)
if before_string == after_string:
return PASS

View file

@ -17,6 +17,7 @@ from pre_commit_hooks.file_contents_sorter import PASS
(b'missing_newline', [], FAIL, b'missing_newline\n'),
(b'newline\nmissing', [], FAIL, b'missing\nnewline\n'),
(b'missing\nnewline', [], FAIL, b'missing\nnewline\n'),
(b'missing\r\nnewline', [], FAIL, b'missing\r\nnewline\r\n'),
(b'alpha\nbeta\n', [], PASS, b'alpha\nbeta\n'),
(b'beta\nalpha\n', [], FAIL, b'alpha\nbeta\n'),
(b'C\nc\n', [], PASS, b'C\nc\n'),
@ -67,6 +68,12 @@ from pre_commit_hooks.file_contents_sorter import PASS
FAIL,
b'Fie\nFoe\nfee\nfum\n',
),
(
b'Fie\r\nFie\nFoe\nfee\nfee\r\nfum\n',
['--unique'],
FAIL,
b'Fie\r\nFoe\nfee\nfum\n',
),
(
b'fee\nFie\nFoe\nfum\n',
['--unique', '--ignore-case'],
@ -79,6 +86,24 @@ from pre_commit_hooks.file_contents_sorter import PASS
FAIL,
b'fee\nFie\nFoe\nfum\n',
),
(
b'linefeed\r\ncarriage_return\r\n',
[],
FAIL,
b'carriage_return\r\nlinefeed\r\n',
),
(
b'carriage_return\r\nlinefeed\r\n',
[],
PASS,
b'carriage_return\r\nlinefeed\r\n',
),
(
b'a\na\r\na\r\na\na\r\na\n',
[],
PASS,
b'a\na\r\na\r\na\na\r\na\n',
),
),
)
def test_integration(input_s, argv, expected_retval, output, tmpdir):