mirror of
https://github.com/pre-commit/pre-commit-hooks.git
synced 2026-03-29 18:16:52 +00:00
Simplify mixed-line-ending hook
This commit is contained in:
parent
47c4d9ebed
commit
fbcd096ea9
3 changed files with 134 additions and 315 deletions
|
|
@ -1,212 +1,83 @@
|
|||
from __future__ import absolute_import
|
||||
from __future__ import print_function
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import argparse
|
||||
import re
|
||||
import sys
|
||||
|
||||
from enum import Enum
|
||||
import collections
|
||||
|
||||
|
||||
class LineEnding(Enum):
|
||||
CR = b'\r', 'cr', re.compile(b'\r(?!\n)', re.DOTALL)
|
||||
CRLF = b'\r\n', 'crlf', re.compile(b'\r\n', re.DOTALL)
|
||||
LF = b'\n', 'lf', re.compile(b'(?<!\r)\n', re.DOTALL)
|
||||
|
||||
def __init__(self, string, opt_name, regex):
|
||||
self.string = string
|
||||
self.str_print = repr(string)
|
||||
self.opt_name = opt_name
|
||||
self.regex = regex
|
||||
CRLF = b'\r\n'
|
||||
LF = b'\n'
|
||||
CR = b'\r'
|
||||
# Prefer LF to CRLF to CR, but detect CRLF before LF
|
||||
ALL_ENDINGS = (CR, CRLF, LF)
|
||||
FIX_TO_LINE_ENDING = {'cr': CR, 'crlf': CRLF, 'lf': LF}
|
||||
|
||||
|
||||
class MixedLineEndingOption(Enum):
|
||||
AUTO = 'auto', None
|
||||
NO = 'no', None
|
||||
CRLF = LineEnding.CRLF.opt_name, LineEnding.CRLF
|
||||
LF = LineEnding.LF.opt_name, LineEnding.LF
|
||||
|
||||
def __init__(self, opt_name, line_ending_enum):
|
||||
self.opt_name = opt_name
|
||||
self.line_ending_enum = line_ending_enum
|
||||
def _fix(filename, contents, ending):
|
||||
new_contents = b''.join(
|
||||
line.rstrip(b'\r\n') + ending for line in contents.splitlines(True)
|
||||
)
|
||||
with open(filename, 'wb') as f:
|
||||
f.write(new_contents)
|
||||
|
||||
|
||||
class MixedLineDetection(Enum):
|
||||
NOT_MIXED = 1, False, None
|
||||
UNKNOWN = 2, False, None
|
||||
MIXED_MOSTLY_CRLF = 3, True, LineEnding.CRLF
|
||||
MIXED_MOSTLY_LF = 4, True, LineEnding.LF
|
||||
MIXED_MOSTLY_CR = 5, True, LineEnding.CR
|
||||
def fix_filename(filename, fix):
|
||||
with open(filename, 'rb') as f:
|
||||
contents = f.read()
|
||||
|
||||
def __init__(self, index, mle_found, line_ending_enum):
|
||||
# TODO hack to prevent enum overriding
|
||||
self.index = index
|
||||
self.mle_found = mle_found
|
||||
self.line_ending_enum = line_ending_enum
|
||||
counts = collections.defaultdict(int)
|
||||
|
||||
for line in contents.splitlines(True):
|
||||
for ending in ALL_ENDINGS:
|
||||
if line.endswith(ending):
|
||||
counts[ending] += 1
|
||||
break
|
||||
|
||||
ANY_LINE_ENDING_PATTERN = re.compile(
|
||||
b'(' + LineEnding.CRLF.regex.pattern +
|
||||
b'|' + LineEnding.LF.regex.pattern +
|
||||
b'|' + LineEnding.CR.regex.pattern + b')',
|
||||
)
|
||||
# Some amount of mixed line endings
|
||||
mixed = sum(bool(x) for x in counts.values()) > 1
|
||||
|
||||
if fix == 'no' or (fix == 'auto' and not mixed):
|
||||
return mixed
|
||||
|
||||
def mixed_line_ending(argv=None):
|
||||
options = _parse_arguments(argv)
|
||||
if fix == 'auto':
|
||||
max_ending = LF
|
||||
max_lines = 0
|
||||
# ordering is important here such that lf > crlf > cr
|
||||
for ending_type in ALL_ENDINGS:
|
||||
# also important, using >= to find a max that prefers the last
|
||||
if counts[ending_type] >= max_lines:
|
||||
max_ending = ending_type
|
||||
max_lines = counts[ending_type]
|
||||
|
||||
filenames = options['filenames']
|
||||
fix_option = options['fix']
|
||||
|
||||
if fix_option == MixedLineEndingOption.NO:
|
||||
return _process_no_fix(filenames)
|
||||
elif fix_option == MixedLineEndingOption.AUTO:
|
||||
return _process_fix_auto(filenames)
|
||||
# when a line ending character is forced with --fix option
|
||||
_fix(filename, contents, max_ending)
|
||||
return 1
|
||||
else:
|
||||
return _process_fix_force(filenames, fix_option.line_ending_enum)
|
||||
target_ending = FIX_TO_LINE_ENDING[fix]
|
||||
# find if there are lines with *other* endings
|
||||
del counts[target_ending]
|
||||
other_endings = bool(sum(counts.values()))
|
||||
if other_endings:
|
||||
_fix(filename, contents, target_ending)
|
||||
return other_endings
|
||||
|
||||
|
||||
def _parse_arguments(argv=None):
|
||||
def main(argv=None):
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
'-f',
|
||||
'--fix',
|
||||
choices=[m.opt_name for m in MixedLineEndingOption],
|
||||
default=MixedLineEndingOption.AUTO.opt_name,
|
||||
'-f', '--fix',
|
||||
choices=('auto', 'no') + tuple(FIX_TO_LINE_ENDING),
|
||||
default='auto',
|
||||
help='Replace line ending with the specified. Default is "auto"',
|
||||
)
|
||||
parser.add_argument('filenames', nargs='*', help='Filenames to fix')
|
||||
args = parser.parse_args(argv)
|
||||
|
||||
fix, = (
|
||||
member for name, member
|
||||
in MixedLineEndingOption.__members__.items()
|
||||
if member.opt_name == args.fix
|
||||
)
|
||||
|
||||
options = {
|
||||
'fix': fix, 'filenames': args.filenames,
|
||||
}
|
||||
|
||||
return options
|
||||
|
||||
|
||||
def _detect_line_ending(filename):
|
||||
with open(filename, 'rb') as f:
|
||||
buf = f.read()
|
||||
|
||||
le_counts = {}
|
||||
|
||||
for le_enum in LineEnding:
|
||||
le_counts[le_enum] = len(le_enum.regex.findall(buf))
|
||||
|
||||
mixed = False
|
||||
le_found_previously = False
|
||||
most_le = None
|
||||
max_le_count = 0
|
||||
|
||||
for le, le_count in le_counts.items():
|
||||
le_found_cur = le_count > 0
|
||||
|
||||
mixed |= le_found_previously and le_found_cur
|
||||
le_found_previously |= le_found_cur
|
||||
|
||||
if le_count == max_le_count:
|
||||
most_le = None
|
||||
elif le_count > max_le_count:
|
||||
max_le_count = le_count
|
||||
most_le = le
|
||||
|
||||
if not mixed:
|
||||
return MixedLineDetection.NOT_MIXED
|
||||
|
||||
for mld in MixedLineDetection:
|
||||
if (
|
||||
mld.line_ending_enum is not None and
|
||||
mld.line_ending_enum == most_le
|
||||
):
|
||||
return mld
|
||||
|
||||
return MixedLineDetection.UNKNOWN
|
||||
|
||||
|
||||
def _process_no_fix(filenames):
|
||||
print('Checking if the files have mixed line ending.')
|
||||
|
||||
mle_filenames = []
|
||||
for filename in filenames:
|
||||
detect_result = _detect_line_ending(filename)
|
||||
|
||||
if detect_result.mle_found:
|
||||
mle_filenames.append(filename)
|
||||
|
||||
mle_found = len(mle_filenames) > 0
|
||||
|
||||
if mle_found:
|
||||
print(
|
||||
'The following files have mixed line endings:\n\t%s',
|
||||
'\n\t'.join(mle_filenames),
|
||||
)
|
||||
|
||||
return 1 if mle_found else 0
|
||||
|
||||
|
||||
def _process_fix_auto(filenames):
|
||||
mle_found = False
|
||||
|
||||
for filename in filenames:
|
||||
detect_result = _detect_line_ending(filename)
|
||||
|
||||
if detect_result == MixedLineDetection.NOT_MIXED:
|
||||
print('The file %s has no mixed line ending', filename)
|
||||
elif detect_result == MixedLineDetection.UNKNOWN:
|
||||
print(
|
||||
'Could not define most frequent line ending in '
|
||||
'file %s. File skiped.', filename,
|
||||
)
|
||||
|
||||
mle_found = True
|
||||
else:
|
||||
le_enum = detect_result.line_ending_enum
|
||||
|
||||
print(
|
||||
'The file %s has mixed line ending with a '
|
||||
'majority of %s. Converting...', filename, le_enum.str_print,
|
||||
)
|
||||
|
||||
_convert_line_ending(filename, le_enum.string)
|
||||
mle_found = True
|
||||
|
||||
print(
|
||||
'The file %s has been converted to %s line ending.',
|
||||
filename, le_enum.str_print,
|
||||
)
|
||||
|
||||
return 1 if mle_found else 0
|
||||
|
||||
|
||||
def _process_fix_force(filenames, line_ending_enum):
|
||||
for filename in filenames:
|
||||
_convert_line_ending(filename, line_ending_enum.string)
|
||||
|
||||
print(
|
||||
'The file %s has been forced to %s line ending.',
|
||||
filename, line_ending_enum.str_print,
|
||||
)
|
||||
|
||||
return 1
|
||||
|
||||
|
||||
def _convert_line_ending(filename, line_ending):
|
||||
with open(filename, 'rb+') as f:
|
||||
bufin = f.read()
|
||||
|
||||
# convert line ending
|
||||
bufout = ANY_LINE_ENDING_PATTERN.sub(line_ending, bufin)
|
||||
|
||||
# write the result in the file replacing the existing content
|
||||
f.seek(0)
|
||||
f.write(bufout)
|
||||
f.truncate()
|
||||
retv = 0
|
||||
for filename in args.filenames:
|
||||
retv |= fix_filename(filename, args.fix)
|
||||
return retv
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(mixed_line_ending())
|
||||
exit(main())
|
||||
|
|
|
|||
1
setup.py
1
setup.py
|
|
@ -31,7 +31,6 @@ setup(
|
|||
'simplejson',
|
||||
'six',
|
||||
],
|
||||
extras_require={':python_version=="2.7"': ['enum34']},
|
||||
entry_points={
|
||||
'console_scripts': [
|
||||
'autopep8-wrapper = pre_commit_hooks.autopep8_wrapper:main',
|
||||
|
|
|
|||
|
|
@ -1,154 +1,103 @@
|
|||
from __future__ import absolute_import
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import pytest
|
||||
|
||||
from pre_commit_hooks.mixed_line_ending import mixed_line_ending
|
||||
|
||||
# Input, expected return value, expected output
|
||||
TESTS_FIX_AUTO = (
|
||||
# only 'LF'
|
||||
(b'foo\nbar\nbaz\n', 0, b'foo\nbar\nbaz\n'),
|
||||
# only 'CRLF'
|
||||
(b'foo\r\nbar\r\nbaz\r\n', 0, b'foo\r\nbar\r\nbaz\r\n'),
|
||||
# only 'CR'
|
||||
(b'foo\rbar\rbaz\r', 0, b'foo\rbar\rbaz\r'),
|
||||
# mixed with majority of 'LF'
|
||||
(b'foo\r\nbar\nbaz\n', 1, b'foo\nbar\nbaz\n'),
|
||||
# mixed with majority of 'CRLF'
|
||||
(b'foo\r\nbar\nbaz\r\n', 1, b'foo\r\nbar\r\nbaz\r\n'),
|
||||
# mixed with majority of 'CR'
|
||||
(b'foo\rbar\nbaz\r', 1, b'foo\rbar\rbaz\r'),
|
||||
# mixed with as much 'LF' as 'CRLF'
|
||||
(b'foo\r\nbar\nbaz', 1, b'foo\r\nbar\nbaz'),
|
||||
# mixed with as much 'LF' as 'CR'
|
||||
(b'foo\rbar\nbaz', 1, b'foo\rbar\nbaz'),
|
||||
# mixed with as much 'CRLF' as 'CR'
|
||||
(b'foo\r\nbar\nbaz', 1, b'foo\r\nbar\nbaz'),
|
||||
# mixed with as much 'CRLF' as 'LF' as 'CR'
|
||||
(b'foo\r\nbar\nbaz\r', 1, b'foo\r\nbar\nbaz\r'),
|
||||
)
|
||||
from pre_commit_hooks.mixed_line_ending import main
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
('input_s', 'expected_retval', 'output'),
|
||||
TESTS_FIX_AUTO,
|
||||
('input_s', 'output'),
|
||||
(
|
||||
# mixed with majority of 'LF'
|
||||
(b'foo\r\nbar\nbaz\n', b'foo\nbar\nbaz\n'),
|
||||
# mixed with majority of 'CRLF'
|
||||
(b'foo\r\nbar\nbaz\r\n', b'foo\r\nbar\r\nbaz\r\n'),
|
||||
# mixed with majority of 'CR'
|
||||
(b'foo\rbar\nbaz\r', b'foo\rbar\rbaz\r'),
|
||||
# mixed with as much 'LF' as 'CRLF'
|
||||
(b'foo\r\nbar\n', b'foo\nbar\n'),
|
||||
# mixed with as much 'LF' as 'CR'
|
||||
(b'foo\rbar\n', b'foo\nbar\n'),
|
||||
# mixed with as much 'CRLF' as 'CR'
|
||||
(b'foo\r\nbar\r', b'foo\r\nbar\r\n'),
|
||||
# mixed with as much 'CRLF' as 'LF' as 'CR'
|
||||
(b'foo\r\nbar\nbaz\r', b'foo\nbar\nbaz\n'),
|
||||
),
|
||||
)
|
||||
def test_mixed_line_ending_fix_auto(input_s, expected_retval, output, tmpdir):
|
||||
def test_mixed_line_ending_fixes_auto(input_s, output, tmpdir):
|
||||
path = tmpdir.join('file.txt')
|
||||
path.write_binary(input_s)
|
||||
ret = mixed_line_ending(('--fix=auto', path.strpath))
|
||||
ret = main((path.strpath,))
|
||||
|
||||
assert ret == expected_retval
|
||||
assert ret == 1
|
||||
assert path.read_binary() == output
|
||||
|
||||
|
||||
# Input, expected return value, expected output
|
||||
TESTS_NO_FIX = (
|
||||
# only 'LF'
|
||||
(b'foo\nbar\nbaz\n', 0, b'foo\nbar\nbaz\n'),
|
||||
# only 'CRLF'
|
||||
(b'foo\r\nbar\r\nbaz\r\n', 0, b'foo\r\nbar\r\nbaz\r\n'),
|
||||
# only 'CR'
|
||||
(b'foo\rbar\rbaz\r', 0, b'foo\rbar\rbaz\r'),
|
||||
# mixed with majority of 'LF'
|
||||
(b'foo\r\nbar\nbaz\n', 1, b'foo\r\nbar\nbaz\n'),
|
||||
# mixed with majority of 'CRLF'
|
||||
(b'foo\r\nbar\nbaz\r\n', 1, b'foo\r\nbar\nbaz\r\n'),
|
||||
# mixed with majority of 'CR'
|
||||
(b'foo\rbar\nbaz\r', 1, b'foo\rbar\nbaz\r'),
|
||||
# mixed with as much 'LF' as 'CR'
|
||||
(b'foo\rbar\nbaz', 0, b'foo\rbar\nbaz'),
|
||||
# mixed with as much 'CRLF' as 'CR'
|
||||
(b'foo\r\nbar\nbaz', 0, b'foo\r\nbar\nbaz'),
|
||||
# mixed with as much 'CRLF' as 'LF' as 'CR'
|
||||
(b'foo\r\nbar\nbaz\r', 0, b'foo\r\nbar\nbaz\r'),
|
||||
)
|
||||
def test_non_mixed_no_newline_end_of_file(tmpdir):
|
||||
path = tmpdir.join('f.txt')
|
||||
path.write_binary(b'foo\nbar\nbaz')
|
||||
assert not main((path.strpath,))
|
||||
# the hook *could* fix the end of the file, but leaves it alone
|
||||
# this is mostly to document the current behaviour
|
||||
assert path.read_binary() == b'foo\nbar\nbaz'
|
||||
|
||||
|
||||
def test_mixed_no_newline_end_of_file(tmpdir):
|
||||
path = tmpdir.join('f.txt')
|
||||
path.write_binary(b'foo\r\nbar\nbaz')
|
||||
assert main((path.strpath,))
|
||||
# the hook rewrites the end of the file, this is slightly inconsistent
|
||||
# with the non-mixed case but I think this is the better behaviour
|
||||
# this is mostly to document the current behaviour
|
||||
assert path.read_binary() == b'foo\nbar\nbaz\n'
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
('input_s', 'expected_retval', 'output'),
|
||||
TESTS_NO_FIX,
|
||||
('fix_option', 'input_s'),
|
||||
(
|
||||
# All --fix=auto with uniform line endings should be ok
|
||||
('--fix=auto', b'foo\r\nbar\r\nbaz\r\n'),
|
||||
('--fix=auto', b'foo\rbar\rbaz\r'),
|
||||
('--fix=auto', b'foo\nbar\nbaz\n'),
|
||||
# --fix=crlf with crlf endings
|
||||
('--fix=crlf', b'foo\r\nbar\r\nbaz\r\n'),
|
||||
# --fix=lf with lf endings
|
||||
('--fix=lf', b'foo\nbar\nbaz\n'),
|
||||
),
|
||||
)
|
||||
def test_detect_mixed_line_ending(input_s, expected_retval, output, tmpdir):
|
||||
path = tmpdir.join('file.txt')
|
||||
def test_line_endings_ok(fix_option, input_s, tmpdir):
|
||||
path = tmpdir.join('input.txt')
|
||||
path.write_binary(input_s)
|
||||
ret = mixed_line_ending(('--fix=no', path.strpath))
|
||||
ret = main((fix_option, path.strpath))
|
||||
|
||||
assert ret == expected_retval
|
||||
assert path.read_binary() == output
|
||||
assert ret == 0
|
||||
assert path.read_binary() == input_s
|
||||
|
||||
|
||||
# Input, expected return value, expected output
|
||||
TESTS_FIX_FORCE_LF = (
|
||||
# only 'LF'
|
||||
(b'foo\nbar\nbaz\n', 1, b'foo\nbar\nbaz\n'),
|
||||
# only 'CRLF'
|
||||
(b'foo\r\nbar\r\nbaz\r\n', 1, b'foo\nbar\nbaz\n'),
|
||||
# only 'CR'
|
||||
(b'foo\rbar\rbaz\r', 1, b'foo\nbar\nbaz\n'),
|
||||
# mixed with majority of 'LF'
|
||||
(b'foo\r\nbar\nbaz\n', 1, b'foo\nbar\nbaz\n'),
|
||||
# mixed with majority of 'CRLF'
|
||||
(b'foo\r\nbar\nbaz\r\n', 1, b'foo\nbar\nbaz\n'),
|
||||
# mixed with majority of 'CR'
|
||||
(b'foo\rbar\nbaz\r', 1, b'foo\nbar\nbaz\n'),
|
||||
# mixed with as much 'LF' as 'CR'
|
||||
(b'foo\rbar\nbaz', 1, b'foo\nbar\nbaz'),
|
||||
# mixed with as much 'CRLF' as 'CR'
|
||||
(b'foo\r\nbar\nbaz', 1, b'foo\nbar\nbaz'),
|
||||
# mixed with as much 'CRLF' as 'LF' as 'CR'
|
||||
(b'foo\r\nbar\nbaz\r', 1, b'foo\nbar\nbaz\n'),
|
||||
)
|
||||
def test_no_fix_does_not_modify(tmpdir):
|
||||
path = tmpdir.join('input.txt')
|
||||
contents = b'foo\r\nbar\rbaz\nwomp\n'
|
||||
path.write_binary(contents)
|
||||
ret = main(('--fix=no', path.strpath))
|
||||
|
||||
assert ret == 1
|
||||
assert path.read_binary() == contents
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
('input_s', 'expected_retval', 'output'),
|
||||
TESTS_FIX_FORCE_LF,
|
||||
)
|
||||
def test_mixed_line_ending_fix_force_lf(
|
||||
input_s, expected_retval, output,
|
||||
tmpdir,
|
||||
):
|
||||
path = tmpdir.join('file.txt')
|
||||
path.write_binary(input_s)
|
||||
ret = mixed_line_ending(('--fix=lf', path.strpath))
|
||||
def test_fix_lf(tmpdir):
|
||||
path = tmpdir.join('input.txt')
|
||||
path.write_binary(b'foo\r\nbar\rbaz\n')
|
||||
ret = main(('--fix=lf', path.strpath))
|
||||
|
||||
assert ret == expected_retval
|
||||
assert path.read_binary() == output
|
||||
assert ret == 1
|
||||
assert path.read_binary() == b'foo\nbar\nbaz\n'
|
||||
|
||||
|
||||
# Input, expected return value, expected output
|
||||
TESTS_FIX_FORCE_CRLF = (
|
||||
# only 'LF'
|
||||
(b'foo\nbar\nbaz\n', 1, b'foo\r\nbar\r\nbaz\r\n'),
|
||||
# only 'CRLF'
|
||||
(b'foo\r\nbar\r\nbaz\r\n', 1, b'foo\r\nbar\r\nbaz\r\n'),
|
||||
# only 'CR'
|
||||
(b'foo\rbar\rbaz\r', 1, b'foo\r\nbar\r\nbaz\r\n'),
|
||||
# mixed with majority of 'LF'
|
||||
(b'foo\r\nbar\nbaz\n', 1, b'foo\r\nbar\r\nbaz\r\n'),
|
||||
# mixed with majority of 'CRLF'
|
||||
(b'foo\r\nbar\nbaz\r\n', 1, b'foo\r\nbar\r\nbaz\r\n'),
|
||||
# mixed with majority of 'CR'
|
||||
(b'foo\rbar\nbaz\r', 1, b'foo\r\nbar\r\nbaz\r\n'),
|
||||
# mixed with as much 'LF' as 'CR'
|
||||
(b'foo\rbar\nbaz', 1, b'foo\r\nbar\r\nbaz'),
|
||||
# mixed with as much 'CRLF' as 'CR'
|
||||
(b'foo\r\nbar\nbaz', 1, b'foo\r\nbar\r\nbaz'),
|
||||
# mixed with as much 'CRLF' as 'LF' as 'CR'
|
||||
(b'foo\r\nbar\nbaz\r', 1, b'foo\r\nbar\r\nbaz\r\n'),
|
||||
)
|
||||
def test_fix_crlf(tmpdir):
|
||||
path = tmpdir.join('input.txt')
|
||||
path.write_binary(b'foo\r\nbar\rbaz\n')
|
||||
ret = main(('--fix=crlf', path.strpath))
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
('input_s', 'expected_retval', 'output'),
|
||||
TESTS_FIX_FORCE_CRLF,
|
||||
)
|
||||
def test_mixed_line_ending_fix_force_crlf(
|
||||
input_s, expected_retval, output,
|
||||
tmpdir,
|
||||
):
|
||||
path = tmpdir.join('file.txt')
|
||||
path.write_binary(input_s)
|
||||
ret = mixed_line_ending(('--fix=crlf', path.strpath))
|
||||
|
||||
assert ret == expected_retval
|
||||
assert path.read_binary() == output
|
||||
assert ret == 1
|
||||
assert path.read_binary() == b'foo\r\nbar\r\nbaz\r\n'
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue