Add mixed-line-ending hook

This commit is contained in:
Morgan Courbet 2017-06-13 21:38:14 +02:00 committed by Anthony Sottile
parent 78dffcc819
commit fc8a5b27e9
6 changed files with 388 additions and 0 deletions

View file

@ -191,6 +191,15 @@
# for backward compatibility
files: ''
minimum_pre_commit_version: 0.15.0
- id: mixed-line-ending
name: Mixed line ending
description: Replaces or checks mixed line ending
entry: mixed-line-ending
language: python
types: [text]
# for backward compatibility
files: ''
minimum_pre_commit_version: 0.15.0
- id: name-tests-test
name: Tests should end in _test.py
description: This verifies that test files are named correctly

View file

@ -58,6 +58,11 @@ Add this to your `.pre-commit-config.yaml`
- `file-contents-sorter` - Sort the lines in specified files (defaults to alphabetical). You must provide list of target files as input to it. Note that this hook WILL remove blank lines and does NOT respect any comments.
- `flake8` - Run flake8 on your python files.
- `forbid-new-submodules` - Prevent addition of new git submodules.
- `mixed-line-ending` - Replaces or checks mixed line ending.
- `--fix={auto,crlf,lf,no}`
- `auto` - Replaces automatically the most frequent line ending. This is the default argument.
- `crlf`, `lf` - Forces to replace line ending by respectively CRLF and LF.
- `no` - Checks if there is any mixed line ending without modifying any file.
- `name-tests-test` - Assert that files in tests/ end in `_test.py`.
- Use `args: ['--django']` to match `test*.py` instead.
- `no-commit-to-branch` - Protect specific branches from direct checkins.

View file

@ -130,6 +130,12 @@
entry: upgrade-your-pre-commit-version
files: ''
minimum_pre_commit_version: 0.15.0
- id: mixed-line-ending
language: system
name: upgrade-your-pre-commit-version
entry: upgrade-your-pre-commit-version
files: ''
minimum_pre_commit_version: 0.15.0
- id: name-tests-test
language: system
name: upgrade-your-pre-commit-version

View file

@ -0,0 +1,212 @@
import argparse
import re
import sys
from enum import Enum
class LineEnding(Enum):
CR = b'\r', 'cr', re.compile(b'\r(?!\n)', re.DOTALL)
CRLF = b'\r\n', 'crlf', re.compile(b'\r\n', re.DOTALL)
LF = b'\n', 'lf', re.compile(b'(?<!\r)\n', re.DOTALL)
def __init__(self, string, opt_name, regex):
self.string = string
self.str_print = repr(string)
self.opt_name = opt_name
self.regex = regex
class MixedLineEndingOption(Enum):
AUTO = 'auto', None
NO = 'no', None
CRLF = LineEnding.CRLF.opt_name, LineEnding.CRLF
LF = LineEnding.LF.opt_name, LineEnding.LF
def __init__(self, opt_name, line_ending_enum):
self.opt_name = opt_name
self.line_ending_enum = line_ending_enum
class MixedLineDetection(Enum):
NOT_MIXED = 1, False, None
UNKNOWN = 2, False, None
MIXED_MOSTLY_CRLF = 3, True, LineEnding.CRLF
MIXED_MOSTLY_LF = 4, True, LineEnding.LF
MIXED_MOSTLY_CR = 5, True, LineEnding.CR
def __init__(self, index, mle_found, line_ending_enum):
# TODO hack to prevent enum overriding
self.index = index
self.mle_found = mle_found
self.line_ending_enum = line_ending_enum
ANY_LINE_ENDING_PATTERN = re.compile(
b'(' + LineEnding.CRLF.regex.pattern +
b'|' + LineEnding.LF.regex.pattern +
b'|' + LineEnding.CR.regex.pattern + b')',
)
def mixed_line_ending(argv=None):
options = _parse_arguments(argv)
filenames = options['filenames']
fix_option = options['fix']
if fix_option == MixedLineEndingOption.NO:
return _process_no_fix(filenames)
elif fix_option == MixedLineEndingOption.AUTO:
return _process_fix_auto(filenames)
# when a line ending character is forced with --fix option
else:
return _process_fix_force(filenames, fix_option.line_ending_enum)
def _parse_arguments(argv=None):
parser = argparse.ArgumentParser()
parser.add_argument(
'-f',
'--fix',
choices=[m.opt_name for m in MixedLineEndingOption],
default=MixedLineEndingOption.AUTO.opt_name,
help='Replace line ending with the specified. Default is "auto"',
)
parser.add_argument('filenames', nargs='*', help='Filenames to fix')
args = parser.parse_args(argv)
fix, = (
member for name, member
in MixedLineEndingOption.__members__.items()
if member.opt_name == args.fix
)
options = {
'fix': fix, 'filenames': args.filenames,
}
return options
def _detect_line_ending(filename):
with open(filename, 'rb') as f:
buf = f.read()
le_counts = {}
for le_enum in LineEnding:
le_counts[le_enum] = len(le_enum.regex.findall(buf))
mixed = False
le_found_previously = False
most_le = None
max_le_count = 0
for le, le_count in le_counts.items():
le_found_cur = le_count > 0
mixed |= le_found_previously and le_found_cur
le_found_previously |= le_found_cur
if le_count == max_le_count:
most_le = None
elif le_count > max_le_count:
max_le_count = le_count
most_le = le
if not mixed:
return MixedLineDetection.NOT_MIXED
for mld in MixedLineDetection:
if (
mld.line_ending_enum is not None and
mld.line_ending_enum == most_le
):
return mld
return MixedLineDetection.UNKNOWN
def _process_no_fix(filenames):
print('Checking if the files have mixed line ending.')
mle_filenames = []
for filename in filenames:
detect_result = _detect_line_ending(filename)
if detect_result.mle_found:
mle_filenames.append(filename)
mle_found = len(mle_filenames) > 0
if mle_found:
print(
'The following files have mixed line endings:\n\t%s',
'\n\t'.join(mle_filenames),
)
return 1 if mle_found else 0
def _process_fix_auto(filenames):
mle_found = False
for filename in filenames:
detect_result = _detect_line_ending(filename)
if detect_result == MixedLineDetection.NOT_MIXED:
print('The file %s has no mixed line ending', filename)
elif detect_result == MixedLineDetection.UNKNOWN:
print(
'Could not define most frequent line ending in '
'file %s. File skiped.', filename,
)
mle_found = True
else:
le_enum = detect_result.line_ending_enum
print(
'The file %s has mixed line ending with a '
'majority of %s. Converting...', filename, le_enum.str_print,
)
_convert_line_ending(filename, le_enum.string)
mle_found = True
print(
'The file %s has been converted to %s line ending.',
filename, le_enum.str_print,
)
return 1 if mle_found else 0
def _process_fix_force(filenames, line_ending_enum):
for filename in filenames:
_convert_line_ending(filename, line_ending_enum.string)
print(
'The file %s has been forced to %s line ending.',
filename, line_ending_enum.str_print,
)
return 1
def _convert_line_ending(filename, line_ending):
with open(filename, 'rb+') as f:
bufin = f.read()
# convert line ending
bufout = ANY_LINE_ENDING_PATTERN.sub(line_ending, bufin)
# write the result in the file replacing the existing content
f.seek(0)
f.write(bufout)
f.truncate()
if __name__ == '__main__':
sys.exit(mixed_line_ending())

View file

@ -31,6 +31,7 @@ setup(
'simplejson',
'six',
],
extras_require={':python_version=="2.7"': ['enum34']},
entry_points={
'console_scripts': [
'autopep8-wrapper = pre_commit_hooks.autopep8_wrapper:main',
@ -53,6 +54,7 @@ setup(
'file-contents-sorter = pre_commit_hooks.file_contents_sorter:main',
'fix-encoding-pragma = pre_commit_hooks.fix_encoding_pragma:main',
'forbid-new-submodules = pre_commit_hooks.forbid_new_submodules:main',
'mixed-line-ending = pre_commit_hooks.mixed_line_ending:mixed_line_ending',
'name-tests-test = pre_commit_hooks.tests_should_end_in_test:validate_files',
'no-commit-to-branch = pre_commit_hooks.no_commit_to_branch:main',
'pretty-format-json = pre_commit_hooks.pretty_format_json:pretty_format_json',

View file

@ -0,0 +1,154 @@
import pytest
from pre_commit_hooks.mixed_line_ending import mixed_line_ending
# Input, expected return value, expected output
TESTS_FIX_AUTO = (
# only 'LF'
(b'foo\nbar\nbaz\n', 0, b'foo\nbar\nbaz\n'),
# only 'CRLF'
(b'foo\r\nbar\r\nbaz\r\n', 0, b'foo\r\nbar\r\nbaz\r\n'),
# only 'CR'
(b'foo\rbar\rbaz\r', 0, b'foo\rbar\rbaz\r'),
# mixed with majority of 'LF'
(b'foo\r\nbar\nbaz\n', 1, b'foo\nbar\nbaz\n'),
# mixed with majority of 'CRLF'
(b'foo\r\nbar\nbaz\r\n', 1, b'foo\r\nbar\r\nbaz\r\n'),
# mixed with majority of 'CR'
(b'foo\rbar\nbaz\r', 1, b'foo\rbar\rbaz\r'),
# mixed with as much 'LF' as 'CRLF'
(b'foo\r\nbar\nbaz', 1, b'foo\r\nbar\nbaz'),
# mixed with as much 'LF' as 'CR'
(b'foo\rbar\nbaz', 1, b'foo\rbar\nbaz'),
# mixed with as much 'CRLF' as 'CR'
(b'foo\r\nbar\nbaz', 1, b'foo\r\nbar\nbaz'),
# mixed with as much 'CRLF' as 'LF' as 'CR'
(b'foo\r\nbar\nbaz\r', 1, b'foo\r\nbar\nbaz\r'),
)
@pytest.mark.parametrize(
('input_s', 'expected_retval', 'output'),
TESTS_FIX_AUTO,
)
def test_mixed_line_ending_fix_auto(input_s, expected_retval, output, tmpdir):
path = tmpdir.join('file.txt')
path.write(input_s)
ret = mixed_line_ending(('--fix=auto', path.strpath))
assert ret == expected_retval
assert path.read_binary() == output
# Input, expected return value, expected output
TESTS_NO_FIX = (
# only 'LF'
(b'foo\nbar\nbaz\n', 0, b'foo\nbar\nbaz\n'),
# only 'CRLF'
(b'foo\r\nbar\r\nbaz\r\n', 0, b'foo\r\nbar\r\nbaz\r\n'),
# only 'CR'
(b'foo\rbar\rbaz\r', 0, b'foo\rbar\rbaz\r'),
# mixed with majority of 'LF'
(b'foo\r\nbar\nbaz\n', 1, b'foo\r\nbar\nbaz\n'),
# mixed with majority of 'CRLF'
(b'foo\r\nbar\nbaz\r\n', 1, b'foo\r\nbar\nbaz\r\n'),
# mixed with majority of 'CR'
(b'foo\rbar\nbaz\r', 1, b'foo\rbar\nbaz\r'),
# mixed with as much 'LF' as 'CR'
(b'foo\rbar\nbaz', 0, b'foo\rbar\nbaz'),
# mixed with as much 'CRLF' as 'CR'
(b'foo\r\nbar\nbaz', 0, b'foo\r\nbar\nbaz'),
# mixed with as much 'CRLF' as 'LF' as 'CR'
(b'foo\r\nbar\nbaz\r', 0, b'foo\r\nbar\nbaz\r'),
)
@pytest.mark.parametrize(
('input_s', 'expected_retval', 'output'),
TESTS_NO_FIX,
)
def test_detect_mixed_line_ending(input_s, expected_retval, output, tmpdir):
path = tmpdir.join('file.txt')
path.write(input_s)
ret = mixed_line_ending(('--fix=no', path.strpath))
assert ret == expected_retval
assert path.read_binary() == output
# Input, expected return value, expected output
TESTS_FIX_FORCE_LF = (
# only 'LF'
(b'foo\nbar\nbaz\n', 1, b'foo\nbar\nbaz\n'),
# only 'CRLF'
(b'foo\r\nbar\r\nbaz\r\n', 1, b'foo\nbar\nbaz\n'),
# only 'CR'
(b'foo\rbar\rbaz\r', 1, b'foo\nbar\nbaz\n'),
# mixed with majority of 'LF'
(b'foo\r\nbar\nbaz\n', 1, b'foo\nbar\nbaz\n'),
# mixed with majority of 'CRLF'
(b'foo\r\nbar\nbaz\r\n', 1, b'foo\nbar\nbaz\n'),
# mixed with majority of 'CR'
(b'foo\rbar\nbaz\r', 1, b'foo\nbar\nbaz\n'),
# mixed with as much 'LF' as 'CR'
(b'foo\rbar\nbaz', 1, b'foo\nbar\nbaz'),
# mixed with as much 'CRLF' as 'CR'
(b'foo\r\nbar\nbaz', 1, b'foo\nbar\nbaz'),
# mixed with as much 'CRLF' as 'LF' as 'CR'
(b'foo\r\nbar\nbaz\r', 1, b'foo\nbar\nbaz\n'),
)
@pytest.mark.parametrize(
('input_s', 'expected_retval', 'output'),
TESTS_FIX_FORCE_LF,
)
def test_mixed_line_ending_fix_force_lf(
input_s, expected_retval, output,
tmpdir,
):
path = tmpdir.join('file.txt')
path.write(input_s)
ret = mixed_line_ending(('--fix=lf', path.strpath))
assert ret == expected_retval
assert path.read_binary() == output
# Input, expected return value, expected output
TESTS_FIX_FORCE_CRLF = (
# only 'LF'
(b'foo\nbar\nbaz\n', 1, b'foo\r\nbar\r\nbaz\r\n'),
# only 'CRLF'
(b'foo\r\nbar\r\nbaz\r\n', 1, b'foo\r\nbar\r\nbaz\r\n'),
# only 'CR'
(b'foo\rbar\rbaz\r', 1, b'foo\r\nbar\r\nbaz\r\n'),
# mixed with majority of 'LF'
(b'foo\r\nbar\nbaz\n', 1, b'foo\r\nbar\r\nbaz\r\n'),
# mixed with majority of 'CRLF'
(b'foo\r\nbar\nbaz\r\n', 1, b'foo\r\nbar\r\nbaz\r\n'),
# mixed with majority of 'CR'
(b'foo\rbar\nbaz\r', 1, b'foo\r\nbar\r\nbaz\r\n'),
# mixed with as much 'LF' as 'CR'
(b'foo\rbar\nbaz', 1, b'foo\r\nbar\r\nbaz'),
# mixed with as much 'CRLF' as 'CR'
(b'foo\r\nbar\nbaz', 1, b'foo\r\nbar\r\nbaz'),
# mixed with as much 'CRLF' as 'LF' as 'CR'
(b'foo\r\nbar\nbaz\r', 1, b'foo\r\nbar\r\nbaz\r\n'),
)
@pytest.mark.parametrize(
('input_s', 'expected_retval', 'output'),
TESTS_FIX_FORCE_CRLF,
)
def test_mixed_line_ending_fix_force_crlf(
input_s, expected_retval, output,
tmpdir,
):
path = tmpdir.join('file.txt')
path.write(input_s)
ret = mixed_line_ending(('--fix=crlf', path.strpath))
assert ret == expected_retval
assert path.read_binary() == output