Simplify mixed-line-ending hook

This commit is contained in:
Anthony Sottile 2017-09-05 20:20:43 -07:00
parent 47c4d9ebed
commit fbcd096ea9
3 changed files with 134 additions and 315 deletions

View file

@ -1,212 +1,83 @@
from __future__ import absolute_import
from __future__ import print_function
from __future__ import unicode_literals
import argparse
import re
import sys
from enum import Enum
import collections
class LineEnding(Enum):
CR = b'\r', 'cr', re.compile(b'\r(?!\n)', re.DOTALL)
CRLF = b'\r\n', 'crlf', re.compile(b'\r\n', re.DOTALL)
LF = b'\n', 'lf', re.compile(b'(?<!\r)\n', re.DOTALL)
def __init__(self, string, opt_name, regex):
self.string = string
self.str_print = repr(string)
self.opt_name = opt_name
self.regex = regex
CRLF = b'\r\n'
LF = b'\n'
CR = b'\r'
# Prefer LF to CRLF to CR, but detect CRLF before LF
ALL_ENDINGS = (CR, CRLF, LF)
FIX_TO_LINE_ENDING = {'cr': CR, 'crlf': CRLF, 'lf': LF}
class MixedLineEndingOption(Enum):
AUTO = 'auto', None
NO = 'no', None
CRLF = LineEnding.CRLF.opt_name, LineEnding.CRLF
LF = LineEnding.LF.opt_name, LineEnding.LF
def __init__(self, opt_name, line_ending_enum):
self.opt_name = opt_name
self.line_ending_enum = line_ending_enum
def _fix(filename, contents, ending):
new_contents = b''.join(
line.rstrip(b'\r\n') + ending for line in contents.splitlines(True)
)
with open(filename, 'wb') as f:
f.write(new_contents)
class MixedLineDetection(Enum):
NOT_MIXED = 1, False, None
UNKNOWN = 2, False, None
MIXED_MOSTLY_CRLF = 3, True, LineEnding.CRLF
MIXED_MOSTLY_LF = 4, True, LineEnding.LF
MIXED_MOSTLY_CR = 5, True, LineEnding.CR
def fix_filename(filename, fix):
with open(filename, 'rb') as f:
contents = f.read()
def __init__(self, index, mle_found, line_ending_enum):
# TODO hack to prevent enum overriding
self.index = index
self.mle_found = mle_found
self.line_ending_enum = line_ending_enum
counts = collections.defaultdict(int)
for line in contents.splitlines(True):
for ending in ALL_ENDINGS:
if line.endswith(ending):
counts[ending] += 1
break
ANY_LINE_ENDING_PATTERN = re.compile(
b'(' + LineEnding.CRLF.regex.pattern +
b'|' + LineEnding.LF.regex.pattern +
b'|' + LineEnding.CR.regex.pattern + b')',
)
# Some amount of mixed line endings
mixed = sum(bool(x) for x in counts.values()) > 1
if fix == 'no' or (fix == 'auto' and not mixed):
return mixed
def mixed_line_ending(argv=None):
options = _parse_arguments(argv)
if fix == 'auto':
max_ending = LF
max_lines = 0
# ordering is important here such that lf > crlf > cr
for ending_type in ALL_ENDINGS:
# also important, using >= to find a max that prefers the last
if counts[ending_type] >= max_lines:
max_ending = ending_type
max_lines = counts[ending_type]
filenames = options['filenames']
fix_option = options['fix']
if fix_option == MixedLineEndingOption.NO:
return _process_no_fix(filenames)
elif fix_option == MixedLineEndingOption.AUTO:
return _process_fix_auto(filenames)
# when a line ending character is forced with --fix option
_fix(filename, contents, max_ending)
return 1
else:
return _process_fix_force(filenames, fix_option.line_ending_enum)
target_ending = FIX_TO_LINE_ENDING[fix]
# find if there are lines with *other* endings
del counts[target_ending]
other_endings = bool(sum(counts.values()))
if other_endings:
_fix(filename, contents, target_ending)
return other_endings
def _parse_arguments(argv=None):
def main(argv=None):
parser = argparse.ArgumentParser()
parser.add_argument(
'-f',
'--fix',
choices=[m.opt_name for m in MixedLineEndingOption],
default=MixedLineEndingOption.AUTO.opt_name,
'-f', '--fix',
choices=('auto', 'no') + tuple(FIX_TO_LINE_ENDING),
default='auto',
help='Replace line ending with the specified. Default is "auto"',
)
parser.add_argument('filenames', nargs='*', help='Filenames to fix')
args = parser.parse_args(argv)
fix, = (
member for name, member
in MixedLineEndingOption.__members__.items()
if member.opt_name == args.fix
)
options = {
'fix': fix, 'filenames': args.filenames,
}
return options
def _detect_line_ending(filename):
with open(filename, 'rb') as f:
buf = f.read()
le_counts = {}
for le_enum in LineEnding:
le_counts[le_enum] = len(le_enum.regex.findall(buf))
mixed = False
le_found_previously = False
most_le = None
max_le_count = 0
for le, le_count in le_counts.items():
le_found_cur = le_count > 0
mixed |= le_found_previously and le_found_cur
le_found_previously |= le_found_cur
if le_count == max_le_count:
most_le = None
elif le_count > max_le_count:
max_le_count = le_count
most_le = le
if not mixed:
return MixedLineDetection.NOT_MIXED
for mld in MixedLineDetection:
if (
mld.line_ending_enum is not None and
mld.line_ending_enum == most_le
):
return mld
return MixedLineDetection.UNKNOWN
def _process_no_fix(filenames):
print('Checking if the files have mixed line ending.')
mle_filenames = []
for filename in filenames:
detect_result = _detect_line_ending(filename)
if detect_result.mle_found:
mle_filenames.append(filename)
mle_found = len(mle_filenames) > 0
if mle_found:
print(
'The following files have mixed line endings:\n\t%s',
'\n\t'.join(mle_filenames),
)
return 1 if mle_found else 0
def _process_fix_auto(filenames):
mle_found = False
for filename in filenames:
detect_result = _detect_line_ending(filename)
if detect_result == MixedLineDetection.NOT_MIXED:
print('The file %s has no mixed line ending', filename)
elif detect_result == MixedLineDetection.UNKNOWN:
print(
'Could not define most frequent line ending in '
'file %s. File skiped.', filename,
)
mle_found = True
else:
le_enum = detect_result.line_ending_enum
print(
'The file %s has mixed line ending with a '
'majority of %s. Converting...', filename, le_enum.str_print,
)
_convert_line_ending(filename, le_enum.string)
mle_found = True
print(
'The file %s has been converted to %s line ending.',
filename, le_enum.str_print,
)
return 1 if mle_found else 0
def _process_fix_force(filenames, line_ending_enum):
for filename in filenames:
_convert_line_ending(filename, line_ending_enum.string)
print(
'The file %s has been forced to %s line ending.',
filename, line_ending_enum.str_print,
)
return 1
def _convert_line_ending(filename, line_ending):
with open(filename, 'rb+') as f:
bufin = f.read()
# convert line ending
bufout = ANY_LINE_ENDING_PATTERN.sub(line_ending, bufin)
# write the result in the file replacing the existing content
f.seek(0)
f.write(bufout)
f.truncate()
retv = 0
for filename in args.filenames:
retv |= fix_filename(filename, args.fix)
return retv
if __name__ == '__main__':
sys.exit(mixed_line_ending())
exit(main())