Add line ending detection

This commit is contained in:
Morgan Courbet 2017-06-26 23:06:57 +02:00
parent ad0062a3bb
commit 466f9e1732
No known key found for this signature in database
GPG key ID: 467299D324A21B24

View file

@ -1,4 +1,6 @@
import argparse
import os
import re
import sys
from enum import Enum
@ -10,6 +12,7 @@ class CLIOption(Enum):
class LineEnding(CLIOption):
CR = '\r', '\\r', 'cr'
CRLF = '\r\n', '\\r\\n', 'crlf'
LF = '\n', '\\n', 'lf'
@ -26,11 +29,33 @@ class MixedLineEndingOption(CLIOption):
LF = LineEnding.LF.optName
class MixedLineDetection(Enum):
MIXED_MOSTLY_CRLF = True, LineEnding.CRLF.string
MIXED_MOSTLY_LF = True, LineEnding.LF.string
NOT_MIXED = False, None
UNKNOWN = False, None
def __init__(self, conversion, line_ending_char):
self.conversion = conversion
self.line_ending_char = line_ending_char
# Matches CRLF
CRLF_PATTERN = re.compile(LineEnding.CRLF.string, re.DOTALL)
# Matches LF (without preceding CR)
LF_PATTERN = re.compile('(?<!' + LineEnding.CR.strPrint + ')' + LineEnding.LF.strPrint, re.DOTALL)
def mixed_line_ending(argv=None):
options = _parse_arguments(argv)
print(options)
_check_filenames(options['filenames'])
for filename in options['filenames']:
print(_detect_line_ending(filename))
return 0
@ -65,5 +90,33 @@ def _parse_arguments(argv=None):
return options
def _check_filenames(filenames):
for filename in filenames:
if not os.path.isfile(filename):
raise IOError('The file "{}" does not exist'.format(filename))
def _detect_line_ending(filename):
f = open(filename, 'r')
buf = f.read()
crlf_nb = len(CRLF_PATTERN.findall(buf))
lf_nb = len(LF_PATTERN.findall(buf))
crlf_found = crlf_nb > 0
lf_found = lf_nb > 0
if crlf_nb == lf_nb:
return MixedLineDetection.UNKNOWN
if crlf_found ^ lf_found:
return MixedLineDetection.NOT_MIXED
if crlf_nb > lf_nb:
return MixedLineDetection.MIXED_MOSTLY_CRLF
else:
return MixedLineDetection.MIXED_MOSTLY_LF
if __name__ == '__main__':
sys.exit(mixed_line_ending())