mirror of
https://github.com/PyCQA/flake8.git
synced 2026-04-10 06:44:18 +00:00
Add line splitting and file reading
Add some tests around reading lines and striping UTF BOMs
This commit is contained in:
parent
28f4811cb9
commit
5ee061b810
2 changed files with 79 additions and 15 deletions
|
|
@ -187,17 +187,11 @@ class FileChecker(object):
|
||||||
return self.read_lines_from_stdin()
|
return self.read_lines_from_stdin()
|
||||||
return self.read_lines_from_filename()
|
return self.read_lines_from_filename()
|
||||||
|
|
||||||
def read_lines_from_stdin(self):
|
def _readlines_py2(self):
|
||||||
"""Read the lines from standard in."""
|
|
||||||
return utils.stdin_get_value().splitlines(True)
|
|
||||||
|
|
||||||
def read_lines_from_filename(self):
|
|
||||||
"""Read the lines for a file."""
|
|
||||||
if (2, 6) <= sys.version_info < (3, 0):
|
|
||||||
with open(self.filename, 'rU') as fd:
|
with open(self.filename, 'rU') as fd:
|
||||||
return fd.readlines()
|
return fd.readlines()
|
||||||
|
|
||||||
elif (3, 0) <= sys.version_info < (4, 0):
|
def _readlines_py3(self):
|
||||||
try:
|
try:
|
||||||
with open(self.filename, 'rb') as fd:
|
with open(self.filename, 'rb') as fd:
|
||||||
(coding, lines) = tokenize.detect_encoding(fd.readline)
|
(coding, lines) = tokenize.detect_encoding(fd.readline)
|
||||||
|
|
@ -205,9 +199,53 @@ class FileChecker(object):
|
||||||
return ([l.decode(coding) for l in lines] +
|
return ([l.decode(coding) for l in lines] +
|
||||||
textfd.readlines())
|
textfd.readlines())
|
||||||
except (LookupError, SyntaxError, UnicodeError):
|
except (LookupError, SyntaxError, UnicodeError):
|
||||||
|
# If we can't detect the codec with tokenize.detect_encoding, or
|
||||||
|
# the detected encoding is incorrect, just fallback to latin-1.
|
||||||
with open(self.filename, encoding='latin-1') as fd:
|
with open(self.filename, encoding='latin-1') as fd:
|
||||||
return fd.readlines()
|
return fd.readlines()
|
||||||
|
|
||||||
|
def read_lines_from_filename(self):
|
||||||
|
"""Read the lines for a file."""
|
||||||
|
if (2, 6) <= sys.version_info < (3, 0):
|
||||||
|
readlines = self._readlines_py2
|
||||||
|
elif (3, 0) <= sys.version_info < (4, 0):
|
||||||
|
readlines = self._readlines_py3
|
||||||
|
|
||||||
|
try:
|
||||||
|
return readlines()
|
||||||
|
except IOError:
|
||||||
|
# If we can not read the file due to an IOError (e.g., the file
|
||||||
|
# does not exist or we do not have the permissions to open it)
|
||||||
|
# then we need to format that exception for the user.
|
||||||
|
# NOTE(sigmavirus24): Historically, pep8 has always reported this
|
||||||
|
# as an E902. We probably *want* a better error code for this
|
||||||
|
# going forward.
|
||||||
|
(exc_type, exception) = sys.exc_info()[:2]
|
||||||
|
message = '{0}: {1}'.format(exc_type.__name__, exception)
|
||||||
|
self.results.append('E902', self.filename, 0, 0, message)
|
||||||
|
return []
|
||||||
|
|
||||||
|
def read_lines_from_stdin(self):
|
||||||
|
"""Read the lines from standard in."""
|
||||||
|
return utils.stdin_get_value().splitlines(True)
|
||||||
|
|
||||||
def run_checks(self):
|
def run_checks(self):
|
||||||
"""Run checks against the file."""
|
"""Run checks against the file."""
|
||||||
self.lines = self.read_lines()
|
self.lines = self.read_lines()
|
||||||
|
self.strip_utf_bom()
|
||||||
|
|
||||||
|
def strip_utf_bom(self):
|
||||||
|
"""Strip the UTF bom from the lines of the file."""
|
||||||
|
if not self.lines:
|
||||||
|
# If we have nothing to analyze quit early
|
||||||
|
return
|
||||||
|
|
||||||
|
first_byte = ord(self.lines[0][0])
|
||||||
|
if first_byte not in (0xEF, 0xFEFF):
|
||||||
|
return
|
||||||
|
|
||||||
|
# If the first byte of the file is a UTF-8 BOM, strip it
|
||||||
|
if first_byte == 0xFEFF:
|
||||||
|
self.lines[0] = self.lines[0][1:]
|
||||||
|
elif self.lines[0][:3] == '\xEF\xBB\xBF':
|
||||||
|
self.lines[0] = self.lines[0][3:]
|
||||||
|
|
|
||||||
26
tests/unit/test_file_checker.py
Normal file
26
tests/unit/test_file_checker.py
Normal file
|
|
@ -0,0 +1,26 @@
|
||||||
|
"""Tests for the FileChecker class."""
|
||||||
|
from flake8 import checker
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
def test_read_lines_splits_lines():
|
||||||
|
"""Verify that read_lines splits the lines of the file."""
|
||||||
|
file_checker = checker.FileChecker(__file__, [])
|
||||||
|
lines = file_checker.read_lines()
|
||||||
|
assert len(lines) > 5
|
||||||
|
assert '"""Tests for the FileChecker class."""\n' in lines
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('first_line', [
|
||||||
|
'\xEF\xBB\xBF"""Module docstring."""\n',
|
||||||
|
'\uFEFF"""Module docstring."""\n',
|
||||||
|
])
|
||||||
|
def test_strip_utf_bom(first_line):
|
||||||
|
r"""Verify that we strip '\xEF\xBB\xBF' from the first line."""
|
||||||
|
lines = [first_line]
|
||||||
|
file_checker = checker.FileChecker('stdin', [])
|
||||||
|
file_checker.lines = lines[:]
|
||||||
|
file_checker.strip_utf_bom()
|
||||||
|
assert file_checker.lines != lines
|
||||||
|
assert file_checker.lines[0] == '"""Module docstring."""\n'
|
||||||
Loading…
Add table
Add a link
Reference in a new issue