Add line splitting and file reading

Add some tests around reading lines and striping UTF BOMs
2026-04-10 06:44:18 +00:00 · 2016-02-23 11:17:11 -06:00 · 2016-02-23 11:17:11 -06:00 · 5ee061b810
commit 5ee061b810
parent 28f4811cb9
2 changed files with 79 additions and 15 deletions
--- a/flake8/checker.py
+++ b/flake8/checker.py
@ -187,17 +187,11 @@ class FileChecker(object):
            return self.read_lines_from_stdin()
        return self.read_lines_from_filename()
-    def read_lines_from_stdin(self):
+    def _readlines_py2(self):
        """Read the lines from standard in."""
        return utils.stdin_get_value().splitlines(True)
    def read_lines_from_filename(self):
        """Read the lines for a file."""
        if (2, 6) <= sys.version_info < (3, 0):
        with open(self.filename, 'rU') as fd:
            return fd.readlines()
-        elif (3, 0) <= sys.version_info < (4, 0):
+    def _readlines_py3(self):
        try:
            with open(self.filename, 'rb') as fd:
                (coding, lines) = tokenize.detect_encoding(fd.readline)
@ -205,9 +199,53 @@ class FileChecker(object):
                return ([l.decode(coding) for l in lines] +
                        textfd.readlines())
        except (LookupError, SyntaxError, UnicodeError):
            # If we can't detect the codec with tokenize.detect_encoding, or
            # the detected encoding is incorrect, just fallback to latin-1.
            with open(self.filename, encoding='latin-1') as fd:
                return fd.readlines()
    def read_lines_from_filename(self):
        """Read the lines for a file."""
        if (2, 6) <= sys.version_info < (3, 0):
            readlines = self._readlines_py2
        elif (3, 0) <= sys.version_info < (4, 0):
            readlines = self._readlines_py3
        try:
            return readlines()
        except IOError:
            # If we can not read the file due to an IOError (e.g., the file
            # does not exist or we do not have the permissions to open it)
            # then we need to format that exception for the user.
            # NOTE(sigmavirus24): Historically, pep8 has always reported this
            # as an E902. We probably *want* a better error code for this
            # going forward.
            (exc_type, exception) = sys.exc_info()[:2]
            message = '{0}: {1}'.format(exc_type.__name__, exception)
            self.results.append('E902', self.filename, 0, 0, message)
            return []
    def read_lines_from_stdin(self):
        """Read the lines from standard in."""
        return utils.stdin_get_value().splitlines(True)
    def run_checks(self):
        """Run checks against the file."""
        self.lines = self.read_lines()
        self.strip_utf_bom()
    def strip_utf_bom(self):
        """Strip the UTF bom from the lines of the file."""
        if not self.lines:
            # If we have nothing to analyze quit early
            return
        first_byte = ord(self.lines[0][0])
        if first_byte not in (0xEF, 0xFEFF):
            return
        # If the first byte of the file is a UTF-8 BOM, strip it
        if first_byte == 0xFEFF:
            self.lines[0] = self.lines[0][1:]
        elif self.lines[0][:3] == '\xEF\xBB\xBF':
            self.lines[0] = self.lines[0][3:]
--- a/tests/unit/test_file_checker.py
+++ b/tests/unit/test_file_checker.py
@ -0,0 +1,26 @@
 """Tests for the FileChecker class."""
 from flake8 import checker
 import pytest
 def test_read_lines_splits_lines():
    """Verify that read_lines splits the lines of the file."""
    file_checker = checker.FileChecker(__file__, [])
    lines = file_checker.read_lines()
    assert len(lines) > 5
    assert '"""Tests for the FileChecker class."""\n' in lines
@pytest.mark.parametrize('first_line', [
    '\xEF\xBB\xBF"""Module docstring."""\n',
    '\uFEFF"""Module docstring."""\n',
 ])
 def test_strip_utf_bom(first_line):
    r"""Verify that we strip '\xEF\xBB\xBF' from the first line."""
    lines = [first_line]
    file_checker = checker.FileChecker('stdin', [])
    file_checker.lines = lines[:]
    file_checker.strip_utf_bom()
    assert file_checker.lines != lines
    assert file_checker.lines[0] == '"""Module docstring."""\n'