mirror of
https://github.com/PyCQA/flake8.git
synced 2026-04-09 14:24:17 +00:00
Merge branch 'match_newlines_py3' into 'master'
Fix inconsistent newlines read from a file in python3 Closes #457 See merge request pycqa/flake8!253
This commit is contained in:
commit
63b91c95ea
2 changed files with 29 additions and 6 deletions
|
|
@ -1,6 +1,5 @@
|
||||||
"""Module containing our file processor that tokenizes a file for checks."""
|
"""Module containing our file processor that tokenizes a file for checks."""
|
||||||
import contextlib
|
import contextlib
|
||||||
import io
|
|
||||||
import logging
|
import logging
|
||||||
import sys
|
import sys
|
||||||
import tokenize
|
import tokenize
|
||||||
|
|
@ -308,11 +307,9 @@ class FileProcessor(object):
|
||||||
def _readlines_py3(self):
|
def _readlines_py3(self):
|
||||||
# type: () -> List[str]
|
# type: () -> List[str]
|
||||||
try:
|
try:
|
||||||
with open(self.filename, "rb") as fd:
|
with tokenize.open(self.filename) as fd:
|
||||||
(coding, lines) = tokenize.detect_encoding(fd.readline)
|
return fd.readlines()
|
||||||
textfd = io.TextIOWrapper(fd, coding, line_buffering=True)
|
except (SyntaxError, UnicodeError):
|
||||||
return [l.decode(coding) for l in lines] + textfd.readlines()
|
|
||||||
except (LookupError, SyntaxError, UnicodeError):
|
|
||||||
# If we can't detect the codec with tokenize.detect_encoding, or
|
# If we can't detect the codec with tokenize.detect_encoding, or
|
||||||
# the detected encoding is incorrect, just fallback to latin-1.
|
# the detected encoding is incorrect, just fallback to latin-1.
|
||||||
with open(self.filename, encoding="latin-1") as fd:
|
with open(self.filename, encoding="latin-1") as fd:
|
||||||
|
|
|
||||||
|
|
@ -27,6 +27,32 @@ def test_read_lines_splits_lines():
|
||||||
for line in lines)
|
for line in lines)
|
||||||
|
|
||||||
|
|
||||||
|
def lines_from_file(tmpdir, lines):
|
||||||
|
f = tmpdir.join('f.py')
|
||||||
|
f.write(''.join(lines))
|
||||||
|
return processor.FileProcessor(f.strpath, options_from()).lines
|
||||||
|
|
||||||
|
|
||||||
|
def test_read_lines_universal_newlines(tmpdir):
|
||||||
|
r"""Verify that line endings are translated to \n."""
|
||||||
|
lines = lines_from_file(tmpdir, ['# coding: utf-8\r\n', 'x = 1\r\n'])
|
||||||
|
assert lines == ['# coding: utf-8\n', 'x = 1\n']
|
||||||
|
|
||||||
|
|
||||||
|
def test_read_lines_incorrect_utf_16(tmpdir):
|
||||||
|
"""Verify that a file which incorrectly claims it is utf16 is still read
|
||||||
|
as latin-1.
|
||||||
|
"""
|
||||||
|
lines = lines_from_file(tmpdir, ['# coding: utf16\n', 'x = 1\n'])
|
||||||
|
assert lines == ['# coding: utf16\n', 'x = 1\n']
|
||||||
|
|
||||||
|
|
||||||
|
def test_read_lines_unknown_encoding(tmpdir):
|
||||||
|
"""Verify that an unknown encoding is still read as latin-1."""
|
||||||
|
lines = lines_from_file(tmpdir, ['# coding: fake-encoding\n', 'x = 1\n'])
|
||||||
|
assert lines == ['# coding: fake-encoding\n', 'x = 1\n']
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize('first_line', [
|
@pytest.mark.parametrize('first_line', [
|
||||||
'\xEF\xBB\xBF"""Module docstring."""\n',
|
'\xEF\xBB\xBF"""Module docstring."""\n',
|
||||||
u'\uFEFF"""Module docstring."""\n',
|
u'\uFEFF"""Module docstring."""\n',
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue