From a42bfdf6d2a9479648831dda619e179516827a93 Mon Sep 17 00:00:00 2001
From: Anthony Sottile <asottile@umich.edu>
Date: Sun, 21 Oct 2018 08:56:52 -0700
Subject: [PATCH] Fix inconsistent newlines read from a file in python3

---
 src/flake8/processor.py           |  9 +++------
 tests/unit/test_file_processor.py | 26 ++++++++++++++++++++++++++
 2 files changed, 29 insertions(+), 6 deletions(-)

diff --git a/src/flake8/processor.py b/src/flake8/processor.py
index fdf0a98..18f9f1d 100644
--- a/src/flake8/processor.py
+++ b/src/flake8/processor.py
@@ -1,6 +1,5 @@
 """Module containing our file processor that tokenizes a file for checks."""
 import contextlib
-import io
 import logging
 import sys
 import tokenize
@@ -308,11 +307,9 @@ class FileProcessor(object):
     def _readlines_py3(self):
         # type: () -> List[str]
         try:
-            with open(self.filename, "rb") as fd:
-                (coding, lines) = tokenize.detect_encoding(fd.readline)
-                textfd = io.TextIOWrapper(fd, coding, line_buffering=True)
-                return [l.decode(coding) for l in lines] + textfd.readlines()
-        except (LookupError, SyntaxError, UnicodeError):
+            with tokenize.open(self.filename) as fd:
+                return fd.readlines()
+        except (SyntaxError, UnicodeError):
             # If we can't detect the codec with tokenize.detect_encoding, or
             # the detected encoding is incorrect, just fallback to latin-1.
             with open(self.filename, encoding="latin-1") as fd:
diff --git a/tests/unit/test_file_processor.py b/tests/unit/test_file_processor.py
index 312827b..b5e8323 100644
--- a/tests/unit/test_file_processor.py
+++ b/tests/unit/test_file_processor.py
@@ -27,6 +27,32 @@ def test_read_lines_splits_lines():
                for line in lines)
 
 
+def lines_from_file(tmpdir, lines):
+    f = tmpdir.join('f.py')
+    f.write(''.join(lines))
+    return processor.FileProcessor(f.strpath, options_from()).lines
+
+
+def test_read_lines_universal_newlines(tmpdir):
+    r"""Verify that line endings are translated to \n."""
+    lines = lines_from_file(tmpdir, ['# coding: utf-8\r\n', 'x = 1\r\n'])
+    assert lines == ['# coding: utf-8\n', 'x = 1\n']
+
+
+def test_read_lines_incorrect_utf_16(tmpdir):
+    """Verify that a file which incorrectly claims it is utf16 is still read
+    as latin-1.
+    """
+    lines = lines_from_file(tmpdir, ['# coding: utf16\n', 'x = 1\n'])
+    assert lines == ['# coding: utf16\n', 'x = 1\n']
+
+
+def test_read_lines_unknown_encoding(tmpdir):
+    """Verify that an unknown encoding is still read as latin-1."""
+    lines = lines_from_file(tmpdir, ['# coding: fake-encoding\n', 'x = 1\n'])
+    assert lines == ['# coding: fake-encoding\n', 'x = 1\n']
+
+
 @pytest.mark.parametrize('first_line', [
     '\xEF\xBB\xBF"""Module docstring."""\n',
     u'\uFEFF"""Module docstring."""\n',