From 879d3fc0d77f5179ee56d742ef820f93817cd854 Mon Sep 17 00:00:00 2001 From: Ian Cordasco Date: Sun, 19 Feb 2017 14:21:56 -0600 Subject: [PATCH] Modify stdin retriever to detect coding pragma When we read in a file on Python 3, we use the tokenize module to detect the coding pragma at the top of the file. We then use that to decode the rest of the file. However, when we were receiving stdin, we would not do that. This updates ``stdin_get_value`` in a backwards compatible way to check for that coding pragma and then fall back to UTF-8 if necessary. Closes #306 --- src/flake8/utils.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/src/flake8/utils.py b/src/flake8/utils.py index 55c699e..b06b659 100644 --- a/src/flake8/utils.py +++ b/src/flake8/utils.py @@ -7,6 +7,7 @@ import os import platform import re import sys +import tokenize DIFF_HUNK_REGEXP = re.compile(r'^@@ -\d+(?:,\d+)? \+(\d+)(?:,(\d+))? @@.*$') @@ -65,17 +66,26 @@ def normalize_path(path, parent=os.curdir): return path.rstrip(separator + alternate_separator) +def _stdin_get_value_py3(): + stdin_value = sys.stdin.buffer.read() + fd = io.BytesIO(stdin_value) + try: + (coding, lines) = tokenize.detect_encoding(fd.readline) + return io.StringIO(stdin_value.decode(coding)) + except (LookupError, SyntaxError, UnicodeError): + return io.StringIO(stdin_value.decode('utf-8')) + + def stdin_get_value(): # type: () -> str """Get and cache it so plugins can use it.""" cached_value = getattr(stdin_get_value, 'cached_stdin', None) if cached_value is None: - stdin_value = sys.stdin.read() if sys.version_info < (3, 0): - cached_type = io.BytesIO + stdin_value = io.BytesIO(sys.stdin.read()) else: - cached_type = io.StringIO - stdin_get_value.cached_stdin = cached_type(stdin_value) + stdin_value = _stdin_get_value_py3() + stdin_get_value.cached_stdin = stdin_value cached_value = stdin_get_value.cached_stdin return cached_value.getvalue()