Modify stdin retriever to detect coding pragma

When we read in a file on Python 3, we use the tokenize module to detect
the coding pragma at the top of the file. We then use that to decode the
rest of the file. However, when we were receiving stdin, we would not do
that.

This updates ``stdin_get_value`` in a backwards compatible way to check
for that coding pragma and then fall back to UTF-8 if necessary.

Closes #306
This commit is contained in:
Ian Cordasco 2017-02-19 14:21:56 -06:00
parent 218a0b6980
commit 879d3fc0d7
No known key found for this signature in database
GPG key ID: 656D3395E4A9791A

View file

@ -7,6 +7,7 @@ import os
import platform
import re
import sys
import tokenize
DIFF_HUNK_REGEXP = re.compile(r'^@@ -\d+(?:,\d+)? \+(\d+)(?:,(\d+))? @@.*$')
@ -65,17 +66,26 @@ def normalize_path(path, parent=os.curdir):
return path.rstrip(separator + alternate_separator)
def _stdin_get_value_py3():
stdin_value = sys.stdin.buffer.read()
fd = io.BytesIO(stdin_value)
try:
(coding, lines) = tokenize.detect_encoding(fd.readline)
return io.StringIO(stdin_value.decode(coding))
except (LookupError, SyntaxError, UnicodeError):
return io.StringIO(stdin_value.decode('utf-8'))
def stdin_get_value():
# type: () -> str
"""Get and cache it so plugins can use it."""
cached_value = getattr(stdin_get_value, 'cached_stdin', None)
if cached_value is None:
stdin_value = sys.stdin.read()
if sys.version_info < (3, 0):
cached_type = io.BytesIO
stdin_value = io.BytesIO(sys.stdin.read())
else:
cached_type = io.StringIO
stdin_get_value.cached_stdin = cached_type(stdin_value)
stdin_value = _stdin_get_value_py3()
stdin_get_value.cached_stdin = stdin_value
cached_value = stdin_get_value.cached_stdin
return cached_value.getvalue()