Don't require an encoding for check-docstring-first

This commit is contained in:
Anthony Sottile 2019-03-30 15:31:42 -07:00
parent cbc17d19d9
commit 2f6a2515ec
2 changed files with 35 additions and 18 deletions

View file

@ -8,14 +8,23 @@ import tokenize
from typing import Optional from typing import Optional
from typing import Sequence from typing import Sequence
import six
NON_CODE_TOKENS = frozenset(( if six.PY2: # pragma: no cover (PY2)
tokenize.COMMENT, tokenize.ENDMARKER, tokenize.NEWLINE, tokenize.NL, from tokenize import generate_tokens as tokenize_tokenize
)) OTHER_NON_CODE = ()
else: # pragma: no cover (PY3)
from tokenize import tokenize as tokenize_tokenize
OTHER_NON_CODE = (tokenize.ENCODING,)
NON_CODE_TOKENS = frozenset(
(tokenize.COMMENT, tokenize.ENDMARKER, tokenize.NEWLINE, tokenize.NL) +
OTHER_NON_CODE,
)
def check_docstring_first(src, filename='<unknown>'): def check_docstring_first(src, filename='<unknown>'):
# type: (str, str) -> int # type: (bytes, str) -> int
"""Returns nonzero if the source has what looks like a docstring that is """Returns nonzero if the source has what looks like a docstring that is
not at the beginning of the source. not at the beginning of the source.
@ -25,7 +34,7 @@ def check_docstring_first(src, filename='<unknown>'):
found_docstring_line = None found_docstring_line = None
found_code_line = None found_code_line = None
tok_gen = tokenize.generate_tokens(io.StringIO(src).readline) tok_gen = tokenize_tokenize(io.BytesIO(src).readline)
for tok_type, _, (sline, scol), _, _ in tok_gen: for tok_type, _, (sline, scol), _, _ in tok_gen:
# Looks like a docstring! # Looks like a docstring!
if tok_type == tokenize.STRING and scol == 0: if tok_type == tokenize.STRING and scol == 0:
@ -61,7 +70,7 @@ def main(argv=None): # type: (Optional[Sequence[str]]) -> int
retv = 0 retv = 0
for filename in args.filenames: for filename in args.filenames:
with io.open(filename, encoding='UTF-8') as f: with open(filename, 'rb') as f:
contents = f.read() contents = f.read()
retv |= check_docstring_first(contents, filename=filename) retv |= check_docstring_first(contents, filename=filename)

View file

@ -1,3 +1,4 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import unicode_literals from __future__ import unicode_literals
@ -10,37 +11,37 @@ from pre_commit_hooks.check_docstring_first import main
# Contents, expected, expected_output # Contents, expected, expected_output
TESTS = ( TESTS = (
# trivial # trivial
('', 0, ''), (b'', 0, ''),
# Acceptable # Acceptable
('"foo"', 0, ''), (b'"foo"', 0, ''),
# Docstring after code # Docstring after code
( (
'from __future__ import unicode_literals\n' b'from __future__ import unicode_literals\n'
'"foo"\n', b'"foo"\n',
1, 1,
'{filename}:2 Module docstring appears after code ' '{filename}:2 Module docstring appears after code '
'(code seen on line 1).\n', '(code seen on line 1).\n',
), ),
# Test double docstring # Test double docstring
( (
'"The real docstring"\n' b'"The real docstring"\n'
'from __future__ import absolute_import\n' b'from __future__ import absolute_import\n'
'"fake docstring"\n', b'"fake docstring"\n',
1, 1,
'{filename}:3 Multiple module docstrings ' '{filename}:3 Multiple module docstrings '
'(first docstring on line 1).\n', '(first docstring on line 1).\n',
), ),
# Test multiple lines of code above # Test multiple lines of code above
( (
'import os\n' b'import os\n'
'import sys\n' b'import sys\n'
'"docstring"\n', b'"docstring"\n',
1, 1,
'{filename}:3 Module docstring appears after code ' '{filename}:3 Module docstring appears after code '
'(code seen on line 1).\n', '(code seen on line 1).\n',
), ),
# String literals in expressions are ok. # String literals in expressions are ok.
('x = "foo"\n', 0, ''), (b'x = "foo"\n', 0, ''),
) )
@ -58,6 +59,13 @@ def test_unit(capsys, contents, expected, expected_out):
@all_tests @all_tests
def test_integration(tmpdir, capsys, contents, expected, expected_out): def test_integration(tmpdir, capsys, contents, expected, expected_out):
f = tmpdir.join('test.py') f = tmpdir.join('test.py')
f.write(contents) f.write_binary(contents)
assert main([f.strpath]) == expected assert main([f.strpath]) == expected
assert capsys.readouterr()[0] == expected_out.format(filename=f.strpath) assert capsys.readouterr()[0] == expected_out.format(filename=f.strpath)
def test_arbitrary_encoding(tmpdir):
f = tmpdir.join('f.py')
contents = '# -*- coding: cp1252\nx = "£"'.encode('cp1252')
f.write_binary(contents)
assert main([f.strpath]) == 0