Merge pull request #1320 from asottile/ast_syntax_error_earlier

short circuit on ast error before tokenization error
This commit is contained in:
Anthony Sottile 2021-04-18 10:32:45 -07:00 committed by GitHub
commit d1a4043c59
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 47 additions and 94 deletions

View file

@ -424,13 +424,23 @@ class FileChecker:
) )
@staticmethod @staticmethod
def _extract_syntax_information(exception): def _extract_syntax_information(exception: Exception) -> Tuple[int, int]:
token = () if (
if len(exception.args) > 1: len(exception.args) > 1
and exception.args[1]
and len(exception.args[1]) > 2
):
token = exception.args[1] token = exception.args[1]
if token and len(token) > 2: row, column = token[1:3]
row, column = token[1:3] elif (
isinstance(exception, tokenize.TokenError)
and len(exception.args) == 2
and len(exception.args[1]) == 2
):
token = ()
row, column = exception.args[1]
else: else:
token = ()
row, column = (1, 0) row, column = (1, 0)
if column > 0 and token and isinstance(exception, SyntaxError): if column > 0 and token and isinstance(exception, SyntaxError):
@ -463,14 +473,7 @@ class FileChecker:
def run_ast_checks(self) -> None: def run_ast_checks(self) -> None:
"""Run all checks expecting an abstract syntax tree.""" """Run all checks expecting an abstract syntax tree."""
assert self.processor is not None assert self.processor is not None
try: ast = self.processor.build_ast()
ast = self.processor.build_ast()
except (ValueError, SyntaxError, TypeError) as e:
row, column = self._extract_syntax_information(e)
self.report(
"E999", row, column, f"{type(e).__name__}: {e.args[0]}"
)
return
for plugin in self.checks["ast_plugins"]: for plugin in self.checks["ast_plugins"]:
checker = self.run_check(plugin, tree=ast) checker = self.run_check(plugin, tree=ast)
@ -548,7 +551,6 @@ class FileChecker:
def process_tokens(self): def process_tokens(self):
"""Process tokens and trigger checks. """Process tokens and trigger checks.
This can raise a :class:`flake8.exceptions.InvalidSyntax` exception.
Instead of using this directly, you should use Instead of using this directly, you should use
:meth:`flake8.checker.FileChecker.run_checks`. :meth:`flake8.checker.FileChecker.run_checks`.
""" """
@ -578,15 +580,13 @@ class FileChecker:
"""Run checks against the file.""" """Run checks against the file."""
assert self.processor is not None assert self.processor is not None
try: try:
self.process_tokens()
self.run_ast_checks() self.run_ast_checks()
except exceptions.InvalidSyntax as exc: self.process_tokens()
self.report( except (SyntaxError, tokenize.TokenError) as e:
exc.error_code, code = "E902" if isinstance(e, tokenize.TokenError) else "E999"
exc.line_number, row, column = self._extract_syntax_information(e)
exc.column_number, self.report(code, row, column, f"{type(e).__name__}: {e.args[0]}")
exc.error_message, return
)
logical_lines = self.processor.statistics["logical lines"] logical_lines = self.processor.statistics["logical lines"]
self.statistics["logical lines"] = logical_lines self.statistics["logical lines"] = logical_lines

View file

@ -33,23 +33,6 @@ class FailedToLoadPlugin(Flake8Exception):
} }
class InvalidSyntax(Flake8Exception):
"""Exception raised when tokenizing a file fails."""
def __init__(self, exception: Exception) -> None:
"""Initialize our InvalidSyntax exception."""
self.original_exception = exception
self.error_message = f"{type(exception).__name__}: {exception.args[0]}"
self.error_code = "E902"
self.line_number = 1
self.column_number = 0
super().__init__(exception)
def __str__(self) -> str:
"""Format our exception message."""
return self.error_message
class PluginRequestedUnknownParameters(Flake8Exception): class PluginRequestedUnknownParameters(Flake8Exception):
"""The plugin requested unknown parameters.""" """The plugin requested unknown parameters."""

View file

@ -13,7 +13,6 @@ from typing import Tuple
import flake8 import flake8
from flake8 import defaults from flake8 import defaults
from flake8 import exceptions
from flake8 import utils from flake8 import utils
LOG = logging.getLogger(__name__) LOG = logging.getLogger(__name__)
@ -125,20 +124,12 @@ class FileProcessor:
@property @property
def file_tokens(self) -> List[_Token]: def file_tokens(self) -> List[_Token]:
"""Return the complete set of tokens for a file. """Return the complete set of tokens for a file."""
Accessing this attribute *may* raise an InvalidSyntax exception.
:raises: flake8.exceptions.InvalidSyntax
"""
if self._file_tokens is None: if self._file_tokens is None:
line_iter = iter(self.lines) line_iter = iter(self.lines)
try: self._file_tokens = list(
self._file_tokens = list( tokenize.generate_tokens(lambda: next(line_iter))
tokenize.generate_tokens(lambda: next(line_iter)) )
)
except (tokenize.TokenError, SyntaxError) as exc:
raise exceptions.InvalidSyntax(exception=exc)
return self._file_tokens return self._file_tokens
@ -274,20 +265,12 @@ class FileProcessor:
return arguments return arguments
def generate_tokens(self) -> Generator[_Token, None, None]: def generate_tokens(self) -> Generator[_Token, None, None]:
"""Tokenize the file and yield the tokens. """Tokenize the file and yield the tokens."""
for token in tokenize.generate_tokens(self.next_line):
:raises flake8.exceptions.InvalidSyntax: if token[2][0] > self.total_lines:
If a :class:`tokenize.TokenError` is raised while generating break
tokens. self.tokens.append(token)
""" yield token
try:
for token in tokenize.generate_tokens(self.next_line):
if token[2][0] > self.total_lines:
break
self.tokens.append(token)
yield token
except (tokenize.TokenError, SyntaxError) as exc:
raise exceptions.InvalidSyntax(exception=exc)
def _noqa_line_range(self, min_line: int, max_line: int) -> Dict[int, str]: def _noqa_line_range(self, min_line: int, max_line: int) -> Dict[int, str]:
line_range = range(min_line, max_line + 1) line_range = range(min_line, max_line + 1)
@ -299,7 +282,7 @@ class FileProcessor:
if self._noqa_line_mapping is None: if self._noqa_line_mapping is None:
try: try:
file_tokens = self.file_tokens file_tokens = self.file_tokens
except exceptions.InvalidSyntax: except (tokenize.TokenError, SyntaxError):
# if we failed to parse the file tokens, we'll always fail in # if we failed to parse the file tokens, we'll always fail in
# the future, so set this so the code does not try again # the future, so set this so the code does not try again
self._noqa_line_mapping = {} self._noqa_line_mapping = {}

View file

@ -1,6 +1,7 @@
"""Integration tests for the main entrypoint of flake8.""" """Integration tests for the main entrypoint of flake8."""
import json import json
import os import os
import sys
from unittest import mock from unittest import mock
import pytest import pytest
@ -186,8 +187,15 @@ def test_tokenization_error_but_not_syntax_error(tmpdir, capsys):
tmpdir.join("t.py").write("b'foo' \\\n") tmpdir.join("t.py").write("b'foo' \\\n")
_call_main(["t.py"], retv=1) _call_main(["t.py"], retv=1)
if hasattr(sys, "pypy_version_info"): # pragma: no cover (pypy)
expected = "t.py:2:1: E999 SyntaxError: end of file (EOF) in multi-line statement\n" # noqa: E501
elif sys.version_info < (3, 8): # pragma: no cover (<cp38)
expected = "t.py:2:1: E902 TokenError: EOF in multi-line statement\n"
else: # pragma: no cover (cp38+)
expected = "t.py:1:8: E999 SyntaxError: unexpected EOF while parsing\n"
out, err = capsys.readouterr() out, err = capsys.readouterr()
assert out == "t.py:1:1: E902 TokenError: EOF in multi-line statement\n" assert out == expected
assert err == "" assert err == ""
@ -197,8 +205,12 @@ def test_tokenization_error_is_a_syntax_error(tmpdir, capsys):
tmpdir.join("t.py").write("if True:\n pass\n pass\n") tmpdir.join("t.py").write("if True:\n pass\n pass\n")
_call_main(["t.py"], retv=1) _call_main(["t.py"], retv=1)
if hasattr(sys, "pypy_version_info"): # pragma: no cover (pypy)
expected = "t.py:3:2: E999 IndentationError: unindent does not match any outer indentation level\n" # noqa: E501
else: # pragma: no cover (cpython)
expected = "t.py:3:5: E999 IndentationError: unindent does not match any outer indentation level\n" # noqa: E501
out, err = capsys.readouterr() out, err = capsys.readouterr()
expected = "t.py:1:1: E902 IndentationError: unindent does not match any outer indentation level\n" # noqa: E501
assert out == expected assert out == expected
assert err == "" assert err == ""

View file

@ -13,7 +13,6 @@ from flake8 import exceptions
plugin_name="plugin_name", plugin_name="plugin_name",
exception=ValueError("boom!"), exception=ValueError("boom!"),
), ),
exceptions.InvalidSyntax(exception=ValueError("Unexpected token: $")),
exceptions.PluginRequestedUnknownParameters( exceptions.PluginRequestedUnknownParameters(
plugin={"plugin_name": "plugin_name"}, plugin={"plugin_name": "plugin_name"},
exception=ValueError("boom!"), exception=ValueError("boom!"),

View file

@ -7,30 +7,6 @@ import flake8
from flake8 import checker from flake8 import checker
@mock.patch("flake8.processor.FileProcessor")
def test_run_ast_checks_handles_SyntaxErrors(FileProcessor): # noqa: N802,N803
"""Stress our SyntaxError handling.
Related to: https://github.com/pycqa/flake8/issues/169
"""
processor = mock.Mock(lines=[])
FileProcessor.return_value = processor
processor.build_ast.side_effect = SyntaxError(
"Failed to build ast", ("", 1, 5, "foo(\n")
)
file_checker = checker.FileChecker(__file__, checks={}, options=object())
with mock.patch.object(file_checker, "report") as report:
file_checker.run_ast_checks()
report.assert_called_once_with(
"E999",
1,
3,
"SyntaxError: Failed to build ast",
)
@mock.patch("flake8.checker.FileChecker._make_processor", return_value=None) @mock.patch("flake8.checker.FileChecker._make_processor", return_value=None)
def test_repr(*args): def test_repr(*args):
"""Verify we generate a correct repr.""" """Verify we generate a correct repr."""