remove --statistics and --benchmarks

This commit is contained in:
Anthony Sottile 2022-09-29 15:04:51 -04:00
parent 2c1bfa1f3d
commit ce274fb742
12 changed files with 8 additions and 443 deletions

View file

@ -45,26 +45,12 @@ class Report:
assert application.guide is not None
self._application = application
self._style_guide = application.guide
self._stats = self._style_guide.stats
@property
def total_errors(self) -> int:
"""Return the total number of errors."""
return self._application.result_count
def get_statistics(self, violation: str) -> list[str]:
"""Get the list of occurrences of a violation.
:returns:
List of occurrences of a violation formatted as:
{Count} {Error Code} {Message}, e.g.,
``8 E531 Some error message about the error``
"""
return [
f"{s.count} {s.error_code} {s.message}"
for s in self._stats.statistics_for(violation)
]
class StyleGuide:
"""Public facing object that mimic's Flake8 2.0's StyleGuide.

View file

@ -14,7 +14,6 @@ from typing import List
from typing import Optional
from typing import Tuple
from flake8 import defaults
from flake8 import exceptions
from flake8 import processor
from flake8 import utils
@ -74,22 +73,10 @@ class Manager:
self.jobs = self._job_count()
self._all_checkers: list[FileChecker] = []
self.checkers: list[FileChecker] = []
self.statistics = {
"files": 0,
"logical lines": 0,
"physical lines": 0,
"tokens": 0,
}
self.exclude = tuple(
itertools.chain(self.options.exclude, self.options.extend_exclude)
)
def _process_statistics(self) -> None:
for checker in self.checkers:
for statistic in defaults.STATISTIC_NAMES:
self.statistics[statistic] += checker.statistics[statistic]
self.statistics["files"] += len(self.checkers)
def _job_count(self) -> int:
# First we walk through all of our error cases:
# - multiprocessing library is not present
@ -197,7 +184,6 @@ class Manager:
"""Run the checkers in parallel."""
# fmt: off
final_results: dict[str, list[tuple[str, int, int, str, str | None]]] = collections.defaultdict(list) # noqa: E501
final_statistics: dict[str, dict[str, int]] = collections.defaultdict(dict) # noqa: E501
# fmt: on
pool = _try_initialize_processpool(self.jobs)
@ -216,9 +202,8 @@ class Manager:
),
)
for ret in pool_map:
filename, results, statistics = ret
filename, results = ret
final_results[filename] = results
final_statistics[filename] = statistics
pool.close()
pool.join()
pool_closed = True
@ -230,7 +215,6 @@ class Manager:
for checker in self.checkers:
filename = checker.display_name
checker.results = final_results[filename]
checker.statistics = final_statistics[filename]
def run_serial(self) -> None:
"""Run the checkers in serial."""
@ -265,10 +249,6 @@ class Manager:
LOG.info("Making checkers")
self.make_checkers(paths)
def stop(self) -> None:
"""Stop checking files."""
self._process_statistics()
class FileChecker:
"""Manage running checks for a file and aggregate the results."""
@ -285,18 +265,12 @@ class FileChecker:
self.filename = filename
self.plugins = plugins
self.results: Results = []
self.statistics = {
"tokens": 0,
"logical lines": 0,
"physical lines": 0,
}
self.processor = self._make_processor()
self.display_name = filename
self.should_process = False
if self.processor is not None:
self.display_name = self.processor.filename
self.should_process = not self.processor.should_ignore_file()
self.statistics["physical lines"] = len(self.processor.lines)
def __repr__(self) -> str:
"""Provide helpful debugging representation."""
@ -506,11 +480,9 @@ class FileChecker:
"""
assert self.processor is not None
parens = 0
statistics = self.statistics
file_processor = self.processor
prev_physical = ""
for token in file_processor.generate_tokens():
statistics["tokens"] += 1
self.check_physical_eol(token, prev_physical)
token_type, text = token[0:2]
if token_type == tokenize.OP:
@ -525,7 +497,7 @@ class FileChecker:
self.run_physical_checks(file_processor.lines[-1])
self.run_logical_checks()
def run_checks(self) -> tuple[str, Results, dict[str, int]]:
def run_checks(self) -> tuple[str, Results]:
"""Run checks against the file."""
assert self.processor is not None
try:
@ -535,11 +507,9 @@ class FileChecker:
code = "E902" if isinstance(e, tokenize.TokenError) else "E999"
row, column = self._extract_syntax_information(e)
self.report(code, row, column, f"{type(e).__name__}: {e.args[0]}")
return self.filename, self.results, self.statistics
return self.filename, self.results
logical_lines = self.processor.statistics["logical lines"]
self.statistics["logical lines"] = logical_lines
return self.filename, self.results, self.statistics
return self.filename, self.results
def handle_newline(self, token_type: int) -> None:
"""Handle the logic when encountering a newline token."""
@ -618,7 +588,7 @@ def calculate_pool_chunksize(num_checkers: int, num_jobs: int) -> int:
return max(num_checkers // (num_jobs * 2), 1)
def _run_checks(checker: FileChecker) -> tuple[str, Results, dict[str, int]]:
def _run_checks(checker: FileChecker) -> tuple[str, Results]:
return checker.run_checks()

View file

@ -23,8 +23,6 @@ INDENT_SIZE = 4
# Other constants
WHITESPACE = frozenset(" \t")
STATISTIC_NAMES = ("logical lines", "physical lines", "tokens")
NOQA_INLINE_REGEXP = re.compile(
# We're looking for items that look like this:
# ``# noqa``

View file

@ -7,7 +7,6 @@ import sys
from typing import IO
from flake8.formatting import _windows_color
from flake8.statistics import Statistics
from flake8.violation import Violation
@ -113,36 +112,6 @@ class BaseFormatter:
"Subclass of BaseFormatter did not implement" " format."
)
def show_statistics(self, statistics: Statistics) -> None:
"""Format and print the statistics."""
for error_code in statistics.error_codes():
stats_for_error_code = statistics.statistics_for(error_code)
statistic = next(stats_for_error_code)
count = statistic.count
count += sum(stat.count for stat in stats_for_error_code)
self._write(f"{count:<5} {error_code} {statistic.message}")
def show_benchmarks(self, benchmarks: list[tuple[str, float]]) -> None:
"""Format and print the benchmarks."""
# NOTE(sigmavirus24): The format strings are a little confusing, even
# to me, so here's a quick explanation:
# We specify the named value first followed by a ':' to indicate we're
# formatting the value.
# Next we use '<' to indicate we want the value left aligned.
# Then '10' is the width of the area.
# For floats, finally, we only want only want at most 3 digits after
# the decimal point to be displayed. This is the precision and it
# can not be specified for integers which is why we need two separate
# format strings.
float_format = "{value:<10.3} {statistic}".format
int_format = "{value:<10} {statistic}".format
for statistic, value in benchmarks:
if isinstance(value, int):
benchmark = int_format(statistic=statistic, value=value)
else:
benchmark = float_format(statistic=statistic, value=value)
self._write(benchmark)
def show_source(self, error: Violation) -> str | None:
"""Show the physical line generating the error.

View file

@ -10,7 +10,6 @@ from typing import Sequence
import flake8
from flake8 import checker
from flake8 import defaults
from flake8 import exceptions
from flake8 import style_guide
from flake8 import utils
@ -229,30 +228,8 @@ class Application:
print("Run flake8 with greater verbosity to see more details")
self.catastrophic_failure = True
LOG.info("Finished running")
self.file_checker_manager.stop()
self.end_time = time.time()
def report_benchmarks(self) -> None:
"""Aggregate, calculate, and report benchmarks for this run."""
assert self.options is not None
if not self.options.benchmark:
return
assert self.file_checker_manager is not None
assert self.end_time is not None
time_elapsed = self.end_time - self.start_time
statistics = [("seconds elapsed", time_elapsed)]
add_statistic = statistics.append
for statistic in defaults.STATISTIC_NAMES + ("files",):
value = self.file_checker_manager.statistics[statistic]
total_description = f"total {statistic} processed"
add_statistic((total_description, value))
per_second_description = f"{statistic} processed per second"
add_statistic((per_second_description, int(value / time_elapsed)))
assert self.formatter is not None
self.formatter.show_benchmarks(statistics)
def report_errors(self) -> None:
"""Report all the errors found by flake8 3.0.
@ -269,16 +246,6 @@ class Application:
self.result_count,
)
def report_statistics(self) -> None:
"""Aggregate and report statistics from this run."""
assert self.options is not None
if not self.options.statistics:
return
assert self.formatter is not None
assert self.guide is not None
self.formatter.show_statistics(self.guide.stats)
def initialize(self, argv: Sequence[str]) -> None:
"""Initialize the application to be run.
@ -309,12 +276,10 @@ class Application:
self.make_file_checker_manager()
def report(self) -> None:
"""Report errors, statistics, and benchmarks."""
"""Report errors."""
assert self.formatter is not None
self.formatter.start()
self.report_errors()
self.report_statistics()
self.report_benchmarks()
self.formatter.stop()
def _run(self, argv: Sequence[str]) -> None:
@ -344,7 +309,3 @@ class Application:
except exceptions.EarlyQuit:
self.catastrophic_failure = True
print("... stopped while processing files")
else:
assert self.options is not None
if self.options.count:
print(self.result_count)

View file

@ -111,8 +111,6 @@ class FileProcessor:
self.total_lines = len(self.lines)
#: Verbosity level of Flake8
self.verbose = options.verbose
#: Statistics dictionary
self.statistics = {"logical lines": 0}
self._file_tokens: list[tokenize.TokenInfo] | None = None
# map from line number to the line we'll search for `noqa` in
self._noqa_line_mapping: dict[int, str] | None = None
@ -222,7 +220,6 @@ class FileProcessor:
comments, logical, mapping_list = self.build_logical_line_tokens()
joined_comments = "".join(comments)
self.logical_line = "".join(logical)
self.statistics["logical lines"] += 1
return joined_comments, self.logical_line, mapping_list
def split_line(

View file

@ -1,131 +0,0 @@
"""Statistic collection logic for Flake8."""
from __future__ import annotations
from typing import Generator
from typing import NamedTuple
from flake8.violation import Violation
class Statistics:
"""Manager of aggregated statistics for a run of Flake8."""
def __init__(self) -> None:
"""Initialize the underlying dictionary for our statistics."""
self._store: dict[Key, Statistic] = {}
def error_codes(self) -> list[str]:
"""Return all unique error codes stored.
:returns:
Sorted list of error codes.
"""
return sorted({key.code for key in self._store})
def record(self, error: Violation) -> None:
"""Add the fact that the error was seen in the file.
:param error:
The Violation instance containing the information about the
violation.
"""
key = Key.create_from(error)
if key not in self._store:
self._store[key] = Statistic.create_from(error)
self._store[key].increment()
def statistics_for(
self, prefix: str, filename: str | None = None
) -> Generator[Statistic, None, None]:
"""Generate statistics for the prefix and filename.
If you have a :class:`Statistics` object that has recorded errors,
you can generate the statistics for a prefix (e.g., ``E``, ``E1``,
``W50``, ``W503``) with the optional filter of a filename as well.
.. code-block:: python
>>> stats = Statistics()
>>> stats.statistics_for('E12',
filename='src/flake8/statistics.py')
<generator ...>
>>> stats.statistics_for('W')
<generator ...>
:param prefix:
The error class or specific error code to find statistics for.
:param filename:
(Optional) The filename to further filter results by.
:returns:
Generator of instances of :class:`Statistic`
"""
matching_errors = sorted(
key for key in self._store if key.matches(prefix, filename)
)
for error_code in matching_errors:
yield self._store[error_code]
class Key(NamedTuple):
"""Simple key structure for the Statistics dictionary.
To make things clearer, easier to read, and more understandable, we use a
namedtuple here for all Keys in the underlying dictionary for the
Statistics object.
"""
filename: str
code: str
@classmethod
def create_from(cls, error: Violation) -> Key:
"""Create a Key from :class:`flake8.violation.Violation`."""
return cls(filename=error.filename, code=error.code)
def matches(self, prefix: str, filename: str | None) -> bool:
"""Determine if this key matches some constraints.
:param prefix:
The error code prefix that this key's error code should start with.
:param filename:
The filename that we potentially want to match on. This can be
None to only match on error prefix.
:returns:
True if the Key's code starts with the prefix and either filename
is None, or the Key's filename matches the value passed in.
"""
return self.code.startswith(prefix) and (
filename is None or self.filename == filename
)
class Statistic:
"""Simple wrapper around the logic of each statistic.
Instead of maintaining a simple but potentially hard to reason about
tuple, we create a class which has attributes and a couple
convenience methods on it.
"""
def __init__(
self, error_code: str, filename: str, message: str, count: int
) -> None:
"""Initialize our Statistic."""
self.error_code = error_code
self.filename = filename
self.message = message
self.count = count
@classmethod
def create_from(cls, error: Violation) -> Statistic:
"""Create a Statistic from a :class:`flake8.violation.Violation`."""
return cls(
error_code=error.code,
filename=error.filename,
message=error.text,
count=0,
)
def increment(self) -> None:
"""Increment the number of times we've seen this error in this file."""
self.count += 1

View file

@ -12,7 +12,6 @@ from typing import Generator
from typing import Sequence
from flake8 import defaults
from flake8 import statistics
from flake8 import utils
from flake8.formatting import base as base_formatter
from flake8.violation import Violation
@ -215,11 +214,10 @@ class StyleGuideManager:
"""
self.options = options
self.formatter = formatter
self.stats = statistics.Statistics()
self.decider = decider or DecisionEngine(options)
self.style_guides: list[StyleGuide] = []
self.default_style_guide = StyleGuide(
options, formatter, self.stats, decider=decider
options, formatter, decider=decider
)
self.style_guides = list(
itertools.chain(
@ -318,7 +316,6 @@ class StyleGuide:
self,
options: argparse.Namespace,
formatter: base_formatter.BaseFormatter,
stats: statistics.Statistics,
filename: str | None = None,
decider: DecisionEngine | None = None,
):
@ -328,7 +325,6 @@ class StyleGuide:
"""
self.options = options
self.formatter = formatter
self.stats = stats
self.decider = decider or DecisionEngine(options)
self.filename = filename
if self.filename:
@ -349,9 +345,7 @@ class StyleGuide:
options = copy.deepcopy(self.options)
options.extend_ignore = options.extend_ignore or []
options.extend_ignore.extend(extend_ignore_with or [])
return StyleGuide(
options, self.formatter, self.stats, filename=filename
)
return StyleGuide(options, self.formatter, filename=filename)
@contextlib.contextmanager
def processing_file(
@ -443,7 +437,6 @@ class StyleGuide:
is_included_in_diff = error.is_in(self._parsed_diff)
if error_is_selected and is_not_inline_ignored and is_included_in_diff:
self.formatter.handle(error)
self.stats.record(error)
return 1
return 0

View file

@ -91,22 +91,6 @@ if True:
assert cli.main(["t.py"]) == 0
def test_statistics_option(tmpdir, capsys):
"""Ensure that `flake8 --statistics` works."""
with tmpdir.as_cwd():
tmpdir.join("t.py").write("import os\nimport sys\n")
assert cli.main(["--statistics", "t.py"]) == 1
expected = """\
t.py:1:1: F401 'os' imported but unused
t.py:2:1: F401 'sys' imported but unused
2 F401 'os' imported but unused
"""
out, err = capsys.readouterr()
assert out == expected
assert err == ""
def test_show_source_option(tmpdir, capsys):
"""Ensure that --show-source and --no-show-source work."""
with tmpdir.as_cwd():
@ -226,29 +210,6 @@ def test_bug_report_successful(capsys):
assert err == ""
def test_benchmark_successful(tmp_path, capsys):
"""Test that --benchmark does not crash."""
fname = tmp_path.joinpath("t.py")
fname.write_text("print('hello world')\n")
assert cli.main(["--benchmark", str(fname)]) == 0
out, err = capsys.readouterr()
parts = [line.split(maxsplit=1) for line in out.splitlines()]
assert parts == [
[mock.ANY, "seconds elapsed"],
["1", "total logical lines processed"],
[mock.ANY, "logical lines processed per second"],
["1", "total physical lines processed"],
[mock.ANY, "physical lines processed per second"],
["5", "total tokens processed"],
[mock.ANY, "tokens processed per second"],
["1", "total files processed"],
[mock.ANY, "files processed per second"],
]
assert err == ""
def test_specific_noqa_does_not_clobber_pycodestyle_noqa(tmpdir, capsys):
"""See https://github.com/pycqa/flake8/issues/1104."""
with tmpdir.as_cwd():

View file

@ -143,15 +143,3 @@ def test_report_total_errors():
app = mock.Mock(result_count="Fake count")
report = api.Report(app)
assert report.total_errors == "Fake count"
def test_report_get_statistics():
"""Verify that we use the statistics object."""
stats = mock.Mock()
stats.statistics_for.return_value = []
style_guide = mock.Mock(stats=stats)
app = mock.Mock(guide=style_guide)
report = api.Report(app)
assert report.get_statistics("E") == []
stats.statistics_for.assert_called_once_with("E")

View file

@ -1,124 +0,0 @@
"""Tests for the statistics module in Flake8."""
from __future__ import annotations
import pytest
from flake8 import statistics as stats
from flake8.violation import Violation
DEFAULT_ERROR_CODE = "E100"
DEFAULT_FILENAME = "file.py"
DEFAULT_TEXT = "Default text"
def make_error(**kwargs):
"""Create errors with a bunch of default values."""
kwargs.setdefault("code", DEFAULT_ERROR_CODE)
kwargs.setdefault("filename", DEFAULT_FILENAME)
kwargs.setdefault("line_number", 1)
kwargs.setdefault("column_number", 1)
kwargs.setdefault("text", DEFAULT_TEXT)
return Violation(**kwargs, physical_line=None)
def test_key_creation():
"""Verify how we create Keys from Errors."""
key = stats.Key.create_from(make_error())
assert key == (DEFAULT_FILENAME, DEFAULT_ERROR_CODE)
assert key.filename == DEFAULT_FILENAME
assert key.code == DEFAULT_ERROR_CODE
@pytest.mark.parametrize(
"code, filename, args, expected_result",
[
# Error prefix matches
("E123", "file000.py", ("E", None), True),
("E123", "file000.py", ("E1", None), True),
("E123", "file000.py", ("E12", None), True),
("E123", "file000.py", ("E123", None), True),
# Error prefix and filename match
("E123", "file000.py", ("E", "file000.py"), True),
("E123", "file000.py", ("E1", "file000.py"), True),
("E123", "file000.py", ("E12", "file000.py"), True),
("E123", "file000.py", ("E123", "file000.py"), True),
# Error prefix does not match
("E123", "file000.py", ("W", None), False),
# Error prefix matches but filename does not
("E123", "file000.py", ("E", "file001.py"), False),
# Error prefix does not match but filename does
("E123", "file000.py", ("W", "file000.py"), False),
# Neither error prefix match nor filename
("E123", "file000.py", ("W", "file001.py"), False),
],
)
def test_key_matching(code, filename, args, expected_result):
"""Verify Key#matches behaves as we expect with fthe above input."""
key = stats.Key.create_from(make_error(code=code, filename=filename))
assert key.matches(*args) is expected_result
def test_statistic_creation():
"""Verify how we create Statistic objects from Errors."""
stat = stats.Statistic.create_from(make_error())
assert stat.error_code == DEFAULT_ERROR_CODE
assert stat.message == DEFAULT_TEXT
assert stat.filename == DEFAULT_FILENAME
assert stat.count == 0
def test_statistic_increment():
"""Verify we update the count."""
stat = stats.Statistic.create_from(make_error())
assert stat.count == 0
stat.increment()
assert stat.count == 1
def test_recording_statistics():
"""Verify that we appropriately create a new Statistic and store it."""
aggregator = stats.Statistics()
assert list(aggregator.statistics_for("E")) == []
aggregator.record(make_error())
storage = aggregator._store
for key, value in storage.items():
assert isinstance(key, stats.Key)
assert isinstance(value, stats.Statistic)
assert storage[stats.Key(DEFAULT_FILENAME, DEFAULT_ERROR_CODE)].count == 1
def test_statistics_for_single_record():
"""Show we can retrieve the only statistic recorded."""
aggregator = stats.Statistics()
assert list(aggregator.statistics_for("E")) == []
aggregator.record(make_error())
statistics = list(aggregator.statistics_for("E"))
assert len(statistics) == 1
assert isinstance(statistics[0], stats.Statistic)
def test_statistics_for_filters_by_filename():
"""Show we can retrieve the only statistic recorded."""
aggregator = stats.Statistics()
assert list(aggregator.statistics_for("E")) == []
aggregator.record(make_error())
aggregator.record(make_error(filename="example.py"))
statistics = list(aggregator.statistics_for("E", DEFAULT_FILENAME))
assert len(statistics) == 1
assert isinstance(statistics[0], stats.Statistic)
def test_statistic_for_retrieves_more_than_one_value():
"""Show this works for more than a couple statistic values."""
aggregator = stats.Statistics()
for i in range(50):
aggregator.record(make_error(code=f"E1{i:02d}"))
aggregator.record(make_error(code=f"W2{i:02d}"))
statistics = list(aggregator.statistics_for("E"))
assert len(statistics) == 50
statistics = list(aggregator.statistics_for("W22"))
assert len(statistics) == 10

View file

@ -6,7 +6,6 @@ from unittest import mock
import pytest
from flake8 import statistics
from flake8 import style_guide
from flake8 import utils
from flake8.formatting import base
@ -32,7 +31,6 @@ def test_handle_error_does_not_raise_type_errors():
guide = style_guide.StyleGuide(
create_options(select=["T111"], ignore=[]),
formatter=formatter,
stats=statistics.Statistics(),
)
assert 1 == guide.handle_error(
@ -74,7 +72,6 @@ def test_style_guide_applies_to(style_guide_file, filename, expected):
guide = style_guide.StyleGuide(
options,
formatter=formatter,
stats=statistics.Statistics(),
filename=style_guide_file,
)
assert guide.applies_to(filename) is expected