diff --git a/src/flake8/api/legacy.py b/src/flake8/api/legacy.py index 35c7101..9fc05bb 100644 --- a/src/flake8/api/legacy.py +++ b/src/flake8/api/legacy.py @@ -141,6 +141,8 @@ class Report(object): .. warning:: This should not be instantiated by users. """ self._application = application + self._style_guide = application.guide + self._stats = self._style_guide.stats @property def total_errors(self): @@ -149,4 +151,7 @@ class Report(object): def get_statistics(self, violation): """Get the number of occurences of a violation.""" - raise NotImplementedError('Statistics capturing needs to happen first') + return [ + '{} {} {}'.format(s.count, s.error_code, s.message) + for s in self._stats.statistics_for(violation) + ] diff --git a/src/flake8/statistics.py b/src/flake8/statistics.py new file mode 100644 index 0000000..2512089 --- /dev/null +++ b/src/flake8/statistics.py @@ -0,0 +1,118 @@ +"""Statistic collection logic for Flake8.""" +import collections + + +class Statistics(object): + """Manager of aggregated statistics for a run of Flake8.""" + + def __init__(self): + """Initialize the underlying dictionary for our statistics.""" + self._store = {} + + def record(self, error): + """Add the fact that the error was seen in the file. + + :param error: + The Error instance containing the information about the violation. + :type error: + flake8.style_guide.Error + """ + key = Key.create_from(error) + if key not in self._store: + self._store[key] = Statistic.create_from(error) + self._store[key].increment() + + def statistics_for(self, prefix, filename=None): + """Generate statistics for the prefix and filename. + + If you have a :class:`Statistics` object that has recorded errors, + you can generate the statistics for a prefix (e.g., ``E``, ``E1``, + ``W50``, ``W503``) with the optional filter of a filename as well. + + .. code-block:: python + + >>> stats = Statistics() + >>> stats.statistics_for('E12', + filename='src/flake8/statistics.py') + + >>> stats.statistics_for('W') + + + :param str prefix: + The error class or specific error code to find statistics for. + :param str filename: + (Optional) The filename to further filter results by. + :returns: + Generator of instances of :class:`Statistic` + """ + matching_errors = sorted(key for key in self._store.keys() + if key.matches(prefix, filename)) + for error_code in matching_errors: + yield self._store[error_code] + + +class Key(collections.namedtuple('Key', ['filename', 'code'])): + """Simple key structure for the Statistics dictionary. + + To make things clearer, easier to read, and more understandable, we use a + namedtuple here for all Keys in the underlying dictionary for the + Statistics object. + """ + + __slots__ = () + + @classmethod + def create_from(cls, error): + """Create a Key from :class:`flake8.style_guide.Error`.""" + return cls( + filename=error.filename, + code=error.code, + ) + + def matches(self, prefix, filename): + """Determine if this key matches some constraints. + + :param str prefix: + The error code prefix that this key's error code should start with. + :param str filename: + The filename that we potentially want to match on. This can be + None to only match on error prefix. + :returns: + True if the Key's code starts with the prefix and either filename + is None, or the Key's filename matches the value passed in. + :rtype: + bool + """ + return (self.code.startswith(prefix) and + (filename is None or + self.filename == filename)) + + +class Statistic(object): + """Simple wrapper around the logic of each statistic. + + Instead of maintaining a simple but potentially hard to reason about + tuple, we create a namedtuple which has attributes and a couple + convenience methods on it. + """ + + def __init__(self, error_code, filename, message, count): + """Initialize our Statistic.""" + self.error_code = error_code + self.filename = filename + self.message = message + self.count = count + + @classmethod + def create_from(cls, error): + """Create a Statistic from a :class:`flake8.style_guide.Error`.""" + return cls( + error_code=error.code, + filename=error.filename, + message=error.text, + count=0, + ) + + def increment(self): + """Increment the number of times we've seen this error in this file.""" + self.count += 1 diff --git a/src/flake8/style_guide.py b/src/flake8/style_guide.py index 89890ba..ed1b844 100644 --- a/src/flake8/style_guide.py +++ b/src/flake8/style_guide.py @@ -5,6 +5,7 @@ import linecache import logging import re +from flake8 import statistics from flake8 import utils __all__ = ( @@ -74,6 +75,7 @@ class StyleGuide(object): self.options = options self.listener = listener_trie self.formatter = formatter + self.stats = statistics.Statistics() self._selected = tuple(options.select) self._ignored = tuple(options.ignore) self._decision_cache = {} @@ -267,6 +269,7 @@ class StyleGuide(object): if (error_is_selected and is_not_inline_ignored and is_included_in_diff): self.formatter.handle(error) + self.stats.record(error) self.listener.notify(error.code, error) return 1 return 0 diff --git a/tests/unit/test_statistics.py b/tests/unit/test_statistics.py new file mode 100644 index 0000000..f95c638 --- /dev/null +++ b/tests/unit/test_statistics.py @@ -0,0 +1,121 @@ +"""Tests for the statistics module in Flake8.""" +import pytest + +from flake8 import statistics as stats +from flake8 import style_guide + +DEFAULT_ERROR_CODE = 'E100' +DEFAULT_FILENAME = 'file.py' +DEFAULT_TEXT = 'Default text' + + +def make_error(**kwargs): + """Create errors with a bunch of default values.""" + return style_guide.Error( + code=kwargs.pop('code', DEFAULT_ERROR_CODE), + filename=kwargs.pop('filename', DEFAULT_FILENAME), + line_number=kwargs.pop('line_number', 1), + column_number=kwargs.pop('column_number', 1), + text=kwargs.pop('text', DEFAULT_TEXT), + physical_line=None, + ) + + +def test_key_creation(): + """Verify how we create Keys from Errors.""" + key = stats.Key.create_from(make_error()) + assert key == (DEFAULT_FILENAME, DEFAULT_ERROR_CODE) + assert key.filename == DEFAULT_FILENAME + assert key.code == DEFAULT_ERROR_CODE + + +@pytest.mark.parametrize('code, filename, args, expected_result', [ + # Error prefix matches + ('E123', 'file000.py', ('E', None), True), + ('E123', 'file000.py', ('E1', None), True), + ('E123', 'file000.py', ('E12', None), True), + ('E123', 'file000.py', ('E123', None), True), + # Error prefix and filename match + ('E123', 'file000.py', ('E', 'file000.py'), True), + ('E123', 'file000.py', ('E1', 'file000.py'), True), + ('E123', 'file000.py', ('E12', 'file000.py'), True), + ('E123', 'file000.py', ('E123', 'file000.py'), True), + # Error prefix does not match + ('E123', 'file000.py', ('W', None), False), + # Error prefix matches but filename does not + ('E123', 'file000.py', ('E', 'file001.py'), False), + # Error prefix does not match but filename does + ('E123', 'file000.py', ('W', 'file000.py'), False), + # Neither error prefix match nor filename + ('E123', 'file000.py', ('W', 'file001.py'), False), +]) +def test_key_matching(code, filename, args, expected_result): + """Verify Key#matches behaves as we expect with fthe above input.""" + key = stats.Key.create_from(make_error(code=code, filename=filename)) + assert key.matches(*args) is expected_result + + +def test_statistic_creation(): + """Verify how we create Statistic objects from Errors.""" + stat = stats.Statistic.create_from(make_error()) + assert stat.error_code == DEFAULT_ERROR_CODE + assert stat.message == DEFAULT_TEXT + assert stat.filename == DEFAULT_FILENAME + assert stat.count == 0 + + +def test_statistic_increment(): + """Verify we update the count.""" + stat = stats.Statistic.create_from(make_error()) + assert stat.count == 0 + stat.increment() + assert stat.count == 1 + + +def test_recording_statistics(): + """Verify that we appropriately create a new Statistic and store it.""" + aggregator = stats.Statistics() + assert list(aggregator.statistics_for('E')) == [] + aggregator.record(make_error()) + storage = aggregator._store + for key, value in storage.items(): + assert isinstance(key, stats.Key) + assert isinstance(value, stats.Statistic) + + assert storage[(DEFAULT_FILENAME, DEFAULT_ERROR_CODE)].count == 1 + + +def test_statistics_for_single_record(): + """Show we can retrieve the only statistic recorded.""" + aggregator = stats.Statistics() + assert list(aggregator.statistics_for('E')) == [] + aggregator.record(make_error()) + statistics = list(aggregator.statistics_for('E')) + assert len(statistics) == 1 + assert isinstance(statistics[0], stats.Statistic) + + +def test_statistics_for_filters_by_filename(): + """Show we can retrieve the only statistic recorded.""" + aggregator = stats.Statistics() + assert list(aggregator.statistics_for('E')) == [] + aggregator.record(make_error()) + aggregator.record(make_error(filename='example.py')) + + statistics = list(aggregator.statistics_for('E', DEFAULT_FILENAME)) + assert len(statistics) == 1 + assert isinstance(statistics[0], stats.Statistic) + + +def test_statistic_for_retrieves_more_than_one_value(): + """Show this works for more than a couple statistic values.""" + aggregator = stats.Statistics() + for i in range(50): + aggregator.record(make_error(code='E1{:02d}'.format(i))) + aggregator.record(make_error(code='W2{:02d}'.format(i))) + + statistics = list(aggregator.statistics_for('E')) + assert len(statistics) == 50 + + statistics = list(aggregator.statistics_for('W22')) + assert len(statistics) == 10