From 6cfb292b1b0e4f956b53e76b83f567b77cd1525e Mon Sep 17 00:00:00 2001 From: Ian Cordasco Date: Mon, 11 Jul 2016 20:13:41 -0500 Subject: [PATCH 1/6] Add the statistics module --- src/flake8/statistics.py | 101 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100644 src/flake8/statistics.py diff --git a/src/flake8/statistics.py b/src/flake8/statistics.py new file mode 100644 index 0000000..2ae6c64 --- /dev/null +++ b/src/flake8/statistics.py @@ -0,0 +1,101 @@ +"""Statistic collection logic for Flake8.""" +import collections + + +class Statistics(object): + """Manager of aggregated statistics for a run of Flake8.""" + + def __init__(self): + """Initialize the underlying dictionary for our statistics.""" + self._store = {} + + def record(self, error): + """Add the fact that the error was seen in the file. + + :param error: + The Error instance containing the information about the violation. + :type error: + flake8.style_guide.Error + """ + key = Key.create_from(error) + if key in self._store: + statistic = self._store[key] + else: + statistic = Statistic.create_from(error) + self._store[key] = statistic.increment() + + def statistics_for(self, prefix, filename=None): + """Generate statistics for the prefix and filename. + + If you have a :class:`Statistics` object that has recorded errors, + you can generate the statistics for a prefix (e.g., ``E``, ``E1``, + ``W50``, ``W503``) with the optional filter of a filename as well. + + .. code-block:: python + + >>> stats = Statistics() + >>> stats.statistics_for('E12', + filename='src/flake8/statistics.py') + + >>> stats.statistics_for('W') + + + :param str prefix: + The error class or specific error code to find statistics for. + :param str filename: + (Optional) The filename to further filter results by. + :returns: + Generator of instances of :class:`Statistic` + """ + matching_errors = sorted(key for key in self._store.keys() + if key.matches(prefix, filename)) + for error_code in matching_errors: + yield self._store[error_code] + + +class Key(collections.namedtuple('Key', ['filename', 'code'])): + __slots__ = () + + @classmethod + def create_from(cls, error): + return cls( + filename=error.filename, + code=error.code, + ) + + def matches(self, prefix, filename): + return (self.code.startswith(prefix) and + (filename is None or + self.filename == filename)) + + +_Statistic = collections.namedtuple('Statistic', [ + 'error_code', + 'filename', + 'message', + 'count', +]) + + +class Statistic(_Statistic): + __slots__ = () + + @classmethod + def create_from(cls, error): + return cls( + error_code=error.code, + filename=error.filename, + message=error.message, + count=0, + ) + + def increment(self): + return Statistic( + error_code=self.error_code, + filename=self.filename, + message=self.message, + count=self.count + 1, + ) + + +del _Statistic From 1a722bbe7bb1918d26a073108ae362e731296a3e Mon Sep 17 00:00:00 2001 From: Ian Cordasco Date: Mon, 11 Jul 2016 20:24:38 -0500 Subject: [PATCH 2/6] Add empty test file for our statistics module --- tests/unit/test_statistics.py | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 tests/unit/test_statistics.py diff --git a/tests/unit/test_statistics.py b/tests/unit/test_statistics.py new file mode 100644 index 0000000..015e94b --- /dev/null +++ b/tests/unit/test_statistics.py @@ -0,0 +1,5 @@ +from flake8 import statistics as stats + + +def test_nothing(): + pass From 2d3f06219199a4a7307a3c9a5c14b69be8608dca Mon Sep 17 00:00:00 2001 From: Ian Cordasco Date: Tue, 12 Jul 2016 08:21:57 -0500 Subject: [PATCH 3/6] Add actual tests around statistics module Also refactor our statistics module to be a bit smarter and less namedtuple happy. The Statistic class had no reason to be a tuple, I have no clue why I wrote it that way last night. --- src/flake8/statistics.py | 63 +++++++++++++++++++------------ tests/unit/test_statistics.py | 71 ++++++++++++++++++++++++++++++++++- 2 files changed, 109 insertions(+), 25 deletions(-) diff --git a/src/flake8/statistics.py b/src/flake8/statistics.py index 2ae6c64..2512089 100644 --- a/src/flake8/statistics.py +++ b/src/flake8/statistics.py @@ -18,11 +18,9 @@ class Statistics(object): flake8.style_guide.Error """ key = Key.create_from(error) - if key in self._store: - statistic = self._store[key] - else: - statistic = Statistic.create_from(error) - self._store[key] = statistic.increment() + if key not in self._store: + self._store[key] = Statistic.create_from(error) + self._store[key].increment() def statistics_for(self, prefix, filename=None): """Generate statistics for the prefix and filename. @@ -54,48 +52,67 @@ class Statistics(object): class Key(collections.namedtuple('Key', ['filename', 'code'])): + """Simple key structure for the Statistics dictionary. + + To make things clearer, easier to read, and more understandable, we use a + namedtuple here for all Keys in the underlying dictionary for the + Statistics object. + """ + __slots__ = () @classmethod def create_from(cls, error): + """Create a Key from :class:`flake8.style_guide.Error`.""" return cls( filename=error.filename, code=error.code, ) def matches(self, prefix, filename): + """Determine if this key matches some constraints. + + :param str prefix: + The error code prefix that this key's error code should start with. + :param str filename: + The filename that we potentially want to match on. This can be + None to only match on error prefix. + :returns: + True if the Key's code starts with the prefix and either filename + is None, or the Key's filename matches the value passed in. + :rtype: + bool + """ return (self.code.startswith(prefix) and (filename is None or self.filename == filename)) -_Statistic = collections.namedtuple('Statistic', [ - 'error_code', - 'filename', - 'message', - 'count', -]) +class Statistic(object): + """Simple wrapper around the logic of each statistic. + Instead of maintaining a simple but potentially hard to reason about + tuple, we create a namedtuple which has attributes and a couple + convenience methods on it. + """ -class Statistic(_Statistic): - __slots__ = () + def __init__(self, error_code, filename, message, count): + """Initialize our Statistic.""" + self.error_code = error_code + self.filename = filename + self.message = message + self.count = count @classmethod def create_from(cls, error): + """Create a Statistic from a :class:`flake8.style_guide.Error`.""" return cls( error_code=error.code, filename=error.filename, - message=error.message, + message=error.text, count=0, ) def increment(self): - return Statistic( - error_code=self.error_code, - filename=self.filename, - message=self.message, - count=self.count + 1, - ) - - -del _Statistic + """Increment the number of times we've seen this error in this file.""" + self.count += 1 diff --git a/tests/unit/test_statistics.py b/tests/unit/test_statistics.py index 015e94b..c60af33 100644 --- a/tests/unit/test_statistics.py +++ b/tests/unit/test_statistics.py @@ -1,5 +1,72 @@ +"""Tests for the statistics module in Flake8.""" +import pytest + from flake8 import statistics as stats +from flake8 import style_guide + +DEFAULT_ERROR_CODE = 'E100' +DEFAULT_FILENAME = 'file.py' +DEFAULT_TEXT = 'Default text' -def test_nothing(): - pass +def make_error(**kwargs): + """Create errors with a bunch of default values.""" + return style_guide.Error( + code=kwargs.pop('code', DEFAULT_ERROR_CODE), + filename=kwargs.pop('filename', DEFAULT_FILENAME), + line_number=kwargs.pop('line_number', 1), + column_number=kwargs.pop('column_number', 1), + text=kwargs.pop('text', DEFAULT_TEXT), + physical_line=None, + ) + + +def test_key_creation(): + """Verify how we create Keys from Errors.""" + key = stats.Key.create_from(make_error()) + assert key == (DEFAULT_FILENAME, DEFAULT_ERROR_CODE) + assert key.filename == DEFAULT_FILENAME + assert key.code == DEFAULT_ERROR_CODE + + +@pytest.mark.parametrize('code, filename, args, expected_result', [ + # Error prefix matches + ('E123', 'file000.py', ('E', None), True), + ('E123', 'file000.py', ('E1', None), True), + ('E123', 'file000.py', ('E12', None), True), + ('E123', 'file000.py', ('E123', None), True), + # Error prefix and filename match + ('E123', 'file000.py', ('E', 'file000.py'), True), + ('E123', 'file000.py', ('E1', 'file000.py'), True), + ('E123', 'file000.py', ('E12', 'file000.py'), True), + ('E123', 'file000.py', ('E123', 'file000.py'), True), + # Error prefix does not match + ('E123', 'file000.py', ('W', None), False), + # Error prefix matches but filename does not + ('E123', 'file000.py', ('E', 'file001.py'), False), + # Error prefix does not match but filename does + ('E123', 'file000.py', ('W', 'file000.py'), False), + # Neither error prefix match nor filename + ('E123', 'file000.py', ('W', 'file001.py'), False), +]) +def test_key_matching(code, filename, args, expected_result): + """Verify Key#matches behaves as we expect with fthe above input.""" + key = stats.Key.create_from(make_error(code=code, filename=filename)) + assert key.matches(*args) is expected_result + + +def test_statistic_creation(): + """Verify how we create Statistic objects from Errors.""" + stat = stats.Statistic.create_from(make_error()) + assert stat.error_code == DEFAULT_ERROR_CODE + assert stat.message == DEFAULT_TEXT + assert stat.filename == DEFAULT_FILENAME + assert stat.count == 0 + + +def test_statistic_increment(): + """Verify we update the count.""" + stat = stats.Statistic.create_from(make_error()) + assert stat.count == 0 + stat.increment() + assert stat.count == 1 From 61430951ec13753aa49200222971ec646846b9b6 Mon Sep 17 00:00:00 2001 From: Ian Cordasco Date: Tue, 12 Jul 2016 19:58:44 -0500 Subject: [PATCH 4/6] Add more tests around our Statistics class --- tests/unit/test_statistics.py | 49 +++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/tests/unit/test_statistics.py b/tests/unit/test_statistics.py index c60af33..f95c638 100644 --- a/tests/unit/test_statistics.py +++ b/tests/unit/test_statistics.py @@ -70,3 +70,52 @@ def test_statistic_increment(): assert stat.count == 0 stat.increment() assert stat.count == 1 + + +def test_recording_statistics(): + """Verify that we appropriately create a new Statistic and store it.""" + aggregator = stats.Statistics() + assert list(aggregator.statistics_for('E')) == [] + aggregator.record(make_error()) + storage = aggregator._store + for key, value in storage.items(): + assert isinstance(key, stats.Key) + assert isinstance(value, stats.Statistic) + + assert storage[(DEFAULT_FILENAME, DEFAULT_ERROR_CODE)].count == 1 + + +def test_statistics_for_single_record(): + """Show we can retrieve the only statistic recorded.""" + aggregator = stats.Statistics() + assert list(aggregator.statistics_for('E')) == [] + aggregator.record(make_error()) + statistics = list(aggregator.statistics_for('E')) + assert len(statistics) == 1 + assert isinstance(statistics[0], stats.Statistic) + + +def test_statistics_for_filters_by_filename(): + """Show we can retrieve the only statistic recorded.""" + aggregator = stats.Statistics() + assert list(aggregator.statistics_for('E')) == [] + aggregator.record(make_error()) + aggregator.record(make_error(filename='example.py')) + + statistics = list(aggregator.statistics_for('E', DEFAULT_FILENAME)) + assert len(statistics) == 1 + assert isinstance(statistics[0], stats.Statistic) + + +def test_statistic_for_retrieves_more_than_one_value(): + """Show this works for more than a couple statistic values.""" + aggregator = stats.Statistics() + for i in range(50): + aggregator.record(make_error(code='E1{:02d}'.format(i))) + aggregator.record(make_error(code='W2{:02d}'.format(i))) + + statistics = list(aggregator.statistics_for('E')) + assert len(statistics) == 50 + + statistics = list(aggregator.statistics_for('W22')) + assert len(statistics) == 10 From 14cab7e81b1bed1022f4fdce32d4b1777b055988 Mon Sep 17 00:00:00 2001 From: Ian Cordasco Date: Tue, 12 Jul 2016 20:04:04 -0500 Subject: [PATCH 5/6] Add statistics collection to StyleGuide --- src/flake8/style_guide.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/flake8/style_guide.py b/src/flake8/style_guide.py index 89890ba..ed1b844 100644 --- a/src/flake8/style_guide.py +++ b/src/flake8/style_guide.py @@ -5,6 +5,7 @@ import linecache import logging import re +from flake8 import statistics from flake8 import utils __all__ = ( @@ -74,6 +75,7 @@ class StyleGuide(object): self.options = options self.listener = listener_trie self.formatter = formatter + self.stats = statistics.Statistics() self._selected = tuple(options.select) self._ignored = tuple(options.ignore) self._decision_cache = {} @@ -267,6 +269,7 @@ class StyleGuide(object): if (error_is_selected and is_not_inline_ignored and is_included_in_diff): self.formatter.handle(error) + self.stats.record(error) self.listener.notify(error.code, error) return 1 return 0 From 2ffcf96b4b7a1fbc761796df1336a94408a79ed0 Mon Sep 17 00:00:00 2001 From: Ian Cordasco Date: Tue, 12 Jul 2016 20:04:20 -0500 Subject: [PATCH 6/6] Use statistics in the legacy report class --- src/flake8/api/legacy.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/flake8/api/legacy.py b/src/flake8/api/legacy.py index 35c7101..9fc05bb 100644 --- a/src/flake8/api/legacy.py +++ b/src/flake8/api/legacy.py @@ -141,6 +141,8 @@ class Report(object): .. warning:: This should not be instantiated by users. """ self._application = application + self._style_guide = application.guide + self._stats = self._style_guide.stats @property def total_errors(self): @@ -149,4 +151,7 @@ class Report(object): def get_statistics(self, violation): """Get the number of occurences of a violation.""" - raise NotImplementedError('Statistics capturing needs to happen first') + return [ + '{} {} {}'.format(s.count, s.error_code, s.message) + for s in self._stats.statistics_for(violation) + ]