From adedd6c5cfd705d01e79a0f673ed95ee4bb26e08 Mon Sep 17 00:00:00 2001
From: Ian Cordasco <graffatcolmingov@gmail.com>
Date: Sun, 19 Jun 2016 09:25:27 -0500
Subject: [PATCH] Add --benchmark and formatting for its values

---
 flake8/checker.py          | 60 ++++++++++++++++++++++++++++++++++++--
 flake8/defaults.py         |  8 ++++-
 flake8/formatting/base.py  | 19 +++++++++++-
 flake8/main/application.py | 19 +++++++++++-
 flake8/main/options.py     |  7 +++++
 flake8/processor.py        |  5 ++++
 6 files changed, 112 insertions(+), 6 deletions(-)

diff --git a/flake8/checker.py b/flake8/checker.py
index 655387e..b875f44 100644
--- a/flake8/checker.py
+++ b/flake8/checker.py
@@ -10,6 +10,12 @@ try:
 except ImportError:
     multiprocessing = None
 
+try:
+    import Queue as queue
+except ImportError:
+    import queue
+
+from flake8 import defaults
 from flake8 import exceptions
 from flake8 import processor
 from flake8 import utils
@@ -72,14 +78,22 @@ class Manager(object):
         self.jobs = self._job_count()
         self.process_queue = None
         self.results_queue = None
+        self.statistics_queue = None
         self.using_multiprocessing = self.jobs > 1
         self.processes = []
         self.checkers = []
+        self.statistics = {
+            'files': 0,
+            'logical lines': 0,
+            'physical lines': 0,
+            'tokens': 0,
+        }
 
         if self.using_multiprocessing:
             try:
                 self.process_queue = multiprocessing.Queue()
                 self.results_queue = multiprocessing.Queue()
+                self.statistics_queue = multiprocessing.Queue()
             except OSError as oserr:
                 if oserr.errno not in SERIAL_RETRY_ERRNOS:
                     raise
@@ -96,6 +110,29 @@ class Manager(object):
                 proc.join(0.2)
             self._cleanup_queue(self.process_queue)
             self._cleanup_queue(self.results_queue)
+            self._cleanup_queue(self.statistics_queue)
+
+    def _process_statistics(self):
+        all_statistics = self.statistics
+        if self.using_multiprocessing:
+            total_number_of_checkers = len(self.checkers)
+            statistics_gathered = 0
+            while statistics_gathered < total_number_of_checkers:
+                try:
+                    statistics = self.statistics_queue.get(block=False)
+                    statistics_gathered += 1
+                except queue.Empty:
+                    break
+
+                for statistic in defaults.STATISTIC_NAMES:
+                    all_statistics[statistic] += statistics[statistic]
+        else:
+            statistics_generator = (checker.statistics
+                                    for checker in self.checkers)
+            for statistics in statistics_generator:
+                for statistic in defaults.STATISTIC_NAMES:
+                    all_statistics[statistic] += statistics[statistic]
+        all_statistics['files'] += len(self.checkers)
 
     def _job_count(self):
         # type: () -> Union[int, NoneType]
@@ -182,7 +219,7 @@ class Manager(object):
         LOG.info('Running checks in parallel')
         for checker in iter(self.process_queue.get, 'DONE'):
             LOG.debug('Running checker for file "%s"', checker.filename)
-            checker.run_checks(self.results_queue)
+            checker.run_checks(self.results_queue, self.statistics_queue)
         self.results_queue.put('DONE')
 
     def is_path_excluded(self, path):
@@ -280,7 +317,7 @@ class Manager(object):
     def run_serial(self):
         """Run the checkers in serial."""
         for checker in self.checkers:
-            checker.run_checks(self.results_queue)
+            checker.run_checks(self.results_queue, self.statistics_queue)
 
     def run(self):
         """Run all the checkers.
@@ -325,6 +362,7 @@ class Manager(object):
 
     def stop(self):
         """Stop checking files."""
+        self._process_statistics()
         for proc in self.processes:
             LOG.info('Joining %s to the main process', proc.name)
             proc.join()
@@ -342,12 +380,21 @@ class FileChecker(object):
             The plugins registered to check the file.
         :type checks:
             flake8.plugins.manager.Checkers
+        :param style_guide:
+            The initialized StyleGuide for this particular run.
+        :type style_guide:
+            flake8.style_guide.StyleGuide
         """
         self.filename = filename
         self.checks = checks
         self.style_guide = style_guide
         self.results = []
         self.processor = self._make_processor()
+        self.statistics = {
+            'tokens': 0,
+            'logical lines': 0,
+            'physical lines': len(self.processor.lines),
+        }
 
     def _make_processor(self):
         try:
@@ -466,8 +513,10 @@ class FileChecker(object):
         :meth:`flake8.checker.FileChecker.run_checks`.
         """
         parens = 0
+        statistics = self.statistics
         file_processor = self.processor
         for token in file_processor.generate_tokens():
+            statistics['tokens'] += 1
             self.check_physical_eol(token)
             token_type, text = token[0:2]
             processor.log_token(LOG, token)
@@ -485,7 +534,7 @@ class FileChecker(object):
             self.run_physical_checks(file_processor.lines[-1])
             self.run_logical_checks()
 
-    def run_checks(self, results_queue):
+    def run_checks(self, results_queue, statistics_queue):
         """Run checks against the file."""
         if self.processor.should_ignore_file():
             return
@@ -501,6 +550,11 @@ class FileChecker(object):
         if results_queue is not None:
             results_queue.put((self.filename, self.results))
 
+        logical_lines = self.processor.statistics['logical lines']
+        self.statistics['logical lines'] = logical_lines
+        if statistics_queue is not None:
+            statistics_queue.put(self.statistics)
+
     def handle_comment(self, token, token_text):
         """Handle the logic when encountering a comment token."""
         # The comment also ends a physical line
diff --git a/flake8/defaults.py b/flake8/defaults.py
index 76829a5..d9f5a0b 100644
--- a/flake8/defaults.py
+++ b/flake8/defaults.py
@@ -7,5 +7,11 @@ MAX_LINE_LENGTH = 79
 
 TRUTHY_VALUES = set(['true', '1', 't'])
 
-# Other consants
+# Other constants
 WHITESPACE = frozenset(' \t')
+
+STATISTIC_NAMES = (
+    'logical lines',
+    'physical lines',
+    'tokens',
+)
diff --git a/flake8/formatting/base.py b/flake8/formatting/base.py
index 19e21e4..4fda6f4 100644
--- a/flake8/formatting/base.py
+++ b/flake8/formatting/base.py
@@ -87,7 +87,24 @@ class BaseFormatter(object):
 
     def show_benchmarks(self, benchmarks):
         """Format and print the benchmarks."""
-        pass
+        # NOTE(sigmavirus24): The format strings are a little confusing, even
+        # to me, so here's a quick explanation:
+        # We specify the named value first followed by a ':' to indicate we're
+        # formatting the value.
+        # Next we use '<' to indicate we want the value left aligned.
+        # Then '10' is the width of the area.
+        # For floats, finally, we only want only want at most 3 digits after
+        # the decimal point to be displayed. This is the precision and it
+        # can not be specified for integers which is why we need two separate
+        # format strings.
+        float_format = '{value:<10.3} {statistic}'.format
+        int_format = '{value:<10} {statistic}'.format
+        for statistic, value in benchmarks:
+            if isinstance(value, int):
+                benchmark = int_format(statistic=statistic, value=value)
+            else:
+                benchmark = float_format(statistic=statistic, value=value)
+            self._write(benchmark)
 
     def show_source(self, error):
         """Show the physical line generating the error.
diff --git a/flake8/main/application.py b/flake8/main/application.py
index 6739216..efb1049 100644
--- a/flake8/main/application.py
+++ b/flake8/main/application.py
@@ -7,6 +7,7 @@ import time
 
 import flake8
 from flake8 import checker
+from flake8 import defaults
 from flake8 import style_guide
 from flake8 import utils
 from flake8.main import options
@@ -225,6 +226,22 @@ class Application(object):
         self.file_checker_manager.run()
         LOG.info('Finished running')
         self.file_checker_manager.stop()
+        self.end_time = time.time()
+
+    def report_benchmarks(self):
+        if not self.options.benchmark:
+            return
+        time_elapsed = self.end_time - self.start_time
+        statistics = [('seconds elapsed', time_elapsed)]
+        add_statistic = statistics.append
+        for statistic in (defaults.STATISTIC_NAMES + ('files',)):
+            value = self.file_checker_manager.statistics[statistic]
+            total_description = 'total ' + statistic + ' processed'
+            add_statistic((total_description, value))
+            per_second_description = statistic + ' processed per second'
+            add_statistic((per_second_description, int(value / time_elapsed)))
+
+        self.formatter.show_benchmarks(statistics)
 
     def report_errors(self):
         # type: () -> NoneType
@@ -259,7 +276,7 @@ class Application(object):
         self.initialize(argv)
         self.run_checks()
         self.report_errors()
-        self.end_time = time.time()
+        self.report_benchmarks()
 
     def run(self, argv=None):
         # type: (Union[NoneType, List[str]]) -> NoneType
diff --git a/flake8/main/options.py b/flake8/main/options.py
index 51db34e..c725c38 100644
--- a/flake8/main/options.py
+++ b/flake8/main/options.py
@@ -192,3 +192,10 @@ def register_default_options(option_manager):
         '--isolated', default=False, action='store_true',
         help='Ignore all found configuration files.',
     )
+
+    # Benchmarking
+
+    add_option(
+        '--benchmark', default=False, action='store_true',
+        help='Print benchmark information about this run of Flake8',
+    )
diff --git a/flake8/processor.py b/flake8/processor.py
index 8e6d897..0c33cc2 100644
--- a/flake8/processor.py
+++ b/flake8/processor.py
@@ -97,6 +97,10 @@ class FileProcessor(object):
         self.total_lines = len(self.lines)
         #: Verbosity level of Flake8
         self.verbose = options.verbose
+        #: Statistics dictionary
+        self.statistics = {
+            'logical lines': 0,
+        }
 
     @contextlib.contextmanager
     def inside_multiline(self, line_number):
@@ -186,6 +190,7 @@ class FileProcessor(object):
         """Build a logical line from the current tokens list."""
         comments, logical, mapping_list = self.build_logical_line_tokens()
         self.logical_line = ''.join(logical)
+        self.statistics['logical lines'] += 1
         return ''.join(comments), self.logical_line, mapping_list
 
     def split_line(self, token):