Add --benchmark and formatting for its values

2026-06-29 08:40:45 +00:00 · 2016-06-19 09:25:27 -05:00 · 2016-06-19 09:25:27 -05:00 · adedd6c5cf
commit adedd6c5cf
parent 1f7a8081ad
6 changed files with 112 additions and 6 deletions
--- a/flake8/checker.py
+++ b/flake8/checker.py
@ -10,6 +10,12 @@ try:
 except ImportError:
    multiprocessing = None

+try:
+    import Queue as queue
+except ImportError:
+    import queue
+
+from flake8 import defaults
 from flake8 import exceptions
 from flake8 import processor
 from flake8 import utils
@ -72,14 +78,22 @@ class Manager(object):
        self.jobs = self._job_count()
        self.process_queue = None
        self.results_queue = None
+        self.statistics_queue = None
        self.using_multiprocessing = self.jobs > 1
        self.processes = []
        self.checkers = []
+        self.statistics = {
+            'files': 0,
+            'logical lines': 0,
+            'physical lines': 0,
+            'tokens': 0,
+        }

        if self.using_multiprocessing:
            try:
                self.process_queue = multiprocessing.Queue()
                self.results_queue = multiprocessing.Queue()
+                self.statistics_queue = multiprocessing.Queue()
            except OSError as oserr:
                if oserr.errno not in SERIAL_RETRY_ERRNOS:
                    raise
@ -96,6 +110,29 @@ class Manager(object):
                proc.join(0.2)
            self._cleanup_queue(self.process_queue)
            self._cleanup_queue(self.results_queue)
+            self._cleanup_queue(self.statistics_queue)
+
+    def _process_statistics(self):
+        all_statistics = self.statistics
+        if self.using_multiprocessing:
+            total_number_of_checkers = len(self.checkers)
+            statistics_gathered = 0
+            while statistics_gathered < total_number_of_checkers:
+                try:
+                    statistics = self.statistics_queue.get(block=False)
+                    statistics_gathered += 1
+                except queue.Empty:
+                    break
+
+                for statistic in defaults.STATISTIC_NAMES:
+                    all_statistics[statistic] += statistics[statistic]
+        else:
+            statistics_generator = (checker.statistics
+                                    for checker in self.checkers)
+            for statistics in statistics_generator:
+                for statistic in defaults.STATISTIC_NAMES:
+                    all_statistics[statistic] += statistics[statistic]
+        all_statistics['files'] += len(self.checkers)

    def _job_count(self):
        # type: () -> Union[int, NoneType]
@ -182,7 +219,7 @@ class Manager(object):
        LOG.info('Running checks in parallel')
        for checker in iter(self.process_queue.get, 'DONE'):
            LOG.debug('Running checker for file "%s"', checker.filename)
-            checker.run_checks(self.results_queue)
+            checker.run_checks(self.results_queue, self.statistics_queue)
        self.results_queue.put('DONE')

    def is_path_excluded(self, path):
@ -280,7 +317,7 @@ class Manager(object):
    def run_serial(self):
        """Run the checkers in serial."""
        for checker in self.checkers:
-            checker.run_checks(self.results_queue)
+            checker.run_checks(self.results_queue, self.statistics_queue)

    def run(self):
        """Run all the checkers.
@ -325,6 +362,7 @@ class Manager(object):

    def stop(self):
        """Stop checking files."""
+        self._process_statistics()
        for proc in self.processes:
            LOG.info('Joining %s to the main process', proc.name)
            proc.join()
@ -342,12 +380,21 @@ class FileChecker(object):
            The plugins registered to check the file.
        :type checks:
            flake8.plugins.manager.Checkers
+        :param style_guide:
+            The initialized StyleGuide for this particular run.
+        :type style_guide:
+            flake8.style_guide.StyleGuide
        """
        self.filename = filename
        self.checks = checks
        self.style_guide = style_guide
        self.results = []
        self.processor = self._make_processor()
+        self.statistics = {
+            'tokens': 0,
+            'logical lines': 0,
+            'physical lines': len(self.processor.lines),
+        }

    def _make_processor(self):
        try:
@ -466,8 +513,10 @@ class FileChecker(object):
        :meth:`flake8.checker.FileChecker.run_checks`.
        """
        parens = 0
+        statistics = self.statistics
        file_processor = self.processor
        for token in file_processor.generate_tokens():
+            statistics['tokens'] += 1
            self.check_physical_eol(token)
            token_type, text = token[0:2]
            processor.log_token(LOG, token)
@ -485,7 +534,7 @@ class FileChecker(object):
            self.run_physical_checks(file_processor.lines[-1])
            self.run_logical_checks()

-    def run_checks(self, results_queue):
+    def run_checks(self, results_queue, statistics_queue):
        """Run checks against the file."""
        if self.processor.should_ignore_file():
            return
@ -501,6 +550,11 @@ class FileChecker(object):
        if results_queue is not None:
            results_queue.put((self.filename, self.results))

+        logical_lines = self.processor.statistics['logical lines']
+        self.statistics['logical lines'] = logical_lines
+        if statistics_queue is not None:
+            statistics_queue.put(self.statistics)
+
    def handle_comment(self, token, token_text):
        """Handle the logic when encountering a comment token."""
        # The comment also ends a physical line
--- a/flake8/defaults.py
+++ b/flake8/defaults.py
@ -7,5 +7,11 @@ MAX_LINE_LENGTH = 79

 TRUTHY_VALUES = set(['true', '1', 't'])

-# Other consants
+# Other constants
 WHITESPACE = frozenset(' \t')
+
+STATISTIC_NAMES = (
+    'logical lines',
+    'physical lines',
+    'tokens',
+)
--- a/flake8/formatting/base.py
+++ b/flake8/formatting/base.py
@ -87,7 +87,24 @@ class BaseFormatter(object):

    def show_benchmarks(self, benchmarks):
        """Format and print the benchmarks."""
-        pass
+        # NOTE(sigmavirus24): The format strings are a little confusing, even
+        # to me, so here's a quick explanation:
+        # We specify the named value first followed by a ':' to indicate we're
+        # formatting the value.
+        # Next we use '<' to indicate we want the value left aligned.
+        # Then '10' is the width of the area.
+        # For floats, finally, we only want only want at most 3 digits after
+        # the decimal point to be displayed. This is the precision and it
+        # can not be specified for integers which is why we need two separate
+        # format strings.
+        float_format = '{value:<10.3} {statistic}'.format
+        int_format = '{value:<10} {statistic}'.format
+        for statistic, value in benchmarks:
+            if isinstance(value, int):
+                benchmark = int_format(statistic=statistic, value=value)
+            else:
+                benchmark = float_format(statistic=statistic, value=value)
+            self._write(benchmark)

    def show_source(self, error):
        """Show the physical line generating the error.
--- a/flake8/main/application.py
+++ b/flake8/main/application.py
@ -7,6 +7,7 @@ import time

 import flake8
 from flake8 import checker
+from flake8 import defaults
 from flake8 import style_guide
 from flake8 import utils
 from flake8.main import options
@ -225,6 +226,22 @@ class Application(object):
        self.file_checker_manager.run()
        LOG.info('Finished running')
        self.file_checker_manager.stop()
+        self.end_time = time.time()
+
+    def report_benchmarks(self):
+        if not self.options.benchmark:
+            return
+        time_elapsed = self.end_time - self.start_time
+        statistics = [('seconds elapsed', time_elapsed)]
+        add_statistic = statistics.append
+        for statistic in (defaults.STATISTIC_NAMES + ('files',)):
+            value = self.file_checker_manager.statistics[statistic]
+            total_description = 'total ' + statistic + ' processed'
+            add_statistic((total_description, value))
+            per_second_description = statistic + ' processed per second'
+            add_statistic((per_second_description, int(value / time_elapsed)))
+
+        self.formatter.show_benchmarks(statistics)

    def report_errors(self):
        # type: () -> NoneType
@ -259,7 +276,7 @@ class Application(object):
        self.initialize(argv)
        self.run_checks()
        self.report_errors()
-        self.end_time = time.time()
+        self.report_benchmarks()

    def run(self, argv=None):
        # type: (Union[NoneType, List[str]]) -> NoneType
--- a/flake8/main/options.py
+++ b/flake8/main/options.py
@ -192,3 +192,10 @@ def register_default_options(option_manager):
        '--isolated', default=False, action='store_true',
        help='Ignore all found configuration files.',
    )
+
+    # Benchmarking
+
+    add_option(
+        '--benchmark', default=False, action='store_true',
+        help='Print benchmark information about this run of Flake8',
+    )
--- a/flake8/processor.py
+++ b/flake8/processor.py
@ -97,6 +97,10 @@ class FileProcessor(object):
        self.total_lines = len(self.lines)
        #: Verbosity level of Flake8
        self.verbose = options.verbose
+        #: Statistics dictionary
+        self.statistics = {
+            'logical lines': 0,
+        }

    @contextlib.contextmanager
    def inside_multiline(self, line_number):
@ -186,6 +190,7 @@ class FileProcessor(object):
        """Build a logical line from the current tokens list."""
        comments, logical, mapping_list = self.build_logical_line_tokens()
        self.logical_line = ''.join(logical)
+        self.statistics['logical lines'] += 1
        return ''.join(comments), self.logical_line, mapping_list

    def split_line(self, token):