Add --benchmark and formatting for its values

This commit is contained in:
Ian Cordasco 2016-06-19 09:25:27 -05:00
parent 1f7a8081ad
commit adedd6c5cf
No known key found for this signature in database
GPG key ID: 656D3395E4A9791A
6 changed files with 112 additions and 6 deletions

View file

@ -10,6 +10,12 @@ try:
except ImportError:
multiprocessing = None
try:
import Queue as queue
except ImportError:
import queue
from flake8 import defaults
from flake8 import exceptions
from flake8 import processor
from flake8 import utils
@ -72,14 +78,22 @@ class Manager(object):
self.jobs = self._job_count()
self.process_queue = None
self.results_queue = None
self.statistics_queue = None
self.using_multiprocessing = self.jobs > 1
self.processes = []
self.checkers = []
self.statistics = {
'files': 0,
'logical lines': 0,
'physical lines': 0,
'tokens': 0,
}
if self.using_multiprocessing:
try:
self.process_queue = multiprocessing.Queue()
self.results_queue = multiprocessing.Queue()
self.statistics_queue = multiprocessing.Queue()
except OSError as oserr:
if oserr.errno not in SERIAL_RETRY_ERRNOS:
raise
@ -96,6 +110,29 @@ class Manager(object):
proc.join(0.2)
self._cleanup_queue(self.process_queue)
self._cleanup_queue(self.results_queue)
self._cleanup_queue(self.statistics_queue)
def _process_statistics(self):
all_statistics = self.statistics
if self.using_multiprocessing:
total_number_of_checkers = len(self.checkers)
statistics_gathered = 0
while statistics_gathered < total_number_of_checkers:
try:
statistics = self.statistics_queue.get(block=False)
statistics_gathered += 1
except queue.Empty:
break
for statistic in defaults.STATISTIC_NAMES:
all_statistics[statistic] += statistics[statistic]
else:
statistics_generator = (checker.statistics
for checker in self.checkers)
for statistics in statistics_generator:
for statistic in defaults.STATISTIC_NAMES:
all_statistics[statistic] += statistics[statistic]
all_statistics['files'] += len(self.checkers)
def _job_count(self):
# type: () -> Union[int, NoneType]
@ -182,7 +219,7 @@ class Manager(object):
LOG.info('Running checks in parallel')
for checker in iter(self.process_queue.get, 'DONE'):
LOG.debug('Running checker for file "%s"', checker.filename)
checker.run_checks(self.results_queue)
checker.run_checks(self.results_queue, self.statistics_queue)
self.results_queue.put('DONE')
def is_path_excluded(self, path):
@ -280,7 +317,7 @@ class Manager(object):
def run_serial(self):
"""Run the checkers in serial."""
for checker in self.checkers:
checker.run_checks(self.results_queue)
checker.run_checks(self.results_queue, self.statistics_queue)
def run(self):
"""Run all the checkers.
@ -325,6 +362,7 @@ class Manager(object):
def stop(self):
"""Stop checking files."""
self._process_statistics()
for proc in self.processes:
LOG.info('Joining %s to the main process', proc.name)
proc.join()
@ -342,12 +380,21 @@ class FileChecker(object):
The plugins registered to check the file.
:type checks:
flake8.plugins.manager.Checkers
:param style_guide:
The initialized StyleGuide for this particular run.
:type style_guide:
flake8.style_guide.StyleGuide
"""
self.filename = filename
self.checks = checks
self.style_guide = style_guide
self.results = []
self.processor = self._make_processor()
self.statistics = {
'tokens': 0,
'logical lines': 0,
'physical lines': len(self.processor.lines),
}
def _make_processor(self):
try:
@ -466,8 +513,10 @@ class FileChecker(object):
:meth:`flake8.checker.FileChecker.run_checks`.
"""
parens = 0
statistics = self.statistics
file_processor = self.processor
for token in file_processor.generate_tokens():
statistics['tokens'] += 1
self.check_physical_eol(token)
token_type, text = token[0:2]
processor.log_token(LOG, token)
@ -485,7 +534,7 @@ class FileChecker(object):
self.run_physical_checks(file_processor.lines[-1])
self.run_logical_checks()
def run_checks(self, results_queue):
def run_checks(self, results_queue, statistics_queue):
"""Run checks against the file."""
if self.processor.should_ignore_file():
return
@ -501,6 +550,11 @@ class FileChecker(object):
if results_queue is not None:
results_queue.put((self.filename, self.results))
logical_lines = self.processor.statistics['logical lines']
self.statistics['logical lines'] = logical_lines
if statistics_queue is not None:
statistics_queue.put(self.statistics)
def handle_comment(self, token, token_text):
"""Handle the logic when encountering a comment token."""
# The comment also ends a physical line

View file

@ -7,5 +7,11 @@ MAX_LINE_LENGTH = 79
TRUTHY_VALUES = set(['true', '1', 't'])
# Other consants
# Other constants
WHITESPACE = frozenset(' \t')
STATISTIC_NAMES = (
'logical lines',
'physical lines',
'tokens',
)

View file

@ -87,7 +87,24 @@ class BaseFormatter(object):
def show_benchmarks(self, benchmarks):
"""Format and print the benchmarks."""
pass
# NOTE(sigmavirus24): The format strings are a little confusing, even
# to me, so here's a quick explanation:
# We specify the named value first followed by a ':' to indicate we're
# formatting the value.
# Next we use '<' to indicate we want the value left aligned.
# Then '10' is the width of the area.
# For floats, finally, we only want only want at most 3 digits after
# the decimal point to be displayed. This is the precision and it
# can not be specified for integers which is why we need two separate
# format strings.
float_format = '{value:<10.3} {statistic}'.format
int_format = '{value:<10} {statistic}'.format
for statistic, value in benchmarks:
if isinstance(value, int):
benchmark = int_format(statistic=statistic, value=value)
else:
benchmark = float_format(statistic=statistic, value=value)
self._write(benchmark)
def show_source(self, error):
"""Show the physical line generating the error.

View file

@ -7,6 +7,7 @@ import time
import flake8
from flake8 import checker
from flake8 import defaults
from flake8 import style_guide
from flake8 import utils
from flake8.main import options
@ -225,6 +226,22 @@ class Application(object):
self.file_checker_manager.run()
LOG.info('Finished running')
self.file_checker_manager.stop()
self.end_time = time.time()
def report_benchmarks(self):
if not self.options.benchmark:
return
time_elapsed = self.end_time - self.start_time
statistics = [('seconds elapsed', time_elapsed)]
add_statistic = statistics.append
for statistic in (defaults.STATISTIC_NAMES + ('files',)):
value = self.file_checker_manager.statistics[statistic]
total_description = 'total ' + statistic + ' processed'
add_statistic((total_description, value))
per_second_description = statistic + ' processed per second'
add_statistic((per_second_description, int(value / time_elapsed)))
self.formatter.show_benchmarks(statistics)
def report_errors(self):
# type: () -> NoneType
@ -259,7 +276,7 @@ class Application(object):
self.initialize(argv)
self.run_checks()
self.report_errors()
self.end_time = time.time()
self.report_benchmarks()
def run(self, argv=None):
# type: (Union[NoneType, List[str]]) -> NoneType

View file

@ -192,3 +192,10 @@ def register_default_options(option_manager):
'--isolated', default=False, action='store_true',
help='Ignore all found configuration files.',
)
# Benchmarking
add_option(
'--benchmark', default=False, action='store_true',
help='Print benchmark information about this run of Flake8',
)

View file

@ -97,6 +97,10 @@ class FileProcessor(object):
self.total_lines = len(self.lines)
#: Verbosity level of Flake8
self.verbose = options.verbose
#: Statistics dictionary
self.statistics = {
'logical lines': 0,
}
@contextlib.contextmanager
def inside_multiline(self, line_number):
@ -186,6 +190,7 @@ class FileProcessor(object):
"""Build a logical line from the current tokens list."""
comments, logical, mapping_list = self.build_logical_line_tokens()
self.logical_line = ''.join(logical)
self.statistics['logical lines'] += 1
return ''.join(comments), self.logical_line, mapping_list
def split_line(self, token):