Restructure Checker Manager for serial retries

This commit is contained in:
Ian Cordasco 2016-04-02 11:30:12 -05:00
parent 81eb3e41cc
commit 09ad1d850e

View file

@ -1,4 +1,5 @@
"""Checker Manager and Checker classes.""" """Checker Manager and Checker classes."""
import errno
import logging import logging
import os import os
import sys import sys
@ -15,6 +16,21 @@ from flake8 import utils
LOG = logging.getLogger(__name__) LOG = logging.getLogger(__name__)
SERIAL_RETRY_ERRNOS = set([
# ENOSPC: Added by sigmavirus24
# > On some operating systems (OSX), multiprocessing may cause an
# > ENOSPC error while trying to trying to create a Semaphore.
# > In those cases, we should replace the customized Queue Report
# > class with pep8's StandardReport class to ensure users don't run
# > into this problem.
# > (See also: https://gitlab.com/pycqa/flake8/issues/74)
errno.ENOSPC,
# NOTE(sigmavirus24): When adding to this list, include the reasoning
# on the lines before the error code and always append your error
# code. Further, please always add a trailing `,` to reduce the visual
# noise in diffs.
])
class Manager(object): class Manager(object):
"""Manage the parallelism and checker instances for each plugin and file. """Manage the parallelism and checker instances for each plugin and file.
@ -62,8 +78,6 @@ class Manager(object):
if self.jobs > 1: if self.jobs > 1:
self.using_multiprocessing = True self.using_multiprocessing = True
self.process_queue = multiprocessing.Queue()
self.results_queue = multiprocessing.Queue()
@staticmethod @staticmethod
def _cleanup_queue(q): def _cleanup_queue(q):
@ -232,28 +246,52 @@ class Manager(object):
if not self.using_multiprocessing: if not self.using_multiprocessing:
self._report_after_serial() self._report_after_serial()
def run_parallel(self):
"""Run the checkers in parallel."""
# NOTE(sigmavirus24): Initialize Queues here to handle serial retries
# in one place.
self.process_queue = multiprocessing.Queue()
self.results_queue = multiprocessing.Queue()
LOG.info('Starting %d process workers', self.jobs - 1)
for i in range(self.jobs - 1):
proc = multiprocessing.Process(
target=self._run_checks_from_queue
)
proc.daemon = True
proc.start()
self.processes.append(proc)
proc = multiprocessing.Process(target=self._report_after_parallel)
proc.start()
LOG.info('Started process to report errors')
self.processes.append(proc)
def run_serial(self):
"""Run the checkers in serial."""
for checker in self.checkers:
checker.run_checks(self.results_queue)
def run(self): def run(self):
"""Run all the checkers. """Run all the checkers.
This handles starting the process workers or just simply running all This will intelligently decide whether to run the checks in parallel
of the checks in serial. or whether to run them in serial.
If running the checks in parallel causes a problem (e.g.,
https://gitlab.com/pycqa/flake8/issues/74) this also implements
fallback to serial processing.
""" """
if self.using_multiprocessing: try:
LOG.info('Starting %d process workers', self.jobs) if self.using_multiprocessing:
for i in range(self.jobs): self.run_parallel()
proc = multiprocessing.Process( else:
target=self._run_checks_from_queue self.run_serial()
) except OSError as oserr:
proc.daemon = True if oserr.errno not in SERIAL_RETRY_ERRNOS:
proc.start() LOG.exception(oserr)
self.processes.append(proc) raise
proc = multiprocessing.Process(target=self._report_after_parallel) LOG.warning('Running in serial after OS exception, %r', oserr)
proc.start() self.run_serial()
LOG.info('Started process to report errors')
self.processes.append(proc)
else:
for checker in self.checkers:
checker.run_checks(self.results_queue)
def start(self): def start(self):
"""Start checking files.""" """Start checking files."""