diff --git a/src/flake8/main/options.py b/src/flake8/main/options.py index 2aabab8..666f42a 100644 --- a/src/flake8/main/options.py +++ b/src/flake8/main/options.py @@ -1,6 +1,5 @@ """Contains the logic for all of the default options for Flake8.""" from flake8 import defaults -from flake8 import utils from flake8.main import debug from flake8.main import vcs @@ -146,9 +145,8 @@ def register_default_options(option_manager): add_option( "--per-file-ignores", + default="", parse_from_config=True, - comma_separated_list=True, - separator=utils.NEWLINE_SEPARATED_LIST_RE, help="A pairing of filenames and violation codes that defines which " "violations to ignore in a particular file. The filenames can be " "specified in a manner similar to the ``--exclude`` option and the " diff --git a/src/flake8/options/manager.py b/src/flake8/options/manager.py index 7be4315..3f4e883 100644 --- a/src/flake8/options/manager.py +++ b/src/flake8/options/manager.py @@ -32,7 +32,6 @@ class Option(object): parse_from_config=False, comma_separated_list=False, normalize_paths=False, - separator=None, ): """Initialize an Option instance wrapping optparse.Option. @@ -80,8 +79,6 @@ class Option(object): :param bool normalize_paths: Whether the option is expecting a path or list of paths and should attempt to normalize the paths to absolute paths. - :param separator: - The item that separates the "comma"-separated list. """ self.short_option_name = short_option_name self.long_option_name = long_option_name @@ -110,7 +107,6 @@ class Option(object): self.parse_from_config = parse_from_config self.comma_separated_list = comma_separated_list self.normalize_paths = normalize_paths - self.separator = separator or utils.COMMA_SEPARATED_LIST_RE self.config_name = None if parse_from_config: diff --git a/src/flake8/style_guide.py b/src/flake8/style_guide.py index 56fa832..01d85d7 100644 --- a/src/flake8/style_guide.py +++ b/src/flake8/style_guide.py @@ -357,9 +357,10 @@ class StyleGuideManager(object): :rtype: :class:`~flake8.style_guide.StyleGuide` """ - for value in options.per_file_ignores: - filename, violations_str = value.split(":") - violations = utils.parse_comma_separated_list(violations_str) + per_file = utils.parse_files_to_codes_mapping( + options.per_file_ignores + ) + for filename, violations in per_file: yield self.default_style_guide.copy( filename=filename, extend_ignore_with=violations ) diff --git a/src/flake8/utils.py b/src/flake8/utils.py index e5eef45..0c97a39 100644 --- a/src/flake8/utils.py +++ b/src/flake8/utils.py @@ -11,7 +11,6 @@ import tokenize DIFF_HUNK_REGEXP = re.compile(r"^@@ -\d+(?:,\d+)? \+(\d+)(?:,(\d+))? @@.*$") COMMA_SEPARATED_LIST_RE = re.compile(r"[,\s]") -NEWLINE_SEPARATED_LIST_RE = re.compile(r"[\s]") LOCAL_PLUGIN_LIST_RE = re.compile(r"[,\t\n\r\f\v]") @@ -41,6 +40,100 @@ def parse_comma_separated_list(value, regexp=COMMA_SEPARATED_LIST_RE): return [item for item in item_gen if item] +_Token = collections.namedtuple("Token", ("tp", "src")) +_CODE, _FILE, _COLON, _COMMA, _WS = "code", "file", "colon", "comma", "ws" +_EOF = "eof" +_FILE_LIST_TOKEN_TYPES = [ + (re.compile(r"[A-Z][0-9]*"), _CODE), + (re.compile(r"[^\s:,]+"), _FILE), + (re.compile(r"\s*:\s*"), _COLON), + (re.compile(r"\s*,\s*"), _COMMA), + (re.compile(r"\s+"), _WS), +] + + +def _tokenize_files_to_codes_mapping(value): + # type: (str) -> List[_Token] + tokens = [] + i = 0 + while i < len(value): + for token_re, token_name in _FILE_LIST_TOKEN_TYPES: + match = token_re.match(value, i) + if match: + tokens.append(_Token(token_name, match.group().strip())) + i = match.end() + break + else: + raise AssertionError("unreachable", value, i) + tokens.append(_Token(_EOF, "")) + + return tokens + + +def parse_files_to_codes_mapping(value): # noqa: C901 + # type: (Union[Sequence[str], str]) -> List[Tuple[List[str], List[str]]] + """Parse a files-to-codes maping. + + A files-to-codes mapping a sequence of values specified as + `filenames list:codes list ...`. Each of the lists may be separated by + either comma or whitespace tokens. + + :param value: String to be parsed and normalized. + :type value: str + """ + if isinstance(value, (list, tuple)): + value = "\n".join(value) + + ret = [] + if not value.strip(): + return ret + + class State: + seen_sep = True + seen_colon = False + filenames = [] + codes = [] + + def _reset(): + if State.codes: + for filename in State.filenames: + ret.append((filename, State.codes)) + State.seen_sep = True + State.seen_colon = False + State.filenames = [] + State.codes = [] + + for token in _tokenize_files_to_codes_mapping(value): + # legal in any state: separator sets the sep bit + if token.tp in {_COMMA, _WS}: + State.seen_sep = True + # looking for filenames + elif not State.seen_colon: + if token.tp == _COLON: + State.seen_colon = True + State.seen_sep = True + elif State.seen_sep and token.tp == _FILE: + State.filenames.append(token.src) + State.seen_sep = False + else: + raise ValueError("Unexpected token: {}".format(token)) + # looking for codes + else: + if token.tp == _EOF: + _reset() + elif State.seen_sep and token.tp == _CODE: + State.codes.append(token.src) + State.seen_sep = False + elif State.seen_sep and token.tp == _FILE: + _reset() + State.filenames.append(token.src) + State.seen_sep = False + else: + raise ValueError("Unexpected token: {}".format(token)) + + return ret + + def normalize_paths(paths, parent=os.curdir): # type: (Union[Sequence[str], str], str) -> List[str] """Parse a comma-separated list of paths. diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index 9f3976f..afd1b43 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -32,6 +32,82 @@ def test_parse_comma_separated_list(value, expected): assert utils.parse_comma_separated_list(value) == expected +@pytest.mark.parametrize( + ('value', 'expected'), + ( + # empty option configures nothing + ('', []), (' ', []), ('\n\n\n', []), + # basic case + ( + 'f.py:E123', + [('f.py', ['E123'])], + ), + # multiple filenames, multiple codes + ( + 'f.py,g.py:E,F', + [('f.py', ['E', 'F']), ('g.py', ['E', 'F'])], + ), + # demonstrate that whitespace is not important around tokens + ( + ' f.py , g.py : E , F ', + [('f.py', ['E', 'F']), ('g.py', ['E', 'F'])], + ), + # whitespace can separate groups of configuration + ( + 'f.py:E g.py:F', + [('f.py', ['E']), ('g.py', ['F'])], + ), + # newlines can separate groups of configuration + ( + 'f.py: E\ng.py: F\n', + [('f.py', ['E']), ('g.py', ['F'])], + ), + # whitespace can be used in place of commas + ( + 'f.py g.py: E F', + [('f.py', ['E', 'F']), ('g.py', ['E', 'F'])], + ), + # go ahead, indent your codes + ( + 'f.py:\n E,F\ng.py:\n G,H', + [('f.py', ['E', 'F']), ('g.py', ['G', 'H'])], + ), + # it's easier to allow zero filenames or zero codes than forbid it + (':E', []), ('f.py:', []), + (':E f.py:F', [('f.py', ['F'])]), + ('f.py: g.py:F', [('g.py', ['F'])]), + # sequences are also valid (?) + ( + ['f.py:E,F', 'g.py:G,H'], + [('f.py', ['E', 'F']), ('g.py', ['G', 'H'])], + ), + ), +) +def test_parse_files_to_codes_mapping(value, expected): + """Test parsing of valid files-to-codes mappings.""" + assert utils.parse_files_to_codes_mapping(value) == expected + + +@pytest.mark.parametrize( + 'value', + ( + # code while looking for filenames + 'E123', 'f.py,E123', 'f.py E123', + # eof while looking for filenames + 'f.py', 'f.py:E,g.py' + # colon while looking for codes + 'f.py::', 'f.py:E:', + + # no separator between + 'f.py:Eg.py:F', 'f.py:E1F1', + ), +) +def test_invalid_file_list(value): + """Test parsing of invalid files-to-codes mappings.""" + with pytest.raises(ValueError): + utils.parse_files_to_codes_mapping(value) + + @pytest.mark.parametrize("value,expected", [ ("flake8", "flake8"), ("../flake8", os.path.abspath("../flake8")),