Merge branch 'per_file_ignore' into 'master'

Support more syntaxes in per-file-ignores Closes #471 See merge request pycqa/flake8!281
2026-07-04 19:09:31 +00:00 · 2019-01-17 13:32:06 +00:00 · 2019-01-17 13:32:06 +00:00 · 97e4927555
commit 97e4927555
parent 92efb041d9 9788b87c91
5 changed files with 175 additions and 11 deletions
--- a/src/flake8/main/options.py
+++ b/src/flake8/main/options.py
@ -1,6 +1,5 @@
 """Contains the logic for all of the default options for Flake8."""
 from flake8 import defaults
-from flake8 import utils
 from flake8.main import debug
 from flake8.main import vcs

@ -146,9 +145,8 @@ def register_default_options(option_manager):

    add_option(
        "--per-file-ignores",
+        default="",
        parse_from_config=True,
-        comma_separated_list=True,
-        separator=utils.NEWLINE_SEPARATED_LIST_RE,
        help="A pairing of filenames and violation codes that defines which "
        "violations to ignore in a particular file. The filenames can be "
        "specified in a manner similar to the ``--exclude`` option and the "
--- a/src/flake8/options/manager.py
+++ b/src/flake8/options/manager.py
@ -32,7 +32,6 @@ class Option(object):
        parse_from_config=False,
        comma_separated_list=False,
        normalize_paths=False,
-        separator=None,
    ):
        """Initialize an Option instance wrapping optparse.Option.

@ -80,8 +79,6 @@ class Option(object):
        :param bool normalize_paths:
            Whether the option is expecting a path or list of paths and should
            attempt to normalize the paths to absolute paths.
-        :param separator:
-            The item that separates the "comma"-separated list.
        """
        self.short_option_name = short_option_name
        self.long_option_name = long_option_name
@ -110,7 +107,6 @@ class Option(object):
        self.parse_from_config = parse_from_config
        self.comma_separated_list = comma_separated_list
        self.normalize_paths = normalize_paths
-        self.separator = separator or utils.COMMA_SEPARATED_LIST_RE

        self.config_name = None
        if parse_from_config:
--- a/src/flake8/style_guide.py
+++ b/src/flake8/style_guide.py
@ -357,9 +357,10 @@ class StyleGuideManager(object):
        :rtype:
            :class:`~flake8.style_guide.StyleGuide`
        """
-        for value in options.per_file_ignores:
-            filename, violations_str = value.split(":")
-            violations = utils.parse_comma_separated_list(violations_str)
+        per_file = utils.parse_files_to_codes_mapping(
+            options.per_file_ignores
+        )
+        for filename, violations in per_file:
            yield self.default_style_guide.copy(
                filename=filename, extend_ignore_with=violations
            )
--- a/src/flake8/utils.py
+++ b/src/flake8/utils.py
@ -11,7 +11,6 @@ import tokenize

 DIFF_HUNK_REGEXP = re.compile(r"^@@ -\d+(?:,\d+)? \+(\d+)(?:,(\d+))? @@.*$")
 COMMA_SEPARATED_LIST_RE = re.compile(r"[,\s]")
-NEWLINE_SEPARATED_LIST_RE = re.compile(r"[\s]")
 LOCAL_PLUGIN_LIST_RE = re.compile(r"[,\t\n\r\f\v]")


@ -41,6 +40,100 @@ def parse_comma_separated_list(value, regexp=COMMA_SEPARATED_LIST_RE):
    return [item for item in item_gen if item]


+_Token = collections.namedtuple("Token", ("tp", "src"))
+_CODE, _FILE, _COLON, _COMMA, _WS = "code", "file", "colon", "comma", "ws"
+_EOF = "eof"
+_FILE_LIST_TOKEN_TYPES = [
+    (re.compile(r"[A-Z][0-9]*"), _CODE),
+    (re.compile(r"[^\s:,]+"), _FILE),
+    (re.compile(r"\s*:\s*"), _COLON),
+    (re.compile(r"\s*,\s*"), _COMMA),
+    (re.compile(r"\s+"), _WS),
+]
+
+
+def _tokenize_files_to_codes_mapping(value):
+    # type: (str) -> List[_Token]
+    tokens = []
+    i = 0
+    while i < len(value):
+        for token_re, token_name in _FILE_LIST_TOKEN_TYPES:
+            match = token_re.match(value, i)
+            if match:
+                tokens.append(_Token(token_name, match.group().strip()))
+                i = match.end()
+                break
+        else:
+            raise AssertionError("unreachable", value, i)
+    tokens.append(_Token(_EOF, ""))
+
+    return tokens
+
+
+def parse_files_to_codes_mapping(value):  # noqa: C901
+    # type: (Union[Sequence[str], str]) -> List[Tuple[List[str], List[str]]]
+    """Parse a files-to-codes maping.
+
+    A files-to-codes mapping a sequence of values specified as
+    `filenames list:codes list ...`.  Each of the lists may be separated by
+    either comma or whitespace tokens.
+
+    :param value: String to be parsed and normalized.
+    :type value: str
+    """
+    if isinstance(value, (list, tuple)):
+        value = "\n".join(value)
+
+    ret = []
+    if not value.strip():
+        return ret
+
+    class State:
+        seen_sep = True
+        seen_colon = False
+        filenames = []
+        codes = []
+
+    def _reset():
+        if State.codes:
+            for filename in State.filenames:
+                ret.append((filename, State.codes))
+        State.seen_sep = True
+        State.seen_colon = False
+        State.filenames = []
+        State.codes = []
+
+    for token in _tokenize_files_to_codes_mapping(value):
+        # legal in any state: separator sets the sep bit
+        if token.tp in {_COMMA, _WS}:
+            State.seen_sep = True
+        # looking for filenames
+        elif not State.seen_colon:
+            if token.tp == _COLON:
+                State.seen_colon = True
+                State.seen_sep = True
+            elif State.seen_sep and token.tp == _FILE:
+                State.filenames.append(token.src)
+                State.seen_sep = False
+            else:
+                raise ValueError("Unexpected token: {}".format(token))
+        # looking for codes
+        else:
+            if token.tp == _EOF:
+                _reset()
+            elif State.seen_sep and token.tp == _CODE:
+                State.codes.append(token.src)
+                State.seen_sep = False
+            elif State.seen_sep and token.tp == _FILE:
+                _reset()
+                State.filenames.append(token.src)
+                State.seen_sep = False
+            else:
+                raise ValueError("Unexpected token: {}".format(token))
+
+    return ret
+
+
 def normalize_paths(paths, parent=os.curdir):
    # type: (Union[Sequence[str], str], str) -> List[str]
    """Parse a comma-separated list of paths.
--- a/tests/unit/test_utils.py
+++ b/tests/unit/test_utils.py
@ -32,6 +32,82 @@ def test_parse_comma_separated_list(value, expected):
    assert utils.parse_comma_separated_list(value) == expected


+@pytest.mark.parametrize(
+    ('value', 'expected'),
+    (
+        # empty option configures nothing
+        ('', []), ('   ', []), ('\n\n\n', []),
+        # basic case
+        (
+            'f.py:E123',
+            [('f.py', ['E123'])],
+        ),
+        # multiple filenames, multiple codes
+        (
+            'f.py,g.py:E,F',
+            [('f.py', ['E', 'F']), ('g.py', ['E', 'F'])],
+        ),
+        # demonstrate that whitespace is not important around tokens
+        (
+            '   f.py  , g.py  : E  , F  ',
+            [('f.py', ['E', 'F']), ('g.py', ['E', 'F'])],
+        ),
+        # whitespace can separate groups of configuration
+        (
+            'f.py:E g.py:F',
+            [('f.py', ['E']), ('g.py', ['F'])],
+        ),
+        # newlines can separate groups of configuration
+        (
+            'f.py: E\ng.py: F\n',
+            [('f.py', ['E']), ('g.py', ['F'])],
+        ),
+        # whitespace can be used in place of commas
+        (
+            'f.py g.py: E F',
+            [('f.py', ['E', 'F']), ('g.py', ['E', 'F'])],
+        ),
+        # go ahead, indent your codes
+        (
+            'f.py:\n    E,F\ng.py:\n    G,H',
+            [('f.py', ['E', 'F']), ('g.py', ['G', 'H'])],
+        ),
+        #  it's easier to allow zero filenames or zero codes than forbid it
+        (':E', []), ('f.py:', []),
+        (':E f.py:F', [('f.py', ['F'])]),
+        ('f.py: g.py:F', [('g.py', ['F'])]),
+        # sequences are also valid (?)
+        (
+            ['f.py:E,F', 'g.py:G,H'],
+            [('f.py', ['E', 'F']), ('g.py', ['G', 'H'])],
+        ),
+    ),
+)
+def test_parse_files_to_codes_mapping(value, expected):
+    """Test parsing of valid files-to-codes mappings."""
+    assert utils.parse_files_to_codes_mapping(value) == expected
+
+
+@pytest.mark.parametrize(
+    'value',
+    (
+        # code while looking for filenames
+        'E123', 'f.py,E123', 'f.py E123',
+        # eof while looking for filenames
+        'f.py', 'f.py:E,g.py'
+        # colon while looking for codes
+        'f.py::', 'f.py:E:',
+
+        # no separator between
+        'f.py:Eg.py:F', 'f.py:E1F1',
+    ),
+)
+def test_invalid_file_list(value):
+    """Test parsing of invalid files-to-codes mappings."""
+    with pytest.raises(ValueError):
+        utils.parse_files_to_codes_mapping(value)
+
+
@pytest.mark.parametrize("value,expected", [
    ("flake8", "flake8"),
    ("../flake8", os.path.abspath("../flake8")),