From 9fbaf2d2ea49adb97cc45be83a966fb3c7f292f2 Mon Sep 17 00:00:00 2001 From: "Eric N. Vander Weele" Date: Sun, 28 Jul 2019 10:39:27 -0400 Subject: [PATCH 1/6] utils: Assert desired contract for `parse_comma_separated_list()` This is a separate commit so it can be dropped during a rebase or reverted independently. --- src/flake8/utils.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/flake8/utils.py b/src/flake8/utils.py index 02f0607..cf88648 100644 --- a/src/flake8/utils.py +++ b/src/flake8/utils.py @@ -39,6 +39,10 @@ def parse_comma_separated_list(value, regexp=COMMA_SEPARATED_LIST_RE): :rtype: list """ + assert isinstance( # nosec (for bandit) + value, (string_types, type(None)) + ), value + if not value: return [] From 9283f2f03f5c8e61eb5a6bca2f10afadb0d90317 Mon Sep 17 00:00:00 2001 From: "Eric N. Vander Weele" Date: Sun, 28 Jul 2019 10:39:27 -0400 Subject: [PATCH 2/6] utils: Change `parse_comma_separated_list()` contract This is the initial incision point to only accept `str` (or `None`) for parsing out comma/whitespace/regexp separated values. --- docs/source/internal/utils.rst | 9 ++------- src/flake8/utils.py | 10 ++++------ tests/unit/test_utils.py | 5 ----- 3 files changed, 6 insertions(+), 18 deletions(-) diff --git a/docs/source/internal/utils.rst b/docs/source/internal/utils.rst index 31e0d39..2b2638e 100644 --- a/docs/source/internal/utils.rst +++ b/docs/source/internal/utils.rst @@ -25,13 +25,8 @@ "E121,W123,F904" "E121,\nW123,\nF804" - "E121,\n\tW123,\n\tF804" - -Or it will take a list of strings (potentially with whitespace) such as - -.. code-block:: python - - [" E121\n", "\t\nW123 ", "\n\tF904\n "] + " E121,\n\tW123,\n\tF804 " + " E121\n\tW123 \n\tF804" And converts it to a list that looks as follows diff --git a/src/flake8/utils.py b/src/flake8/utils.py index cf88648..06a1db1 100644 --- a/src/flake8/utils.py +++ b/src/flake8/utils.py @@ -24,11 +24,11 @@ string_types = (str, type(u"")) def parse_comma_separated_list(value, regexp=COMMA_SEPARATED_LIST_RE): - # type: (Union[Sequence[str], str], Pattern[str]) -> List[str] + # type: (Optional[str], Pattern[str]) -> List[str] """Parse a comma-separated list. :param value: - String or list of strings to be parsed and normalized. + String to be parsed and normalized. :param regexp: Compiled regular expression used to split the value when it is a string. @@ -46,10 +46,8 @@ def parse_comma_separated_list(value, regexp=COMMA_SEPARATED_LIST_RE): if not value: return [] - if isinstance(value, string_types): - value = regexp.split(value) - - item_gen = (item.strip() for item in value) + separated = regexp.split(value) + item_gen = (item.strip() for item in separated) return [item for item in item_gen if item] diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index 20c5fb9..b4bd1a1 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -22,11 +22,6 @@ RELATIVE_PATHS = ["flake8", "pep8", "pyflakes", "mccabe"] ("E123,W234,,E206,,", ["E123", "W234", "E206"]), ("E123, W234,, E206,,", ["E123", "W234", "E206"]), ("E123,,W234,,E206,,", ["E123", "W234", "E206"]), - (["E123", "W234", "E206"], ["E123", "W234", "E206"]), - (["E123", "\n\tW234", "\n E206"], ["E123", "W234", "E206"]), - (["E123", "\n\tW234", "\n E206", "\n"], ["E123", "W234", "E206"]), - (["E123", "\n\tW234", "", "\n E206", "\n"], ["E123", "W234", "E206"]), - (["E123", "\n\tW234", "", "\n E206", ""], ["E123", "W234", "E206"]), ]) def test_parse_comma_separated_list(value, expected): """Verify that similar inputs produce identical outputs.""" From a0cd55fd6d2697f4823d1de24c9dda5858b27610 Mon Sep 17 00:00:00 2001 From: "Eric N. Vander Weele" Date: Sun, 28 Jul 2019 10:39:27 -0400 Subject: [PATCH 3/6] utils: Assert desired contract for `normalize_paths()` This is a separate commit so it can be dropped during a rebase or revert independently. --- src/flake8/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/flake8/utils.py b/src/flake8/utils.py index 06a1db1..bb6ba2e 100644 --- a/src/flake8/utils.py +++ b/src/flake8/utils.py @@ -168,6 +168,7 @@ def normalize_paths(paths, parent=os.curdir): :rtype: [str] """ + assert isinstance(paths, list), paths # nosec (for bandit) return [ normalize_path(p, parent) for p in parse_comma_separated_list(paths) ] From 1ba56b9056fd60dbb0f991fe5a4ac0bb6ad85cb9 Mon Sep 17 00:00:00 2001 From: "Eric N. Vander Weele" Date: Sun, 28 Jul 2019 10:39:27 -0400 Subject: [PATCH 4/6] utils: Change `normalize_paths()` contract MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The `normalize_paths()` utility was doing too much — parsing unstructured configuration path data and dispatching the scrubbed paths to be normalized. Towards moving the parsing of unstructured configuration path data closer towards were configuration occurs, have the utility accept only structured input for normalizing paths. --- docs/source/internal/utils.rst | 6 +++--- src/flake8/utils.py | 8 +++----- tests/unit/test_utils.py | 7 +++---- 3 files changed, 9 insertions(+), 12 deletions(-) diff --git a/docs/source/internal/utils.rst b/docs/source/internal/utils.rst index 2b2638e..fed9a47 100644 --- a/docs/source/internal/utils.rst +++ b/docs/source/internal/utils.rst @@ -45,9 +45,9 @@ path if the string has a ``/`` in it. It also removes trailing ``/``\ s. .. autofunction:: flake8.utils.normalize_paths -This function utilizes :func:`~flake8.utils.parse_comma_separated_list` and -:func:`~flake8.utils.normalize_path` to normalize its input to a list of -strings that should be paths. +This function utilizes :func:`~flake8.utils.normalize_path` to normalize a +sequence of paths. See :func:`~flake8.utils.normalize_path` for what defines a +normalized path. .. autofunction:: flake8.utils.stdin_get_value diff --git a/src/flake8/utils.py b/src/flake8/utils.py index bb6ba2e..5ed78e1 100644 --- a/src/flake8/utils.py +++ b/src/flake8/utils.py @@ -160,8 +160,8 @@ def parse_files_to_codes_mapping(value_): # noqa: C901 def normalize_paths(paths, parent=os.curdir): - # type: (Union[Sequence[str], str], str) -> List[str] - """Parse a comma-separated list of paths. + # type: (Sequence[str], str) -> List[str] + """Normalize a list of paths relative to a parent directory. :returns: The normalized paths. @@ -169,9 +169,7 @@ def normalize_paths(paths, parent=os.curdir): [str] """ assert isinstance(paths, list), paths # nosec (for bandit) - return [ - normalize_path(p, parent) for p in parse_comma_separated_list(paths) - ] + return [normalize_path(p, parent) for p in paths] def normalize_path(path, parent=os.curdir): diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index b4bd1a1..9fd03b2 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -127,14 +127,13 @@ def test_normalize_path(value, expected): @pytest.mark.parametrize("value,expected", [ - ("flake8,pep8,pyflakes,mccabe", ["flake8", "pep8", "pyflakes", "mccabe"]), - ("flake8,\n\tpep8,\n pyflakes,\n\n mccabe", + (["flake8", "pep8", "pyflakes", "mccabe"], ["flake8", "pep8", "pyflakes", "mccabe"]), - ("../flake8,../pep8,../pyflakes,../mccabe", + (["../flake8", "../pep8", "../pyflakes", "../mccabe"], [os.path.abspath("../" + p) for p in RELATIVE_PATHS]), ]) def test_normalize_paths(value, expected): - """Verify we normalize comma-separated paths provided to the tool.""" + """Verify we normalizes a sequence of paths provided to the tool.""" assert utils.normalize_paths(value) == expected From f01011a403fd806571b8b0fa172ec39f54bb9e83 Mon Sep 17 00:00:00 2001 From: "Eric N. Vander Weele" Date: Sun, 28 Jul 2019 10:39:27 -0400 Subject: [PATCH 5/6] Normalize option values additionally by type Now that the contract has narrowed for `utils.normalize_paths()` and `utils.parse_comma_separated_list()`, `Option.normalize()` must handle when the option value is either a singular value or a sequence (i.e., `list`) of values. The paths where `Option.normalize()` is called are: 1. options/config.py: `MergedConfigParser.parse_*_config()` There are 3 paths wehre eventually `_normalize_value()` is called. 2. options/manager.py: `OptionManager.parse_args()` For (1), values are coming from the `configparser` module. For (2), values are coming from `optparse.OptionParser`. Rudimentary investigation seems to implicate that `optparse.OptionParser.parse_args()` always returns values in a `list` because it accumulates values. However, for `configparser`, the values are a string due to the key-value nature of the INI format. Given that `Option.normalize()` is the convergence point where normalization of an option occurs, it is acceptable for the method to also handle the parsing comma-separated values and path normalization by the option value's type. --- src/flake8/options/manager.py | 19 +++++++++++-------- src/flake8/utils.py | 4 +--- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/src/flake8/options/manager.py b/src/flake8/options/manager.py index 5d21127..cf94927 100644 --- a/src/flake8/options/manager.py +++ b/src/flake8/options/manager.py @@ -2,7 +2,7 @@ import collections import logging import optparse # pylint: disable=deprecated-module -from typing import Any, Callable, Dict, List, Optional, Set +from typing import Dict, List, Optional, Set from flake8 import utils @@ -142,14 +142,17 @@ class Option(object): def normalize(self, value, *normalize_args): """Normalize the value based on the option configuration.""" + if self.comma_separated_list and isinstance( + value, utils.string_types + ): + value = utils.parse_comma_separated_list(value) + if self.normalize_paths: - # Decide whether to parse a list of paths or a single path - normalize = utils.normalize_path # type: Callable[..., Any] - if self.comma_separated_list: - normalize = utils.normalize_paths - return normalize(value, *normalize_args) - elif self.comma_separated_list: - return utils.parse_comma_separated_list(value) + if isinstance(value, list): + value = utils.normalize_paths(value, *normalize_args) + else: + value = utils.normalize_path(value, *normalize_args) + return value def normalize_from_setuptools(self, value): diff --git a/src/flake8/utils.py b/src/flake8/utils.py index 5ed78e1..619c83d 100644 --- a/src/flake8/utils.py +++ b/src/flake8/utils.py @@ -39,9 +39,7 @@ def parse_comma_separated_list(value, regexp=COMMA_SEPARATED_LIST_RE): :rtype: list """ - assert isinstance( # nosec (for bandit) - value, (string_types, type(None)) - ), value + assert isinstance(value, string_types), value # nosec (for bandit) if not value: return [] From 1757bce321c8276b6fad77ce15766f0c81e46957 Mon Sep 17 00:00:00 2001 From: "Eric N. Vander Weele" Date: Sun, 28 Jul 2019 10:39:27 -0400 Subject: [PATCH 6/6] utils: Tighten `parse_comma_separated_list()` contract further Now that callers are ensuring that `value` is not `None`, we can further tighten the contract and remove the conditional to account when `None` is passed-in for `value`. Additionally, add a new test vector to account for when an empty string is passed in, which would fail `if no value`. --- src/flake8/utils.py | 5 +---- tests/unit/test_utils.py | 1 + 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/flake8/utils.py b/src/flake8/utils.py index 619c83d..92fec71 100644 --- a/src/flake8/utils.py +++ b/src/flake8/utils.py @@ -24,7 +24,7 @@ string_types = (str, type(u"")) def parse_comma_separated_list(value, regexp=COMMA_SEPARATED_LIST_RE): - # type: (Optional[str], Pattern[str]) -> List[str] + # type: (str, Pattern[str]) -> List[str] """Parse a comma-separated list. :param value: @@ -41,9 +41,6 @@ def parse_comma_separated_list(value, regexp=COMMA_SEPARATED_LIST_RE): """ assert isinstance(value, string_types), value # nosec (for bandit) - if not value: - return [] - separated = regexp.split(value) item_gen = (item.strip() for item in separated) return [item for item in item_gen if item] diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index 9fd03b2..151a76d 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -22,6 +22,7 @@ RELATIVE_PATHS = ["flake8", "pep8", "pyflakes", "mccabe"] ("E123,W234,,E206,,", ["E123", "W234", "E206"]), ("E123, W234,, E206,,", ["E123", "W234", "E206"]), ("E123,,W234,,E206,,", ["E123", "W234", "E206"]), + ("", []), ]) def test_parse_comma_separated_list(value, expected): """Verify that similar inputs produce identical outputs."""