From 35996b7a256ac7445edc2492457e45b17c896c34 Mon Sep 17 00:00:00 2001 From: Ben Webber Date: Sun, 26 Nov 2017 00:17:47 +0000 Subject: [PATCH] Add check to enforce literal syntax for Python builtin types This check requires authors to initialize empty or zero builtin types using the literal syntax (e.g., `{}` instead of `dict()`). Authors may ignore this requirement for certain builtins using the `--ignore` option. Authors may also forbid calling `dict()` with keyword arguments (`dict(a=1, b=2)`) using the `--no-allow-dict-kwargs` flag. --- .pre-commit-hooks.yaml | 9 ++ README.md | 4 + hooks.yaml | 6 ++ pre_commit_hooks/check_builtin_literals.py | 90 +++++++++++++++++ setup.py | 1 + testing/resources/builtin_constructors.py | 7 ++ testing/resources/builtin_literals.py | 7 ++ tests/check_builtin_literals_test.py | 107 +++++++++++++++++++++ 8 files changed, 231 insertions(+) create mode 100644 pre_commit_hooks/check_builtin_literals.py create mode 100644 testing/resources/builtin_constructors.py create mode 100644 testing/resources/builtin_literals.py create mode 100644 tests/check_builtin_literals_test.py diff --git a/.pre-commit-hooks.yaml b/.pre-commit-hooks.yaml index f5b73d3..f1a901e 100644 --- a/.pre-commit-hooks.yaml +++ b/.pre-commit-hooks.yaml @@ -34,6 +34,15 @@ # for backward compatibility files: '' minimum_pre_commit_version: 0.15.0 +- id: check-builtin-literals + name: Check builtin type constructor use + description: Require literal syntax when initializing empty or zero Python builtin types. + entry: check-builtin-literals + language: python + types: [python] + # for backward compatibility + files: '' + minimum_pre_commit_version: 0.15.0 - id: check-case-conflict name: Check for case conflicts description: Check for files that would conflict in case-insensitive filesystems diff --git a/README.md b/README.md index 128c0c0..ff596ac 100644 --- a/README.md +++ b/README.md @@ -30,6 +30,10 @@ Add this to your `.pre-commit-config.yaml` - `check-added-large-files` - Prevent giant files from being committed. - Specify what is "too large" with `args: ['--maxkb=123']` (default=500kB). - `check-ast` - Simply check whether files parse as valid python. +- `check-builtin-literals` - Require literal syntax when initializing empty or zero Python builtin types. + - Allows calling constructors with positional arguments (e.g., `list('abc')`). + - Ignore this requirement for specific builtin types with `--ignore=type1,type2,…`. + - Forbid `dict` keyword syntax with `--no-allow-dict-kwargs`. - `check-byte-order-marker` - Forbid files which have a UTF-8 byte-order marker - `check-case-conflict` - Check for files with names that would conflict on a case-insensitive filesystem like MacOS HFS+ or Windows FAT. diff --git a/hooks.yaml b/hooks.yaml index 6d66935..4552fa7 100644 --- a/hooks.yaml +++ b/hooks.yaml @@ -16,6 +16,12 @@ entry: upgrade-your-pre-commit-version files: '' minimum_pre_commit_version: 0.15.0 +- id: check-builtin-literals + language: system + name: upgrade-your-pre-commit-version + entry: upgrade-your-pre-commit-version + files: '' + minimum_pre_commit_version: 0.15.0 - id: check-byte-order-marker language: system name: upgrade-your-pre-commit-version diff --git a/pre_commit_hooks/check_builtin_literals.py b/pre_commit_hooks/check_builtin_literals.py new file mode 100644 index 0000000..1213288 --- /dev/null +++ b/pre_commit_hooks/check_builtin_literals.py @@ -0,0 +1,90 @@ +from __future__ import unicode_literals + +import argparse +import ast +import collections +import sys + + +BUILTIN_TYPES = { + 'complex': '0j', + 'dict': '{}', + 'float': '0.0', + 'int': '0', + 'list': '[]', + 'str': "''", + 'tuple': '()', +} + + +BuiltinTypeCall = collections.namedtuple('BuiltinTypeCall', ['name', 'line', 'column']) + + +class BuiltinTypeVisitor(ast.NodeVisitor): + def __init__(self, ignore=None, allow_dict_kwargs=True): + self.builtin_type_calls = [] + self.ignore = set(ignore) if ignore else set() + self.allow_dict_kwargs = allow_dict_kwargs + + def _check_dict_call(self, node): + return self.allow_dict_kwargs and (getattr(node, 'kwargs', None) or getattr(node, 'keywords', None)) + + def visit_Call(self, node): + if node.func.id not in set(BUILTIN_TYPES).difference(self.ignore): + return + if node.func.id == 'dict' and self._check_dict_call(node): + return + elif node.args: + return + self.builtin_type_calls.append( + BuiltinTypeCall(node.func.id, node.lineno, node.col_offset), + ) + + +def check_file_for_builtin_type_constructors(filename, ignore=None, allow_dict_kwargs=True): + tree = ast.parse(open(filename, 'rb').read(), filename=filename) + visitor = BuiltinTypeVisitor(ignore=ignore, allow_dict_kwargs=allow_dict_kwargs) + visitor.visit(tree) + return visitor.builtin_type_calls + + +def parse_args(argv): + def parse_ignore(value): + return set(value.split(',')) + + parser = argparse.ArgumentParser() + parser.add_argument('filenames', nargs='*') + parser.add_argument('--ignore', type=parse_ignore, default=set()) + + allow_dict_kwargs = parser.add_mutually_exclusive_group(required=False) + allow_dict_kwargs.add_argument('--allow-dict-kwargs', action='store_true') + allow_dict_kwargs.add_argument('--no-allow-dict-kwargs', dest='allow_dict_kwargs', action='store_false') + allow_dict_kwargs.set_defaults(allow_dict_kwargs=True) + + return parser.parse_args(argv) + + +def main(argv=None): + args = parse_args(argv) + rc = 0 + for filename in args.filenames: + calls = check_file_for_builtin_type_constructors( + filename, + ignore=args.ignore, + allow_dict_kwargs=args.allow_dict_kwargs, + ) + if calls: + rc = rc or 1 + for call in calls: + print( + '{filename}:{call.line}:{call.column} - Replace {call.name}() with {replacement}'.format( + filename=filename, + call=call, + replacement=BUILTIN_TYPES[call.name], + ), + ) + return rc + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/setup.py b/setup.py index d5296de..ce8de14 100644 --- a/setup.py +++ b/setup.py @@ -36,6 +36,7 @@ setup( 'autopep8-wrapper = pre_commit_hooks.autopep8_wrapper:main', 'check-added-large-files = pre_commit_hooks.check_added_large_files:main', 'check-ast = pre_commit_hooks.check_ast:check_ast', + 'check-builtin-literals = pre_commit_hooks.check_builtin_literals:main', 'check-byte-order-marker = pre_commit_hooks.check_byte_order_marker:main', 'check-case-conflict = pre_commit_hooks.check_case_conflict:main', 'check-docstring-first = pre_commit_hooks.check_docstring_first:main', diff --git a/testing/resources/builtin_constructors.py b/testing/resources/builtin_constructors.py new file mode 100644 index 0000000..3fab056 --- /dev/null +++ b/testing/resources/builtin_constructors.py @@ -0,0 +1,7 @@ +c1 = complex() +d1 = dict() +f1 = float() +i1 = int() +l1 = list() +s1 = str() +t1 = tuple() diff --git a/testing/resources/builtin_literals.py b/testing/resources/builtin_literals.py new file mode 100644 index 0000000..8513b70 --- /dev/null +++ b/testing/resources/builtin_literals.py @@ -0,0 +1,7 @@ +c1 = 0j +d1 = {} +f1 = 0.0 +i1 = 0 +l1 = [] +s1 = '' +t1 = () diff --git a/tests/check_builtin_literals_test.py b/tests/check_builtin_literals_test.py new file mode 100644 index 0000000..a38e522 --- /dev/null +++ b/tests/check_builtin_literals_test.py @@ -0,0 +1,107 @@ +import ast + +import pytest + +from pre_commit_hooks.check_builtin_literals import BuiltinTypeCall +from pre_commit_hooks.check_builtin_literals import BuiltinTypeVisitor +from pre_commit_hooks.check_builtin_literals import main +from testing.util import get_resource_path + + +@pytest.fixture +def visitor(): + return BuiltinTypeVisitor() + + +@pytest.mark.parametrize( + ('expression', 'calls'), + [ + # complex + ("0j", []), + ("complex()", [BuiltinTypeCall('complex', 1, 0)]), + ("complex(0, 0)", []), + ("complex('0+0j')", []), + # float + ("0.0", []), + ("float()", [BuiltinTypeCall('float', 1, 0)]), + ("float('0.0')", []), + # int + ("0", []), + ("int()", [BuiltinTypeCall('int', 1, 0)]), + ("int('0')", []), + # list + ("[]", []), + ("list()", [BuiltinTypeCall('list', 1, 0)]), + ("list('abc')", []), + ("list([c for c in 'abc'])", []), + ("list(c for c in 'abc')", []), + # str + ("''", []), + ("str()", [BuiltinTypeCall('str', 1, 0)]), + ("str('0')", []), + ("[]", []), + # tuple + ("()", []), + ("tuple()", [BuiltinTypeCall('tuple', 1, 0)]), + ("tuple('abc')", []), + ("tuple([c for c in 'abc'])", []), + ("tuple(c for c in 'abc')", []), + ], +) +def test_non_dict_exprs(visitor, expression, calls): + visitor.visit(ast.parse(expression)) + assert visitor.builtin_type_calls == calls + + +@pytest.mark.parametrize( + ('expression', 'calls'), + [ + ("{}", []), + ("dict()", [BuiltinTypeCall('dict', 1, 0)]), + ("dict(a=1, b=2, c=3)", []), + ("dict(**{'a': 1, 'b': 2, 'c': 3})", []), + ("dict([(k, v) for k, v in [('a', 1), ('b', 2), ('c', 3)]])", []), + ("dict((k, v) for k, v in [('a', 1), ('b', 2), ('c', 3)])", []), + ], +) +def test_dict_allow_kwargs_exprs(visitor, expression, calls): + visitor.visit(ast.parse(expression)) + assert visitor.builtin_type_calls == calls + + +@pytest.mark.parametrize( + ('expression', 'calls'), + [ + ("dict()", [BuiltinTypeCall('dict', 1, 0)]), + ("dict(a=1, b=2, c=3)", [BuiltinTypeCall('dict', 1, 0)]), + ("dict(**{'a': 1, 'b': 2, 'c': 3})", [BuiltinTypeCall('dict', 1, 0)]), + ], +) +def test_dict_no_allow_kwargs_exprs(expression, calls): + visitor = BuiltinTypeVisitor(allow_dict_kwargs=False) + visitor.visit(ast.parse(expression)) + assert visitor.builtin_type_calls == calls + + +def test_ignore_constructors(): + visitor = BuiltinTypeVisitor(ignore=('complex', 'dict', 'float', 'int', 'list', 'str', 'tuple')) + visitor.visit(ast.parse(open(get_resource_path('builtin_constructors.py'), 'rb').read(), 'builtin_constructors.py')) + assert visitor.builtin_type_calls == [] + + +def test_failing_file(): + rc = main([get_resource_path('builtin_constructors.py')]) + assert rc == 1 + + +def test_passing_file(): + rc = main([get_resource_path('builtin_literals.py')]) + assert rc == 0 + + +def test_failing_file_ignore_all(): + rc = main([ + '--ignore=complex,dict,float,int,list,str,tuple', + get_resource_path('builtin_constructors.py'), + ]) + assert rc == 0