From 53f1dc0163330633d8fb3ba8590089166cd49e47 Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Sun, 4 Jan 2015 13:06:21 -0800 Subject: [PATCH] Add check-docstring-first hook. --- README.md | 3 +- hooks.yaml | 10 +++- pre_commit_hooks/check_docstring_first.py | 63 +++++++++++++++++++++ setup.py | 3 +- tests/check_docstring_first_test.py | 67 +++++++++++++++++++++++ 5 files changed, 142 insertions(+), 4 deletions(-) create mode 100644 pre_commit_hooks/check_docstring_first.py create mode 100644 tests/check_docstring_first_test.py diff --git a/README.md b/README.md index 8d17026..08a3bcc 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,8 @@ Add this to your `.pre-commit-config.yaml` ### Hooks available - `autopep8-wrapper` - Runs autopep8 over python source. -' `check-added-large-files` - Prevent giant files from being committed. +- `check-added-large-files` - Prevent giant files from being committed. +- `check-docstring-first` - Checks a common error of defining a docstring after code. - `check-json` - Attempts to load all json files to verify syntax. - `check-yaml` - Attempts to load all yaml files to verify syntax. - `debug-statements` - Check for pdb / ipdb / pudb statements in code. diff --git a/hooks.yaml b/hooks.yaml index 3eb0e94..fb9cc82 100644 --- a/hooks.yaml +++ b/hooks.yaml @@ -7,11 +7,17 @@ args: [-i] - id: check-added-large-files name: Check for added large files - language: python - entry: check-added-large-files description: Prevent giant files from being committed + entry: check-added-large-files + language: python # Match all files files: '' +- id: check-docstring-first + name: Check docstring is first + description: Checks a common error of defining a docstring after code. + entry: check-docstring-first + language: python + files: \.py$ - id: check-json name: Check JSON description: This hook checks json files for parseable syntax. diff --git a/pre_commit_hooks/check_docstring_first.py b/pre_commit_hooks/check_docstring_first.py new file mode 100644 index 0000000..da5425d --- /dev/null +++ b/pre_commit_hooks/check_docstring_first.py @@ -0,0 +1,63 @@ +from __future__ import absolute_import +from __future__ import unicode_literals + +import argparse +import io +import tokenize + + +NON_CODE_TOKENS = frozenset(( + tokenize.COMMENT, tokenize.ENDMARKER, tokenize.NEWLINE, tokenize.NL, +)) + + +def check_docstring_first(src, filename=''): + """Returns nonzero if the source has what looks like a docstring that is + not at the beginning of the source. + + A string will be considered a docstring if it is a STRING token with a + col offset of 0. + """ + found_docstring_line = None + found_code_line = None + + tok_gen = tokenize.generate_tokens(io.StringIO(src).readline) + for tok_type, _, (sline, scol), _, _ in tok_gen: + # Looks like a docstring! + if tok_type == tokenize.STRING and scol == 0: + if found_docstring_line is not None: + print( + '{0}:{1} Multiple module docstrings ' + '(first docstring on line {2}).'.format( + filename, sline, found_docstring_line, + ) + ) + return 1 + elif found_code_line is not None: + print( + '{0}:{1} Module docstring appears after code ' + '(code seen on line {2}).'.format( + filename, sline, found_code_line, + ) + ) + return 1 + else: + found_docstring_line = sline + elif tok_type not in NON_CODE_TOKENS and found_code_line is None: + found_code_line = sline + + return 0 + + +def main(argv=None): + parser = argparse.ArgumentParser() + parser.add_argument('filenames', nargs='*') + args = parser.parse_args(argv) + + retv = 0 + + for filename in args.filenames: + contents = io.open(filename).read() + retv |= check_docstring_first(contents, filename=filename) + + return retv diff --git a/setup.py b/setup.py index 3681e65..26991de 100644 --- a/setup.py +++ b/setup.py @@ -37,8 +37,9 @@ setup( entry_points={ 'console_scripts': [ 'autopep8-wrapper = pre_commit_hooks.autopep8_wrapper:main', - 'check-json = pre_commit_hooks.check_json:check_json', 'check-added-large-files = pre_commit_hooks.check_added_large_files:main', + 'check-docstring-first = pre_commit_hooks.check_docstring_first:main', + 'check-json = pre_commit_hooks.check_json:check_json', 'check-yaml = pre_commit_hooks.check_yaml:check_yaml', 'debug-statement-hook = pre_commit_hooks.debug_statement_hook:debug_statement_hook', 'end-of-file-fixer = pre_commit_hooks.end_of_file_fixer:end_of_file_fixer', diff --git a/tests/check_docstring_first_test.py b/tests/check_docstring_first_test.py new file mode 100644 index 0000000..ecff0e3 --- /dev/null +++ b/tests/check_docstring_first_test.py @@ -0,0 +1,67 @@ +from __future__ import absolute_import +from __future__ import unicode_literals + +import io + +import pytest + +from pre_commit_hooks.check_docstring_first import check_docstring_first +from pre_commit_hooks.check_docstring_first import main + + +# Contents, expected, expected_output +TESTS = ( + # trivial + ('', 0, ''), + # Acceptable + ('"foo"', 0, ''), + # Docstrin after code + ( + 'from __future__ import unicode_literals\n' + '"foo"\n', + 1, + '{filename}:2 Module docstring appears after code ' + '(code seen on line 1).\n' + ), + # Test double docstring + ( + '"The real docstring"\n' + 'from __future__ import absolute_import\n' + '"fake docstring"\n', + 1, + '{filename}:3 Multiple module docstrings ' + '(first docstring on line 1).\n' + ), + # Test multiple lines of code above + ( + 'import os\n' + 'import sys\n' + '"docstring"\n', + 1, + '{filename}:3 Module docstring appears after code ' + '(code seen on line 1).\n', + ), + # String literals in expressions are ok. + ('x = "foo"\n', 0, ''), +) + + +all_tests = pytest.mark.parametrize( + ('contents', 'expected', 'expected_out'), TESTS, +) + + +@all_tests +def test_unit(capsys, contents, expected, expected_out): + assert check_docstring_first(contents) == expected + assert capsys.readouterr()[0] == expected_out.format(filename='') + + +@all_tests +def test_integration(tmpdir, capsys, contents, expected, expected_out): + tmpfilename = tmpdir.join('test.py').strpath + with io.open(tmpfilename, 'w') as tmpfile: + tmpfile.write(contents) + + assert main([tmpfilename]) == expected + assert capsys.readouterr()[0] == expected_out.format(filename=tmpfilename)