From aa2ba6f94fba94c91740bba3894b1f9e0977f8dc Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Fri, 13 Nov 2015 12:34:37 -0800 Subject: [PATCH] Add encoding pragma hook. Resolves pre-commit/pre-commit#15 --- README.md | 1 + hooks.yaml | 6 ++ pre_commit_hooks/fix_encoding_pragma.py | 75 ++++++++++++++++++++++ setup.py | 1 + tests/fix_encoding_pragma_test.py | 82 +++++++++++++++++++++++++ 5 files changed, 165 insertions(+) create mode 100644 pre_commit_hooks/fix_encoding_pragma.py create mode 100644 tests/fix_encoding_pragma_test.py diff --git a/README.md b/README.md index 8148cee..9229f8f 100644 --- a/README.md +++ b/README.md @@ -43,6 +43,7 @@ Add this to your `.pre-commit-config.yaml` - `double-quote-string-fixer` - This hook replaces double quoted strings with single quoted strings. - `end-of-file-fixer` - Makes sure files end in a newline and only a newline. +- `fix-encoding-pragma` - Add # -*- coding: utf-8 -*- to the top of python files - `flake8` - Run flake8 on your python files. - `name-tests-test` - Assert that files in tests/ end in `_test.py`. - Use `args: ['--django']` to match `test*.py` instead. diff --git a/hooks.yaml b/hooks.yaml index 7817d1b..d4ef521 100644 --- a/hooks.yaml +++ b/hooks.yaml @@ -92,6 +92,12 @@ entry: end-of-file-fixer language: python files: \.(c|cpp|html|erb|slim|haml|ejs|jade|js|coffee|json|rb|md|py|css|scss|less|sh|tmpl|txt|yaml|yml|pp)$ +- id: fix-encoding-pragma + name: Fix python encoding pragma + language: python + entry: fix-encoding-pragma + description: 'Add # -*- coding: utf-8 -*- to the top of python files' + files: \.py$ - id: flake8 name: Flake8 description: This hook runs flake8. diff --git a/pre_commit_hooks/fix_encoding_pragma.py b/pre_commit_hooks/fix_encoding_pragma.py new file mode 100644 index 0000000..48fc9c7 --- /dev/null +++ b/pre_commit_hooks/fix_encoding_pragma.py @@ -0,0 +1,75 @@ +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +import argparse +import io + +expected_pragma = b'# -*- coding: utf-8 -*-\n' + + +def has_coding(line): + if not line.strip(): + return False + return ( + line.lstrip()[0:1] == b'#' and ( + b'unicode' in line or + b'encoding' in line or + b'coding:' in line or + b'coding=' in line + ) + ) + + +def fix_encoding_pragma(f): + first_line = f.readline() + second_line = f.readline() + old = f.read() + f.seek(0) + + # Ok case: the file is empty + if not (first_line + second_line + old).strip(): + return 0 + + # Ok case: we specify pragma as the first line + if first_line == expected_pragma: + return 0 + + # OK case: we have a shebang as first line and pragma on second line + if first_line.startswith(b'#!') and second_line == expected_pragma: + return 0 + + # Otherwise we need to rewrite stuff! + if first_line.startswith(b'#!'): + if has_coding(second_line): + f.write(first_line + expected_pragma + old) + else: + f.write(first_line + expected_pragma + second_line + old) + elif has_coding(first_line): + f.write(expected_pragma + second_line + old) + else: + f.write(expected_pragma + first_line + second_line + old) + + return 1 + + +def main(argv=None): + parser = argparse.ArgumentParser('Fixes the encoding pragma of python files') + parser.add_argument('filenames', nargs='*', help='Filenames to fix') + args = parser.parse_args(argv) + + retv = 0 + + for filename in args.filenames: + with io.open(filename, 'r+b') as f: + file_ret = fix_encoding_pragma(f) + retv |= file_ret + if file_ret: + print('Added `{0}` to {1}'.format( + expected_pragma.strip(), filename, + )) + + return retv + +if __name__ == "__main__": + exit(main()) diff --git a/setup.py b/setup.py index 4fefeaa..7779089 100644 --- a/setup.py +++ b/setup.py @@ -50,6 +50,7 @@ setup( 'detect-private-key = pre_commit_hooks.detect_private_key:detect_private_key', 'double-quote-string-fixer = pre_commit_hooks.string_fixer:main', 'end-of-file-fixer = pre_commit_hooks.end_of_file_fixer:end_of_file_fixer', + 'fix-encoding-pragma = pre_commit_hooks.fix_encoding_pragma:main', 'name-tests-test = pre_commit_hooks.tests_should_end_in_test:validate_files', 'pretty-format-json = pre_commit_hooks.pretty_format_json:pretty_format_json', 'requirements-txt-fixer = pre_commit_hooks.requirements_txt_fixer:fix_requirements_txt', diff --git a/tests/fix_encoding_pragma_test.py b/tests/fix_encoding_pragma_test.py new file mode 100644 index 0000000..e000a33 --- /dev/null +++ b/tests/fix_encoding_pragma_test.py @@ -0,0 +1,82 @@ +from __future__ import absolute_import +from __future__ import unicode_literals + +import io + +import pytest + +from pre_commit_hooks.fix_encoding_pragma import fix_encoding_pragma +from pre_commit_hooks.fix_encoding_pragma import main + + +def test_integration_inserting_pragma(tmpdir): + file_path = tmpdir.join('foo.py').strpath + + with open(file_path, 'wb') as file_obj: + file_obj.write(b'import httplib\n') + + assert main([file_path]) == 1 + + with open(file_path, 'rb') as file_obj: + assert file_obj.read() == ( + b'# -*- coding: utf-8 -*-\n' + b'import httplib\n' + ) + + +def test_integration_ok(tmpdir): + file_path = tmpdir.join('foo.py').strpath + with open(file_path, 'wb') as file_obj: + file_obj.write(b'# -*- coding: utf-8 -*-\nx = 1\n') + assert main([file_path]) == 0 + + +@pytest.mark.parametrize( + 'input_str', + ( + b'', + b'# -*- coding: utf-8 -*-\n', + ( + b'#!/usr/bin/env python\n' + b'# -*- coding: utf-8 -*-\n' + b'foo = "bar"\n' + ), + ) +) +def test_ok_inputs(input_str): + bytesio = io.BytesIO(input_str) + assert fix_encoding_pragma(bytesio) == 0 + bytesio.seek(0) + assert bytesio.read() == input_str + + +@pytest.mark.parametrize( + ('input_str', 'output'), + ( + ( + b'import httplib\n', + b'# -*- coding: utf-8 -*-\n' + b'import httplib\n', + ), + ( + b'#!/usr/bin/env python\n', + b'#!/usr/bin/env python\n' + b'# -*- coding: utf-8 -*-\n' + ), + ( + b'#coding=utf-8\n', + b'# -*- coding: utf-8 -*-\n' + ), + ( + b'#!/usr/bin/env python\n' + b'#coding=utf8\n', + b'#!/usr/bin/env python\n' + b'# -*- coding: utf-8 -*-\n', + ), + ) +) +def test_not_ok_inputs(input_str, output): + bytesio = io.BytesIO(input_str) + assert fix_encoding_pragma(bytesio) == 1 + bytesio.seek(0) + assert bytesio.read() == output