Add a --remove option to fix-encoding-pragma

2026-04-09 04:54:16 +00:00 · 2016-04-27 11:18:14 -07:00 · 2016-04-27 11:18:14 -07:00 · 03bf17f2b0
commit 03bf17f2b0
parent 17478a0a50
3 changed files with 120 additions and 48 deletions
--- a/README.md
+++ b/README.md
@ -45,7 +45,8 @@ Add this to your `.pre-commit-config.yaml`
 - `double-quote-string-fixer` - This hook replaces double quoted strings
  with single quoted strings.
 - `end-of-file-fixer` - Makes sure files end in a newline and only a newline.
- `fix-encoding-pragma` - Add `# -*- coding: utf-8 -*-` to the top of python files
+- `fix-encoding-pragma` - Add `# -*- coding: utf-8 -*-` to the top of python files.
    - To remove the coding pragma pass `--remove` (useful in a python3-only codebase)
 - `flake8` - Run flake8 on your python files.
 - `name-tests-test` - Assert that files in tests/ end in `_test.py`.
    - Use `args: ['--django']` to match `test*.py` instead.
--- a/pre_commit_hooks/fix_encoding_pragma.py
+++ b/pre_commit_hooks/fix_encoding_pragma.py
@ -3,7 +3,7 @@ from __future__ import print_function
 from __future__ import unicode_literals
 import argparse
-import io
+import collections
 expected_pragma = b'# -*- coding: utf-8 -*-\n'
@ -21,34 +21,72 @@ def has_coding(line):
    )
-def fix_encoding_pragma(f):
+class ExpectedContents(collections.namedtuple(
-    first_line = f.readline()
+        'ExpectedContents', ('shebang', 'rest', 'pragma_status'),
-    second_line = f.readline()
+)):
-    old = f.read()
+    """
-    f.seek(0)
+    pragma_status:
    - True: has exactly the coding pragma expected
    - False: missing coding pragma entirely
    - None: has a coding pragma, but it does not match
    """
    __slots__ = ()
-    # Ok case: the file is empty
+    @property
-    if not (first_line + second_line + old).strip():
+    def has_any_pragma(self):
-        return 0
+        return self.pragma_status is not False
-    # Ok case: we specify pragma as the first line
+    def is_expected_pragma(self, remove):
-    if first_line == expected_pragma:
+        expected_pragma_status = not remove
-        return 0
+        return self.pragma_status is expected_pragma_status
    # OK case: we have a shebang as first line and pragma on second line
    if first_line.startswith(b'#!') and second_line == expected_pragma:
        return 0
-    # Otherwise we need to rewrite stuff!
+def _get_expected_contents(first_line, second_line, rest):
    if first_line.startswith(b'#!'):
-        if has_coding(second_line):
+        shebang = first_line
-            f.write(first_line + expected_pragma + old)
+        potential_coding = second_line
        else:
            f.write(first_line + expected_pragma + second_line + old)
    elif has_coding(first_line):
        f.write(expected_pragma + second_line + old)
    else:
-        f.write(expected_pragma + first_line + second_line + old)
+        shebang = b''
        potential_coding = first_line
        rest = second_line + rest
    if potential_coding == expected_pragma:
        pragma_status = True
    elif has_coding(potential_coding):
        pragma_status = None
    else:
        pragma_status = False
        rest = potential_coding + rest
    return ExpectedContents(
        shebang=shebang, rest=rest, pragma_status=pragma_status,
    )
 def fix_encoding_pragma(f, remove=False):
    expected = _get_expected_contents(f.readline(), f.readline(), f.read())
    # Special cases for empty files
    if not expected.rest.strip():
        # If a file only has a shebang or a coding pragma, remove it
        if expected.has_any_pragma or expected.shebang:
            f.seek(0)
            f.truncate()
            f.write(b'')
            return 1
        else:
            return 0
    if expected.is_expected_pragma(remove):
        return 0
    # Otherwise, write out the new file
    f.seek(0)
    f.truncate()
    f.write(expected.shebang)
    if not remove:
        f.write(expected_pragma)
    f.write(expected.rest)
    return 1
@ -56,18 +94,25 @@ def fix_encoding_pragma(f):
 def main(argv=None):
    parser = argparse.ArgumentParser('Fixes the encoding pragma of python files')
    parser.add_argument('filenames', nargs='*', help='Filenames to fix')
    parser.add_argument(
        '--remove', action='store_true',
        help='Remove the encoding pragma (Useful in a python3-only codebase)',
    )
    args = parser.parse_args(argv)
    retv = 0
    if args.remove:
        fmt = 'Removed encoding pragma from {filename}'
    else:
        fmt = 'Added `{pragma}` to {filename}'
    for filename in args.filenames:
-        with io.open(filename, 'r+b') as f:
+        with open(filename, 'r+b') as f:
-            file_ret = fix_encoding_pragma(f)
+            file_ret = fix_encoding_pragma(f, remove=args.remove)
            retv |= file_ret
            if file_ret:
-                print('Added `{0}` to {1}'.format(
+                print(fmt.format(pragma=expected_pragma, filename=filename))
                    expected_pragma.strip(), filename,
                ))
    return retv
--- a/tests/fix_encoding_pragma_test.py
+++ b/tests/fix_encoding_pragma_test.py
@ -10,32 +10,46 @@ from pre_commit_hooks.fix_encoding_pragma import main
 def test_integration_inserting_pragma(tmpdir):
-    file_path = tmpdir.join('foo.py').strpath
+    path = tmpdir.join('foo.py')
    path.write_binary(b'import httplib\n')
-    with open(file_path, 'wb') as file_obj:
+    assert main((path.strpath,)) == 1
        file_obj.write(b'import httplib\n')
-    assert main([file_path]) == 1
+    assert path.read_binary() == (
-
+        b'# -*- coding: utf-8 -*-\n'
-    with open(file_path, 'rb') as file_obj:
+        b'import httplib\n'
-        assert file_obj.read() == (
+    )
            b'# -*- coding: utf-8 -*-\n'
            b'import httplib\n'
        )
 def test_integration_ok(tmpdir):
-    file_path = tmpdir.join('foo.py').strpath
+    path = tmpdir.join('foo.py')
-    with open(file_path, 'wb') as file_obj:
+    path.write_binary(b'# -*- coding: utf-8 -*-\nx = 1\n')
-        file_obj.write(b'# -*- coding: utf-8 -*-\nx = 1\n')
+    assert main((path.strpath,)) == 0
-    assert main([file_path]) == 0
+
 def test_integration_remove(tmpdir):
    path = tmpdir.join('foo.py')
    path.write_binary(b'# -*- coding: utf-8 -*-\nx = 1\n')
    assert main((path.strpath, '--remove')) == 1
    assert path.read_binary() == b'x = 1\n'
 def test_integration_remove_ok(tmpdir):
    path = tmpdir.join('foo.py')
    path.write_binary(b'x = 1\n')
    assert main((path.strpath, '--remove')) == 0
@pytest.mark.parametrize(
    'input_str',
    (
        b'',
-        b'# -*- coding: utf-8 -*-\n',
+        (
            b'# -*- coding: utf-8 -*-\n'
            b'x = 1\n'
        ),
        (
            b'#!/usr/bin/env python\n'
            b'# -*- coding: utf-8 -*-\n'
@ -59,20 +73,32 @@ def test_ok_inputs(input_str):
            b'import httplib\n',
        ),
        (
-            b'#!/usr/bin/env python\n',
+            b'#!/usr/bin/env python\n'
            b'x = 1\n',
            b'#!/usr/bin/env python\n'
            b'# -*- coding: utf-8 -*-\n'
            b'x = 1\n',
        ),
        (
-            b'#coding=utf-8\n',
+            b'#coding=utf-8\n'
            b'x = 1\n',
            b'# -*- coding: utf-8 -*-\n'
            b'x = 1\n',
        ),
        (
            b'#!/usr/bin/env python\n'
-            b'#coding=utf8\n',
+            b'#coding=utf8\n'
            b'x = 1\n',
            b'#!/usr/bin/env python\n'
-            b'# -*- coding: utf-8 -*-\n',
+            b'# -*- coding: utf-8 -*-\n'
            b'x = 1\n',
        ),
        # These should each get truncated
        (b'#coding: utf-8\n', b''),
        (b'# -*- coding: utf-8 -*-\n', b''),
        (b'#!/usr/bin/env python\n', b''),
        (b'#!/usr/bin/env python\n#coding: utf8\n', b''),
        (b'#!/usr/bin/env python\n# -*- coding: utf-8 -*-\n', b''),
    )
 )
 def test_not_ok_inputs(input_str, output):