Merge branch 'master' of https://github.com/pre-commit/pre-commit-hooks into bobby-pull-from-master

2026-07-02 07:29:33 +00:00 · 2018-06-13 13:00:20 -07:00 · 2018-06-13 13:00:20 -07:00 · 110790d320
commit 110790d320
parent 61d0735ca8 a193eab99e
81 changed files with 2170 additions and 677 deletions
--- a/pre_commit_hooks/autopep8_wrapper.py
+++ b/pre_commit_hooks/autopep8_wrapper.py
@ -14,12 +14,12 @@ def main(argv=None):

    retv = 0
    for filename in args.files:
-        original_contents = io.open(filename).read()
+        original_contents = io.open(filename, encoding='UTF-8').read()
        new_contents = autopep8.fix_code(original_contents, args)
        if original_contents != new_contents:
-            print('Fixing {0}'.format(filename))
+            print('Fixing {}'.format(filename))
            retv = 1
-            with io.open(filename, 'w') as output_file:
+            with io.open(filename, 'w', encoding='UTF-8') as output_file:
                output_file.write(new_contents)

    return retv
--- a/pre_commit_hooks/check_added_large_files.py
+++ b/pre_commit_hooks/check_added_large_files.py
@ -4,6 +4,7 @@ from __future__ import print_function
 from __future__ import unicode_literals

 import argparse
+import json
 import math
 import os

@ -13,23 +14,13 @@ from pre_commit_hooks.util import cmd_output


 def lfs_files():
-    try:  # pragma: no cover (no git-lfs)
-        lines = cmd_output('git', 'lfs', 'status', '--porcelain').splitlines()
+    try:
+        # Introduced in git-lfs 2.2.0, first working in 2.2.1
+        lfs_ret = cmd_output('git', 'lfs', 'status', '--json')
    except CalledProcessError:  # pragma: no cover (with git-lfs)
-        lines = []
+        lfs_ret = '{"files":{}}'

-    modes_and_fileparts = [
-        (line[:3].strip(), line[3:].rpartition(' ')[0]) for line in lines
-    ]
-
-    def to_file_part(mode, filepart):  # pragma: no cover (no git-lfs)
-        assert mode in ('A', 'R')
-        return filepart if mode == 'A' else filepart.split(' -> ')[1]
-
-    return set(
-        to_file_part(mode, filepart) for mode, filepart in modes_and_fileparts
-        if mode in ('A', 'R')
-    )
+    return set(json.loads(lfs_ret)['files'])


 def find_large_added_files(filenames, maxkb):
@ -41,7 +32,7 @@ def find_large_added_files(filenames, maxkb):
    for filename in filenames:
        kb = int(math.ceil(os.stat(filename).st_size / 1024))
        if kb > maxkb:
-            print('{0} ({1} KB) exceeds {2} KB.'.format(filename, kb, maxkb))
+            print('{} ({} KB) exceeds {} KB.'.format(filename, kb, maxkb))
            retv = 1

    return retv
@ -51,7 +42,7 @@ def main(argv=None):
    parser = argparse.ArgumentParser()
    parser.add_argument(
        'filenames', nargs='*',
-        help='Filenames pre-commit believes are changed.'
+        help='Filenames pre-commit believes are changed.',
    )
    parser.add_argument(
        '--maxkb', type=int, default=500,
--- a/pre_commit_hooks/check_ast.py
+++ b/pre_commit_hooks/check_ast.py
@ -4,7 +4,7 @@ from __future__ import unicode_literals

 import argparse
 import ast
-import os.path
+import platform
 import sys
 import traceback

@ -14,19 +14,19 @@ def check_ast(argv=None):
    parser.add_argument('filenames', nargs='*')
    args = parser.parse_args(argv)

-    _, interpreter = os.path.split(sys.executable)
-
    retval = 0
    for filename in args.filenames:

        try:
            ast.parse(open(filename, 'rb').read(), filename=filename)
        except SyntaxError:
-            print('{0}: failed parsing with {1}:'.format(
-                filename, interpreter,
+            print('{}: failed parsing with {} {}:'.format(
+                filename,
+                platform.python_implementation(),
+                sys.version.partition(' ')[0],
            ))
-            print('\n{0}'.format(
-                '    ' + traceback.format_exc().replace('\n', '\n    ')
+            print('\n{}'.format(
+                '    ' + traceback.format_exc().replace('\n', '\n    '),
            ))
            retval = 1
    return retval
--- a/pre_commit_hooks/check_builtin_literals.py
+++ b/pre_commit_hooks/check_builtin_literals.py
@ -0,0 +1,95 @@
+from __future__ import unicode_literals
+
+import argparse
+import ast
+import collections
+import sys
+
+
+BUILTIN_TYPES = {
+    'complex': '0j',
+    'dict': '{}',
+    'float': '0.0',
+    'int': '0',
+    'list': '[]',
+    'str': "''",
+    'tuple': '()',
+}
+
+
+BuiltinTypeCall = collections.namedtuple('BuiltinTypeCall', ['name', 'line', 'column'])
+
+
+class BuiltinTypeVisitor(ast.NodeVisitor):
+    def __init__(self, ignore=None, allow_dict_kwargs=True):
+        self.builtin_type_calls = []
+        self.ignore = set(ignore) if ignore else set()
+        self.allow_dict_kwargs = allow_dict_kwargs
+
+    def _check_dict_call(self, node):
+        return self.allow_dict_kwargs and (getattr(node, 'kwargs', None) or getattr(node, 'keywords', None))
+
+    def visit_Call(self, node):
+        if not isinstance(node.func, ast.Name):
+            # Ignore functions that are object attributes (`foo.bar()`).
+            # Assume that if the user calls `builtins.list()`, they know what
+            # they're doing.
+            return
+        if node.func.id not in set(BUILTIN_TYPES).difference(self.ignore):
+            return
+        if node.func.id == 'dict' and self._check_dict_call(node):
+            return
+        elif node.args:
+            return
+        self.builtin_type_calls.append(
+            BuiltinTypeCall(node.func.id, node.lineno, node.col_offset),
+        )
+
+
+def check_file_for_builtin_type_constructors(filename, ignore=None, allow_dict_kwargs=True):
+    tree = ast.parse(open(filename, 'rb').read(), filename=filename)
+    visitor = BuiltinTypeVisitor(ignore=ignore, allow_dict_kwargs=allow_dict_kwargs)
+    visitor.visit(tree)
+    return visitor.builtin_type_calls
+
+
+def parse_args(argv):
+    def parse_ignore(value):
+        return set(value.split(','))
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument('filenames', nargs='*')
+    parser.add_argument('--ignore', type=parse_ignore, default=set())
+
+    allow_dict_kwargs = parser.add_mutually_exclusive_group(required=False)
+    allow_dict_kwargs.add_argument('--allow-dict-kwargs', action='store_true')
+    allow_dict_kwargs.add_argument('--no-allow-dict-kwargs', dest='allow_dict_kwargs', action='store_false')
+    allow_dict_kwargs.set_defaults(allow_dict_kwargs=True)
+
+    return parser.parse_args(argv)
+
+
+def main(argv=None):
+    args = parse_args(argv)
+    rc = 0
+    for filename in args.filenames:
+        calls = check_file_for_builtin_type_constructors(
+            filename,
+            ignore=args.ignore,
+            allow_dict_kwargs=args.allow_dict_kwargs,
+        )
+        if calls:
+            rc = rc or 1
+        for call in calls:
+            print(
+                '{filename}:{call.line}:{call.column} - Replace {call.name}() with {replacement}'.format(
+                    filename=filename,
+                    call=call,
+                    replacement=BUILTIN_TYPES[call.name],
+                ),
+            )
+    return rc
+
+
+if __name__ == '__main__':
+    sys.exit(main())
--- a/pre_commit_hooks/check_byte_order_marker.py
+++ b/pre_commit_hooks/check_byte_order_marker.py
@ -16,7 +16,7 @@ def main(argv=None):
        with open(filename, 'rb') as f:
            if f.read(3) == b'\xef\xbb\xbf':
                retv = 1
-                print('{0}: Has a byte-order marker'.format(filename))
+                print('{}: Has a byte-order marker'.format(filename))

    return retv

--- a/pre_commit_hooks/check_case_conflict.py
+++ b/pre_commit_hooks/check_case_conflict.py
@ -9,7 +9,7 @@ from pre_commit_hooks.util import cmd_output


 def lower_set(iterable):
-    return set(x.lower() for x in iterable)
+    return {x.lower() for x in iterable}


 def find_conflicting_filenames(filenames):
@ -35,7 +35,7 @@ def find_conflicting_filenames(filenames):
            if x.lower() in conflicts
        ]
        for filename in sorted(conflicting_files):
-            print('Case-insensitivity conflict found: {0}'.format(filename))
+            print('Case-insensitivity conflict found: {}'.format(filename))
        retv = 1

    return retv
@ -45,7 +45,7 @@ def main(argv=None):
    parser = argparse.ArgumentParser()
    parser.add_argument(
        'filenames', nargs='*',
-        help='Filenames pre-commit believes are changed.'
+        help='Filenames pre-commit believes are changed.',
    )

    args = parser.parse_args(argv)
--- a/pre_commit_hooks/check_docstring_first.py
+++ b/pre_commit_hooks/check_docstring_first.py
@ -1,4 +1,5 @@
 from __future__ import absolute_import
+from __future__ import print_function
 from __future__ import unicode_literals

 import argparse
@ -27,18 +28,18 @@ def check_docstring_first(src, filename='<unknown>'):
        if tok_type == tokenize.STRING and scol == 0:
            if found_docstring_line is not None:
                print(
-                    '{0}:{1} Multiple module docstrings '
-                    '(first docstring on line {2}).'.format(
+                    '{}:{} Multiple module docstrings '
+                    '(first docstring on line {}).'.format(
                        filename, sline, found_docstring_line,
-                    )
+                    ),
                )
                return 1
            elif found_code_line is not None:
                print(
-                    '{0}:{1} Module docstring appears after code '
-                    '(code seen on line {2}).'.format(
+                    '{}:{} Module docstring appears after code '
+                    '(code seen on line {}).'.format(
                        filename, sline, found_code_line,
-                    )
+                    ),
                )
                return 1
            else:
@ -57,7 +58,7 @@ def main(argv=None):
    retv = 0

    for filename in args.filenames:
-        contents = io.open(filename).read()
+        contents = io.open(filename, encoding='UTF-8').read()
        retv |= check_docstring_first(contents, filename=filename)

    return retv
--- a/pre_commit_hooks/check_executables_have_shebangs.py
+++ b/pre_commit_hooks/check_executables_have_shebangs.py
@ -0,0 +1,40 @@
+"""Check that executable text files have a shebang."""
+from __future__ import absolute_import
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import argparse
+import pipes
+import sys
+
+
+def check_has_shebang(path):
+    with open(path, 'rb') as f:
+        first_bytes = f.read(2)
+
+    if first_bytes != b'#!':
+        print(
+            '{path}: marked executable but has no (or invalid) shebang!\n'
+            "  If it isn't supposed to be executable, try: chmod -x {quoted}\n"
+            '  If it is supposed to be executable, double-check its shebang.'.format(
+                path=path,
+                quoted=pipes.quote(path),
+            ),
+            file=sys.stderr,
+        )
+        return 1
+    else:
+        return 0
+
+
+def main(argv=None):
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument('filenames', nargs='*')
+    args = parser.parse_args(argv)
+
+    retv = 0
+
+    for filename in args.filenames:
+        retv |= check_has_shebang(filename)
+
+    return retv
--- a/pre_commit_hooks/check_json.py
+++ b/pre_commit_hooks/check_json.py
@ -1,10 +1,10 @@
 from __future__ import print_function

 import argparse
+import io
+import json
 import sys

-import simplejson
-

 def check_json(argv=None):
    parser = argparse.ArgumentParser()
@ -14,9 +14,9 @@ def check_json(argv=None):
    retval = 0
    for filename in args.filenames:
        try:
-            simplejson.load(open(filename))
-        except (simplejson.JSONDecodeError, UnicodeDecodeError) as exc:
-            print('{0}: Failed to json decode ({1})'.format(filename, exc))
+            json.load(io.open(filename, encoding='UTF-8'))
+        except (ValueError, UnicodeDecodeError) as exc:
+            print('{}: Failed to json decode ({})'.format(filename, exc))
            retval = 1
    return retval

--- a/pre_commit_hooks/check_merge_conflict.py
+++ b/pre_commit_hooks/check_merge_conflict.py
@ -7,7 +7,7 @@ CONFLICT_PATTERNS = [
    b'<<<<<<< ',
    b'======= ',
    b'=======\n',
-    b'>>>>>>> '
+    b'>>>>>>> ',
 ]
 WARNING_MSG = 'Merge conflict string "{0}" found in {1}:{2}'

@ -15,7 +15,11 @@ WARNING_MSG = 'Merge conflict string "{0}" found in {1}:{2}'
 def is_in_merge():
    return (
        os.path.exists(os.path.join('.git', 'MERGE_MSG')) and
-        os.path.exists(os.path.join('.git', 'MERGE_HEAD'))
+        (
+            os.path.exists(os.path.join('.git', 'MERGE_HEAD')) or
+            os.path.exists(os.path.join('.git', 'rebase-apply')) or
+            os.path.exists(os.path.join('.git', 'rebase-merge'))
+        )
    )


--- a/pre_commit_hooks/check_symlinks.py
+++ b/pre_commit_hooks/check_symlinks.py
@ -19,7 +19,7 @@ def check_symlinks(argv=None):
                os.path.islink(filename) and
                not os.path.exists(filename)
        ):  # pragma: no cover (symlink support required)
-            print('{0}: Broken symlink'.format(filename))
+            print('{}: Broken symlink'.format(filename))
            retv = 1

    return retv
--- a/pre_commit_hooks/check_vcs_permalinks.py
+++ b/pre_commit_hooks/check_vcs_permalinks.py
@ -0,0 +1,44 @@
+from __future__ import absolute_import
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import argparse
+import re
+import sys
+
+
+GITHUB_NON_PERMALINK = re.compile(
+    b'https://github.com/[^/ ]+/[^/ ]+/blob/master/[^# ]+#L\d+',
+)
+
+
+def _check_filename(filename):
+    retv = 0
+    with open(filename, 'rb') as f:
+        for i, line in enumerate(f, 1):
+            if GITHUB_NON_PERMALINK.search(line):
+                sys.stdout.write('{}:{}:'.format(filename, i))
+                sys.stdout.flush()
+                getattr(sys.stdout, 'buffer', sys.stdout).write(line)
+                retv = 1
+    return retv
+
+
+def main(argv=None):
+    parser = argparse.ArgumentParser()
+    parser.add_argument('filenames', nargs='*')
+    args = parser.parse_args(argv)
+
+    retv = 0
+    for filename in args.filenames:
+        retv |= _check_filename(filename)
+
+    if retv:
+        print()
+        print('Non-permanent github link detected.')
+        print('On any page on github press [y] to load a permalink.')
+    return retv
+
+
+if __name__ == '__main__':
+    exit(main())
--- a/pre_commit_hooks/check_xml.py
+++ b/pre_commit_hooks/check_xml.py
@ -19,7 +19,7 @@ def check_xml(argv=None):
            with io.open(filename, 'rb') as xml_file:
                xml.sax.parse(xml_file, xml.sax.ContentHandler())
        except xml.sax.SAXException as exc:
-            print('{0}: Failed to xml parse ({1})'.format(filename, exc))
+            print('{}: Failed to xml parse ({})'.format(filename, exc))
            retval = 1
    return retval

--- a/pre_commit_hooks/check_yaml.py
+++ b/pre_commit_hooks/check_yaml.py
@ -1,6 +1,7 @@
 from __future__ import print_function

 import argparse
+import collections
 import sys

 import yaml
@ -11,21 +12,57 @@ except ImportError:  # pragma: no cover (no libyaml-dev / pypy)
    Loader = yaml.SafeLoader


+def _exhaust(gen):
+    for _ in gen:
+        pass
+
+
+def _parse_unsafe(*args, **kwargs):
+    _exhaust(yaml.parse(*args, **kwargs))
+
+
+def _load_all(*args, **kwargs):
+    _exhaust(yaml.load_all(*args, **kwargs))
+
+
+Key = collections.namedtuple('Key', ('multi', 'unsafe'))
+LOAD_FNS = {
+    Key(multi=False, unsafe=False): yaml.load,
+    Key(multi=False, unsafe=True): _parse_unsafe,
+    Key(multi=True, unsafe=False): _load_all,
+    Key(multi=True, unsafe=True): _parse_unsafe,
+}
+
+
 def check_yaml(argv=None):
    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '-m', '--multi', '--allow-multiple-documents', action='store_true',
+    )
+    parser.add_argument(
+        '--unsafe', action='store_true',
+        help=(
+            'Instead of loading the files, simply parse them for syntax.  '
+            'A syntax-only check enables extensions and unsafe contstructs '
+            'which would otherwise be forbidden.  Using this option removes '
+            'all guarantees of portability to other yaml implementations.  '
+            'Implies --allow-multiple-documents'
+        ),
+    )
    parser.add_argument('--ignore-tags', type=lambda s: s.split(','), default=[],
                        help='Custom tags to ignore.')
    parser.add_argument('filenames', nargs='*', help='Yaml filenames to check.')
    args = parser.parse_args(argv)

-    # Ignore custom tags by returning None
    for tag in args.ignore_tags:
        Loader.add_constructor(tag, lambda *a, **k: None)

+    load_fn = LOAD_FNS[Key(multi=args.multi, unsafe=args.unsafe)]
+
    retval = 0
    for filename in args.filenames:
        try:
-            yaml.load(open(filename), Loader=Loader)
+            load_fn(open(filename), Loader=Loader)
        except yaml.YAMLError as exc:
            print(exc)
            retval = 1
--- a/pre_commit_hooks/debug_statement_hook.py
+++ b/pre_commit_hooks/debug_statement_hook.py
@ -7,69 +7,66 @@ import collections
 import traceback


-DEBUG_STATEMENTS = set(['pdb', 'ipdb', 'pudb', 'q', 'rdb'])
+DEBUG_STATEMENTS = {'pdb', 'ipdb', 'pudb', 'q', 'rdb'}
+Debug = collections.namedtuple('Debug', ('line', 'col', 'name', 'reason'))


-DebugStatement = collections.namedtuple(
-    'DebugStatement', ['name', 'line', 'col'],
-)
-
-
-class ImportStatementParser(ast.NodeVisitor):
+class DebugStatementParser(ast.NodeVisitor):
    def __init__(self):
-        self.debug_import_statements = []
+        self.breakpoints = []

    def visit_Import(self, node):
-        for node_name in node.names:
-            if node_name.name in DEBUG_STATEMENTS:
-                self.debug_import_statements.append(
-                    DebugStatement(node_name.name, node.lineno, node.col_offset),
-                )
+        for name in node.names:
+            if name.name in DEBUG_STATEMENTS:
+                st = Debug(node.lineno, node.col_offset, name.name, 'imported')
+                self.breakpoints.append(st)

    def visit_ImportFrom(self, node):
        if node.module in DEBUG_STATEMENTS:
-            self.debug_import_statements.append(
-                DebugStatement(node.module, node.lineno, node.col_offset)
-            )
+            st = Debug(node.lineno, node.col_offset, node.module, 'imported')
+            self.breakpoints.append(st)
+
+    def visit_Call(self, node):
+        """python3.7+ breakpoint()"""
+        if isinstance(node.func, ast.Name) and node.func.id == 'breakpoint':
+            st = Debug(node.lineno, node.col_offset, node.func.id, 'called')
+            self.breakpoints.append(st)
+        self.generic_visit(node)


-def check_file_for_debug_statements(filename):
+def check_file(filename):
    try:
-        ast_obj = ast.parse(open(filename).read(), filename=filename)
+        ast_obj = ast.parse(open(filename, 'rb').read(), filename=filename)
    except SyntaxError:
-        print('{0} - Could not parse ast'.format(filename))
+        print('{} - Could not parse ast'.format(filename))
        print()
        print('\t' + traceback.format_exc().replace('\n', '\n\t'))
        print()
        return 1
-    visitor = ImportStatementParser()
+
+    visitor = DebugStatementParser()
    visitor.visit(ast_obj)
-    if visitor.debug_import_statements:
-        for debug_statement in visitor.debug_import_statements:
-            print(
-                '{0}:{1}:{2} - {3} imported'.format(
-                    filename,
-                    debug_statement.line,
-                    debug_statement.col,
-                    debug_statement.name,
-                )
-            )
-        return 1
-    else:
-        return 0
+
+    for bp in visitor.breakpoints:
+        print(
+            '{}:{}:{} - {} {}'.format(
+                filename, bp.line, bp.col, bp.name, bp.reason,
+            ),
+        )
+
+    return int(bool(visitor.breakpoints))


-def debug_statement_hook(argv=None):
+def main(argv=None):
    parser = argparse.ArgumentParser()
    parser.add_argument('filenames', nargs='*', help='Filenames to run')
    args = parser.parse_args(argv)

    retv = 0
    for filename in args.filenames:
-        retv |= check_file_for_debug_statements(filename)
-
+        retv |= check_file(filename)
    return retv


 if __name__ == '__main__':
-    exit(debug_statement_hook())
+    exit(main())
--- a/pre_commit_hooks/detect_aws_credentials.py
+++ b/pre_commit_hooks/detect_aws_credentials.py
@ -12,7 +12,7 @@ def get_aws_credential_files_from_env():
    files = set()
    for env_var in (
        'AWS_CONFIG_FILE', 'AWS_CREDENTIAL_FILE', 'AWS_SHARED_CREDENTIALS_FILE',
-        'BOTO_CONFIG'
+        'BOTO_CONFIG',
    ):
        if env_var in os.environ:
            files.add(os.environ[env_var])
@ -23,7 +23,7 @@ def get_aws_secrets_from_env():
    """Extract AWS secrets from environment variables."""
    keys = set()
    for env_var in (
-        'AWS_SECRET_ACCESS_KEY', 'AWS_SECURITY_TOKEN', 'AWS_SESSION_TOKEN'
+        'AWS_SECRET_ACCESS_KEY', 'AWS_SECURITY_TOKEN', 'AWS_SESSION_TOKEN',
    ):
        if env_var in os.environ:
            keys.add(os.environ[env_var])
@ -50,10 +50,12 @@ def get_aws_secrets_from_file(credentials_file):
    for section in parser.sections():
        for var in (
            'aws_secret_access_key', 'aws_security_token',
-            'aws_session_token'
+            'aws_session_token',
        ):
            try:
-                keys.add(parser.get(section, var))
+                key = parser.get(section, var).strip()
+                if key:
+                    keys.add(key)
            except configparser.NoOptionError:
                pass
    return keys
@ -93,13 +95,13 @@ def main(argv=None):
        help=(
            'Location of additional AWS credential files from which to get '
            'secret keys from'
-        )
+        ),
    )
    parser.add_argument(
        '--allow-missing-credentials',
        dest='allow_missing_credentials',
        action='store_true',
-        help='Allow hook to pass when no credentials are detected.'
+        help='Allow hook to pass when no credentials are detected.',
    )
    args = parser.parse_args(argv)

@ -124,7 +126,7 @@ def main(argv=None):
        print(
            'No AWS keys were found in the configured credential files and '
            'environment variables.\nPlease ensure you have the correct '
-            'setting for --credentials-file'
+            'setting for --credentials-file',
        )
        return 2

--- a/pre_commit_hooks/detect_private_key.py
+++ b/pre_commit_hooks/detect_private_key.py
@ -8,6 +8,9 @@ BLACKLIST = [
    b'BEGIN DSA PRIVATE KEY',
    b'BEGIN EC PRIVATE KEY',
    b'BEGIN OPENSSH PRIVATE KEY',
+    b'BEGIN PRIVATE KEY',
+    b'PuTTY-User-Key-File-2',
+    b'BEGIN SSH2 ENCRYPTED PRIVATE KEY',
 ]


@ -26,7 +29,7 @@ def detect_private_key(argv=None):

    if private_key_files:
        for private_key_file in private_key_files:
-            print('Private key found: {0}'.format(private_key_file))
+            print('Private key found: {}'.format(private_key_file))
        return 1
    else:
        return 0
--- a/pre_commit_hooks/end_of_file_fixer.py
+++ b/pre_commit_hooks/end_of_file_fixer.py
@ -58,7 +58,7 @@ def end_of_file_fixer(argv=None):
        with open(filename, 'rb+') as file_obj:
            ret_for_file = fix_file(file_obj)
            if ret_for_file:
-                print('Fixing {0}'.format(filename))
+                print('Fixing {}'.format(filename))
            retv |= ret_for_file

    return retv
--- a/pre_commit_hooks/file_contents_sorter.py
+++ b/pre_commit_hooks/file_contents_sorter.py
@ -0,0 +1,52 @@
+"""
+A very simple pre-commit hook that, when passed one or more filenames
+as arguments, will sort the lines in those files.
+
+An example use case for this: you have a deploy-whitelist.txt file
+in a repo that contains a list of filenames that is used to specify
+files to be included in a docker container. This file has one filename
+per line. Various users are adding/removing lines from this file; using
+this hook on that file should reduce the instances of git merge
+conflicts and keep the file nicely ordered.
+"""
+from __future__ import print_function
+
+import argparse
+
+PASS = 0
+FAIL = 1
+
+
+def sort_file_contents(f):
+    before = list(f)
+    after = sorted([line.strip(b'\n\r') for line in before if line.strip()])
+
+    before_string = b''.join(before)
+    after_string = b'\n'.join(after) + b'\n'
+
+    if before_string == after_string:
+        return PASS
+    else:
+        f.seek(0)
+        f.write(after_string)
+        f.truncate()
+        return FAIL
+
+
+def main(argv=None):
+    parser = argparse.ArgumentParser()
+    parser.add_argument('filenames', nargs='+', help='Files to sort')
+    args = parser.parse_args(argv)
+
+    retv = PASS
+
+    for arg in args.filenames:
+        with open(arg, 'rb+') as file_obj:
+            ret_for_file = sort_file_contents(file_obj)
+
+            if ret_for_file:
+                print('Sorting {}'.format(arg))
+
+            retv |= ret_for_file
+
+    return retv
--- a/pre_commit_hooks/mixed_line_ending.py
+++ b/pre_commit_hooks/mixed_line_ending.py
@ -0,0 +1,84 @@
+from __future__ import absolute_import
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import argparse
+import collections
+
+
+CRLF = b'\r\n'
+LF = b'\n'
+CR = b'\r'
+# Prefer LF to CRLF to CR, but detect CRLF before LF
+ALL_ENDINGS = (CR, CRLF, LF)
+FIX_TO_LINE_ENDING = {'cr': CR, 'crlf': CRLF, 'lf': LF}
+
+
+def _fix(filename, contents, ending):
+    new_contents = b''.join(
+        line.rstrip(b'\r\n') + ending for line in contents.splitlines(True)
+    )
+    with open(filename, 'wb') as f:
+        f.write(new_contents)
+
+
+def fix_filename(filename, fix):
+    with open(filename, 'rb') as f:
+        contents = f.read()
+
+    counts = collections.defaultdict(int)
+
+    for line in contents.splitlines(True):
+        for ending in ALL_ENDINGS:
+            if line.endswith(ending):
+                counts[ending] += 1
+                break
+
+    # Some amount of mixed line endings
+    mixed = sum(bool(x) for x in counts.values()) > 1
+
+    if fix == 'no' or (fix == 'auto' and not mixed):
+        return mixed
+
+    if fix == 'auto':
+        max_ending = LF
+        max_lines = 0
+        # ordering is important here such that lf > crlf > cr
+        for ending_type in ALL_ENDINGS:
+            # also important, using >= to find a max that prefers the last
+            if counts[ending_type] >= max_lines:
+                max_ending = ending_type
+                max_lines = counts[ending_type]
+
+        _fix(filename, contents, max_ending)
+        return 1
+    else:
+        target_ending = FIX_TO_LINE_ENDING[fix]
+        # find if there are lines with *other* endings
+        # It's possible there's no line endings of the target type
+        counts.pop(target_ending, None)
+        other_endings = bool(sum(counts.values()))
+        if other_endings:
+            _fix(filename, contents, target_ending)
+        return other_endings
+
+
+def main(argv=None):
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '-f', '--fix',
+        choices=('auto', 'no') + tuple(FIX_TO_LINE_ENDING),
+        default='auto',
+        help='Replace line ending with the specified. Default is "auto"',
+    )
+    parser.add_argument('filenames', nargs='*', help='Filenames to fix')
+    args = parser.parse_args(argv)
+
+    retv = 0
+    for filename in args.filenames:
+        retv |= fix_filename(filename, args.fix)
+    return retv
+
+
+if __name__ == '__main__':
+    exit(main())
--- a/pre_commit_hooks/no_commit_to_branch.py
+++ b/pre_commit_hooks/no_commit_to_branch.py
@ -0,0 +1,31 @@
+from __future__ import print_function
+
+import argparse
+
+from pre_commit_hooks.util import CalledProcessError
+from pre_commit_hooks.util import cmd_output
+
+
+def is_on_branch(protected):
+    try:
+        branch = cmd_output('git', 'symbolic-ref', 'HEAD')
+    except CalledProcessError:
+        return False
+    chunks = branch.strip().split('/')
+    return '/'.join(chunks[2:]) in protected
+
+
+def main(argv=None):
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '-b', '--branch', action='append',
+        help='branch to disallow commits to, may be specified multiple times',
+    )
+    args = parser.parse_args(argv)
+
+    protected = set(args.branch or ('master',))
+    return int(is_on_branch(protected))
+
+
+if __name__ == '__main__':
+    exit(main())
--- a/pre_commit_hooks/pretty_format_json.py
+++ b/pre_commit_hooks/pretty_format_json.py
@ -1,13 +1,15 @@
 from __future__ import print_function

 import argparse
+import io
+import json
 import sys
 from collections import OrderedDict

-import simplejson
+from six import text_type


-def _get_pretty_format(contents, indent, sort_keys=True, top_keys=[]):
+def _get_pretty_format(contents, indent, ensure_ascii=True, sort_keys=True, top_keys=[]):
    def pairs_first(pairs):
        before = [pair for pair in pairs if pair[0] in top_keys]
        before = sorted(before, key=lambda x: top_keys.index(x[0]))
@ -15,39 +17,28 @@ def _get_pretty_format(contents, indent, sort_keys=True, top_keys=[]):
        if sort_keys:
            after = sorted(after, key=lambda x: x[0])
        return OrderedDict(before + after)
-    return simplejson.dumps(
-        simplejson.loads(
-            contents,
-            object_pairs_hook=pairs_first,
-        ),
-        indent=indent
-    ) + "\n"  # dumps don't end with a newline
+    json_pretty = json.dumps(
+        json.loads(contents, object_pairs_hook=pairs_first),
+        indent=indent,
+        ensure_ascii=ensure_ascii,
+        separators=(',', ': '),  # Workaround for https://bugs.python.org/issue16333
+    )
+    # Ensure unicode (Py2) and add the newline that dumps does not end with.
+    return text_type(json_pretty) + '\n'


 def _autofix(filename, new_contents):
-    print("Fixing file {0}".format(filename))
-    with open(filename, 'w') as f:
+    print('Fixing file {}'.format(filename))
+    with io.open(filename, 'w', encoding='UTF-8') as f:
        f.write(new_contents)


-def parse_indent(s):
-    # type: (str) -> str
+def parse_num_to_int(s):
+    """Convert string numbers to int, leaving strings as is."""
    try:
-        int_indentation_spec = int(s)
+        return int(s)
    except ValueError:
-        if not s.strip():
-            return s
-        else:
-            raise ValueError(
-                'Non-whitespace JSON indentation delimiter supplied. ',
-            )
-    else:
-        if int_indentation_spec >= 0:
-            return int_indentation_spec * ' '
-        else:
-            raise ValueError(
-                'Negative integer supplied to construct JSON indentation delimiter. ',
-            )
+        return s


 def parse_topkeys(s):
@ -65,9 +56,19 @@ def pretty_format_json(argv=None):
    )
    parser.add_argument(
        '--indent',
-        type=parse_indent,
-        default='  ',
-        help='String used as delimiter for one indentation level',
+        type=parse_num_to_int,
+        default='2',
+        help=(
+            'The number of indent spaces or a string to be used as delimiter'
+            ' for indentation level e.g. 4 or "\t" (Default: 2)'
+        ),
+    )
+    parser.add_argument(
+        '--no-ensure-ascii',
+        action='store_true',
+        dest='no_ensure_ascii',
+        default=False,
+        help='Do NOT convert non-ASCII characters to Unicode escape sequences (\\uXXXX)',
    )
    parser.add_argument(
        '--no-sort-keys',
@ -90,27 +91,26 @@ def pretty_format_json(argv=None):
    status = 0

    for json_file in args.filenames:
-        with open(json_file) as f:
+        with io.open(json_file, encoding='UTF-8') as f:
            contents = f.read()

        try:
            pretty_contents = _get_pretty_format(
-                contents, args.indent, sort_keys=not args.no_sort_keys,
-                top_keys=args.top_keys
+                contents, args.indent, ensure_ascii=not args.no_ensure_ascii,
+                sort_keys=not args.no_sort_keys, top_keys=args.top_keys,
            )

            if contents != pretty_contents:
-                print("File {0} is not pretty-formatted".format(json_file))
+                print('File {} is not pretty-formatted'.format(json_file))

                if args.autofix:
                    _autofix(json_file, pretty_contents)

                status = 1
-
-        except simplejson.JSONDecodeError:
+        except ValueError:
            print(
-                "Input File {0} is not a valid JSON, consider using check-json"
-                .format(json_file)
+                'Input File {} is not a valid JSON, consider using check-json'
+                .format(json_file),
            )
            return 1

--- a/pre_commit_hooks/requirements_txt_fixer.py
+++ b/pre_commit_hooks/requirements_txt_fixer.py
@ -3,6 +3,10 @@ from __future__ import print_function
 import argparse


+PASS = 0
+FAIL = 1
+
+
 class Requirement(object):

    def __init__(self):
@ -30,21 +34,25 @@ class Requirement(object):

 def fix_requirements(f):
    requirements = []
-    before = []
+    before = tuple(f)
    after = []

-    for line in f:
-        before.append(line)
+    before_string = b''.join(before)

-        # If the most recent requirement object has a value, then it's time to
-        # start building the next requirement object.
+    # If the file is empty (i.e. only whitespace/newlines) exit early
+    if before_string.strip() == b'':
+        return PASS
+
+    for line in before:
+        # If the most recent requirement object has a value, then it's
+        # time to start building the next requirement object.
        if not len(requirements) or requirements[-1].value is not None:
            requirements.append(Requirement())

        requirement = requirements[-1]

-        # If we see a newline before any requirements, then this is a top of
-        # file comment.
+        # If we see a newline before any requirements, then this is a
+        # top of file comment.
        if len(requirements) == 1 and line.strip() == b'':
            if len(requirement.comments) and requirement.comments[0].startswith(b'#'):
                requirement.value = b'\n'
@ -55,21 +63,33 @@ def fix_requirements(f):
        else:
            requirement.value = line

-    for requirement in sorted(requirements):
-        for comment in requirement.comments:
-            after.append(comment)
-        after.append(requirement.value)
+    # if a file ends in a comment, preserve it at the end
+    if requirements[-1].value is None:
+        rest = requirements.pop().comments
+    else:
+        rest = []
+
+    # find and remove pkg-resources==0.0.0
+    # which is automatically added by broken pip package under Debian
+    requirements = [
+        req for req in requirements
+        if req.value != b'pkg-resources==0.0.0\n'
+    ]
+
+    for requirement in sorted(requirements):
+        after.extend(requirement.comments)
+        after.append(requirement.value)
+    after.extend(rest)

-    before_string = b''.join(before)
    after_string = b''.join(after)

    if before_string == after_string:
-        return 0
+        return PASS
    else:
        f.seek(0)
        f.write(after_string)
        f.truncate()
-        return 1
+        return FAIL


 def fix_requirements_txt(argv=None):
@ -77,14 +97,14 @@ def fix_requirements_txt(argv=None):
    parser.add_argument('filenames', nargs='*', help='Filenames to fix')
    args = parser.parse_args(argv)

-    retv = 0
+    retv = PASS

    for arg in args.filenames:
        with open(arg, 'rb+') as file_obj:
            ret_for_file = fix_requirements(file_obj)

            if ret_for_file:
-                print('Sorting {0}'.format(arg))
+                print('Sorting {}'.format(arg))

            retv |= ret_for_file

--- a/pre_commit_hooks/sort_simple_yaml.py
+++ b/pre_commit_hooks/sort_simple_yaml.py
@ -0,0 +1,123 @@
+#!/usr/bin/env python
+"""Sort a simple YAML file, keeping blocks of comments and definitions
+together.
+
+We assume a strict subset of YAML that looks like:
+
+    # block of header comments
+    # here that should always
+    # be at the top of the file
+
+    # optional comments
+    # can go here
+    key: value
+    key: value
+
+    key: value
+
+In other words, we don't sort deeper than the top layer, and might corrupt
+complicated YAML files.
+"""
+from __future__ import print_function
+
+import argparse
+
+
+QUOTES = ["'", '"']
+
+
+def sort(lines):
+    """Sort a YAML file in alphabetical order, keeping blocks together.
+
+    :param lines: array of strings (without newlines)
+    :return: sorted array of strings
+    """
+    # make a copy of lines since we will clobber it
+    lines = list(lines)
+    new_lines = parse_block(lines, header=True)
+
+    for block in sorted(parse_blocks(lines), key=first_key):
+        if new_lines:
+            new_lines.append('')
+        new_lines.extend(block)
+
+    return new_lines
+
+
+def parse_block(lines, header=False):
+    """Parse and return a single block, popping off the start of `lines`.
+
+    If parsing a header block, we stop after we reach a line that is not a
+    comment. Otherwise, we stop after reaching an empty line.
+
+    :param lines: list of lines
+    :param header: whether we are parsing a header block
+    :return: list of lines that form the single block
+    """
+    block_lines = []
+    while lines and lines[0] and (not header or lines[0].startswith('#')):
+        block_lines.append(lines.pop(0))
+    return block_lines
+
+
+def parse_blocks(lines):
+    """Parse and return all possible blocks, popping off the start of `lines`.
+
+    :param lines: list of lines
+    :return: list of blocks, where each block is a list of lines
+    """
+    blocks = []
+
+    while lines:
+        if lines[0] == '':
+            lines.pop(0)
+        else:
+            blocks.append(parse_block(lines))
+
+    return blocks
+
+
+def first_key(lines):
+    """Returns a string representing the sort key of a block.
+
+    The sort key is the first YAML key we encounter, ignoring comments, and
+    stripping leading quotes.
+
+    >>> print(test)
+    # some comment
+    'foo': true
+    >>> first_key(test)
+    'foo'
+    """
+    for line in lines:
+        if line.startswith('#'):
+            continue
+        if any(line.startswith(quote) for quote in QUOTES):
+            return line[1:]
+        return line
+
+
+def main(argv=None):
+    parser = argparse.ArgumentParser()
+    parser.add_argument('filenames', nargs='*', help='Filenames to fix')
+    args = parser.parse_args(argv)
+
+    retval = 0
+
+    for filename in args.filenames:
+        with open(filename, 'r+') as f:
+            lines = [line.rstrip() for line in f.readlines()]
+            new_lines = sort(lines)
+
+            if lines != new_lines:
+                print("Fixing file `{filename}`".format(filename=filename))
+                f.seek(0)
+                f.write("\n".join(new_lines) + "\n")
+                f.truncate()
+                retval = 1
+
+    return retval
+
+
+if __name__ == '__main__':
+    exit(main())
--- a/pre_commit_hooks/string_fixer.py
+++ b/pre_commit_hooks/string_fixer.py
@ -32,7 +32,7 @@ def get_line_offsets_by_line_no(src):


 def fix_strings(filename):
-    contents = io.open(filename).read()
+    contents = io.open(filename, encoding='UTF-8').read()
    line_offsets = get_line_offsets_by_line_no(contents)

    # Basically a mutable string
@ -52,7 +52,7 @@ def fix_strings(filename):

    new_contents = ''.join(splitcontents)
    if contents != new_contents:
-        with io.open(filename, 'w') as write_handle:
+        with io.open(filename, 'w', encoding='UTF-8') as write_handle:
            write_handle.write(new_contents)
        return 1
    else:
@ -69,7 +69,7 @@ def main(argv=None):
    for filename in args.filenames:
        return_value = fix_strings(filename)
        if return_value != 0:
-            print('Fixing strings in {0}'.format(filename))
+            print('Fixing strings in {}'.format(filename))
        retv |= return_value

    return retv
--- a/pre_commit_hooks/tests_should_end_in_test.py
+++ b/pre_commit_hooks/tests_should_end_in_test.py
@ -11,12 +11,12 @@ def validate_files(argv=None):
    parser.add_argument('filenames', nargs='*')
    parser.add_argument(
        '--django', default=False, action='store_true',
-        help='Use Django-style test naming pattern (test*.py)'
+        help='Use Django-style test naming pattern (test*.py)',
    )
    args = parser.parse_args(argv)

    retcode = 0
-    test_name_pattern = 'test_.*.py' if args.django else '.*_test.py'
+    test_name_pattern = 'test.*.py' if args.django else '.*_test.py'
    for filename in args.filenames:
        base = basename(filename)
        if (
@ -26,9 +26,9 @@ def validate_files(argv=None):
        ):
            retcode = 1
            print(
-                '{0} does not match pattern "{1}"'.format(
-                    filename, test_name_pattern
-                )
+                '{} does not match pattern "{}"'.format(
+                    filename, test_name_pattern,
+                ),
            )

    return retcode
--- a/pre_commit_hooks/trailing_whitespace_fixer.py
+++ b/pre_commit_hooks/trailing_whitespace_fixer.py
@ -4,8 +4,6 @@ import argparse
 import os
 import sys

-from pre_commit_hooks.util import cmd_output
-

 def _fix_file(filename, is_markdown):
    with open(filename, mode='rb') as file_processed:
@ -21,14 +19,19 @@ def _fix_file(filename, is_markdown):


 def _process_line(line, is_markdown):
+    if line[-2:] == b'\r\n':
+        eol = b'\r\n'
+    elif line[-1:] == b'\n':
+        eol = b'\n'
+    else:
+        eol = b''
    # preserve trailing two-space for non-blank lines in markdown files
-    eol = b'\r\n' if line[-2:] == b'\r\n' else b'\n'
    if is_markdown and (not line.isspace()) and line.endswith(b'  ' + eol):
        return line.rstrip() + b'  ' + eol
    return line.rstrip() + eol


-def fix_trailing_whitespace(argv=None):
+def main(argv=None):
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--no-markdown-linebreak-ext',
@ -36,7 +39,7 @@ def fix_trailing_whitespace(argv=None):
        const=[],
        default=argparse.SUPPRESS,
        dest='markdown_linebreak_ext',
-        help='Do not preserve linebreak spaces in Markdown'
+        help='Do not preserve linebreak spaces in Markdown',
    )
    parser.add_argument(
        '--markdown-linebreak-ext',
@ -45,15 +48,11 @@ def fix_trailing_whitespace(argv=None):
        default=['md,markdown'],
        metavar='*|EXT[,EXT,...]',
        nargs='?',
-        help='Markdown extensions (or *) for linebreak spaces'
+        help='Markdown extensions (or *) for linebreak spaces',
    )
    parser.add_argument('filenames', nargs='*', help='Filenames to fix')
    args = parser.parse_args(argv)

-    bad_whitespace_files = cmd_output(
-        'grep', '-l', '[[:space:]]$', *args.filenames, retcode=None
-    ).strip().splitlines()
-
    md_args = args.markdown_linebreak_ext
    if '' in md_args:
        parser.error('--markdown-linebreak-ext requires a non-empty argument')
@ -67,20 +66,20 @@ def fix_trailing_whitespace(argv=None):
    for ext in md_exts:
        if any(c in ext[1:] for c in r'./\:'):
            parser.error(
-                "bad --markdown-linebreak-ext extension '{0}' (has . / \\ :)\n"
+                "bad --markdown-linebreak-ext extension '{}' (has . / \\ :)\n"
                "  (probably filename; use '--markdown-linebreak-ext=EXT')"
-                .format(ext)
+                .format(ext),
            )

    return_code = 0
-    for bad_whitespace_file in bad_whitespace_files:
-        _, extension = os.path.splitext(bad_whitespace_file.lower())
+    for filename in args.filenames:
+        _, extension = os.path.splitext(filename.lower())
        md = all_markdown or extension in md_exts
-        if _fix_file(bad_whitespace_file, md):
-            print('Fixing {}'.format(bad_whitespace_file))
+        if _fix_file(filename, md):
+            print('Fixing {}'.format(filename))
            return_code = 1
    return return_code


 if __name__ == '__main__':
-    sys.exit(fix_trailing_whitespace())
+    sys.exit(main())