Merge branch 'master' of https://github.com/pre-commit/pre-commit-hooks into bobby-pull-from-master

This commit is contained in:
Bobby Rullo 2018-06-13 13:00:20 -07:00
commit 110790d320
81 changed files with 2170 additions and 677 deletions

View file

@ -14,12 +14,12 @@ def main(argv=None):
retv = 0
for filename in args.files:
original_contents = io.open(filename).read()
original_contents = io.open(filename, encoding='UTF-8').read()
new_contents = autopep8.fix_code(original_contents, args)
if original_contents != new_contents:
print('Fixing {0}'.format(filename))
print('Fixing {}'.format(filename))
retv = 1
with io.open(filename, 'w') as output_file:
with io.open(filename, 'w', encoding='UTF-8') as output_file:
output_file.write(new_contents)
return retv

View file

@ -4,6 +4,7 @@ from __future__ import print_function
from __future__ import unicode_literals
import argparse
import json
import math
import os
@ -13,23 +14,13 @@ from pre_commit_hooks.util import cmd_output
def lfs_files():
try: # pragma: no cover (no git-lfs)
lines = cmd_output('git', 'lfs', 'status', '--porcelain').splitlines()
try:
# Introduced in git-lfs 2.2.0, first working in 2.2.1
lfs_ret = cmd_output('git', 'lfs', 'status', '--json')
except CalledProcessError: # pragma: no cover (with git-lfs)
lines = []
lfs_ret = '{"files":{}}'
modes_and_fileparts = [
(line[:3].strip(), line[3:].rpartition(' ')[0]) for line in lines
]
def to_file_part(mode, filepart): # pragma: no cover (no git-lfs)
assert mode in ('A', 'R')
return filepart if mode == 'A' else filepart.split(' -> ')[1]
return set(
to_file_part(mode, filepart) for mode, filepart in modes_and_fileparts
if mode in ('A', 'R')
)
return set(json.loads(lfs_ret)['files'])
def find_large_added_files(filenames, maxkb):
@ -41,7 +32,7 @@ def find_large_added_files(filenames, maxkb):
for filename in filenames:
kb = int(math.ceil(os.stat(filename).st_size / 1024))
if kb > maxkb:
print('{0} ({1} KB) exceeds {2} KB.'.format(filename, kb, maxkb))
print('{} ({} KB) exceeds {} KB.'.format(filename, kb, maxkb))
retv = 1
return retv
@ -51,7 +42,7 @@ def main(argv=None):
parser = argparse.ArgumentParser()
parser.add_argument(
'filenames', nargs='*',
help='Filenames pre-commit believes are changed.'
help='Filenames pre-commit believes are changed.',
)
parser.add_argument(
'--maxkb', type=int, default=500,

View file

@ -4,7 +4,7 @@ from __future__ import unicode_literals
import argparse
import ast
import os.path
import platform
import sys
import traceback
@ -14,19 +14,19 @@ def check_ast(argv=None):
parser.add_argument('filenames', nargs='*')
args = parser.parse_args(argv)
_, interpreter = os.path.split(sys.executable)
retval = 0
for filename in args.filenames:
try:
ast.parse(open(filename, 'rb').read(), filename=filename)
except SyntaxError:
print('{0}: failed parsing with {1}:'.format(
filename, interpreter,
print('{}: failed parsing with {} {}:'.format(
filename,
platform.python_implementation(),
sys.version.partition(' ')[0],
))
print('\n{0}'.format(
' ' + traceback.format_exc().replace('\n', '\n ')
print('\n{}'.format(
' ' + traceback.format_exc().replace('\n', '\n '),
))
retval = 1
return retval

View file

@ -0,0 +1,95 @@
from __future__ import unicode_literals
import argparse
import ast
import collections
import sys
BUILTIN_TYPES = {
'complex': '0j',
'dict': '{}',
'float': '0.0',
'int': '0',
'list': '[]',
'str': "''",
'tuple': '()',
}
BuiltinTypeCall = collections.namedtuple('BuiltinTypeCall', ['name', 'line', 'column'])
class BuiltinTypeVisitor(ast.NodeVisitor):
def __init__(self, ignore=None, allow_dict_kwargs=True):
self.builtin_type_calls = []
self.ignore = set(ignore) if ignore else set()
self.allow_dict_kwargs = allow_dict_kwargs
def _check_dict_call(self, node):
return self.allow_dict_kwargs and (getattr(node, 'kwargs', None) or getattr(node, 'keywords', None))
def visit_Call(self, node):
if not isinstance(node.func, ast.Name):
# Ignore functions that are object attributes (`foo.bar()`).
# Assume that if the user calls `builtins.list()`, they know what
# they're doing.
return
if node.func.id not in set(BUILTIN_TYPES).difference(self.ignore):
return
if node.func.id == 'dict' and self._check_dict_call(node):
return
elif node.args:
return
self.builtin_type_calls.append(
BuiltinTypeCall(node.func.id, node.lineno, node.col_offset),
)
def check_file_for_builtin_type_constructors(filename, ignore=None, allow_dict_kwargs=True):
tree = ast.parse(open(filename, 'rb').read(), filename=filename)
visitor = BuiltinTypeVisitor(ignore=ignore, allow_dict_kwargs=allow_dict_kwargs)
visitor.visit(tree)
return visitor.builtin_type_calls
def parse_args(argv):
def parse_ignore(value):
return set(value.split(','))
parser = argparse.ArgumentParser()
parser.add_argument('filenames', nargs='*')
parser.add_argument('--ignore', type=parse_ignore, default=set())
allow_dict_kwargs = parser.add_mutually_exclusive_group(required=False)
allow_dict_kwargs.add_argument('--allow-dict-kwargs', action='store_true')
allow_dict_kwargs.add_argument('--no-allow-dict-kwargs', dest='allow_dict_kwargs', action='store_false')
allow_dict_kwargs.set_defaults(allow_dict_kwargs=True)
return parser.parse_args(argv)
def main(argv=None):
args = parse_args(argv)
rc = 0
for filename in args.filenames:
calls = check_file_for_builtin_type_constructors(
filename,
ignore=args.ignore,
allow_dict_kwargs=args.allow_dict_kwargs,
)
if calls:
rc = rc or 1
for call in calls:
print(
'{filename}:{call.line}:{call.column} - Replace {call.name}() with {replacement}'.format(
filename=filename,
call=call,
replacement=BUILTIN_TYPES[call.name],
),
)
return rc
if __name__ == '__main__':
sys.exit(main())

View file

@ -16,7 +16,7 @@ def main(argv=None):
with open(filename, 'rb') as f:
if f.read(3) == b'\xef\xbb\xbf':
retv = 1
print('{0}: Has a byte-order marker'.format(filename))
print('{}: Has a byte-order marker'.format(filename))
return retv

View file

@ -9,7 +9,7 @@ from pre_commit_hooks.util import cmd_output
def lower_set(iterable):
return set(x.lower() for x in iterable)
return {x.lower() for x in iterable}
def find_conflicting_filenames(filenames):
@ -35,7 +35,7 @@ def find_conflicting_filenames(filenames):
if x.lower() in conflicts
]
for filename in sorted(conflicting_files):
print('Case-insensitivity conflict found: {0}'.format(filename))
print('Case-insensitivity conflict found: {}'.format(filename))
retv = 1
return retv
@ -45,7 +45,7 @@ def main(argv=None):
parser = argparse.ArgumentParser()
parser.add_argument(
'filenames', nargs='*',
help='Filenames pre-commit believes are changed.'
help='Filenames pre-commit believes are changed.',
)
args = parser.parse_args(argv)

View file

@ -1,4 +1,5 @@
from __future__ import absolute_import
from __future__ import print_function
from __future__ import unicode_literals
import argparse
@ -27,18 +28,18 @@ def check_docstring_first(src, filename='<unknown>'):
if tok_type == tokenize.STRING and scol == 0:
if found_docstring_line is not None:
print(
'{0}:{1} Multiple module docstrings '
'(first docstring on line {2}).'.format(
'{}:{} Multiple module docstrings '
'(first docstring on line {}).'.format(
filename, sline, found_docstring_line,
)
),
)
return 1
elif found_code_line is not None:
print(
'{0}:{1} Module docstring appears after code '
'(code seen on line {2}).'.format(
'{}:{} Module docstring appears after code '
'(code seen on line {}).'.format(
filename, sline, found_code_line,
)
),
)
return 1
else:
@ -57,7 +58,7 @@ def main(argv=None):
retv = 0
for filename in args.filenames:
contents = io.open(filename).read()
contents = io.open(filename, encoding='UTF-8').read()
retv |= check_docstring_first(contents, filename=filename)
return retv

View file

@ -0,0 +1,40 @@
"""Check that executable text files have a shebang."""
from __future__ import absolute_import
from __future__ import print_function
from __future__ import unicode_literals
import argparse
import pipes
import sys
def check_has_shebang(path):
with open(path, 'rb') as f:
first_bytes = f.read(2)
if first_bytes != b'#!':
print(
'{path}: marked executable but has no (or invalid) shebang!\n'
" If it isn't supposed to be executable, try: chmod -x {quoted}\n"
' If it is supposed to be executable, double-check its shebang.'.format(
path=path,
quoted=pipes.quote(path),
),
file=sys.stderr,
)
return 1
else:
return 0
def main(argv=None):
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument('filenames', nargs='*')
args = parser.parse_args(argv)
retv = 0
for filename in args.filenames:
retv |= check_has_shebang(filename)
return retv

View file

@ -1,10 +1,10 @@
from __future__ import print_function
import argparse
import io
import json
import sys
import simplejson
def check_json(argv=None):
parser = argparse.ArgumentParser()
@ -14,9 +14,9 @@ def check_json(argv=None):
retval = 0
for filename in args.filenames:
try:
simplejson.load(open(filename))
except (simplejson.JSONDecodeError, UnicodeDecodeError) as exc:
print('{0}: Failed to json decode ({1})'.format(filename, exc))
json.load(io.open(filename, encoding='UTF-8'))
except (ValueError, UnicodeDecodeError) as exc:
print('{}: Failed to json decode ({})'.format(filename, exc))
retval = 1
return retval

View file

@ -7,7 +7,7 @@ CONFLICT_PATTERNS = [
b'<<<<<<< ',
b'======= ',
b'=======\n',
b'>>>>>>> '
b'>>>>>>> ',
]
WARNING_MSG = 'Merge conflict string "{0}" found in {1}:{2}'
@ -15,7 +15,11 @@ WARNING_MSG = 'Merge conflict string "{0}" found in {1}:{2}'
def is_in_merge():
return (
os.path.exists(os.path.join('.git', 'MERGE_MSG')) and
os.path.exists(os.path.join('.git', 'MERGE_HEAD'))
(
os.path.exists(os.path.join('.git', 'MERGE_HEAD')) or
os.path.exists(os.path.join('.git', 'rebase-apply')) or
os.path.exists(os.path.join('.git', 'rebase-merge'))
)
)

View file

@ -19,7 +19,7 @@ def check_symlinks(argv=None):
os.path.islink(filename) and
not os.path.exists(filename)
): # pragma: no cover (symlink support required)
print('{0}: Broken symlink'.format(filename))
print('{}: Broken symlink'.format(filename))
retv = 1
return retv

View file

@ -0,0 +1,44 @@
from __future__ import absolute_import
from __future__ import print_function
from __future__ import unicode_literals
import argparse
import re
import sys
GITHUB_NON_PERMALINK = re.compile(
b'https://github.com/[^/ ]+/[^/ ]+/blob/master/[^# ]+#L\d+',
)
def _check_filename(filename):
retv = 0
with open(filename, 'rb') as f:
for i, line in enumerate(f, 1):
if GITHUB_NON_PERMALINK.search(line):
sys.stdout.write('{}:{}:'.format(filename, i))
sys.stdout.flush()
getattr(sys.stdout, 'buffer', sys.stdout).write(line)
retv = 1
return retv
def main(argv=None):
parser = argparse.ArgumentParser()
parser.add_argument('filenames', nargs='*')
args = parser.parse_args(argv)
retv = 0
for filename in args.filenames:
retv |= _check_filename(filename)
if retv:
print()
print('Non-permanent github link detected.')
print('On any page on github press [y] to load a permalink.')
return retv
if __name__ == '__main__':
exit(main())

View file

@ -19,7 +19,7 @@ def check_xml(argv=None):
with io.open(filename, 'rb') as xml_file:
xml.sax.parse(xml_file, xml.sax.ContentHandler())
except xml.sax.SAXException as exc:
print('{0}: Failed to xml parse ({1})'.format(filename, exc))
print('{}: Failed to xml parse ({})'.format(filename, exc))
retval = 1
return retval

View file

@ -1,6 +1,7 @@
from __future__ import print_function
import argparse
import collections
import sys
import yaml
@ -11,21 +12,57 @@ except ImportError: # pragma: no cover (no libyaml-dev / pypy)
Loader = yaml.SafeLoader
def _exhaust(gen):
for _ in gen:
pass
def _parse_unsafe(*args, **kwargs):
_exhaust(yaml.parse(*args, **kwargs))
def _load_all(*args, **kwargs):
_exhaust(yaml.load_all(*args, **kwargs))
Key = collections.namedtuple('Key', ('multi', 'unsafe'))
LOAD_FNS = {
Key(multi=False, unsafe=False): yaml.load,
Key(multi=False, unsafe=True): _parse_unsafe,
Key(multi=True, unsafe=False): _load_all,
Key(multi=True, unsafe=True): _parse_unsafe,
}
def check_yaml(argv=None):
parser = argparse.ArgumentParser()
parser.add_argument(
'-m', '--multi', '--allow-multiple-documents', action='store_true',
)
parser.add_argument(
'--unsafe', action='store_true',
help=(
'Instead of loading the files, simply parse them for syntax. '
'A syntax-only check enables extensions and unsafe contstructs '
'which would otherwise be forbidden. Using this option removes '
'all guarantees of portability to other yaml implementations. '
'Implies --allow-multiple-documents'
),
)
parser.add_argument('--ignore-tags', type=lambda s: s.split(','), default=[],
help='Custom tags to ignore.')
parser.add_argument('filenames', nargs='*', help='Yaml filenames to check.')
args = parser.parse_args(argv)
# Ignore custom tags by returning None
for tag in args.ignore_tags:
Loader.add_constructor(tag, lambda *a, **k: None)
load_fn = LOAD_FNS[Key(multi=args.multi, unsafe=args.unsafe)]
retval = 0
for filename in args.filenames:
try:
yaml.load(open(filename), Loader=Loader)
load_fn(open(filename), Loader=Loader)
except yaml.YAMLError as exc:
print(exc)
retval = 1

View file

@ -7,69 +7,66 @@ import collections
import traceback
DEBUG_STATEMENTS = set(['pdb', 'ipdb', 'pudb', 'q', 'rdb'])
DEBUG_STATEMENTS = {'pdb', 'ipdb', 'pudb', 'q', 'rdb'}
Debug = collections.namedtuple('Debug', ('line', 'col', 'name', 'reason'))
DebugStatement = collections.namedtuple(
'DebugStatement', ['name', 'line', 'col'],
)
class ImportStatementParser(ast.NodeVisitor):
class DebugStatementParser(ast.NodeVisitor):
def __init__(self):
self.debug_import_statements = []
self.breakpoints = []
def visit_Import(self, node):
for node_name in node.names:
if node_name.name in DEBUG_STATEMENTS:
self.debug_import_statements.append(
DebugStatement(node_name.name, node.lineno, node.col_offset),
)
for name in node.names:
if name.name in DEBUG_STATEMENTS:
st = Debug(node.lineno, node.col_offset, name.name, 'imported')
self.breakpoints.append(st)
def visit_ImportFrom(self, node):
if node.module in DEBUG_STATEMENTS:
self.debug_import_statements.append(
DebugStatement(node.module, node.lineno, node.col_offset)
)
st = Debug(node.lineno, node.col_offset, node.module, 'imported')
self.breakpoints.append(st)
def visit_Call(self, node):
"""python3.7+ breakpoint()"""
if isinstance(node.func, ast.Name) and node.func.id == 'breakpoint':
st = Debug(node.lineno, node.col_offset, node.func.id, 'called')
self.breakpoints.append(st)
self.generic_visit(node)
def check_file_for_debug_statements(filename):
def check_file(filename):
try:
ast_obj = ast.parse(open(filename).read(), filename=filename)
ast_obj = ast.parse(open(filename, 'rb').read(), filename=filename)
except SyntaxError:
print('{0} - Could not parse ast'.format(filename))
print('{} - Could not parse ast'.format(filename))
print()
print('\t' + traceback.format_exc().replace('\n', '\n\t'))
print()
return 1
visitor = ImportStatementParser()
visitor = DebugStatementParser()
visitor.visit(ast_obj)
if visitor.debug_import_statements:
for debug_statement in visitor.debug_import_statements:
print(
'{0}:{1}:{2} - {3} imported'.format(
filename,
debug_statement.line,
debug_statement.col,
debug_statement.name,
)
)
return 1
else:
return 0
for bp in visitor.breakpoints:
print(
'{}:{}:{} - {} {}'.format(
filename, bp.line, bp.col, bp.name, bp.reason,
),
)
return int(bool(visitor.breakpoints))
def debug_statement_hook(argv=None):
def main(argv=None):
parser = argparse.ArgumentParser()
parser.add_argument('filenames', nargs='*', help='Filenames to run')
args = parser.parse_args(argv)
retv = 0
for filename in args.filenames:
retv |= check_file_for_debug_statements(filename)
retv |= check_file(filename)
return retv
if __name__ == '__main__':
exit(debug_statement_hook())
exit(main())

View file

@ -12,7 +12,7 @@ def get_aws_credential_files_from_env():
files = set()
for env_var in (
'AWS_CONFIG_FILE', 'AWS_CREDENTIAL_FILE', 'AWS_SHARED_CREDENTIALS_FILE',
'BOTO_CONFIG'
'BOTO_CONFIG',
):
if env_var in os.environ:
files.add(os.environ[env_var])
@ -23,7 +23,7 @@ def get_aws_secrets_from_env():
"""Extract AWS secrets from environment variables."""
keys = set()
for env_var in (
'AWS_SECRET_ACCESS_KEY', 'AWS_SECURITY_TOKEN', 'AWS_SESSION_TOKEN'
'AWS_SECRET_ACCESS_KEY', 'AWS_SECURITY_TOKEN', 'AWS_SESSION_TOKEN',
):
if env_var in os.environ:
keys.add(os.environ[env_var])
@ -50,10 +50,12 @@ def get_aws_secrets_from_file(credentials_file):
for section in parser.sections():
for var in (
'aws_secret_access_key', 'aws_security_token',
'aws_session_token'
'aws_session_token',
):
try:
keys.add(parser.get(section, var))
key = parser.get(section, var).strip()
if key:
keys.add(key)
except configparser.NoOptionError:
pass
return keys
@ -93,13 +95,13 @@ def main(argv=None):
help=(
'Location of additional AWS credential files from which to get '
'secret keys from'
)
),
)
parser.add_argument(
'--allow-missing-credentials',
dest='allow_missing_credentials',
action='store_true',
help='Allow hook to pass when no credentials are detected.'
help='Allow hook to pass when no credentials are detected.',
)
args = parser.parse_args(argv)
@ -124,7 +126,7 @@ def main(argv=None):
print(
'No AWS keys were found in the configured credential files and '
'environment variables.\nPlease ensure you have the correct '
'setting for --credentials-file'
'setting for --credentials-file',
)
return 2

View file

@ -8,6 +8,9 @@ BLACKLIST = [
b'BEGIN DSA PRIVATE KEY',
b'BEGIN EC PRIVATE KEY',
b'BEGIN OPENSSH PRIVATE KEY',
b'BEGIN PRIVATE KEY',
b'PuTTY-User-Key-File-2',
b'BEGIN SSH2 ENCRYPTED PRIVATE KEY',
]
@ -26,7 +29,7 @@ def detect_private_key(argv=None):
if private_key_files:
for private_key_file in private_key_files:
print('Private key found: {0}'.format(private_key_file))
print('Private key found: {}'.format(private_key_file))
return 1
else:
return 0

View file

@ -58,7 +58,7 @@ def end_of_file_fixer(argv=None):
with open(filename, 'rb+') as file_obj:
ret_for_file = fix_file(file_obj)
if ret_for_file:
print('Fixing {0}'.format(filename))
print('Fixing {}'.format(filename))
retv |= ret_for_file
return retv

View file

@ -0,0 +1,52 @@
"""
A very simple pre-commit hook that, when passed one or more filenames
as arguments, will sort the lines in those files.
An example use case for this: you have a deploy-whitelist.txt file
in a repo that contains a list of filenames that is used to specify
files to be included in a docker container. This file has one filename
per line. Various users are adding/removing lines from this file; using
this hook on that file should reduce the instances of git merge
conflicts and keep the file nicely ordered.
"""
from __future__ import print_function
import argparse
PASS = 0
FAIL = 1
def sort_file_contents(f):
before = list(f)
after = sorted([line.strip(b'\n\r') for line in before if line.strip()])
before_string = b''.join(before)
after_string = b'\n'.join(after) + b'\n'
if before_string == after_string:
return PASS
else:
f.seek(0)
f.write(after_string)
f.truncate()
return FAIL
def main(argv=None):
parser = argparse.ArgumentParser()
parser.add_argument('filenames', nargs='+', help='Files to sort')
args = parser.parse_args(argv)
retv = PASS
for arg in args.filenames:
with open(arg, 'rb+') as file_obj:
ret_for_file = sort_file_contents(file_obj)
if ret_for_file:
print('Sorting {}'.format(arg))
retv |= ret_for_file
return retv

View file

@ -0,0 +1,84 @@
from __future__ import absolute_import
from __future__ import print_function
from __future__ import unicode_literals
import argparse
import collections
CRLF = b'\r\n'
LF = b'\n'
CR = b'\r'
# Prefer LF to CRLF to CR, but detect CRLF before LF
ALL_ENDINGS = (CR, CRLF, LF)
FIX_TO_LINE_ENDING = {'cr': CR, 'crlf': CRLF, 'lf': LF}
def _fix(filename, contents, ending):
new_contents = b''.join(
line.rstrip(b'\r\n') + ending for line in contents.splitlines(True)
)
with open(filename, 'wb') as f:
f.write(new_contents)
def fix_filename(filename, fix):
with open(filename, 'rb') as f:
contents = f.read()
counts = collections.defaultdict(int)
for line in contents.splitlines(True):
for ending in ALL_ENDINGS:
if line.endswith(ending):
counts[ending] += 1
break
# Some amount of mixed line endings
mixed = sum(bool(x) for x in counts.values()) > 1
if fix == 'no' or (fix == 'auto' and not mixed):
return mixed
if fix == 'auto':
max_ending = LF
max_lines = 0
# ordering is important here such that lf > crlf > cr
for ending_type in ALL_ENDINGS:
# also important, using >= to find a max that prefers the last
if counts[ending_type] >= max_lines:
max_ending = ending_type
max_lines = counts[ending_type]
_fix(filename, contents, max_ending)
return 1
else:
target_ending = FIX_TO_LINE_ENDING[fix]
# find if there are lines with *other* endings
# It's possible there's no line endings of the target type
counts.pop(target_ending, None)
other_endings = bool(sum(counts.values()))
if other_endings:
_fix(filename, contents, target_ending)
return other_endings
def main(argv=None):
parser = argparse.ArgumentParser()
parser.add_argument(
'-f', '--fix',
choices=('auto', 'no') + tuple(FIX_TO_LINE_ENDING),
default='auto',
help='Replace line ending with the specified. Default is "auto"',
)
parser.add_argument('filenames', nargs='*', help='Filenames to fix')
args = parser.parse_args(argv)
retv = 0
for filename in args.filenames:
retv |= fix_filename(filename, args.fix)
return retv
if __name__ == '__main__':
exit(main())

View file

@ -0,0 +1,31 @@
from __future__ import print_function
import argparse
from pre_commit_hooks.util import CalledProcessError
from pre_commit_hooks.util import cmd_output
def is_on_branch(protected):
try:
branch = cmd_output('git', 'symbolic-ref', 'HEAD')
except CalledProcessError:
return False
chunks = branch.strip().split('/')
return '/'.join(chunks[2:]) in protected
def main(argv=None):
parser = argparse.ArgumentParser()
parser.add_argument(
'-b', '--branch', action='append',
help='branch to disallow commits to, may be specified multiple times',
)
args = parser.parse_args(argv)
protected = set(args.branch or ('master',))
return int(is_on_branch(protected))
if __name__ == '__main__':
exit(main())

View file

@ -1,13 +1,15 @@
from __future__ import print_function
import argparse
import io
import json
import sys
from collections import OrderedDict
import simplejson
from six import text_type
def _get_pretty_format(contents, indent, sort_keys=True, top_keys=[]):
def _get_pretty_format(contents, indent, ensure_ascii=True, sort_keys=True, top_keys=[]):
def pairs_first(pairs):
before = [pair for pair in pairs if pair[0] in top_keys]
before = sorted(before, key=lambda x: top_keys.index(x[0]))
@ -15,39 +17,28 @@ def _get_pretty_format(contents, indent, sort_keys=True, top_keys=[]):
if sort_keys:
after = sorted(after, key=lambda x: x[0])
return OrderedDict(before + after)
return simplejson.dumps(
simplejson.loads(
contents,
object_pairs_hook=pairs_first,
),
indent=indent
) + "\n" # dumps don't end with a newline
json_pretty = json.dumps(
json.loads(contents, object_pairs_hook=pairs_first),
indent=indent,
ensure_ascii=ensure_ascii,
separators=(',', ': '), # Workaround for https://bugs.python.org/issue16333
)
# Ensure unicode (Py2) and add the newline that dumps does not end with.
return text_type(json_pretty) + '\n'
def _autofix(filename, new_contents):
print("Fixing file {0}".format(filename))
with open(filename, 'w') as f:
print('Fixing file {}'.format(filename))
with io.open(filename, 'w', encoding='UTF-8') as f:
f.write(new_contents)
def parse_indent(s):
# type: (str) -> str
def parse_num_to_int(s):
"""Convert string numbers to int, leaving strings as is."""
try:
int_indentation_spec = int(s)
return int(s)
except ValueError:
if not s.strip():
return s
else:
raise ValueError(
'Non-whitespace JSON indentation delimiter supplied. ',
)
else:
if int_indentation_spec >= 0:
return int_indentation_spec * ' '
else:
raise ValueError(
'Negative integer supplied to construct JSON indentation delimiter. ',
)
return s
def parse_topkeys(s):
@ -65,9 +56,19 @@ def pretty_format_json(argv=None):
)
parser.add_argument(
'--indent',
type=parse_indent,
default=' ',
help='String used as delimiter for one indentation level',
type=parse_num_to_int,
default='2',
help=(
'The number of indent spaces or a string to be used as delimiter'
' for indentation level e.g. 4 or "\t" (Default: 2)'
),
)
parser.add_argument(
'--no-ensure-ascii',
action='store_true',
dest='no_ensure_ascii',
default=False,
help='Do NOT convert non-ASCII characters to Unicode escape sequences (\\uXXXX)',
)
parser.add_argument(
'--no-sort-keys',
@ -90,27 +91,26 @@ def pretty_format_json(argv=None):
status = 0
for json_file in args.filenames:
with open(json_file) as f:
with io.open(json_file, encoding='UTF-8') as f:
contents = f.read()
try:
pretty_contents = _get_pretty_format(
contents, args.indent, sort_keys=not args.no_sort_keys,
top_keys=args.top_keys
contents, args.indent, ensure_ascii=not args.no_ensure_ascii,
sort_keys=not args.no_sort_keys, top_keys=args.top_keys,
)
if contents != pretty_contents:
print("File {0} is not pretty-formatted".format(json_file))
print('File {} is not pretty-formatted'.format(json_file))
if args.autofix:
_autofix(json_file, pretty_contents)
status = 1
except simplejson.JSONDecodeError:
except ValueError:
print(
"Input File {0} is not a valid JSON, consider using check-json"
.format(json_file)
'Input File {} is not a valid JSON, consider using check-json'
.format(json_file),
)
return 1

View file

@ -3,6 +3,10 @@ from __future__ import print_function
import argparse
PASS = 0
FAIL = 1
class Requirement(object):
def __init__(self):
@ -30,21 +34,25 @@ class Requirement(object):
def fix_requirements(f):
requirements = []
before = []
before = tuple(f)
after = []
for line in f:
before.append(line)
before_string = b''.join(before)
# If the most recent requirement object has a value, then it's time to
# start building the next requirement object.
# If the file is empty (i.e. only whitespace/newlines) exit early
if before_string.strip() == b'':
return PASS
for line in before:
# If the most recent requirement object has a value, then it's
# time to start building the next requirement object.
if not len(requirements) or requirements[-1].value is not None:
requirements.append(Requirement())
requirement = requirements[-1]
# If we see a newline before any requirements, then this is a top of
# file comment.
# If we see a newline before any requirements, then this is a
# top of file comment.
if len(requirements) == 1 and line.strip() == b'':
if len(requirement.comments) and requirement.comments[0].startswith(b'#'):
requirement.value = b'\n'
@ -55,21 +63,33 @@ def fix_requirements(f):
else:
requirement.value = line
for requirement in sorted(requirements):
for comment in requirement.comments:
after.append(comment)
after.append(requirement.value)
# if a file ends in a comment, preserve it at the end
if requirements[-1].value is None:
rest = requirements.pop().comments
else:
rest = []
# find and remove pkg-resources==0.0.0
# which is automatically added by broken pip package under Debian
requirements = [
req for req in requirements
if req.value != b'pkg-resources==0.0.0\n'
]
for requirement in sorted(requirements):
after.extend(requirement.comments)
after.append(requirement.value)
after.extend(rest)
before_string = b''.join(before)
after_string = b''.join(after)
if before_string == after_string:
return 0
return PASS
else:
f.seek(0)
f.write(after_string)
f.truncate()
return 1
return FAIL
def fix_requirements_txt(argv=None):
@ -77,14 +97,14 @@ def fix_requirements_txt(argv=None):
parser.add_argument('filenames', nargs='*', help='Filenames to fix')
args = parser.parse_args(argv)
retv = 0
retv = PASS
for arg in args.filenames:
with open(arg, 'rb+') as file_obj:
ret_for_file = fix_requirements(file_obj)
if ret_for_file:
print('Sorting {0}'.format(arg))
print('Sorting {}'.format(arg))
retv |= ret_for_file

View file

@ -0,0 +1,123 @@
#!/usr/bin/env python
"""Sort a simple YAML file, keeping blocks of comments and definitions
together.
We assume a strict subset of YAML that looks like:
# block of header comments
# here that should always
# be at the top of the file
# optional comments
# can go here
key: value
key: value
key: value
In other words, we don't sort deeper than the top layer, and might corrupt
complicated YAML files.
"""
from __future__ import print_function
import argparse
QUOTES = ["'", '"']
def sort(lines):
"""Sort a YAML file in alphabetical order, keeping blocks together.
:param lines: array of strings (without newlines)
:return: sorted array of strings
"""
# make a copy of lines since we will clobber it
lines = list(lines)
new_lines = parse_block(lines, header=True)
for block in sorted(parse_blocks(lines), key=first_key):
if new_lines:
new_lines.append('')
new_lines.extend(block)
return new_lines
def parse_block(lines, header=False):
"""Parse and return a single block, popping off the start of `lines`.
If parsing a header block, we stop after we reach a line that is not a
comment. Otherwise, we stop after reaching an empty line.
:param lines: list of lines
:param header: whether we are parsing a header block
:return: list of lines that form the single block
"""
block_lines = []
while lines and lines[0] and (not header or lines[0].startswith('#')):
block_lines.append(lines.pop(0))
return block_lines
def parse_blocks(lines):
"""Parse and return all possible blocks, popping off the start of `lines`.
:param lines: list of lines
:return: list of blocks, where each block is a list of lines
"""
blocks = []
while lines:
if lines[0] == '':
lines.pop(0)
else:
blocks.append(parse_block(lines))
return blocks
def first_key(lines):
"""Returns a string representing the sort key of a block.
The sort key is the first YAML key we encounter, ignoring comments, and
stripping leading quotes.
>>> print(test)
# some comment
'foo': true
>>> first_key(test)
'foo'
"""
for line in lines:
if line.startswith('#'):
continue
if any(line.startswith(quote) for quote in QUOTES):
return line[1:]
return line
def main(argv=None):
parser = argparse.ArgumentParser()
parser.add_argument('filenames', nargs='*', help='Filenames to fix')
args = parser.parse_args(argv)
retval = 0
for filename in args.filenames:
with open(filename, 'r+') as f:
lines = [line.rstrip() for line in f.readlines()]
new_lines = sort(lines)
if lines != new_lines:
print("Fixing file `{filename}`".format(filename=filename))
f.seek(0)
f.write("\n".join(new_lines) + "\n")
f.truncate()
retval = 1
return retval
if __name__ == '__main__':
exit(main())

View file

@ -32,7 +32,7 @@ def get_line_offsets_by_line_no(src):
def fix_strings(filename):
contents = io.open(filename).read()
contents = io.open(filename, encoding='UTF-8').read()
line_offsets = get_line_offsets_by_line_no(contents)
# Basically a mutable string
@ -52,7 +52,7 @@ def fix_strings(filename):
new_contents = ''.join(splitcontents)
if contents != new_contents:
with io.open(filename, 'w') as write_handle:
with io.open(filename, 'w', encoding='UTF-8') as write_handle:
write_handle.write(new_contents)
return 1
else:
@ -69,7 +69,7 @@ def main(argv=None):
for filename in args.filenames:
return_value = fix_strings(filename)
if return_value != 0:
print('Fixing strings in {0}'.format(filename))
print('Fixing strings in {}'.format(filename))
retv |= return_value
return retv

View file

@ -11,12 +11,12 @@ def validate_files(argv=None):
parser.add_argument('filenames', nargs='*')
parser.add_argument(
'--django', default=False, action='store_true',
help='Use Django-style test naming pattern (test*.py)'
help='Use Django-style test naming pattern (test*.py)',
)
args = parser.parse_args(argv)
retcode = 0
test_name_pattern = 'test_.*.py' if args.django else '.*_test.py'
test_name_pattern = 'test.*.py' if args.django else '.*_test.py'
for filename in args.filenames:
base = basename(filename)
if (
@ -26,9 +26,9 @@ def validate_files(argv=None):
):
retcode = 1
print(
'{0} does not match pattern "{1}"'.format(
filename, test_name_pattern
)
'{} does not match pattern "{}"'.format(
filename, test_name_pattern,
),
)
return retcode

View file

@ -4,8 +4,6 @@ import argparse
import os
import sys
from pre_commit_hooks.util import cmd_output
def _fix_file(filename, is_markdown):
with open(filename, mode='rb') as file_processed:
@ -21,14 +19,19 @@ def _fix_file(filename, is_markdown):
def _process_line(line, is_markdown):
if line[-2:] == b'\r\n':
eol = b'\r\n'
elif line[-1:] == b'\n':
eol = b'\n'
else:
eol = b''
# preserve trailing two-space for non-blank lines in markdown files
eol = b'\r\n' if line[-2:] == b'\r\n' else b'\n'
if is_markdown and (not line.isspace()) and line.endswith(b' ' + eol):
return line.rstrip() + b' ' + eol
return line.rstrip() + eol
def fix_trailing_whitespace(argv=None):
def main(argv=None):
parser = argparse.ArgumentParser()
parser.add_argument(
'--no-markdown-linebreak-ext',
@ -36,7 +39,7 @@ def fix_trailing_whitespace(argv=None):
const=[],
default=argparse.SUPPRESS,
dest='markdown_linebreak_ext',
help='Do not preserve linebreak spaces in Markdown'
help='Do not preserve linebreak spaces in Markdown',
)
parser.add_argument(
'--markdown-linebreak-ext',
@ -45,15 +48,11 @@ def fix_trailing_whitespace(argv=None):
default=['md,markdown'],
metavar='*|EXT[,EXT,...]',
nargs='?',
help='Markdown extensions (or *) for linebreak spaces'
help='Markdown extensions (or *) for linebreak spaces',
)
parser.add_argument('filenames', nargs='*', help='Filenames to fix')
args = parser.parse_args(argv)
bad_whitespace_files = cmd_output(
'grep', '-l', '[[:space:]]$', *args.filenames, retcode=None
).strip().splitlines()
md_args = args.markdown_linebreak_ext
if '' in md_args:
parser.error('--markdown-linebreak-ext requires a non-empty argument')
@ -67,20 +66,20 @@ def fix_trailing_whitespace(argv=None):
for ext in md_exts:
if any(c in ext[1:] for c in r'./\:'):
parser.error(
"bad --markdown-linebreak-ext extension '{0}' (has . / \\ :)\n"
"bad --markdown-linebreak-ext extension '{}' (has . / \\ :)\n"
" (probably filename; use '--markdown-linebreak-ext=EXT')"
.format(ext)
.format(ext),
)
return_code = 0
for bad_whitespace_file in bad_whitespace_files:
_, extension = os.path.splitext(bad_whitespace_file.lower())
for filename in args.filenames:
_, extension = os.path.splitext(filename.lower())
md = all_markdown or extension in md_exts
if _fix_file(bad_whitespace_file, md):
print('Fixing {}'.format(bad_whitespace_file))
if _fix_file(filename, md):
print('Fixing {}'.format(filename))
return_code = 1
return return_code
if __name__ == '__main__':
sys.exit(fix_trailing_whitespace())
sys.exit(main())