diff --git a/README.md b/README.md index 98e3ac7..8d17026 100644 --- a/README.md +++ b/README.md @@ -23,6 +23,7 @@ Add this to your `.pre-commit-config.yaml` ### Hooks available - `autopep8-wrapper` - Runs autopep8 over python source. +' `check-added-large-files` - Prevent giant files from being committed. - `check-json` - Attempts to load all json files to verify syntax. - `check-yaml` - Attempts to load all yaml files to verify syntax. - `debug-statements` - Check for pdb / ipdb / pudb statements in code. diff --git a/hooks.yaml b/hooks.yaml index 5ef125c..3eb0e94 100644 --- a/hooks.yaml +++ b/hooks.yaml @@ -5,6 +5,13 @@ language: python files: \.py$ args: [-i] +- id: check-added-large-files + name: Check for added large files + language: python + entry: check-added-large-files + description: Prevent giant files from being committed + # Match all files + files: '' - id: check-json name: Check JSON description: This hook checks json files for parseable syntax. diff --git a/pre_commit_hooks/check_added_large_files.py b/pre_commit_hooks/check_added_large_files.py new file mode 100644 index 0000000..999e9c1 --- /dev/null +++ b/pre_commit_hooks/check_added_large_files.py @@ -0,0 +1,49 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import argparse +import math +import os +import sys + +from plumbum import local + + +def find_large_added_files(filenames, maxkb): + # Find all added files that are also in the list of files pre-commit tells + # us about + filenames = set(local['git']( + 'diff', '--staged', '--name-only', '--diff-filter', 'A', + ).splitlines()) & set(filenames) + + retv = 0 + for filename in filenames: + kb = int(math.ceil(os.stat(filename).st_size / 1024)) + if kb > maxkb: + print('{0} ({1} KB) exceeds {2} KB.'.format(filename, kb, maxkb)) + retv = 1 + + return retv + + +def main(argv=None): + argv = argv if argv is not None else sys.argv[1:] + + parser = argparse.ArgumentParser() + parser.add_argument( + 'filenames', nargs='*', + help='Filenames pre-commit believes are changed.' + ) + parser.add_argument( + '--maxkb', type=int, default=500, + help='Maxmimum allowable KB for added files', + ) + + args = parser.parse_args(argv) + return find_large_added_files(args.filenames, args.maxkb) + + +if __name__ == '__main__': + exit(main()) diff --git a/setup.py b/setup.py index 8905945..3681e65 100644 --- a/setup.py +++ b/setup.py @@ -38,6 +38,7 @@ setup( 'console_scripts': [ 'autopep8-wrapper = pre_commit_hooks.autopep8_wrapper:main', 'check-json = pre_commit_hooks.check_json:check_json', + 'check-added-large-files = pre_commit_hooks.check_added_large_files:main', 'check-yaml = pre_commit_hooks.check_yaml:check_yaml', 'debug-statement-hook = pre_commit_hooks.debug_statement_hook:debug_statement_hook', 'end-of-file-fixer = pre_commit_hooks.end_of_file_fixer:end_of_file_fixer', diff --git a/tests/check_added_large_files_test.py b/tests/check_added_large_files_test.py new file mode 100644 index 0000000..b0ae4cd --- /dev/null +++ b/tests/check_added_large_files_test.py @@ -0,0 +1,79 @@ +from __future__ import absolute_import +from __future__ import unicode_literals + +import io + +import pytest +from plumbum import local + +from pre_commit_hooks.check_added_large_files import find_large_added_files +from pre_commit_hooks.check_added_large_files import main + + +@pytest.yield_fixture +def temp_git_dir(tmpdir): + git_dir = tmpdir.join('gits').strpath + local['git']('init', git_dir) + yield git_dir + + +def write_file(filename, contents): + """Hax because coveragepy chokes on nested context managers.""" + with io.open(filename, 'w') as file_obj: + file_obj.write(contents) + + +def test_nothing_added(temp_git_dir): + with local.cwd(temp_git_dir): + assert find_large_added_files(['f.py'], 0) == 0 + + +def test_adding_something(temp_git_dir): + with local.cwd(temp_git_dir): + write_file('f.py', "print('hello world')") + local['git']('add', 'f.py') + + # Should fail with max size of 0 + assert find_large_added_files(['f.py'], 0) == 1 + + +def test_add_something_giant(temp_git_dir): + with local.cwd(temp_git_dir): + write_file('f.py', 'a' * 10000) + + # Should not fail when not added + assert find_large_added_files(['f.py'], 0) == 0 + + local['git']('add', 'f.py') + + # Should fail with strict bound + assert find_large_added_files(['f.py'], 0) == 1 + + # Should also fail with actual bound + assert find_large_added_files(['f.py'], 9) == 1 + + # Should pass with higher bound + assert find_large_added_files(['f.py'], 10) == 0 + + +def test_added_file_not_in_pre_commits_list(temp_git_dir): + with local.cwd(temp_git_dir): + write_file('f.py', "print('hello world')") + local['git']('add', 'f.py') + + # Should pass even with a size of 0 + assert find_large_added_files(['g.py'], 0) == 0 + + +def test_integration(temp_git_dir): + with local.cwd(temp_git_dir): + assert main(argv=[]) == 0 + + write_file('f.py', 'a' * 10000) + local['git']('add', 'f.py') + + # Should not fail with default + assert main(argv=['f.py']) == 0 + + # Should fail with --maxkb + assert main(argv=['--maxkb', '9', 'f.py']) == 1