From 012bb0691f4e1615c11be5860d9e427523d42985 Mon Sep 17 00:00:00 2001 From: Marcus Shawcroft Date: Wed, 16 Sep 2020 06:26:11 +0100 Subject: [PATCH] Fix #518, provide --enforce-all option to check_added_large_files The --enforce-all option when provided ensures that all files passed on the command line are checked against the size limit. Default behaviour remains unchanged. --- README.md | 3 +++ pre_commit_hooks/check_added_large_files.py | 25 +++++++++++++++++---- tests/check_added_large_files_test.py | 23 +++++++++++++++++++ 3 files changed, 47 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 3552721..a6b62ab 100644 --- a/README.md +++ b/README.md @@ -26,8 +26,11 @@ Add this to your `.pre-commit-config.yaml` #### `check-added-large-files` Prevent giant files from being committed. - Specify what is "too large" with `args: ['--maxkb=123']` (default=500kB). + - Limits checked files to those indicated as staged for addition by git. - If `git-lfs` is installed, lfs files will be skipped (requires `git-lfs>=2.2.1`) + - `--enforce-all` - Check all listed files not just those staged for + addition. #### `check-ast` Simply check whether files parse as valid python. diff --git a/pre_commit_hooks/check_added_large_files.py b/pre_commit_hooks/check_added_large_files.py index 91f5754..cb646d7 100644 --- a/pre_commit_hooks/check_added_large_files.py +++ b/pre_commit_hooks/check_added_large_files.py @@ -21,11 +21,20 @@ def lfs_files() -> Set[str]: return set(json.loads(lfs_ret)['files']) -def find_large_added_files(filenames: Sequence[str], maxkb: int) -> int: +def find_large_added_files( + filenames: Sequence[str], + maxkb: int, + *, + enforce_all: bool = False, +) -> int: # Find all added files that are also in the list of files pre-commit tells # us about retv = 0 - for filename in (added_files() & set(filenames)) - lfs_files(): + filenames_filtered = set(filenames) - lfs_files() + if not enforce_all: + filenames_filtered &= added_files() + + for filename in filenames_filtered: kb = int(math.ceil(os.stat(filename).st_size / 1024)) if kb > maxkb: print(f'{filename} ({kb} KB) exceeds {maxkb} KB.') @@ -40,13 +49,21 @@ def main(argv: Optional[Sequence[str]] = None) -> int: 'filenames', nargs='*', help='Filenames pre-commit believes are changed.', ) + parser.add_argument( + '--enforce-all', action='store_true', + help='Enforce all files are checked, not just staged files.', + ) parser.add_argument( '--maxkb', type=int, default=500, help='Maxmimum allowable KB for added files', ) - args = parser.parse_args(argv) - return find_large_added_files(args.filenames, args.maxkb) + + return find_large_added_files( + args.filenames, + args.maxkb, + enforce_all=args.enforce_all, + ) if __name__ == '__main__': diff --git a/tests/check_added_large_files_test.py b/tests/check_added_large_files_test.py index 40ffd24..ff53b05 100644 --- a/tests/check_added_large_files_test.py +++ b/tests/check_added_large_files_test.py @@ -40,6 +40,17 @@ def test_add_something_giant(temp_git_dir): assert find_large_added_files(['f.py'], 10) == 0 +def test_enforce_all(temp_git_dir): + with temp_git_dir.as_cwd(): + temp_git_dir.join('f.py').write('a' * 10000) + + # Should fail, when not staged with enforce_all + assert find_large_added_files(['f.py'], 0, enforce_all=True) == 1 + + # Should pass, when not staged without enforce_all + assert find_large_added_files(['f.py'], 0, enforce_all=False) == 0 + + def test_added_file_not_in_pre_commits_list(temp_git_dir): with temp_git_dir.as_cwd(): temp_git_dir.join('f.py').write("print('hello world')") @@ -97,3 +108,15 @@ def test_moves_with_gitlfs(temp_git_dir, monkeypatch): # pragma: no cover # Now move it and make sure the hook still succeeds cmd_output('git', 'mv', 'a.bin', 'b.bin') assert main(('--maxkb', '9', 'b.bin')) == 0 + + +@xfailif_no_gitlfs +def test_enforce_allows_gitlfs(temp_git_dir, monkeypatch): # pragma: no cover + with temp_git_dir.as_cwd(): + monkeypatch.setenv('HOME', str(temp_git_dir)) + cmd_output('git', 'lfs', 'install') + temp_git_dir.join('f.py').write('a' * 10000) + cmd_output('git', 'lfs', 'track', 'f.py') + cmd_output('git', 'add', '--', '.') + # With --enforce-all large files on git lfs should succeed + assert main(('--enforce-all', '--maxkb', '9', 'f.py')) == 0