Fix check-added-large-files --enforce-all to correctly consider all git-lfs files.

`git lfs status` only outputs status for files that are pending some git-lfs related operation.
For usage with --enforce-all, we need the list of all files that are tracked, which can be
achived by `git lfs ls-files`.

Fixes: https://github.com/pre-commit/pre-commit-hooks/issues/560
This commit is contained in:
Alex Martani 2021-10-21 15:29:54 -07:00 committed by Anthony Sottile
parent b8d76787ff
commit 03a65ca357
2 changed files with 35 additions and 11 deletions

View file

@ -1,24 +1,33 @@
import argparse import argparse
import json
import math import math
import os import os
import subprocess
from typing import Optional from typing import Optional
from typing import Sequence from typing import Sequence
from typing import Set from typing import Set
from pre_commit_hooks.util import added_files from pre_commit_hooks.util import added_files
from pre_commit_hooks.util import CalledProcessError from pre_commit_hooks.util import zsplit
from pre_commit_hooks.util import cmd_output
def lfs_files() -> Set[str]: def filter_lfs_files(filenames: Set[str]) -> None: # pragma: no cover (lfs)
try: """Remove files tracked by git-lfs from the set."""
# Introduced in git-lfs 2.2.0, first working in 2.2.1 if not filenames:
lfs_ret = cmd_output('git', 'lfs', 'status', '--json') return
except CalledProcessError: # pragma: no cover (with git-lfs)
lfs_ret = '{"files":{}}'
return set(json.loads(lfs_ret)['files']) check_attr = subprocess.run(
('git', 'check-attr', 'filter', '-z', '--stdin'),
stdout=subprocess.PIPE,
stderr=subprocess.DEVNULL,
encoding='utf-8',
check=True,
input='\0'.join(filenames),
)
stdout = zsplit(check_attr.stdout)
for i in range(0, len(stdout), 3):
filename, filter_tag = stdout[i], stdout[i + 2]
if filter_tag == 'lfs':
filenames.remove(filename)
def find_large_added_files( def find_large_added_files(
@ -30,7 +39,9 @@ def find_large_added_files(
# Find all added files that are also in the list of files pre-commit tells # Find all added files that are also in the list of files pre-commit tells
# us about # us about
retv = 0 retv = 0
filenames_filtered = set(filenames) - lfs_files() filenames_filtered = set(filenames)
filter_lfs_files(filenames_filtered)
if not enforce_all: if not enforce_all:
filenames_filtered &= added_files() filenames_filtered &= added_files()

View file

@ -121,3 +121,16 @@ def test_enforce_allows_gitlfs(temp_git_dir, monkeypatch): # pragma: no cover
cmd_output('git', 'add', '--', '.') cmd_output('git', 'add', '--', '.')
# With --enforce-all large files on git lfs should succeed # With --enforce-all large files on git lfs should succeed
assert main(('--enforce-all', '--maxkb', '9', 'f.py')) == 0 assert main(('--enforce-all', '--maxkb', '9', 'f.py')) == 0
@xfailif_no_gitlfs # pragma: no cover
def test_enforce_allows_gitlfs_after_commit(temp_git_dir, monkeypatch):
with temp_git_dir.as_cwd():
monkeypatch.setenv('HOME', str(temp_git_dir))
cmd_output('git', 'lfs', 'install')
temp_git_dir.join('f.py').write('a' * 10000)
cmd_output('git', 'lfs', 'track', 'f.py')
cmd_output('git', 'add', '--', '.')
git_commit('-am', 'foo')
# With --enforce-all large files on git lfs should succeed
assert main(('--enforce-all', '--maxkb', '9', 'f.py')) == 0