Implement attribute consistency check for LFS files

This commit is contained in:
Alexey Klimkin 2024-02-29 11:37:59 -08:00
parent aae27b8a3e
commit 622b4e12c5
5 changed files with 167 additions and 0 deletions

View file

@ -59,6 +59,12 @@
entry: pretty-format-json
language: python
types: [json]
- id: check-lfs-attributes
name: check for files with mismatching LFS attribute
description: ensures LFS pointers are matching .gitattributes
entry: check-lfs-attributes
language: python
stages: [commit, push, manual]
- id: check-merge-conflict
name: check for merge conflicts
description: checks for files that contain merge conflict strings.

View file

@ -54,6 +54,12 @@ Checks that non-binary executables have a proper shebang.
#### `check-json`
Attempts to load all json files to verify syntax.
#### `check-lfs-attributes`
Check for files with mismatching LFS attribute.
- Limits checked files to those indicated as staged for addition by git.
- `--enforce-all` - Check all listed files not just those staged for
addition.
#### `check-merge-conflict`
Check for files that contain merge conflict strings.
- `--assume-in-merge` - Allows running the hook when there is no ongoing merge operation

View file

@ -0,0 +1,69 @@
from __future__ import annotations
import argparse
import subprocess
from typing import Sequence
from pre_commit_hooks.util import zsplit
def select_lfs_attr_files(filenames: set[str]) -> set[str]: # pragma: no cover (lfs)
"""Select files tracked by git-lfs from the set."""
if not filenames:
return filenames
check_attr = subprocess.run(
('git', 'check-attr', 'filter', '-z', '--stdin'),
stdout=subprocess.PIPE,
stderr=subprocess.DEVNULL,
encoding='utf-8',
check=True,
input='\0'.join(filenames),
)
stdout = zsplit(check_attr.stdout)
# stdout values are triplets:
# ['b.txt', 'filter', 'unspecified', 'a.bin', 'filter', 'lfs']
return {stdout[i] for i in range(0, len(stdout), 3) if stdout[i + 2] == 'lfs'}
def select_lfs_tree_files(filenames: set[str]) -> set[str]: # pragma: no cover
"""Select LSF files found in the tree."""
if not filenames:
return filenames
output = subprocess.check_output(('git', 'lfs', 'ls-files', '-n'), text=True)
lfs_files = set(output.split())
return lfs_files & set(filenames)
def check_lfs_attributes(filenames: Sequence[str]) -> int:
unique_filenames = set(filenames)
lfs_attr_files = select_lfs_attr_files(unique_filenames)
lfs_tree_files = select_lfs_tree_files(unique_filenames)
retv = 0
for filename in lfs_attr_files - lfs_tree_files:
print(f'{filename} is tracked by LFS but added as a regular object')
retv = 1
for filename in lfs_tree_files - lfs_attr_files:
print(f'{filename} is added as LFS object but not tracked')
retv = 1
return retv
def main(argv: Sequence[str] | None = None) -> int:
parser = argparse.ArgumentParser()
parser.add_argument(
'filenames', nargs='*',
help='Filenames pre-commit believes are changed.',
)
args = parser.parse_args(argv)
return check_lfs_attributes(args.filenames)
if __name__ == '__main__':
raise SystemExit(main())

View file

@ -38,6 +38,7 @@ console_scripts =
check-docstring-first = pre_commit_hooks.check_docstring_first:main
check-executables-have-shebangs = pre_commit_hooks.check_executables_have_shebangs:main
check-json = pre_commit_hooks.check_json:main
check-lfs-attributes = pre_commit_hooks.check_lfs_attributes:main
check-merge-conflict = pre_commit_hooks.check_merge_conflict:main
check-shebang-scripts-are-executable = pre_commit_hooks.check_shebang_scripts_are_executable:main
check-symlinks = pre_commit_hooks.check_symlinks:main

View file

@ -0,0 +1,85 @@
from __future__ import annotations
import shutil
import pytest
from pre_commit_hooks.check_lfs_attributes import check_lfs_attributes, select_lfs_attr_files, \
select_lfs_tree_files
from pre_commit_hooks.check_lfs_attributes import main
from pre_commit_hooks.util import cmd_output
from testing.util import git_commit
@pytest.fixture
def temp_git_dir_as_cwd(temp_git_dir):
with temp_git_dir.as_cwd():
yield temp_git_dir
def has_gitlfs():
return shutil.which('git-lfs') is not None
xfailif_no_gitlfs = pytest.mark.xfail(
not has_gitlfs(), reason='This test requires git-lfs',
)
@xfailif_no_gitlfs
def test_select_lfs_attr_files(temp_git_dir_as_cwd): # pragma: no cover
cmd_output('git', 'lfs', 'install', '--local')
cmd_output('git', 'lfs', 'track', '*.bin')
assert select_lfs_attr_files(set()) == set()
assert select_lfs_attr_files({'b.txt'}) == set()
assert select_lfs_attr_files({'a.bin', 'b.txt'}) == {'a.bin'}
@xfailif_no_gitlfs
def test_select_lfs_tree_files(temp_git_dir_as_cwd): # pragma: no cover
cmd_output('git', 'lfs', 'install', '--local')
cmd_output('git', 'lfs', 'track', '*.bin')
temp_git_dir_as_cwd.join('a.bin').write('a')
temp_git_dir_as_cwd.join('b.bin').write('b')
cmd_output('git', 'add', 'a.bin')
assert select_lfs_tree_files(set()) == set()
assert select_lfs_tree_files({'b.bin'}) == set()
assert select_lfs_tree_files({'a.bin', 'b.bin'}) == {'a.bin'}
@xfailif_no_gitlfs
def test_nothing_added(temp_git_dir):
with temp_git_dir.as_cwd():
assert check_lfs_attributes(['a.bin']) == 0
@xfailif_no_gitlfs
def test_add_as_lfs_object(temp_git_dir_as_cwd): # pragma: no cover
temp_git_dir_as_cwd.join('a.bin').write('a')
cmd_output('git', 'lfs', 'install', '--local')
cmd_output('git', 'lfs', 'track', '*.bin')
cmd_output('git', 'add', 'a.bin')
assert main(('a.bin',)) == 0
@xfailif_no_gitlfs
def test_regular_object_but_tracked_by_lfs(temp_git_dir_as_cwd, capsys): # pragma: no cover
temp_git_dir_as_cwd.join('a.bin').write('a')
cmd_output('git', 'lfs', 'install', '--local')
cmd_output('git', 'add', 'a.bin')
cmd_output('git', 'lfs', 'track', '*.bin')
assert main(('a.bin',)) == 1
out, _ = capsys.readouterr()
assert 'a.bin is tracked by LFS but added as a regular object' in out
@xfailif_no_gitlfs
def test_lfs_object_but_not_tracked(temp_git_dir_as_cwd, capsys): # pragma: no cover
temp_git_dir_as_cwd.join('a.bin').write('a')
cmd_output('git', 'lfs', 'install', '--local')
cmd_output('git', 'lfs', 'track', '*.bin')
cmd_output('git', 'add', 'a.bin')
cmd_output('git', 'lfs', 'untrack', '*.bin')
assert main(('a.bin',)) == 1
out, _ = capsys.readouterr()
assert 'a.bin is added as LFS object but not tracked' in out