diff --git a/.pre-commit-hooks.yaml b/.pre-commit-hooks.yaml index c0d811c..beef7cb 100644 --- a/.pre-commit-hooks.yaml +++ b/.pre-commit-hooks.yaml @@ -59,6 +59,12 @@ entry: pretty-format-json language: python types: [json] +- id: check-lfs-attributes + name: check for files with mismatching LFS attribute + description: ensures LFS pointers are matching .gitattributes + entry: check-lfs-attributes + language: python + stages: [commit, push, manual] - id: check-merge-conflict name: check for merge conflicts description: checks for files that contain merge conflict strings. diff --git a/README.md b/README.md index 9ae7ec5..5c03467 100644 --- a/README.md +++ b/README.md @@ -54,6 +54,12 @@ Checks that non-binary executables have a proper shebang. #### `check-json` Attempts to load all json files to verify syntax. +#### `check-lfs-attributes` +Check for files with mismatching LFS attribute. + - Limits checked files to those indicated as staged for addition by git. + - `--enforce-all` - Check all listed files not just those staged for + addition. + #### `check-merge-conflict` Check for files that contain merge conflict strings. - `--assume-in-merge` - Allows running the hook when there is no ongoing merge operation diff --git a/pre_commit_hooks/check_lfs_attributes.py b/pre_commit_hooks/check_lfs_attributes.py new file mode 100644 index 0000000..b629656 --- /dev/null +++ b/pre_commit_hooks/check_lfs_attributes.py @@ -0,0 +1,69 @@ +from __future__ import annotations + +import argparse +import subprocess +from typing import Sequence + +from pre_commit_hooks.util import zsplit + + +def select_lfs_attr_files(filenames: set[str]) -> set[str]: # pragma: no cover (lfs) + """Select files tracked by git-lfs from the set.""" + if not filenames: + return filenames + + check_attr = subprocess.run( + ('git', 'check-attr', 'filter', '-z', '--stdin'), + stdout=subprocess.PIPE, + stderr=subprocess.DEVNULL, + encoding='utf-8', + check=True, + input='\0'.join(filenames), + ) + stdout = zsplit(check_attr.stdout) + # stdout values are triplets: + # ['b.txt', 'filter', 'unspecified', 'a.bin', 'filter', 'lfs'] + return {stdout[i] for i in range(0, len(stdout), 3) if stdout[i + 2] == 'lfs'} + + +def select_lfs_tree_files(filenames: set[str]) -> set[str]: # pragma: no cover + """Select LSF files found in the tree.""" + if not filenames: + return filenames + + output = subprocess.check_output(('git', 'lfs', 'ls-files', '-n'), text=True) + lfs_files = set(output.split()) + + return lfs_files & set(filenames) + + +def check_lfs_attributes(filenames: Sequence[str]) -> int: + unique_filenames = set(filenames) + + lfs_attr_files = select_lfs_attr_files(unique_filenames) + lfs_tree_files = select_lfs_tree_files(unique_filenames) + + retv = 0 + for filename in lfs_attr_files - lfs_tree_files: + print(f'{filename} is tracked by LFS but added as a regular object') + retv = 1 + for filename in lfs_tree_files - lfs_attr_files: + print(f'{filename} is added as LFS object but not tracked') + retv = 1 + + return retv + + +def main(argv: Sequence[str] | None = None) -> int: + parser = argparse.ArgumentParser() + parser.add_argument( + 'filenames', nargs='*', + help='Filenames pre-commit believes are changed.', + ) + args = parser.parse_args(argv) + + return check_lfs_attributes(args.filenames) + + +if __name__ == '__main__': + raise SystemExit(main()) diff --git a/setup.cfg b/setup.cfg index 6a4c459..d0c2029 100644 --- a/setup.cfg +++ b/setup.cfg @@ -38,6 +38,7 @@ console_scripts = check-docstring-first = pre_commit_hooks.check_docstring_first:main check-executables-have-shebangs = pre_commit_hooks.check_executables_have_shebangs:main check-json = pre_commit_hooks.check_json:main + check-lfs-attributes = pre_commit_hooks.check_lfs_attributes:main check-merge-conflict = pre_commit_hooks.check_merge_conflict:main check-shebang-scripts-are-executable = pre_commit_hooks.check_shebang_scripts_are_executable:main check-symlinks = pre_commit_hooks.check_symlinks:main diff --git a/tests/check_lfs_attributes_test.py b/tests/check_lfs_attributes_test.py new file mode 100644 index 0000000..e98bf80 --- /dev/null +++ b/tests/check_lfs_attributes_test.py @@ -0,0 +1,85 @@ +from __future__ import annotations + +import shutil + +import pytest + +from pre_commit_hooks.check_lfs_attributes import check_lfs_attributes, select_lfs_attr_files, \ + select_lfs_tree_files +from pre_commit_hooks.check_lfs_attributes import main +from pre_commit_hooks.util import cmd_output +from testing.util import git_commit + + +@pytest.fixture +def temp_git_dir_as_cwd(temp_git_dir): + with temp_git_dir.as_cwd(): + yield temp_git_dir + + +def has_gitlfs(): + return shutil.which('git-lfs') is not None + + +xfailif_no_gitlfs = pytest.mark.xfail( + not has_gitlfs(), reason='This test requires git-lfs', +) + + +@xfailif_no_gitlfs +def test_select_lfs_attr_files(temp_git_dir_as_cwd): # pragma: no cover + cmd_output('git', 'lfs', 'install', '--local') + cmd_output('git', 'lfs', 'track', '*.bin') + assert select_lfs_attr_files(set()) == set() + assert select_lfs_attr_files({'b.txt'}) == set() + assert select_lfs_attr_files({'a.bin', 'b.txt'}) == {'a.bin'} + + +@xfailif_no_gitlfs +def test_select_lfs_tree_files(temp_git_dir_as_cwd): # pragma: no cover + cmd_output('git', 'lfs', 'install', '--local') + cmd_output('git', 'lfs', 'track', '*.bin') + temp_git_dir_as_cwd.join('a.bin').write('a') + temp_git_dir_as_cwd.join('b.bin').write('b') + cmd_output('git', 'add', 'a.bin') + assert select_lfs_tree_files(set()) == set() + assert select_lfs_tree_files({'b.bin'}) == set() + assert select_lfs_tree_files({'a.bin', 'b.bin'}) == {'a.bin'} + + +@xfailif_no_gitlfs +def test_nothing_added(temp_git_dir): + with temp_git_dir.as_cwd(): + assert check_lfs_attributes(['a.bin']) == 0 + + +@xfailif_no_gitlfs +def test_add_as_lfs_object(temp_git_dir_as_cwd): # pragma: no cover + temp_git_dir_as_cwd.join('a.bin').write('a') + cmd_output('git', 'lfs', 'install', '--local') + cmd_output('git', 'lfs', 'track', '*.bin') + cmd_output('git', 'add', 'a.bin') + assert main(('a.bin',)) == 0 + + +@xfailif_no_gitlfs +def test_regular_object_but_tracked_by_lfs(temp_git_dir_as_cwd, capsys): # pragma: no cover + temp_git_dir_as_cwd.join('a.bin').write('a') + cmd_output('git', 'lfs', 'install', '--local') + cmd_output('git', 'add', 'a.bin') + cmd_output('git', 'lfs', 'track', '*.bin') + assert main(('a.bin',)) == 1 + out, _ = capsys.readouterr() + assert 'a.bin is tracked by LFS but added as a regular object' in out + + +@xfailif_no_gitlfs +def test_lfs_object_but_not_tracked(temp_git_dir_as_cwd, capsys): # pragma: no cover + temp_git_dir_as_cwd.join('a.bin').write('a') + cmd_output('git', 'lfs', 'install', '--local') + cmd_output('git', 'lfs', 'track', '*.bin') + cmd_output('git', 'add', 'a.bin') + cmd_output('git', 'lfs', 'untrack', '*.bin') + assert main(('a.bin',)) == 1 + out, _ = capsys.readouterr() + assert 'a.bin is added as LFS object but not tracked' in out