Merge pull request #511 from m-khvoinitsky/master

New hook 'destroyed-symlinks'
This commit is contained in:
Anthony Sottile 2020-11-18 11:48:14 -08:00 committed by GitHub
commit 4b863f1272
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 204 additions and 18 deletions

View file

@ -100,6 +100,12 @@
entry: debug-statement-hook entry: debug-statement-hook
language: python language: python
types: [python] types: [python]
- id: destroyed-symlinks
name: Detect Destroyed Symlinks
description: Detects symlinks which are changed to regular files with a content of a path which that symlink was pointing to.
entry: destroyed-symlinks
language: python
types: [file]
- id: detect-aws-credentials - id: detect-aws-credentials
name: Detect AWS Credentials name: Detect AWS Credentials
description: Detects *your* aws credentials from the aws cli credentials file description: Detects *your* aws credentials from the aws cli credentials file

View file

@ -87,6 +87,12 @@ Attempts to load all yaml files to verify syntax.
#### `debug-statements` #### `debug-statements`
Check for debugger imports and py37+ `breakpoint()` calls in python source. Check for debugger imports and py37+ `breakpoint()` calls in python source.
#### `destroyed-symlinks`
Detects symlinks which are changed to regular files with a content of a path
which that symlink was pointing to.
This usually happens on Windows when a user clones a repository that has
symlinks but they do not have the permission to create symlinks.
#### `detect-aws-credentials` #### `detect-aws-credentials`
Checks for the existence of AWS secrets that you have set up with the AWS CLI. Checks for the existence of AWS secrets that you have set up with the AWS CLI.
The following arguments are available: The following arguments are available:

View file

@ -8,18 +8,11 @@ from typing import Sequence
from typing import Set from typing import Set
from pre_commit_hooks.util import cmd_output from pre_commit_hooks.util import cmd_output
from pre_commit_hooks.util import zsplit
EXECUTABLE_VALUES = frozenset(('1', '3', '5', '7')) EXECUTABLE_VALUES = frozenset(('1', '3', '5', '7'))
def zsplit(s: str) -> List[str]:
s = s.strip('\0')
if s:
return s.split('\0')
else:
return []
def check_executables(paths: List[str]) -> int: def check_executables(paths: List[str]) -> int:
if sys.platform == 'win32': # pragma: win32 cover if sys.platform == 'win32': # pragma: win32 cover
return _check_git_filemode(paths) return _check_git_filemode(paths)

View file

@ -0,0 +1,96 @@
import argparse
import shlex
import subprocess
from typing import List
from typing import Optional
from typing import Sequence
from pre_commit_hooks.util import cmd_output
from pre_commit_hooks.util import zsplit
ORDINARY_CHANGED_ENTRIES_MARKER = '1'
PERMS_LINK = '120000'
PERMS_NONEXIST = '000000'
def find_destroyed_symlinks(files: Sequence[str]) -> List[str]:
destroyed_links: List[str] = []
if not files:
return destroyed_links
for line in zsplit(
cmd_output('git', 'status', '--porcelain=v2', '-z', '--', *files),
):
splitted = line.split(' ')
if splitted and splitted[0] == ORDINARY_CHANGED_ENTRIES_MARKER:
# https://git-scm.com/docs/git-status#_changed_tracked_entries
(
_, _, _,
mode_HEAD,
mode_index,
_,
hash_HEAD,
hash_index,
*path_splitted,
) = splitted
path = ' '.join(path_splitted)
if (
mode_HEAD == PERMS_LINK and
mode_index != PERMS_LINK and
mode_index != PERMS_NONEXIST
):
if hash_HEAD == hash_index:
# if old and new hashes are equal, it's not needed to check
# anything more, we've found a destroyed symlink for sure
destroyed_links.append(path)
else:
# if old and new hashes are *not* equal, it doesn't mean
# that everything is OK - new file may be altered
# by something like trailing-whitespace and/or
# mixed-line-ending hooks so we need to go deeper
SIZE_CMD = ('git', 'cat-file', '-s')
size_index = int(cmd_output(*SIZE_CMD, hash_index).strip())
size_HEAD = int(cmd_output(*SIZE_CMD, hash_HEAD).strip())
# in the worst case new file may have CRLF added
# so check content only if new file is bigger
# not more than 2 bytes compared to the old one
if size_index <= size_HEAD + 2:
head_content = subprocess.check_output(
('git', 'cat-file', '-p', hash_HEAD),
).rstrip()
index_content = subprocess.check_output(
('git', 'cat-file', '-p', hash_index),
).rstrip()
if head_content == index_content:
destroyed_links.append(path)
return destroyed_links
def main(argv: Optional[Sequence[str]] = None) -> int:
parser = argparse.ArgumentParser()
parser.add_argument('filenames', nargs='*', help='Filenames to check.')
args = parser.parse_args(argv)
destroyed_links = find_destroyed_symlinks(files=args.filenames)
if destroyed_links:
print('Destroyed symlinks:')
for destroyed_link in destroyed_links:
print(f'- {destroyed_link}')
print('You should unstage affected files:')
print(
'\tgit reset HEAD -- {}'.format(
' '.join(shlex.quote(link) for link in destroyed_links),
),
)
print(
'And retry commit. As a long term solution '
'you may try to explicitly tell git that your '
'environment does not support symlinks:',
)
print('\tgit config core.symlinks false')
return 1
else:
return 0
if __name__ == '__main__':
exit(main())

View file

@ -1,5 +1,6 @@
import subprocess import subprocess
from typing import Any from typing import Any
from typing import List
from typing import Optional from typing import Optional
from typing import Set from typing import Set
@ -22,3 +23,11 @@ def cmd_output(*cmd: str, retcode: Optional[int] = 0, **kwargs: Any) -> str:
if retcode is not None and proc.returncode != retcode: if retcode is not None and proc.returncode != retcode:
raise CalledProcessError(cmd, retcode, proc.returncode, stdout, stderr) raise CalledProcessError(cmd, retcode, proc.returncode, stdout, stderr)
return stdout return stdout
def zsplit(s: str) -> List[str]:
s = s.strip('\0')
if s:
return s.split('\0')
else:
return []

View file

@ -44,6 +44,7 @@ console_scripts =
check-xml = pre_commit_hooks.check_xml:main check-xml = pre_commit_hooks.check_xml:main
check-yaml = pre_commit_hooks.check_yaml:main check-yaml = pre_commit_hooks.check_yaml:main
debug-statement-hook = pre_commit_hooks.debug_statement_hook:main debug-statement-hook = pre_commit_hooks.debug_statement_hook:main
destroyed-symlinks = pre_commit_hooks.destroyed_symlinks:main
detect-aws-credentials = pre_commit_hooks.detect_aws_credentials:main detect-aws-credentials = pre_commit_hooks.detect_aws_credentials:main
detect-private-key = pre_commit_hooks.detect_private_key:main detect-private-key = pre_commit_hooks.detect_private_key:main
double-quote-string-fixer = pre_commit_hooks.string_fixer:main double-quote-string-fixer = pre_commit_hooks.string_fixer:main

View file

@ -102,16 +102,6 @@ def test_check_git_filemode_failing(tmpdir):
assert check_executables_have_shebangs._check_git_filemode(files) == 1 assert check_executables_have_shebangs._check_git_filemode(files) == 1
@pytest.mark.parametrize('out', ('\0f1\0f2\0', '\0f1\0f2', 'f1\0f2\0'))
def test_check_zsplits_correctly(out):
assert check_executables_have_shebangs.zsplit(out) == ['f1', 'f2']
@pytest.mark.parametrize('out', ('\0\0', '\0', ''))
def test_check_zsplit_returns_empty(out):
assert check_executables_have_shebangs.zsplit(out) == []
@pytest.mark.parametrize( @pytest.mark.parametrize(
('content', 'mode', 'expected'), ('content', 'mode', 'expected'),
( (

View file

@ -0,0 +1,74 @@
import os
import subprocess
import pytest
from pre_commit_hooks.destroyed_symlinks import find_destroyed_symlinks
from pre_commit_hooks.destroyed_symlinks import main
TEST_SYMLINK = 'test_symlink'
TEST_SYMLINK_TARGET = '/doesnt/really/matters'
TEST_FILE = 'test_file'
TEST_FILE_RENAMED = f'{TEST_FILE}_renamed'
@pytest.fixture
def repo_with_destroyed_symlink(tmpdir):
source_repo = tmpdir.join('src')
os.makedirs(source_repo, exist_ok=True)
test_repo = tmpdir.join('test')
with source_repo.as_cwd():
subprocess.check_call(('git', 'init'))
os.symlink(TEST_SYMLINK_TARGET, TEST_SYMLINK)
with open(TEST_FILE, 'w') as f:
print('some random content', file=f)
subprocess.check_call(('git', 'add', '.'))
subprocess.check_call(
('git', 'commit', '--no-gpg-sign', '-m', 'initial'),
)
assert b'120000 ' in subprocess.check_output(
('git', 'cat-file', '-p', 'HEAD^{tree}'),
)
subprocess.check_call(
('git', '-c', 'core.symlinks=false', 'clone', source_repo, test_repo),
)
with test_repo.as_cwd():
subprocess.check_call(
('git', 'config', '--local', 'core.symlinks', 'true'),
)
subprocess.check_call(('git', 'mv', TEST_FILE, TEST_FILE_RENAMED))
assert not os.path.islink(test_repo.join(TEST_SYMLINK))
yield test_repo
def test_find_destroyed_symlinks(repo_with_destroyed_symlink):
with repo_with_destroyed_symlink.as_cwd():
assert find_destroyed_symlinks([]) == []
assert main([]) == 0
subprocess.check_call(('git', 'add', TEST_SYMLINK))
assert find_destroyed_symlinks([TEST_SYMLINK]) == [TEST_SYMLINK]
assert find_destroyed_symlinks([]) == []
assert main([]) == 0
assert find_destroyed_symlinks([TEST_FILE_RENAMED, TEST_FILE]) == []
ALL_STAGED = [TEST_SYMLINK, TEST_FILE_RENAMED]
assert find_destroyed_symlinks(ALL_STAGED) == [TEST_SYMLINK]
assert main(ALL_STAGED) != 0
with open(TEST_SYMLINK, 'a') as f:
print(file=f) # add trailing newline
subprocess.check_call(['git', 'add', TEST_SYMLINK])
assert find_destroyed_symlinks(ALL_STAGED) == [TEST_SYMLINK]
assert main(ALL_STAGED) != 0
with open(TEST_SYMLINK, 'w') as f:
print('0' * len(TEST_SYMLINK_TARGET), file=f)
subprocess.check_call(('git', 'add', TEST_SYMLINK))
assert find_destroyed_symlinks(ALL_STAGED) == []
assert main(ALL_STAGED) == 0
with open(TEST_SYMLINK, 'w') as f:
print('0' * (len(TEST_SYMLINK_TARGET) + 3), file=f)
subprocess.check_call(('git', 'add', TEST_SYMLINK))
assert find_destroyed_symlinks(ALL_STAGED) == []
assert main(ALL_STAGED) == 0

View file

@ -2,6 +2,7 @@ import pytest
from pre_commit_hooks.util import CalledProcessError from pre_commit_hooks.util import CalledProcessError
from pre_commit_hooks.util import cmd_output from pre_commit_hooks.util import cmd_output
from pre_commit_hooks.util import zsplit
def test_raises_on_error(): def test_raises_on_error():
@ -12,3 +13,13 @@ def test_raises_on_error():
def test_output(): def test_output():
ret = cmd_output('sh', '-c', 'echo hi') ret = cmd_output('sh', '-c', 'echo hi')
assert ret == 'hi\n' assert ret == 'hi\n'
@pytest.mark.parametrize('out', ('\0f1\0f2\0', '\0f1\0f2', 'f1\0f2\0'))
def test_check_zsplits_str_correctly(out):
assert zsplit(out) == ['f1', 'f2']
@pytest.mark.parametrize('out', ('\0\0', '\0', ''))
def test_check_zsplit_returns_empty(out):
assert zsplit(out) == []