mirror of
https://github.com/pre-commit/pre-commit-hooks.git
synced 2026-03-29 18:16:52 +00:00
Fix parsing of git output with unusual characters
On Windows, all files are "executable". Therefore, to know if a file is supposed to be executed, we check how its attributes were recorded by git: we run a `git ls-files` command in a subprocess. By default, this command outputs information on multiple lines (file and their data separated by newlines). When a file contains an unusual character, the character is escaped with an integer sequence (such as `\303\261`), and git wraps the whole filename in double-quotes because of the backslashes. It breaks the current code because we try to open the filename containing the double-quotes: it doesn't exist, of course. Instead of trying to fix this special case by removing the double-quotes, and breaking other cases (a double-quote is a valid filename character on Linux), we tell git to separate each item with the null character `\0` instead of a new line `\n`, with the option `-z`. With this option, git doesn't escape unusual characters with integer sequence, so the output is fixed, and we parse it by splitting on `\0` instead of `\n`. Fixes #508.
This commit is contained in:
parent
5372f44b85
commit
4faed34fbc
2 changed files with 35 additions and 2 deletions
|
|
@ -12,6 +12,14 @@ from pre_commit_hooks.util import cmd_output
|
|||
EXECUTABLE_VALUES = frozenset(('1', '3', '5', '7'))
|
||||
|
||||
|
||||
def zsplit(s: str) -> List[str]:
|
||||
s = s.strip('\0')
|
||||
if s:
|
||||
return s.split('\0')
|
||||
else:
|
||||
return []
|
||||
|
||||
|
||||
def check_executables(paths: List[str]) -> int:
|
||||
if sys.platform == 'win32': # pragma: win32 cover
|
||||
return _check_git_filemode(paths)
|
||||
|
|
@ -26,9 +34,9 @@ def check_executables(paths: List[str]) -> int:
|
|||
|
||||
|
||||
def _check_git_filemode(paths: Sequence[str]) -> int:
|
||||
outs = cmd_output('git', 'ls-files', '--stage', '--', *paths)
|
||||
outs = cmd_output('git', 'ls-files', '-z', '--stage', '--', *paths)
|
||||
seen: Set[str] = set()
|
||||
for out in outs.splitlines():
|
||||
for out in zsplit(outs):
|
||||
metadata, path = out.split('\t')
|
||||
tagmode = metadata.split(' ', 1)[0]
|
||||
|
||||
|
|
|
|||
|
|
@ -73,6 +73,21 @@ def test_check_git_filemode_passing(tmpdir):
|
|||
assert check_executables_have_shebangs._check_git_filemode(files) == 0
|
||||
|
||||
|
||||
def test_check_git_filemode_passing_unusual_characters(tmpdir):
|
||||
with tmpdir.as_cwd():
|
||||
cmd_output('git', 'init', '.')
|
||||
|
||||
f = tmpdir.join('mañana.txt')
|
||||
f.write('#!/usr/bin/env bash')
|
||||
f_path = str(f)
|
||||
cmd_output('chmod', '+x', f_path)
|
||||
cmd_output('git', 'add', f_path)
|
||||
cmd_output('git', 'update-index', '--chmod=+x', f_path)
|
||||
|
||||
files = (f_path,)
|
||||
assert check_executables_have_shebangs._check_git_filemode(files) == 0
|
||||
|
||||
|
||||
def test_check_git_filemode_failing(tmpdir):
|
||||
with tmpdir.as_cwd():
|
||||
cmd_output('git', 'init', '.')
|
||||
|
|
@ -87,6 +102,16 @@ def test_check_git_filemode_failing(tmpdir):
|
|||
assert check_executables_have_shebangs._check_git_filemode(files) == 1
|
||||
|
||||
|
||||
@pytest.mark.parametrize('out', ('\0f1\0f2\0', '\0f1\0f2', 'f1\0f2\0'))
|
||||
def test_check_zsplits_correctly(out):
|
||||
assert check_executables_have_shebangs.zsplit(out) == ['f1', 'f2']
|
||||
|
||||
|
||||
@pytest.mark.parametrize('out', ('\0\0', '\0', ''))
|
||||
def test_check_zsplit_returns_empty(out):
|
||||
assert check_executables_have_shebangs.zsplit(out) == []
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
('content', 'mode', 'expected'),
|
||||
(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue