Fix parsing of git output with unusual characters

On Windows, all files are "executable".
Therefore, to know if a file is supposed to be executed,
we check how its attributes were recorded by git:
we run a `git ls-files` command in a subprocess.

By default, this command outputs information
on multiple lines (file and their data separated by newlines).
When a file contains an unusual character,
the character is escaped with an integer sequence
(such as `\303\261`), and git wraps the whole filename
in double-quotes because of the backslashes.
It breaks the current code because we try to open
the filename containing the double-quotes:
it doesn't exist, of course.

Instead of trying to fix this special case by removing
the double-quotes, and breaking other cases
(a double-quote is a valid filename character on Linux),
we tell git to separate each item with the null character `\0`
instead of a new line `\n`, with the option `-z`.
With this option, git doesn't escape unusual characters
with integer sequence, so the output is fixed, and we
parse it by splitting on `\0` instead of `\n`.

Fixes #508.
This commit is contained in:
Timothée Mazzucotelli 2020-07-29 09:57:24 +02:00 committed by Anthony Sottile
parent 5372f44b85
commit 4faed34fbc
2 changed files with 35 additions and 2 deletions

View file

@ -73,6 +73,21 @@ def test_check_git_filemode_passing(tmpdir):
assert check_executables_have_shebangs._check_git_filemode(files) == 0
def test_check_git_filemode_passing_unusual_characters(tmpdir):
with tmpdir.as_cwd():
cmd_output('git', 'init', '.')
f = tmpdir.join('mañana.txt')
f.write('#!/usr/bin/env bash')
f_path = str(f)
cmd_output('chmod', '+x', f_path)
cmd_output('git', 'add', f_path)
cmd_output('git', 'update-index', '--chmod=+x', f_path)
files = (f_path,)
assert check_executables_have_shebangs._check_git_filemode(files) == 0
def test_check_git_filemode_failing(tmpdir):
with tmpdir.as_cwd():
cmd_output('git', 'init', '.')
@ -87,6 +102,16 @@ def test_check_git_filemode_failing(tmpdir):
assert check_executables_have_shebangs._check_git_filemode(files) == 1
@pytest.mark.parametrize('out', ('\0f1\0f2\0', '\0f1\0f2', 'f1\0f2\0'))
def test_check_zsplits_correctly(out):
assert check_executables_have_shebangs.zsplit(out) == ['f1', 'f2']
@pytest.mark.parametrize('out', ('\0\0', '\0', ''))
def test_check_zsplit_returns_empty(out):
assert check_executables_have_shebangs.zsplit(out) == []
@pytest.mark.parametrize(
('content', 'mode', 'expected'),
(