mirror of
https://github.com/pre-commit/pre-commit-hooks.git
synced 2026-04-07 04:26:52 +00:00
Use the tokenizer for great success
This commit is contained in:
parent
5207d1f29a
commit
2983d4478f
2 changed files with 63 additions and 67 deletions
|
|
@ -3,34 +3,60 @@ from __future__ import print_function
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import re
|
import io
|
||||||
import tokenize
|
import tokenize
|
||||||
|
|
||||||
|
|
||||||
double_quote_starts = tuple(s for s in tokenize.single_quoted if '"' in s)
|
double_quote_starts = tuple(s for s in tokenize.single_quoted if '"' in s)
|
||||||
compiled_tokenize_string = re.compile('(?<!")' + tokenize.String + '(?!")')
|
|
||||||
|
|
||||||
|
|
||||||
def handle_match(m):
|
def handle_match(token_text):
|
||||||
string = m.group(0)
|
if '"""' in token_text or "'''" in token_text:
|
||||||
|
return token_text
|
||||||
|
|
||||||
for double_quote_start in double_quote_starts:
|
for double_quote_start in double_quote_starts:
|
||||||
if string.startswith(double_quote_start):
|
if token_text.startswith(double_quote_start):
|
||||||
meat = string[len(double_quote_start):-1]
|
meat = token_text[len(double_quote_start):-1]
|
||||||
if '"' in meat or "'" in meat:
|
if '"' in meat or "'" in meat:
|
||||||
break
|
break
|
||||||
return double_quote_start.replace('"', "'") + meat + "'"
|
return double_quote_start.replace('"', "'") + meat + "'"
|
||||||
return string
|
return token_text
|
||||||
|
|
||||||
|
|
||||||
|
def get_line_offsets_by_line_no(src):
|
||||||
|
# Padded so we can index with line number
|
||||||
|
offsets = [None, 0]
|
||||||
|
for line in src.splitlines():
|
||||||
|
offsets.append(offsets[-1] + len(line) + 1)
|
||||||
|
return offsets
|
||||||
|
|
||||||
|
|
||||||
def fix_strings(filename):
|
def fix_strings(filename):
|
||||||
contents = open(filename).read()
|
contents = io.open(filename).read()
|
||||||
new_contents = compiled_tokenize_string.sub(handle_match, contents)
|
line_offsets = get_line_offsets_by_line_no(contents)
|
||||||
retval = int(new_contents != contents)
|
|
||||||
if retval:
|
# Basically a mutable string
|
||||||
with open(filename, 'w') as write_handle:
|
splitcontents = list(contents)
|
||||||
|
|
||||||
|
# Iterate in reverse so the offsets are always correct
|
||||||
|
tokens = reversed(list(tokenize.generate_tokens(
|
||||||
|
io.StringIO(contents).readline,
|
||||||
|
)))
|
||||||
|
for token_type, token_text, (srow, scol), (erow, ecol), _ in tokens:
|
||||||
|
if token_type == tokenize.STRING:
|
||||||
|
new_text = handle_match(token_text)
|
||||||
|
splitcontents[
|
||||||
|
line_offsets[srow] + scol:
|
||||||
|
line_offsets[erow] + ecol
|
||||||
|
] = new_text
|
||||||
|
|
||||||
|
new_contents = ''.join(splitcontents)
|
||||||
|
if contents != new_contents:
|
||||||
|
with io.open(filename, 'w') as write_handle:
|
||||||
write_handle.write(new_contents)
|
write_handle.write(new_contents)
|
||||||
return retval
|
return 1
|
||||||
|
else:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
def main(argv=None):
|
def main(argv=None):
|
||||||
|
|
|
||||||
|
|
@ -2,79 +2,49 @@ from __future__ import absolute_import
|
||||||
from __future__ import print_function
|
from __future__ import print_function
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import textwrap
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from pre_commit_hooks.string_fixer import main
|
from pre_commit_hooks.string_fixer import main
|
||||||
|
|
||||||
TESTS = (
|
TESTS = (
|
||||||
# Base cases
|
# Base cases
|
||||||
(
|
("''", "''", 0),
|
||||||
"''",
|
('""', "''", 1),
|
||||||
"''",
|
(r'"\'"', r'"\'"', 0),
|
||||||
0
|
(r'"\""', r'"\""', 0),
|
||||||
),
|
(r"'\"\"'", r"'\"\"'", 0),
|
||||||
(
|
|
||||||
'""',
|
|
||||||
"''",
|
|
||||||
1
|
|
||||||
),
|
|
||||||
(
|
|
||||||
r'"\'"',
|
|
||||||
r'"\'"',
|
|
||||||
0
|
|
||||||
),
|
|
||||||
(
|
|
||||||
r'"\""',
|
|
||||||
r'"\""',
|
|
||||||
0
|
|
||||||
),
|
|
||||||
(
|
|
||||||
r"'\"\"'",
|
|
||||||
r"'\"\"'",
|
|
||||||
0
|
|
||||||
),
|
|
||||||
# String somewhere in the line
|
# String somewhere in the line
|
||||||
(
|
('x = "foo"', "x = 'foo'", 1),
|
||||||
'x = "foo"',
|
|
||||||
"x = 'foo'",
|
|
||||||
1
|
|
||||||
),
|
|
||||||
# Test escaped characters
|
# Test escaped characters
|
||||||
(
|
(r'"\'"', r'"\'"', 0),
|
||||||
r'"\'"',
|
|
||||||
r'"\'"',
|
|
||||||
0
|
|
||||||
),
|
|
||||||
# Docstring
|
# Docstring
|
||||||
|
('""" Foo """', '""" Foo """', 0),
|
||||||
(
|
(
|
||||||
'""" Foo """',
|
textwrap.dedent("""
|
||||||
'""" Foo """',
|
x = " \\
|
||||||
0
|
foo \\
|
||||||
),
|
|
||||||
# Fuck it, won't even try to fix
|
|
||||||
(
|
|
||||||
"""
|
|
||||||
x = " \\n
|
|
||||||
foo \\n
|
|
||||||
"\n
|
"\n
|
||||||
""",
|
"""),
|
||||||
"""
|
textwrap.dedent("""
|
||||||
x = " \\n
|
x = ' \\
|
||||||
foo \\n
|
foo \\
|
||||||
"\n
|
'\n
|
||||||
""",
|
"""),
|
||||||
0
|
1,
|
||||||
),
|
),
|
||||||
|
('"foo""bar"', "'foo''bar'", 1),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(('input_s', 'expected_output', 'expected_retval'), TESTS)
|
@pytest.mark.parametrize(('input_s', 'output', 'expected_retval'), TESTS)
|
||||||
def test_rewrite(input_s, expected_output, expected_retval, tmpdir):
|
def test_rewrite(input_s, output, expected_retval, tmpdir):
|
||||||
tmpfile = tmpdir.join('file.txt')
|
tmpfile = tmpdir.join('file.txt')
|
||||||
|
|
||||||
with open(tmpfile.strpath, 'w') as f:
|
with open(tmpfile.strpath, 'w') as f:
|
||||||
f.write(input_s)
|
f.write(input_s)
|
||||||
|
|
||||||
retval = main([tmpfile.strpath])
|
retval = main([tmpfile.strpath])
|
||||||
assert tmpfile.read() == expected_output
|
assert tmpfile.read() == output
|
||||||
assert retval == expected_retval
|
assert retval == expected_retval
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue