Check strings in jupyter notebooks

2026-03-30 18:26:53 +00:00 · 2022-10-05 02:42:08 -07:00 · 2022-10-05 02:42:08 -07:00 · cf898e2ce2
commit cf898e2ce2
parent 6b03546fc3
7 changed files with 528 additions and 42 deletions
--- a/pre_commit_hooks/string_fixer.py
+++ b/pre_commit_hooks/string_fixer.py
@ -1,57 +1,17 @@
 from __future__ import annotations

 import argparse
-import io
-import re
-import tokenize
 from typing import Sequence

-START_QUOTE_RE = re.compile('^[a-zA-Z]*"')
-
-
-def handle_match(token_text: str) -> str:
-    if '"""' in token_text or "'''" in token_text:
-        return token_text
-
-    match = START_QUOTE_RE.match(token_text)
-    if match is not None:
-        meat = token_text[match.end():-1]
-        if '"' in meat or "'" in meat:
-            return token_text
-        else:
-            return match.group().replace('"', "'") + meat + "'"
-    else:
-        return token_text
-
-
-def get_line_offsets_by_line_no(src: str) -> list[int]:
-    # Padded so we can index with line number
-    offsets = [-1, 0]
-    for line in src.splitlines(True):
-        offsets.append(offsets[-1] + len(line))
-    return offsets
+from pre_commit_hooks.util_string_fixer import fix_strings_in_file_contents


 def fix_strings(filename: str) -> int:
    with open(filename, encoding='UTF-8', newline='') as f:
        contents = f.read()
-    line_offsets = get_line_offsets_by_line_no(contents)

-    # Basically a mutable string
-    splitcontents = list(contents)
+    new_contents = fix_strings_in_file_contents(contents)

-    # Iterate in reverse so the offsets are always correct
-    tokens_l = list(tokenize.generate_tokens(io.StringIO(contents).readline))
-    tokens = reversed(tokens_l)
-    for token_type, token_text, (srow, scol), (erow, ecol), _ in tokens:
-        if token_type == tokenize.STRING:
-            new_text = handle_match(token_text)
-            splitcontents[
-                line_offsets[srow] + scol:
-                line_offsets[erow] + ecol
-            ] = new_text
-
-    new_contents = ''.join(splitcontents)
    if contents != new_contents:
        with open(filename, 'w', encoding='UTF-8', newline='') as f:
            f.write(new_contents)