mirror of
https://github.com/pre-commit/pre-commit-hooks.git
synced 2026-07-02 15:39:35 +00:00
Add remove-em-dash hook
New fixer hook that replaces UTF-8 em-dashes (U+2014) with a plain hyphen (-), modeled on the trailing-whitespace hook. - pre_commit_hooks/remove_em_dash.py: the fixer (binary-safe, UTF-8 only) - tests/remove_em_dash_test.py: full coverage of fix and no-op cases - registered in setup.cfg, .pre-commit-hooks.yaml, and README.md Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
parent
fa6b006f0e
commit
aba8a7597a
5 changed files with 84 additions and 0 deletions
|
|
@ -190,6 +190,14 @@
|
||||||
language: python
|
language: python
|
||||||
pass_filenames: false
|
pass_filenames: false
|
||||||
always_run: true
|
always_run: true
|
||||||
|
- id: remove-em-dash
|
||||||
|
name: remove em-dash
|
||||||
|
description: replaces em-dashes with a plain hyphen.
|
||||||
|
entry: remove-em-dash
|
||||||
|
language: python
|
||||||
|
types: [text]
|
||||||
|
stages: [pre-commit, pre-push, manual]
|
||||||
|
minimum_pre_commit_version: 3.2.0
|
||||||
- id: requirements-txt-fixer
|
- id: requirements-txt-fixer
|
||||||
name: fix requirements.txt
|
name: fix requirements.txt
|
||||||
description: sorts entries in requirements.txt.
|
description: sorts entries in requirements.txt.
|
||||||
|
|
|
||||||
|
|
@ -175,6 +175,11 @@ the following commandline options:
|
||||||
- `--no-sort-keys` - when autofixing, retain the original key ordering (instead of sorting the keys)
|
- `--no-sort-keys` - when autofixing, retain the original key ordering (instead of sorting the keys)
|
||||||
- `--top-keys comma,separated,keys` - Keys to keep at the top of mappings.
|
- `--top-keys comma,separated,keys` - Keys to keep at the top of mappings.
|
||||||
|
|
||||||
|
#### `remove-em-dash`
|
||||||
|
Replaces em-dashes (Unicode `U+2014`) with a plain hyphen (`-`).
|
||||||
|
- Only the UTF-8 encoding of the em-dash is replaced; files using other
|
||||||
|
encodings are left untouched.
|
||||||
|
|
||||||
#### `requirements-txt-fixer`
|
#### `requirements-txt-fixer`
|
||||||
Sorts entries in requirements.txt and constraints.txt and removes incorrect entry for `pkg-resources==0.0.0`
|
Sorts entries in requirements.txt and constraints.txt and removes incorrect entry for `pkg-resources==0.0.0`
|
||||||
|
|
||||||
|
|
|
||||||
34
pre_commit_hooks/remove_em_dash.py
Normal file
34
pre_commit_hooks/remove_em_dash.py
Normal file
|
|
@ -0,0 +1,34 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
from collections.abc import Sequence
|
||||||
|
|
||||||
|
EM_DASH = '\N{EM DASH}'.encode()
|
||||||
|
|
||||||
|
|
||||||
|
def _fix_file(filename: str) -> bool:
|
||||||
|
with open(filename, 'rb') as f:
|
||||||
|
contents = f.read()
|
||||||
|
new_contents = contents.replace(EM_DASH, b'-')
|
||||||
|
if new_contents == contents:
|
||||||
|
return False
|
||||||
|
with open(filename, 'wb') as f:
|
||||||
|
f.write(new_contents)
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def main(argv: Sequence[str] | None = None) -> int:
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument('filenames', nargs='*', help='Filenames to fix')
|
||||||
|
args = parser.parse_args(argv)
|
||||||
|
|
||||||
|
retv = 0
|
||||||
|
for filename in args.filenames:
|
||||||
|
if _fix_file(filename):
|
||||||
|
print(f'Fixing {filename}')
|
||||||
|
retv = 1
|
||||||
|
return retv
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
raise SystemExit(main())
|
||||||
|
|
@ -57,6 +57,7 @@ console_scripts =
|
||||||
no-commit-to-branch = pre_commit_hooks.no_commit_to_branch:main
|
no-commit-to-branch = pre_commit_hooks.no_commit_to_branch:main
|
||||||
pre-commit-hooks-removed = pre_commit_hooks.removed:main
|
pre-commit-hooks-removed = pre_commit_hooks.removed:main
|
||||||
pretty-format-json = pre_commit_hooks.pretty_format_json:main
|
pretty-format-json = pre_commit_hooks.pretty_format_json:main
|
||||||
|
remove-em-dash = pre_commit_hooks.remove_em_dash:main
|
||||||
requirements-txt-fixer = pre_commit_hooks.requirements_txt_fixer:main
|
requirements-txt-fixer = pre_commit_hooks.requirements_txt_fixer:main
|
||||||
sort-simple-yaml = pre_commit_hooks.sort_simple_yaml:main
|
sort-simple-yaml = pre_commit_hooks.sort_simple_yaml:main
|
||||||
trailing-whitespace-fixer = pre_commit_hooks.trailing_whitespace_fixer:main
|
trailing-whitespace-fixer = pre_commit_hooks.trailing_whitespace_fixer:main
|
||||||
|
|
|
||||||
36
tests/remove_em_dash_test.py
Normal file
36
tests/remove_em_dash_test.py
Normal file
|
|
@ -0,0 +1,36 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from pre_commit_hooks.remove_em_dash import main
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
('text', 'expected'),
|
||||||
|
(
|
||||||
|
('foo\N{EM DASH}bar\n', b'foo-bar\n'),
|
||||||
|
('foo \N{EM DASH} bar\n', b'foo - bar\n'),
|
||||||
|
('a\N{EM DASH}b\N{EM DASH}c\n', b'a-b-c\n'),
|
||||||
|
('x\N{EM DASH}y\r\nz\r\n', b'x-y\r\nz\r\n'),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
def test_fixes_em_dash(text, expected, tmpdir):
|
||||||
|
path = tmpdir.join('file.txt')
|
||||||
|
path.write_binary(text.encode())
|
||||||
|
assert main((str(path),)) == 1
|
||||||
|
assert path.read_binary() == expected
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
'contents',
|
||||||
|
(
|
||||||
|
pytest.param(b'foo-bar\n', id='plain-hyphen'),
|
||||||
|
pytest.param(b'no em dashes here\n', id='no-dash'),
|
||||||
|
pytest.param(b'<a>\x97</a>\n', id='windows-1252-em-dash'),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
def test_noop_without_utf8_em_dash(contents, tmpdir):
|
||||||
|
path = tmpdir.join('file.txt')
|
||||||
|
path.write_binary(contents)
|
||||||
|
assert main((str(path),)) == 0
|
||||||
|
assert path.read_binary() == contents
|
||||||
Loading…
Add table
Add a link
Reference in a new issue