file_contents_sorter: add --group-cases-together -- a better case-insensitive sort

This commit is contained in:
Jacob Lifshay 2025-08-04 20:08:36 -07:00
parent a8f86514f4
commit 1d6bd5f7da
No known key found for this signature in database
3 changed files with 69 additions and 3 deletions

View file

@ -123,7 +123,8 @@ Note that this hook WILL remove blank lines and does NOT respect any comments.
All newlines will be converted to line feeds (`\n`).
The following arguments are available:
- `--ignore-case` - fold lower case to upper case characters.
- `--ignore-case` - fold lower case to upper case characters. this retains the original order of lines that differ only in case, so you probably want `--group-cases-together` instead.
- `--group-cases-together` - group lines that differ only in case together, so e.g. `c`, `b`, `a`, and `B` are sorted to `a`, `B`, `b`, and `c` instead of `B`, `a`, `b`, and `c`.
- `--unique` - ensure each line is unique.
#### `fix-byte-order-marker`

View file

@ -51,6 +51,10 @@ def sort_file_contents(
return FAIL
def group_cases_together_key(s: bytes) -> tuple[bytes, bytes]:
return s.lower(), s
def main(argv: Sequence[str] | None = None) -> int:
parser = argparse.ArgumentParser()
parser.add_argument('filenames', nargs='+', help='Files to sort')
@ -61,22 +65,41 @@ def main(argv: Sequence[str] | None = None) -> int:
action='store_const',
const=bytes.lower,
default=None,
help='fold lower case to upper case characters',
help='fold lower case to upper case characters. this retains\n'
'the original order of lines that differ only in case,\n'
'so you probably want --group-cases-together instead.',
)
mutex.add_argument(
'--group-cases-together',
action='store_const',
const=group_cases_together_key,
default=None,
help='group lines that differ only in case together,\n'
'so e.g. `c`, `b`, `a`, and `B` are sorted to\n'
'`a`, `B`, `b`, and `c` instead of `B`, `a`, `b`, and `c`.',
)
parser.add_argument(
'--unique',
action='store_true',
help='ensure each line is unique',
)
args = parser.parse_args(argv)
# we can't just use add_mutually_exclusive_group for this since
# --unique is allowed with --group-cases-together
if args.ignore_case and args.unique:
parser.error(
'argument --ignore-case: not allowed with argument --unique',
)
key = args.ignore_case or args.group_cases_together
retv = PASS
for arg in args.filenames:
with open(arg, 'rb+') as file_obj:
ret_for_file = sort_file_contents(
file_obj, key=args.ignore_case, unique=args.unique,
file_obj, key=key, unique=args.unique,
)
if ret_for_file:

View file

@ -55,6 +55,30 @@ from pre_commit_hooks.file_contents_sorter import PASS
FAIL,
b'fee\nfee\nFie\nFoe\nfum\n',
),
(
b'a\nb\nB\nb\nc\n',
['--ignore-case'],
PASS,
b'a\nb\nB\nb\nc\n',
),
(
b'a\nb\nB\nb\nc\n',
['--group-cases-together'],
FAIL,
b'a\nB\nb\nb\nc\n',
),
(
b'fee\nFie\nFoe\nfum\n',
['--group-cases-together'],
PASS,
b'fee\nFie\nFoe\nfum\n',
),
(
b'Fie\nFoe\nfee\nfee\nfum\n',
['--group-cases-together'],
FAIL,
b'fee\nfee\nFie\nFoe\nfum\n',
),
(
b'Fie\nFoe\nfee\nfum\n',
['--unique'],
@ -67,6 +91,24 @@ from pre_commit_hooks.file_contents_sorter import PASS
FAIL,
b'Fie\nFoe\nfee\nfum\n',
),
(
b'a\nb\nB\nb\nc\n',
['--group-cases-together', '--unique'],
FAIL,
b'a\nB\nb\nc\n',
),
(
b'fee\nFie\nFoe\nfum\n',
['--group-cases-together', '--unique'],
PASS,
b'fee\nFie\nFoe\nfum\n',
),
(
b'Fie\nFoe\nfee\nfee\nfum\n',
['--group-cases-together', '--unique'],
FAIL,
b'fee\nFie\nFoe\nfum\n',
),
),
)
def test_integration(input_s, argv, expected_retval, output, tmpdir):