Add sort-simple-yaml hook (originally private hook from yelp_pre_commit_hooks)

This commit is contained in:
Daniel Gallagher 2017-06-23 16:26:00 -07:00
parent 78818b90cd
commit b6eff3d39e
7 changed files with 258 additions and 0 deletions

1
.gitignore vendored
View file

@ -2,6 +2,7 @@
*.iml
*.py[co]
.*.sw[a-z]
.cache
.coverage
.idea
.project

View file

@ -147,6 +147,12 @@
entry: requirements-txt-fixer
language: python
files: requirements.*\.txt$
- id: sort-simple-yaml
name: Sort simple YAML files
language: python
entry: sort-simple-yaml
description: Sorts simple YAML files which consist only of top-level keys, preserving comments and blocks.
files: '^$'
- id: trailing-whitespace
name: Trim Trailing Whitespace
description: This hook trims trailing whitespace.

View file

@ -67,6 +67,7 @@ Add this to your `.pre-commit-config.yaml`
- `--no-sort-keys` - when autofixing, retain the original key ordering (instead of sorting the keys)
- `--top-keys comma,separated,keys` - Keys to keep at the top of mappings.
- `requirements-txt-fixer` - Sorts entries in requirements.txt
- `sort-simple-yaml` - Sorts simple YAML files which consist only of top-level keys, preserving comments and blocks.
- `trailing-whitespace` - Trims trailing whitespace.
- Markdown linebreak trailing spaces preserved for `.md` and`.markdown`;
use `args: ['--markdown-linebreak-ext=txt,text']` to add other extensions,

View file

@ -147,6 +147,12 @@
entry: requirements-txt-fixer
language: python
files: requirements.*\.txt$
- id: sort-simple-yaml
name: Sort simple YAML files
language: python
entry: sort-simple-yaml
description: Sorts simple YAML files which consist only of top-level keys, preserving comments and blocks.
files: '^$'
- id: trailing-whitespace
name: Trim Trailing Whitespace
description: This hook trims trailing whitespace.

View file

@ -0,0 +1,123 @@
#!/usr/bin/env python
"""Sort a simple YAML file, keeping blocks of comments and definitions
together.
We assume a strict subset of YAML that looks like:
# block of header comments
# here that should always
# be at the top of the file
# optional comments
# can go here
key: value
key: value
key: value
In other words, we don't sort deeper than the top layer, and might corrupt
complicated YAML files.
"""
from __future__ import print_function
import argparse
QUOTES = ["'", '"']
def sort(lines):
"""Sort a YAML file in alphabetical order, keeping blocks together.
:param lines: array of strings (without newlines)
:return: sorted array of strings
"""
# make a copy of lines since we will clobber it
lines = list(lines)
new_lines = parse_block(lines, header=True)
for block in sorted(parse_blocks(lines), key=first_key):
if new_lines:
new_lines.append('')
new_lines.extend(block)
return new_lines
def parse_block(lines, header=False):
"""Parse and return a single block, popping off the start of `lines`.
If parsing a header block, we stop after we reach a line that is not a
comment. Otherwise, we stop after reaching an empty line.
:param lines: list of lines
:param header: whether we are parsing a header block
:return: list of lines that form the single block
"""
block_lines = []
while lines and lines[0] and (not header or lines[0].startswith('#')):
block_lines.append(lines.pop(0))
return block_lines
def parse_blocks(lines):
"""Parse and return all possible blocks, popping off the start of `lines`.
:param lines: list of lines
:return: list of blocks, where each block is a list of lines
"""
blocks = []
while lines:
if lines[0] == '':
lines.pop(0)
else:
blocks.append(parse_block(lines))
return blocks
def first_key(lines):
"""Returns a string representing the sort key of a block.
The sort key is the first YAML key we encounter, ignoring comments, and
stripping leading quotes.
>>> print(test)
# some comment
'foo': true
>>> first_key(test)
'foo'
"""
for line in lines:
if line.startswith('#'):
continue
if any(line.startswith(quote) for quote in QUOTES):
return line[1:]
return line
def main(argv=None):
parser = argparse.ArgumentParser()
parser.add_argument('filenames', nargs='*', help='Filenames to fix')
args = parser.parse_args(argv)
retval = 0
for filename in args.filenames:
with open(filename, 'r+') as f:
lines = [line.rstrip() for line in f.readlines()]
new_lines = sort(lines)
if lines != new_lines:
print("Fixing file `{filename}`".format(filename=filename))
f.seek(0)
f.write("\n".join(new_lines) + "\n")
f.truncate()
retval = 1
return retval
if __name__ == '__main__':
exit(main())

View file

@ -55,6 +55,7 @@ setup(
'no-commit-to-branch = pre_commit_hooks.no_commit_to_branch:main',
'pretty-format-json = pre_commit_hooks.pretty_format_json:pretty_format_json',
'requirements-txt-fixer = pre_commit_hooks.requirements_txt_fixer:fix_requirements_txt',
'sort-simple-yaml = pre_commit_hooks.sort_simple_yaml:main',
'trailing-whitespace-fixer = pre_commit_hooks.trailing_whitespace_fixer:fix_trailing_whitespace',
],
},

View file

@ -0,0 +1,120 @@
from __future__ import absolute_import
from __future__ import unicode_literals
import os
import pytest
from pre_commit_hooks.sort_simple_yaml import first_key
from pre_commit_hooks.sort_simple_yaml import main
from pre_commit_hooks.sort_simple_yaml import parse_block
from pre_commit_hooks.sort_simple_yaml import parse_blocks
from pre_commit_hooks.sort_simple_yaml import sort
RETVAL_GOOD = 0
RETVAL_BAD = 1
TEST_SORTS = [
(
['c: true', '', 'b: 42', 'a: 19'],
['b: 42', 'a: 19', '', 'c: true'],
RETVAL_BAD,
),
(
['# i am', '# a header', '', 'c: true', '', 'b: 42', 'a: 19'],
['# i am', '# a header', '', 'b: 42', 'a: 19', '', 'c: true'],
RETVAL_BAD,
),
(
['# i am', '# a header', '', 'already: sorted', '', 'yup: i am'],
['# i am', '# a header', '', 'already: sorted', '', 'yup: i am'],
RETVAL_GOOD,
),
(
['# i am', '# a header'],
['# i am', '# a header'],
RETVAL_GOOD,
),
]
@pytest.mark.parametrize('bad_lines,good_lines,retval', TEST_SORTS)
def test_integration_good_bad_lines(tmpdir, bad_lines, good_lines, retval):
file_path = os.path.join(tmpdir.strpath, 'foo.yaml')
with open(file_path, 'w') as f:
f.write("\n".join(bad_lines) + "\n")
assert main([file_path]) == retval
with open(file_path, 'r') as f:
assert [line.rstrip() for line in f.readlines()] == good_lines
def test_parse_header():
lines = ['# some header', '# is here', '', 'this is not a header']
assert parse_block(lines, header=True) == ['# some header', '# is here']
assert lines == ['', 'this is not a header']
lines = ['this is not a header']
assert parse_block(lines, header=True) == []
assert lines == ['this is not a header']
def test_parse_block():
# a normal block
lines = ['a: 42', 'b: 17', '', 'c: 19']
assert parse_block(lines) == ['a: 42', 'b: 17']
assert lines == ['', 'c: 19']
# a block at the end
lines = ['c: 19']
assert parse_block(lines) == ['c: 19']
assert lines == []
# no block
lines = []
assert parse_block(lines) == []
assert lines == []
def test_parse_blocks():
# normal blocks
lines = ['a: 42', 'b: 17', '', 'c: 19']
assert parse_blocks(lines) == [['a: 42', 'b: 17'], ['c: 19']]
assert lines == []
# a single block
lines = ['a: 42', 'b: 17']
assert parse_blocks(lines) == [['a: 42', 'b: 17']]
assert lines == []
# no blocks
lines = []
assert parse_blocks(lines) == []
assert lines == []
def test_first_key():
# first line
lines = ['a: 42', 'b: 17', '', 'c: 19']
assert first_key(lines) == 'a: 42'
# second line
lines = ['# some comment', 'a: 42', 'b: 17', '', 'c: 19']
assert first_key(lines) == 'a: 42'
# second line with quotes
lines = ['# some comment', '"a": 42', 'b: 17', '', 'c: 19']
assert first_key(lines) == 'a": 42'
# no lines
lines = []
assert first_key(lines) is None
@pytest.mark.parametrize('bad_lines,good_lines,_', TEST_SORTS)
def test_sort(bad_lines, good_lines, _):
assert sort(bad_lines) == good_lines