feat: add a simple HTML check pre-commit hook

This commit is contained in:
bernard 2024-07-10 05:10:02 +02:00
parent ae7cee538b
commit 1d83507cf7
10 changed files with 118 additions and 0 deletions

View file

@ -64,6 +64,12 @@
entry: pretty-format-json
language: python
types: [json]
- id: check-html
name: check html
description: checks html files for parseable syntax.
entry: check-html
language: python
types: [html]
- id: check-merge-conflict
name: check for merge conflicts
description: checks for files that contain merge conflict strings.

View file

@ -51,6 +51,9 @@ Checks for a common error of placing code before the docstring.
#### `check-executables-have-shebangs`
Checks that non-binary executables have a proper shebang.
#### `check-html`
Attempts to load all HTML files to verify syntax
#### `check-illegal-windows-names`
Check for files that cannot be created on Windows.

View file

@ -0,0 +1,74 @@
from __future__ import annotations
import argparse
import collections
from html.parser import HTMLParser
from typing import Sequence
class ValidationException(Exception):
pass
class ValidatingHTMLParser(HTMLParser):
def __init__(self) -> None:
super(HTMLParser, self).__init__()
self.stack: collections.deque[str] = collections.deque()
def handle_starttag(
self, tag: str,
attrs: list[tuple[str, str | None]],
) -> None:
self.stack.append(tag)
def handle_endtag(self, tag: str) -> None:
if not self.stack:
raise ValidationException(f"no opening tag for </{tag}>")
opening_tag = self.stack.pop()
if opening_tag != tag:
stack = '/'.join(self.stack)
raise ValidationException(
f"attempt to close {opening_tag} with {tag} at /{stack}",
)
def handle_startendtag(
self, tag: str,
attrs: list[tuple[str, str | None]],
) -> None:
# append and immediately pop stack
pass
def close(self) -> None:
super().close()
if self.stack:
opening_tag = self.stack.pop()
stack = '/'.join(self.stack)
raise ValidationException(
f"EOF reached while {opening_tag} at /{stack} not closed",
)
def main(argv: Sequence[str] | None = None) -> int:
parser = argparse.ArgumentParser()
parser.add_argument(
'filenames',
nargs='*',
help='HTML filenames to check.',
)
args = parser.parse_args(argv)
retval = 0
for filename in args.filenames:
try:
with open(filename, 'rb') as html_file:
html_parser = ValidatingHTMLParser()
html_parser.feed(html_file.read().decode('ascii', 'ignore'))
html_parser.close()
except ValidationException as exc:
print(f'{filename}: Failed to parse HTML: ({exc})')
retval = 1
return retval
if __name__ == '__main__':
raise SystemExit(main())

View file

@ -37,6 +37,7 @@ console_scripts =
check-case-conflict = pre_commit_hooks.check_case_conflict:main
check-docstring-first = pre_commit_hooks.check_docstring_first:main
check-executables-have-shebangs = pre_commit_hooks.check_executables_have_shebangs:main
check-html = pre_commit_hooks.check_html:main,
check-json = pre_commit_hooks.check_json:main
check-merge-conflict = pre_commit_hooks.check_merge_conflict:main
check-shebang-scripts-are-executable = pre_commit_hooks.check_shebang_scripts_are_executable:main

View file

@ -0,0 +1 @@
<div>hello

View file

@ -0,0 +1 @@
<div>hello</div></div>

View file

@ -0,0 +1 @@
<div><p></ul></div>

View file

@ -0,0 +1 @@
<p><a href="https://httpbin.org/">Awesome<a></p>

View file

@ -0,0 +1,10 @@
<!doctype html>
<html lang="en-US">
<head>
<meta charset="utf-8" />
<title>My test page</title>
</head>
<body>
<p>This is my page</p>
</body>
</html>

20
tests/check_html_test.py Normal file
View file

@ -0,0 +1,20 @@
from __future__ import annotations
import pytest
from pre_commit_hooks.check_html import main
from testing.util import get_resource_path
@pytest.mark.parametrize(
('filename', 'expected_retval'), (
('bad_html_not_closed.html', 1),
('bad_html_too_many_close.html', 1),
('bad_html_wrong_close.html', 1),
('ok_html_fragment.html', 0),
('ok_html_page.html', 0),
),
)
def test_main(filename, expected_retval):
ret = main([get_resource_path(filename)])
assert ret == expected_retval