From 25a3d2ea3f8637679da19e9bbfb8e784ba796ea1 Mon Sep 17 00:00:00 2001 From: Chris Rowe Date: Thu, 28 Aug 2025 19:50:02 -0600 Subject: [PATCH] Add catch_dotenv hook and corresponding tests to manage .env files --- pre_commit_hooks/catch_dotenv.py | 182 ++++++++++++++++++ tests/catch_dotenv_test.py | 316 +++++++++++++++++++++++++++++++ 2 files changed, 498 insertions(+) create mode 100644 pre_commit_hooks/catch_dotenv.py create mode 100644 tests/catch_dotenv_test.py diff --git a/pre_commit_hooks/catch_dotenv.py b/pre_commit_hooks/catch_dotenv.py new file mode 100644 index 0000000..f4e14a2 --- /dev/null +++ b/pre_commit_hooks/catch_dotenv.py @@ -0,0 +1,182 @@ +#!/usr/bin/env python +from __future__ import annotations + +import argparse +import os +import re +import tempfile +from collections.abc import Sequence +from typing import Iterable + +# --- Defaults / Constants --- +DEFAULT_ENV_FILE = ".env" # Canonical env file name +DEFAULT_GITIGNORE_FILE = ".gitignore" +DEFAULT_EXAMPLE_ENV_FILE = ".env.example" +GITIGNORE_BANNER = "# Added by pre-commit hook to prevent committing secrets" + +_KEY_REGEX = re.compile(r"^\s*(?:export\s+)?([A-Za-z_][A-Za-z0-9_]*)\s*=") + + +def _atomic_write(path: str, data: str) -> None: + """Write text to path atomically (best-effort).""" + # Using same directory for atomic os.replace semantics on POSIX. + fd, tmp_path = tempfile.mkstemp(dir=os.path.dirname(path) or ".") + try: + with os.fdopen(fd, "w", encoding="utf-8", newline="") as tmp_f: + tmp_f.write(data) + os.replace(tmp_path, path) + finally: # Clean up if replace failed + if os.path.exists(tmp_path): # pragma: no cover (rare failure case) + try: + os.remove(tmp_path) + except OSError: # pragma: no cover + pass + + +def ensure_env_in_gitignore(env_file: str, gitignore_file: str, banner: str) -> bool: + """Normalize `.gitignore` so it contains exactly one banner + env line at end. + + Returns True if the file was created or its contents changed, False otherwise. + Strategy: read existing lines, strip trailing blanks, remove any prior occurrences of + the banner or env_file (even if duplicated), then append a single blank line, + banner, and env_file. Produces an idempotent final layout. + """ + try: + if os.path.exists(gitignore_file): + with open(gitignore_file, "r", encoding="utf-8") as f: + lines = f.read().splitlines() + else: + lines = [] + except OSError as exc: + print(f"ERROR: unable to read '{gitignore_file}': {exc}") + return False + + original = list(lines) + + # Trim trailing blank lines + while lines and not lines[-1].strip(): + lines.pop() + + # Remove existing occurrences (exact match after strip) + filtered: list[str] = [ln for ln in lines if ln.strip() not in {env_file, banner}] + + if filtered and filtered[-1].strip(): + filtered.append("") # ensure single blank before banner + elif not filtered: # empty file -> still separate section visually + filtered.append("") + + filtered.append(banner) + filtered.append(env_file) + + new_content = "\n".join(filtered) + "\n" + if original == filtered: + return False + try: + _atomic_write(gitignore_file, new_content) + return True + except OSError as exc: # pragma: no cover + print(f"ERROR: unable to write '{gitignore_file}': {exc}") + return False + + +def create_example_env(src_env: str, example_file: str) -> bool: + """Write example file containing only variable keys from real env file. + + Returns True if file written (or updated), False on read/write error. + Lines accepted: optional 'export ' prefix then KEY=...; ignores comments & duplicates. + """ + try: + with open(src_env, "r", encoding="utf-8") as f_env: + lines = f_env.readlines() + except OSError as exc: + print(f"ERROR: unable to read '{src_env}': {exc}") + return False + + seen: set[str] = set() + keys: list[str] = [] + for line in lines: + stripped = line.strip() + if not stripped or stripped.startswith('#'): + continue + m = _KEY_REGEX.match(stripped) + if not m: + continue + key = m.group(1) + if key not in seen: + seen.add(key) + keys.append(key) + + header = [ + '# Generated by catch-dotenv hook.', + '# Variable names only – fill in sample values as needed.', + '', + ] + body = [f"{k}=" for k in keys] + try: + _atomic_write(example_file, "\n".join(header + body) + "\n") + return True + except OSError as exc: # pragma: no cover + print(f"ERROR: unable to write '{example_file}': {exc}") + return False + + +def _has_env(filenames: Iterable[str], env_file: str) -> bool: + """Return True if any staged path refers to a target env file by basename.""" + return any(os.path.basename(name) == env_file for name in filenames) + + +def _find_repo_root(start: str = '.') -> str: + """Ascend from start until a directory containing '.git' is found. + + Falls back to absolute path of start if no parent contains '.git'. This mirrors + typical pre-commit execution (already at repo root) but makes behavior stable + when hook is invoked from a subdirectory (e.g. for direct ad‑hoc testing). + """ + cur = os.path.abspath(start) + prev = None + while cur != prev: + if os.path.isdir(os.path.join(cur, '.git')): + return cur + prev, cur = cur, os.path.abspath(os.path.join(cur, os.pardir)) + return os.path.abspath(start) + + +def _print_failure(env_file: str, gitignore_file: str, example_created: bool, gitignore_modified: bool) -> None: + parts: list[str] = [f"Blocked committing '{env_file}'."] + if gitignore_modified: + parts.append(f"Added to '{gitignore_file}'.") + if example_created: + parts.append("Example file generated.") + parts.append(f"Remove '{env_file}' from the commit and commit again.") + print(" ".join(parts)) + + +def main(argv: Sequence[str] | None = None) -> int: + """Main function for the pre-commit hook.""" + parser = argparse.ArgumentParser(description="Block committing environment files (.env).") + parser.add_argument('filenames', nargs='*', help='Staged filenames (supplied by pre-commit).') + parser.add_argument('--create-example', action='store_true', help='Generate example env file (.env.example).') + args = parser.parse_args(argv) + env_file = DEFAULT_ENV_FILE + # Resolve repository root (directory containing .git) so writes happen there + repo_root = _find_repo_root('.') + gitignore_file = os.path.join(repo_root, DEFAULT_GITIGNORE_FILE) + example_file = os.path.join(repo_root, DEFAULT_EXAMPLE_ENV_FILE) + env_abspath = os.path.join(repo_root, env_file) + + if not _has_env(args.filenames, env_file): + return 0 + + gitignore_modified = ensure_env_in_gitignore(env_file, gitignore_file, GITIGNORE_BANNER) + example_created = False + if args.create_example: + # Source env is always looked up relative to repo root + if os.path.exists(env_abspath): + example_created = create_example_env(env_abspath, example_file) + + _print_failure(env_file, gitignore_file, example_created, gitignore_modified) + return 1 # Block commit + + +if __name__ == '__main__': + raise SystemExit(main()) diff --git a/tests/catch_dotenv_test.py b/tests/catch_dotenv_test.py new file mode 100644 index 0000000..c9e1424 --- /dev/null +++ b/tests/catch_dotenv_test.py @@ -0,0 +1,316 @@ +from __future__ import annotations + +import os +import threading +import time +from pathlib import Path +import shutil +import re +import pytest + +from pre_commit_hooks.catch_dotenv import main, ensure_env_in_gitignore, GITIGNORE_BANNER, DEFAULT_ENV_FILE, DEFAULT_EXAMPLE_ENV_FILE, DEFAULT_GITIGNORE_FILE + +# Tests cover hook behavior: detection gating, .gitignore normalization, example +# file generation parsing edge cases, idempotency, and preservation of existing +# content. Each test isolates a single behavioral contract. + + +@pytest.fixture() +def env_file(tmp_path: Path) -> Path: + """Copy shared resource .env into tmp workspace as the canonical .env. + + All tests rely on this baseline content (optionally appending extra lines + for edge cases) to ensure consistent parsing behavior. + """ + # __file__ => /tests/catch_dotenv_test.py + # parents[0] = /tests, parents[1] = + # Source file stored as test.env in repo (cannot commit a real .env in CI) + resource_env = Path(__file__).resolve().parents[1] / 'testing' / 'resources' / 'test.env' + dest = tmp_path / DEFAULT_ENV_FILE + shutil.copyfile(resource_env, dest) + return dest + + +def run_hook(tmp_path: Path, staged: list[str], create_example: bool = False) -> int: + cwd = os.getcwd() + os.chdir(tmp_path) + try: + args = staged[:] + if create_example: + args.append('--create-example') + return main(args) + finally: + os.chdir(cwd) + + +def test_no_env_file(tmp_path: Path, env_file: Path): + """Hook should no-op (return 0) if .env not staged even if it exists.""" + (tmp_path / 'foo.txt').write_text('x') + assert run_hook(tmp_path, ['foo.txt']) == 0 + + +def test_blocks_env_and_updates_gitignore(tmp_path: Path, env_file: Path): + """Staging .env triggers block (exit 1) and appends banner + env entry.""" + ret = run_hook(tmp_path, [DEFAULT_ENV_FILE]) + assert ret == 1 + gi = (tmp_path / DEFAULT_GITIGNORE_FILE).read_text().splitlines() + assert gi[-2] == GITIGNORE_BANNER + assert gi[-1] == DEFAULT_ENV_FILE + + +def test_env_present_but_not_staged(tmp_path: Path, env_file: Path): + """Existing .env on disk but not staged should not block commit.""" + assert run_hook(tmp_path, ['unrelated.txt']) == 0 + + +def test_idempotent_gitignore(tmp_path: Path, env_file: Path): + """Re-running after initial normalization leaves .gitignore unchanged.""" + g = tmp_path / DEFAULT_GITIGNORE_FILE + g.write_text(f"{GITIGNORE_BANNER}\n{DEFAULT_ENV_FILE}\n") + first = run_hook(tmp_path, [DEFAULT_ENV_FILE]) + assert first == 1 + content1 = g.read_text() + second = run_hook(tmp_path, [DEFAULT_ENV_FILE]) + assert second == 1 + assert g.read_text() == content1 # unchanged + + +def test_gitignore_with_existing_content_preserved(tmp_path: Path, env_file: Path): + """Existing entries stay intact; banner/env appended at end cleanly.""" + g = tmp_path / DEFAULT_GITIGNORE_FILE + g.write_text('node_modules/\n# comment line\n') # no trailing newline section markers + run_hook(tmp_path, [DEFAULT_ENV_FILE]) + lines = g.read_text().splitlines() + # original content should still be at top + assert lines[0] == 'node_modules/' + assert '# comment line' in lines[1] + # Last two lines should be banner + env file + assert lines[-2:] == [GITIGNORE_BANNER, DEFAULT_ENV_FILE] + + +def test_gitignore_duplicates_are_collapsed(tmp_path: Path, env_file: Path): + """Multiple prior duplicate banner/env lines collapse to single pair.""" + g = tmp_path / DEFAULT_GITIGNORE_FILE + g.write_text(f"other\n{GITIGNORE_BANNER}\n{DEFAULT_ENV_FILE}\n{GITIGNORE_BANNER}\n{DEFAULT_ENV_FILE}\n\n\n") + run_hook(tmp_path, [DEFAULT_ENV_FILE]) + lines = g.read_text().splitlines() + assert lines.count(GITIGNORE_BANNER) == 1 + assert lines.count(DEFAULT_ENV_FILE) == 1 + assert lines[-2:] == [GITIGNORE_BANNER, DEFAULT_ENV_FILE] + + +def test_create_example(tmp_path: Path, env_file: Path): + """Example file includes discovered keys; values stripped to KEY=.""" + ret = run_hook(tmp_path, [DEFAULT_ENV_FILE], create_example=True) + assert ret == 1 + example = (tmp_path / DEFAULT_EXAMPLE_ENV_FILE).read_text().splitlines() + key_lines = [ln for ln in example if ln and not ln.startswith('#')] + # All key lines should be KEY= + assert all(re.match(r'^[A-Za-z_][A-Za-z0-9_]*=$', ln) for ln in key_lines) + # Spot check a few known keys from resource file + for k in ['BACKEND_CONTAINER_PORT=', 'ACCESS_TOKEN_SECRET=', 'SUPABASE_SERVICE_KEY=']: + assert k in key_lines + + +def test_create_example_duplicate_key_variant_ignored(tmp_path: Path, env_file: Path): + """Appending whitespace duplicate of existing key should not duplicate in example.""" + with open(env_file, 'a', encoding='utf-8') as f: + f.write('BACKEND_CONTAINER_PORT =999 # duplicate variant\n') + run_hook(tmp_path, [DEFAULT_ENV_FILE], create_example=True) + lines = (tmp_path / DEFAULT_EXAMPLE_ENV_FILE).read_text().splitlines() + key_lines = [ln for ln in lines if ln and not ln.startswith('#')] + assert key_lines.count('BACKEND_CONTAINER_PORT=') == 1 + + +def test_gitignore_without_trailing_newline(tmp_path: Path, env_file: Path): + """Normalization works when original .gitignore lacks trailing newline.""" + g = tmp_path / DEFAULT_GITIGNORE_FILE + g.write_text('existing_line') # no newline at EOF + run_hook(tmp_path, [DEFAULT_ENV_FILE]) + lines = g.read_text().splitlines() + assert lines[0] == 'existing_line' + assert lines[-2:] == [GITIGNORE_BANNER, DEFAULT_ENV_FILE] + + +def test_ensure_env_in_gitignore_normalizes(tmp_path: Path, env_file: Path): + """Direct API call collapses duplicates and produces canonical tail layout.""" + g = tmp_path / DEFAULT_GITIGNORE_FILE + g.write_text(f"{GITIGNORE_BANNER}\n{DEFAULT_ENV_FILE}\n{GITIGNORE_BANNER}\n{DEFAULT_ENV_FILE}\n\n") + modified = ensure_env_in_gitignore(DEFAULT_ENV_FILE, str(g), GITIGNORE_BANNER) + assert modified is True + lines = g.read_text().splitlines() + # final two lines should be banner + env + assert lines[-2:] == [GITIGNORE_BANNER, DEFAULT_ENV_FILE] + # only one occurrence each + assert lines.count(GITIGNORE_BANNER) == 1 + assert lines.count(DEFAULT_ENV_FILE) == 1 + + +def test_source_env_file_not_modified(tmp_path: Path, env_file: Path): + """Hook must not alter original .env (comments and formatting stay).""" + original = env_file.read_text() + run_hook(tmp_path, [DEFAULT_ENV_FILE], create_example=True) + assert env_file.read_text() == original + + +def test_large_resource_env_parsing(tmp_path: Path, env_file: Path): + """Generate example from resource env; assert broad key coverage & format.""" + ret = run_hook(tmp_path, [DEFAULT_ENV_FILE], create_example=True) + assert ret == 1 + example_lines = (tmp_path / DEFAULT_EXAMPLE_ENV_FILE).read_text().splitlines() + key_lines = [ln for ln in example_lines if ln and not ln.startswith('#')] + assert len(key_lines) > 20 + assert all(re.match(r'^[A-Za-z_][A-Za-z0-9_]*=$', ln) for ln in key_lines) + for k in ['BACKEND_CONTAINER_PORT=', 'SUPABASE_SERVICE_KEY=', 'ACCESS_TOKEN_SECRET=']: + assert k in key_lines + + +def test_failure_message_content(tmp_path: Path, env_file: Path, capsys): + """Hook stdout message should contain key phrases when blocking commit.""" + ret = run_hook(tmp_path, [DEFAULT_ENV_FILE], create_example=True) + assert ret == 1 + out = capsys.readouterr().out.strip() + assert "Blocked committing" in out + assert DEFAULT_GITIGNORE_FILE in out + assert "Example file generated" in out + assert "Remove '.env'" in out + + +def test_create_example_when_env_missing(tmp_path: Path, env_file: Path): + """--create-example with no .env staged or present should no-op (exit 0). + + Uses env_file fixture (requirement: all tests use fixture) then removes the + copied .env to simulate absence. + """ + env_file.unlink() + ret = run_hook(tmp_path, ['unrelated.txt'], create_example=True) + assert ret == 0 + assert not (tmp_path / DEFAULT_EXAMPLE_ENV_FILE).exists() + + +def test_gitignore_is_directory_error(tmp_path: Path, env_file: Path, capsys): + """If .gitignore path is a directory, hook should print error and still block.""" + gitignore_dir = tmp_path / DEFAULT_GITIGNORE_FILE + gitignore_dir.mkdir() + ret = run_hook(tmp_path, [DEFAULT_ENV_FILE]) + assert ret == 1 # still blocks commit + out = capsys.readouterr().out + assert "ERROR:" in out # read failure logged + + +def test_env_example_overwrites_existing(tmp_path: Path, env_file: Path): + """Pre-existing example file with junk should be overwritten with header & keys.""" + example = tmp_path / DEFAULT_EXAMPLE_ENV_FILE + example.write_text('junk=1\nSHOULD_NOT_REMAIN=2\n') + run_hook(tmp_path, [DEFAULT_ENV_FILE], create_example=True) + content = example.read_text().splitlines() + assert content[0].startswith('# Generated by catch-dotenv') + assert any(ln.startswith('BACKEND_CONTAINER_PORT=') for ln in content) + assert 'junk=1' not in content + assert 'SHOULD_NOT_REMAIN=2' not in content + + +def test_large_gitignore_normalization_performance(tmp_path: Path, env_file: Path): + """Very large .gitignore remains normalized quickly (functional smoke).""" + g = tmp_path / DEFAULT_GITIGNORE_FILE + # Generate many lines with scattered duplicates of banner/env + lines = [f"file_{i}" for i in range(3000)] + [GITIGNORE_BANNER, DEFAULT_ENV_FILE] * 3 + g.write_text("\n".join(lines) + "\n") + start = time.time() + run_hook(tmp_path, [DEFAULT_ENV_FILE]) + elapsed = time.time() - start + result_lines = g.read_text().splitlines() + assert result_lines[-2:] == [GITIGNORE_BANNER, DEFAULT_ENV_FILE] + assert result_lines.count(GITIGNORE_BANNER) == 1 + assert result_lines.count(DEFAULT_ENV_FILE) == 1 + # Soft performance expectation: should finish fast (< 0.5s on typical dev machine) + assert elapsed < 0.5 + + +def test_concurrent_gitignore_writes(tmp_path: Path, env_file: Path): + """Concurrent ensure_env_in_gitignore calls result in canonical final state.""" + g = tmp_path / DEFAULT_GITIGNORE_FILE + # Seed with messy duplicates + g.write_text(f"other\n{GITIGNORE_BANNER}\n{DEFAULT_ENV_FILE}\n\n") + + def worker(): + ensure_env_in_gitignore(DEFAULT_ENV_FILE, str(g), GITIGNORE_BANNER) + + threads = [threading.Thread(target=worker) for _ in range(5)] + for t in threads: + t.start() + for t in threads: + t.join() + lines = g.read_text().splitlines() + assert lines[-2:] == [GITIGNORE_BANNER, DEFAULT_ENV_FILE] + assert lines.count(GITIGNORE_BANNER) == 1 + assert lines.count(DEFAULT_ENV_FILE) == 1 + + +def test_mixed_staged_files(tmp_path: Path, env_file: Path): + """Staging .env with other files still blocks and only normalizes gitignore once.""" + other = tmp_path / 'README.md' + other.write_text('hi') + ret = run_hook(tmp_path, [DEFAULT_ENV_FILE, 'README.md']) + assert ret == 1 + lines = (tmp_path / DEFAULT_GITIGNORE_FILE).read_text().splitlines() + assert lines[-2:] == [GITIGNORE_BANNER, DEFAULT_ENV_FILE] + + +def test_already_ignored_env_with_variations(tmp_path: Path, env_file: Path): + """Pre-existing ignore lines with spacing normalize to single canonical pair.""" + g = tmp_path / DEFAULT_GITIGNORE_FILE + g.write_text(f" {DEFAULT_ENV_FILE} \n{GITIGNORE_BANNER}\n {DEFAULT_ENV_FILE}\n") + run_hook(tmp_path, [DEFAULT_ENV_FILE]) + lines = g.read_text().splitlines() + assert lines[-2:] == [GITIGNORE_BANNER, DEFAULT_ENV_FILE] + assert lines.count(DEFAULT_ENV_FILE) == 1 + + +def test_subdirectory_invocation(tmp_path: Path, env_file: Path): + """Running from a subdirectory still writes gitignore/example at repo root.""" + sub = tmp_path / 'subdir' + sub.mkdir() + # simulate repository root marker + (tmp_path / '.git').mkdir() + # simulate running hook from subdir while staged path relative to repo root + cwd = os.getcwd() + os.chdir(sub) + try: + ret = main(['../' + DEFAULT_ENV_FILE]) + gi = (tmp_path / DEFAULT_GITIGNORE_FILE).read_text().splitlines() + finally: + os.chdir(cwd) + assert ret == 1 + assert gi[-2:] == [GITIGNORE_BANNER, DEFAULT_ENV_FILE] + + +def test_atomic_write_failure_gitignore(monkeypatch, tmp_path: Path, env_file: Path, capsys): + """Simulate os.replace failure during gitignore write to exercise error path.""" + def boom(*a, **k): + raise OSError('replace-fail') + monkeypatch.setattr('pre_commit_hooks.catch_dotenv.os.replace', boom) + modified = ensure_env_in_gitignore(DEFAULT_ENV_FILE, str(tmp_path / DEFAULT_GITIGNORE_FILE), GITIGNORE_BANNER) + assert modified is False + out = capsys.readouterr().out + assert 'ERROR: unable to write' in out + + +def test_atomic_write_failure_example(monkeypatch, tmp_path: Path, env_file: Path, capsys): + """Simulate os.replace failure when writing example env file.""" + def boom(*a, **k): + raise OSError('replace-fail') + monkeypatch.setattr('pre_commit_hooks.catch_dotenv.os.replace', boom) + ok = False + # create_example_env requires source .env to exist; env_file fixture provides it in tmp_path root + cwd = os.getcwd() + os.chdir(tmp_path) + try: + ok = main([DEFAULT_ENV_FILE, '--create-example']) == 1 + finally: + os.chdir(cwd) + # hook still blocks; but example creation failed -> message should not claim Example file generated + assert ok is True + out = capsys.readouterr().out + assert 'Example file generated' not in out + assert 'ERROR: unable to write' in out