pre-commit-hooks/pre_commit_hooks/detect_secrets.py
2025-12-12 11:20:26 +00:00

122 lines
3 KiB
Python

from __future__ import annotations
import argparse
import re
import subprocess
from collections.abc import Sequence
from pathlib import Path
# -------------------------
# Default secret patterns
# -------------------------
DEFAULT_PATTERNS: dict[str, str] = {
# GitLab
'gitlab_pat': r'glpat-[0-9A-Za-z_-]{20,}',
'gitlab_runner_token': r'glrt-[0-9A-Za-z_-]{20,}',
# GitHub
'github_pat': r'ghp_[0-9A-Za-z]{36}',
'github_fine_grained_pat': r'github_pat_[0-9A-Za-z_]{82}',
# AWS
'aws_access_key': r'AKIA[0-9A-Z]{16}',
'aws_secret_key': r"(?i)aws(.{0,20})?(secret|access)[-_ ]?key(.{0,20})?['\"][0-9a-zA-Z/+]{40}['\"]",
# Generic
'generic_secret': r"(?i)(password|passwd|pwd|secret|token|api[_-]?key)\s*=\s*['\"].+['\"]",
}
def load_custom_patterns(path: Path) -> dict[str, str]:
patterns: dict[str, str] = {}
for i, line in enumerate(path.read_text().splitlines(), start=1):
line = line.strip()
if not line or line.startswith('#'):
continue
patterns[f"custom_rule_{i}"] = line
return patterns
def is_binary(data: bytes) -> bool:
return b'\x00' in data
def git_tracked_files() -> list[Path]:
"""Return all git-tracked files in the repo."""
result = subprocess.run(
['git', 'ls-files'],
stdout=subprocess.PIPE,
stderr=subprocess.DEVNULL,
text=True,
check=False,
)
return [Path(p) for p in result.stdout.splitlines() if p]
def main(argv: Sequence[str] | None = None) -> int:
parser = argparse.ArgumentParser(description='Detect exposed secrets in repository')
parser.add_argument(
'--rules',
type=Path,
help='File containing custom regex rules (one per line)',
)
parser.add_argument(
'filenames',
nargs='*',
help='Files to scan (if empty, scans entire repo)',
)
args = parser.parse_args(argv)
patterns = dict(DEFAULT_PATTERNS)
if args.rules:
if not args.rules.is_file():
print(f"Rules file not found: {args.rules}")
return 2
patterns.update(load_custom_patterns(args.rules))
compiled = {
name: re.compile(regex)
for name, regex in patterns.items()
}
files: list[Path]
if args.filenames:
files = [Path(f) for f in args.filenames]
else:
files = git_tracked_files()
findings: list[tuple[Path, str]] = []
for path in files:
if not path.is_file():
continue
try:
data = path.read_bytes()
except OSError:
continue
if is_binary(data):
continue
text = data.decode(errors='ignore')
for rule, regex in compiled.items():
if regex.search(text):
findings.append((path, rule))
if findings:
print('Potential secrets detected:')
for path, rule in findings:
print(f" - {path} (matched: {rule})")
return 1
return 0
if __name__ == '__main__':
raise SystemExit(main())