from __future__ import annotations import argparse import re import subprocess from collections.abc import Sequence from pathlib import Path # ------------------------- # Default secret patterns # ------------------------- DEFAULT_PATTERNS: dict[str, str] = { # GitLab 'gitlab_pat': r'glpat-[0-9A-Za-z_-]{20,}', 'gitlab_runner_token': r'glrt-[0-9A-Za-z_-]{20,}', # GitHub 'github_pat': r'ghp_[0-9A-Za-z]{36}', 'github_fine_grained_pat': r'github_pat_[0-9A-Za-z_]{82}', # AWS 'aws_access_key': r'AKIA[0-9A-Z]{16}', 'aws_secret_key': r"(?i)aws(.{0,20})?(secret|access)[-_ ]?key(.{0,20})?['\"][0-9a-zA-Z/+]{40}['\"]", # Generic 'generic_secret': r"(?i)(password|passwd|pwd|secret|token|api[_-]?key)\s*=\s*['\"].+['\"]", } def load_custom_patterns(path: Path) -> dict[str, str]: patterns: dict[str, str] = {} for i, line in enumerate(path.read_text().splitlines(), start=1): line = line.strip() if not line or line.startswith('#'): continue patterns[f"custom_rule_{i}"] = line return patterns def is_binary(data: bytes) -> bool: return b'\x00' in data def git_tracked_files() -> list[Path]: """Return all git-tracked files in the repo.""" result = subprocess.run( ['git', 'ls-files'], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, text=True, check=False, ) return [Path(p) for p in result.stdout.splitlines() if p] def main(argv: Sequence[str] | None = None) -> int: parser = argparse.ArgumentParser(description='Detect exposed secrets in repository') parser.add_argument( '--rules', type=Path, help='File containing custom regex rules (one per line)', ) parser.add_argument( 'filenames', nargs='*', help='Files to scan (if empty, scans entire repo)', ) args = parser.parse_args(argv) patterns = dict(DEFAULT_PATTERNS) if args.rules: if not args.rules.is_file(): print(f"Rules file not found: {args.rules}") return 2 patterns.update(load_custom_patterns(args.rules)) compiled = { name: re.compile(regex) for name, regex in patterns.items() } files: list[Path] if args.filenames: files = [Path(f) for f in args.filenames] else: files = git_tracked_files() findings: list[tuple[Path, str]] = [] for path in files: if not path.is_file(): continue try: data = path.read_bytes() except OSError: continue if is_binary(data): continue text = data.decode(errors='ignore') for rule, regex in compiled.items(): if regex.search(text): findings.append((path, rule)) if findings: print('Potential secrets detected:') for path, rule in findings: print(f" - {path} (matched: {rule})") return 1 return 0 if __name__ == '__main__': raise SystemExit(main())