99 lines
2.6 KiB
Python
99 lines
2.6 KiB
Python
from __future__ import annotations
|
|
|
|
import re
|
|
import subprocess
|
|
from pathlib import Path
|
|
|
|
WORKSPACE = Path(__file__).resolve().parents[1]
|
|
|
|
EXCLUDED_DIRS = {
|
|
".git",
|
|
".venv",
|
|
"__pycache__",
|
|
".mypy_cache",
|
|
".pytest_cache",
|
|
"data",
|
|
"node_modules",
|
|
}
|
|
|
|
PATTERNS: list[tuple[str, re.Pattern[str]]] = [
|
|
("private_key", re.compile(r"-----BEGIN [A-Z ]*PRIVATE KEY-----")),
|
|
("aws_access_key", re.compile(r"AKIA[0-9A-Z]{16}")),
|
|
(
|
|
"generic_token",
|
|
re.compile(
|
|
r"(?i)(token|secret|password)\s*[:=]\s*['\"]?"
|
|
r"(?=[A-Za-z0-9_\-+/=.]{20,})(?=.*[A-Za-z])(?=.*\d)[A-Za-z0-9_\-+/=.]{20,}"
|
|
),
|
|
),
|
|
]
|
|
|
|
|
|
def _is_probably_text(path: Path) -> bool:
|
|
try:
|
|
with path.open("rb") as handle:
|
|
sample = handle.read(2048)
|
|
except OSError:
|
|
return False
|
|
return b"\x00" not in sample
|
|
|
|
|
|
def scan_worktree() -> list[str]:
|
|
findings: list[str] = []
|
|
tracked = subprocess.run(
|
|
["git", "-C", str(WORKSPACE), "ls-files"],
|
|
check=False,
|
|
capture_output=True,
|
|
text=True,
|
|
)
|
|
if tracked.returncode != 0:
|
|
return ["worktree_scan_failed"]
|
|
|
|
for rel_path in tracked.stdout.splitlines():
|
|
path = WORKSPACE / rel_path
|
|
if not path.is_file() or any(part in EXCLUDED_DIRS for part in path.parts):
|
|
continue
|
|
if not _is_probably_text(path):
|
|
continue
|
|
|
|
try:
|
|
content = path.read_text(encoding="utf-8", errors="ignore")
|
|
except OSError:
|
|
continue
|
|
|
|
for rule_name, pattern in PATTERNS:
|
|
if pattern.search(content):
|
|
findings.append(f"worktree:{path.relative_to(WORKSPACE)}:{rule_name}")
|
|
return findings
|
|
|
|
|
|
def scan_git_history() -> list[str]:
|
|
cmd = ["git", "-C", str(WORKSPACE), "log", "--all", "-p", "--pretty=format:%H"]
|
|
completed = subprocess.run(cmd, check=False, capture_output=True, text=True)
|
|
if completed.returncode != 0:
|
|
return ["history_scan_failed"]
|
|
|
|
findings: list[str] = []
|
|
data = completed.stdout
|
|
for rule_name, pattern in PATTERNS:
|
|
if pattern.search(data):
|
|
findings.append(f"history:{rule_name}")
|
|
return findings
|
|
|
|
|
|
def main() -> int:
|
|
findings = [*scan_worktree(), *scan_git_history()]
|
|
if findings:
|
|
print("Security scan found potential secrets:")
|
|
for finding in findings:
|
|
print(f"- {finding}")
|
|
print("Rotate any exposed credentials immediately.")
|
|
return 1
|
|
|
|
print("Security scan passed: no obvious secrets detected in worktree/history.")
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(main())
|