Sunstead trust scoring project
1"""Static secret/SAST scan of a PR diff -> advisory machine_findings (PRD 6.12).
2
3Regex patterns ported from VouchSafe (tangled.org/ivoine.tngl.sh/hackathon, MIT) --
4the "OSV/secret-scan/SAST" external signal the README lists as skipped. Two deliberate
5adaptations for this system:
6
7- Scan only ADDED lines of the unified diff: a removed secret is not a leak, and
8 context lines aren't the contribution under review.
9- REDACT every matched secret before returning. Findings ride into a Claude prompt
10 and into the published sh.tangled.trust.score record (6.11), so the raw value must
11 never echo back out -- a scanner that leaks the secret it found is worse than none.
12
13Advisory only, like the slop signal: it hints Claude (machine_findings) and adds a
14line to the explanation. It never flips the gate -- only the graph and attestation do
15(constraint 2). To make a critical leak force review, gate should_review on it; left
16out by intent so the gate contract stays in fusion.decide().
17"""
18
19from __future__ import annotations
20
21import re
22
23# (name, pattern, severity). Per-line scan over added lines, so no re.S/multiline needed.
24_PATTERNS = [
25 ("Exposed API Key", re.compile(r"(?:api[_-]?key|apikey|api[_-]?secret)\s*[=:]\s*['\"]([a-zA-Z0-9_\-]{20,})['\"]?", re.I), "critical"),
26 ("AWS Access Key", re.compile(r"(?:A3T[A-Z0-9]|AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA)[A-Z0-9]{16}"), "critical"),
27 ("Private Key", re.compile(r"-----BEGIN (?:RSA |EC |DSA )?PRIVATE KEY-----"), "critical"),
28 ("Hardcoded Password", re.compile(r"(?:password|passwd|pwd)\s*[=:]\s*['\"]([^'\"]{8,})['\"]?", re.I), "high"),
29 ("SQL Injection Risk", re.compile(r"(?:execute|query|prepare)\s*\(\s*['\"]\s*SELECT.*\+.*['\"]|(?:execute|query)\s*\(\s*['\"].*\$\{.*\}.*['\"]", re.I), "high"),
30 ("eval() Usage", re.compile(r"\beval\s*\("), "high"),
31 ("JWT Token", re.compile(r"eyJ[a-zA-Z0-9_-]{10,}\.[a-zA-Z0-9_-]{10,}\.[a-zA-Z0-9_-]{10,}"), "medium"),
32 ("Generic Secret", re.compile(r"(?:secret|token|bearer)\s*[=:]\s*['\"]([a-zA-Z0-9_\-]{20,})['\"]?", re.I), "medium"),
33]
34
35_HUNK = re.compile(r"^@@ -\d+(?:,\d+)? \+(\d+)")
36
37
38def _redact(line: str, m: re.Match) -> str:
39 """Mask the sensitive token (capture group if any, else the whole match) so a
40 finding can travel into a public prompt/record without leaking the secret."""
41 secret = m.group(1) if m.lastindex else m.group(0)
42 masked = secret[:4] + "...[redacted]" if len(secret) > 4 else "...[redacted]"
43 return line.replace(secret, masked).strip()[:120]
44
45
46def scan_diff(diff: str | None) -> list[dict]:
47 """Findings on ADDED lines of a unified diff: [{type, severity, line, snippet}].
48 `line` is the new-file line number; `snippet` has the secret redacted. Empty list
49 if nothing matched or no diff (so callers can `or None` it into machine_findings)."""
50 if not diff:
51 return []
52 findings: list[dict] = []
53 new_line = 0
54 for raw in diff.splitlines():
55 h = _HUNK.match(raw)
56 if h:
57 new_line = int(h.group(1))
58 continue
59 if raw.startswith(("+++", "---")): # file headers, not content
60 continue
61 if raw.startswith("+"):
62 text = raw[1:]
63 for name, pat, sev in _PATTERNS:
64 for m in pat.finditer(text):
65 findings.append({"type": name, "severity": sev,
66 "line": new_line, "snippet": _redact(text, m)})
67 new_line += 1
68 elif not raw.startswith("-"): # context line advances the new-file counter
69 new_line += 1
70 return findings
71
72
73def demo() -> None:
74 """Self-check: flags an added secret, redacts it, ignores removals."""
75 leak = ('@@ -1,2 +1,3 @@\n context\n+api_key = "AKIAIOSFODNN7EXAMPLE12"\n-old = 1\n')
76 f = scan_diff(leak)
77 assert f and f[0]["type"] == "Exposed API Key", f
78 assert f[0]["line"] == 2, f # hunk +1, context line 1, added line 2
79 assert "AKIAIOSFODNN7EXAMPLE12" not in f[0]["snippet"], "secret must be redacted"
80 # a secret only REMOVED is not a leak
81 assert scan_diff('@@ -1 +0,0 @@\n-password = "hunter2hunter2"\n') == []
82 assert scan_diff("") == []
83 print(f"scan_diff ok: {len(f)} finding(s); {f[0]['type']} @line {f[0]['line']} :: {f[0]['snippet']}")
84
85
86if __name__ == "__main__":
87 demo()