Sunstead trust scoring project
0

Configure Feed

Select the types of activity you want to include in your feed.

at main 4.3 kB View raw
1"""Static secret/SAST scan of a PR diff -> advisory machine_findings (PRD 6.12). 2 3Regex patterns ported from VouchSafe (tangled.org/ivoine.tngl.sh/hackathon, MIT) -- 4the "OSV/secret-scan/SAST" external signal the README lists as skipped. Two deliberate 5adaptations for this system: 6 7- Scan only ADDED lines of the unified diff: a removed secret is not a leak, and 8 context lines aren't the contribution under review. 9- REDACT every matched secret before returning. Findings ride into a Claude prompt 10 and into the published sh.tangled.trust.score record (6.11), so the raw value must 11 never echo back out -- a scanner that leaks the secret it found is worse than none. 12 13Advisory only, like the slop signal: it hints Claude (machine_findings) and adds a 14line to the explanation. It never flips the gate -- only the graph and attestation do 15(constraint 2). To make a critical leak force review, gate should_review on it; left 16out by intent so the gate contract stays in fusion.decide(). 17""" 18 19from __future__ import annotations 20 21import re 22 23# (name, pattern, severity). Per-line scan over added lines, so no re.S/multiline needed. 24_PATTERNS = [ 25 ("Exposed API Key", re.compile(r"(?:api[_-]?key|apikey|api[_-]?secret)\s*[=:]\s*['\"]([a-zA-Z0-9_\-]{20,})['\"]?", re.I), "critical"), 26 ("AWS Access Key", re.compile(r"(?:A3T[A-Z0-9]|AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA)[A-Z0-9]{16}"), "critical"), 27 ("Private Key", re.compile(r"-----BEGIN (?:RSA |EC |DSA )?PRIVATE KEY-----"), "critical"), 28 ("Hardcoded Password", re.compile(r"(?:password|passwd|pwd)\s*[=:]\s*['\"]([^'\"]{8,})['\"]?", re.I), "high"), 29 ("SQL Injection Risk", re.compile(r"(?:execute|query|prepare)\s*\(\s*['\"]\s*SELECT.*\+.*['\"]|(?:execute|query)\s*\(\s*['\"].*\$\{.*\}.*['\"]", re.I), "high"), 30 ("eval() Usage", re.compile(r"\beval\s*\("), "high"), 31 ("JWT Token", re.compile(r"eyJ[a-zA-Z0-9_-]{10,}\.[a-zA-Z0-9_-]{10,}\.[a-zA-Z0-9_-]{10,}"), "medium"), 32 ("Generic Secret", re.compile(r"(?:secret|token|bearer)\s*[=:]\s*['\"]([a-zA-Z0-9_\-]{20,})['\"]?", re.I), "medium"), 33] 34 35_HUNK = re.compile(r"^@@ -\d+(?:,\d+)? \+(\d+)") 36 37 38def _redact(line: str, m: re.Match) -> str: 39 """Mask the sensitive token (capture group if any, else the whole match) so a 40 finding can travel into a public prompt/record without leaking the secret.""" 41 secret = m.group(1) if m.lastindex else m.group(0) 42 masked = secret[:4] + "...[redacted]" if len(secret) > 4 else "...[redacted]" 43 return line.replace(secret, masked).strip()[:120] 44 45 46def scan_diff(diff: str | None) -> list[dict]: 47 """Findings on ADDED lines of a unified diff: [{type, severity, line, snippet}]. 48 `line` is the new-file line number; `snippet` has the secret redacted. Empty list 49 if nothing matched or no diff (so callers can `or None` it into machine_findings).""" 50 if not diff: 51 return [] 52 findings: list[dict] = [] 53 new_line = 0 54 for raw in diff.splitlines(): 55 h = _HUNK.match(raw) 56 if h: 57 new_line = int(h.group(1)) 58 continue 59 if raw.startswith(("+++", "---")): # file headers, not content 60 continue 61 if raw.startswith("+"): 62 text = raw[1:] 63 for name, pat, sev in _PATTERNS: 64 for m in pat.finditer(text): 65 findings.append({"type": name, "severity": sev, 66 "line": new_line, "snippet": _redact(text, m)}) 67 new_line += 1 68 elif not raw.startswith("-"): # context line advances the new-file counter 69 new_line += 1 70 return findings 71 72 73def demo() -> None: 74 """Self-check: flags an added secret, redacts it, ignores removals.""" 75 leak = ('@@ -1,2 +1,3 @@\n context\n+api_key = "AKIAIOSFODNN7EXAMPLE12"\n-old = 1\n') 76 f = scan_diff(leak) 77 assert f and f[0]["type"] == "Exposed API Key", f 78 assert f[0]["line"] == 2, f # hunk +1, context line 1, added line 2 79 assert "AKIAIOSFODNN7EXAMPLE12" not in f[0]["snippet"], "secret must be redacted" 80 # a secret only REMOVED is not a leak 81 assert scan_diff('@@ -1 +0,0 @@\n-password = "hunter2hunter2"\n') == [] 82 assert scan_diff("") == [] 83 print(f"scan_diff ok: {len(f)} finding(s); {f[0]['type']} @line {f[0]['line']} :: {f[0]['snippet']}") 84 85 86if __name__ == "__main__": 87 demo()