Sunstead trust scoring project
1"""M4 content signal: Claude reviews ONE PR's diff + discussion (PRD 6.6).
2
3Claude judges content, never identity (constraint 2): no author handle, DID, or
4history is ever passed in. Output is forced to the schema via tool use, temp 0.
5"""
6
7from __future__ import annotations
8
9import json
10import os
11
12from .config import CFG
13
14# Verbatim from PRD 6.6 — do not paraphrase.
15SYSTEM_PROMPT = """\
16You are a code-contribution reviewer for an open-source trust system. You assess ONE
17pull request's actual content for quality and safety. You do not decide whether to
18merge; you produce a structured risk assessment that a separate policy layer combines
19with an identity-trust signal.
20
21Hard rules:
22- Judge only the artifact in front of you: the diff, the PR title and description, and
23 the discussion. You are given NO information about the author's identity, reputation,
24 or history, and you must not speculate about it. Identity trust is handled elsewhere.
25- Your job is to catch problems a reputation signal cannot see: code that looks correct
26 but is subtly wrong, plausible-looking machine-generated filler ("slop"),
27 security-sensitive changes, leaked secrets or credentials, license violations, and
28 changes whose stated intent does not match what the code does.
29- Prefer flagging uncertainty over approving. If the diff is large, unclear, or you
30 cannot verify correctness, say so and set review_recommended. Never rubber-stamp.
31- Be specific. Every flag must point to concrete lines or patterns, not vibes.
32- Output ONLY the structured object specified by the tool. No prose outside it.\
33"""
34
35ASSESSMENT_TOOL = {
36 "name": "submit_assessment",
37 "description": "Submit the structured risk assessment for this PR.",
38 "input_schema": {
39 "type": "object",
40 "properties": {
41 "content_risk": {"type": "number", "description": "0.0 safe/trivial to 1.0 broken/dangerous"},
42 "flags": {
43 "type": "array",
44 "items": {
45 "type": "object",
46 "properties": {
47 "type": {"type": "string", "enum": [
48 "subtle_bug", "slop", "security", "secret_leak", "license",
49 "intent_mismatch", "untested", "oversized", "other"]},
50 "severity": {"type": "string", "enum": ["low", "med", "high"]},
51 "location": {"type": "string"},
52 "explanation": {"type": "string"},
53 },
54 "required": ["type", "severity", "location", "explanation"],
55 "additionalProperties": False,
56 },
57 },
58 "summary": {"type": "string"},
59 "review_recommended": {"type": "boolean"},
60 },
61 "required": ["content_risk", "flags", "summary", "review_recommended"],
62 "additionalProperties": False,
63 },
64}
65
66# Models that reject the temperature param (Opus 4.7+/Fable). Sonnet 4.6 accepts it.
67_NO_TEMPERATURE = ("opus-4-7", "opus-4-8", "fable")
68
69
70def _client():
71 if not os.environ.get(CFG.review.api_key_env):
72 return None
73 import anthropic
74
75 return anthropic.Anthropic()
76
77
78def review_pr(diff: str, title: str = "", description: str = "", discussion: str = "",
79 machine_findings: dict | None = None, model: str | None = None) -> dict | None:
80 """Return the 6.6 schema object, or None if no API key is configured."""
81 client = _client()
82 if client is None:
83 return None
84 model = model or CFG.review.model
85
86 parts = [f"PR title: {title}", f"PR description: {description}",
87 f"Discussion:\n{discussion}", f"Diff:\n{diff[:CFG.review.max_diff_chars]}"]
88 if machine_findings: # 6.12 structured evidence, no identity
89 parts.append("Automated scan findings (advisory evidence):\n"
90 + json.dumps(machine_findings, indent=2))
91 user = "\n\n".join(parts)
92
93 kwargs = dict(
94 model=model, max_tokens=1500, system=SYSTEM_PROMPT,
95 tools=[ASSESSMENT_TOOL], tool_choice={"type": "tool", "name": "submit_assessment"},
96 messages=[{"role": "user", "content": user}],
97 )
98 if not any(m in model for m in _NO_TEMPERATURE):
99 kwargs["temperature"] = 0
100
101 resp = client.messages.create(**kwargs)
102 for block in resp.content:
103 if block.type == "tool_use" and block.name == "submit_assessment":
104 return block.input
105 return None
106
107
108def demo() -> None:
109 """Self-check: schema shape is parseable; live call only if a key is set."""
110 out = review_pr("def add(a,b):\n return a-b # says add, does subtract",
111 title="Add helper", description="adds two numbers")
112 if out is None:
113 print("no ANTHROPIC_API_KEY -> content signal skipped (gate treats as None). ok")
114 return
115 assert 0.0 <= out["content_risk"] <= 1.0
116 assert isinstance(out["flags"], list) and "summary" in out
117 print(f"content_risk={out['content_risk']} flags={len(out['flags'])} :: {out['summary']}")
118
119
120if __name__ == "__main__":
121 demo()