src/trust/review.py at main · veikka.tngl.sh/sunstead

Sunstead trust scoring project
sunstead / src / trust / review.py
at main 5.1 kB View raw
Veikka Silvekoski Initial commit: sunstead trust scoring project 21hrs ago
  1"""M4 content signal: Claude reviews ONE PR's diff + discussion (PRD 6.6).
  2
  3Claude judges content, never identity (constraint 2): no author handle, DID, or
  4history is ever passed in. Output is forced to the schema via tool use, temp 0.
  5"""
  6
  7from __future__ import annotations
  8
  9import json
 10import os
 11
 12from .config import CFG
 13
 14# Verbatim from PRD 6.6 — do not paraphrase.
 15SYSTEM_PROMPT = """\
 16You are a code-contribution reviewer for an open-source trust system. You assess ONE
 17pull request's actual content for quality and safety. You do not decide whether to
 18merge; you produce a structured risk assessment that a separate policy layer combines
 19with an identity-trust signal.
 20
 21Hard rules:
 22- Judge only the artifact in front of you: the diff, the PR title and description, and
 23  the discussion. You are given NO information about the author's identity, reputation,
 24  or history, and you must not speculate about it. Identity trust is handled elsewhere.
 25- Your job is to catch problems a reputation signal cannot see: code that looks correct
 26  but is subtly wrong, plausible-looking machine-generated filler ("slop"),
 27  security-sensitive changes, leaked secrets or credentials, license violations, and
 28  changes whose stated intent does not match what the code does.
 29- Prefer flagging uncertainty over approving. If the diff is large, unclear, or you
 30  cannot verify correctness, say so and set review_recommended. Never rubber-stamp.
 31- Be specific. Every flag must point to concrete lines or patterns, not vibes.
 32- Output ONLY the structured object specified by the tool. No prose outside it.\
 33"""
 34
 35ASSESSMENT_TOOL = {
 36    "name": "submit_assessment",
 37    "description": "Submit the structured risk assessment for this PR.",
 38    "input_schema": {
 39        "type": "object",
 40        "properties": {
 41            "content_risk": {"type": "number", "description": "0.0 safe/trivial to 1.0 broken/dangerous"},
 42            "flags": {
 43                "type": "array",
 44                "items": {
 45                    "type": "object",
 46                    "properties": {
 47                        "type": {"type": "string", "enum": [
 48                            "subtle_bug", "slop", "security", "secret_leak", "license",
 49                            "intent_mismatch", "untested", "oversized", "other"]},
 50                        "severity": {"type": "string", "enum": ["low", "med", "high"]},
 51                        "location": {"type": "string"},
 52                        "explanation": {"type": "string"},
 53                    },
 54                    "required": ["type", "severity", "location", "explanation"],
 55                    "additionalProperties": False,
 56                },
 57            },
 58            "summary": {"type": "string"},
 59            "review_recommended": {"type": "boolean"},
 60        },
 61        "required": ["content_risk", "flags", "summary", "review_recommended"],
 62        "additionalProperties": False,
 63    },
 64}
 65
 66# Models that reject the temperature param (Opus 4.7+/Fable). Sonnet 4.6 accepts it.
 67_NO_TEMPERATURE = ("opus-4-7", "opus-4-8", "fable")
 68
 69
 70def _client():
 71    if not os.environ.get(CFG.review.api_key_env):
 72        return None
 73    import anthropic
 74
 75    return anthropic.Anthropic()
 76
 77
 78def review_pr(diff: str, title: str = "", description: str = "", discussion: str = "",
 79              machine_findings: dict | None = None, model: str | None = None) -> dict | None:
 80    """Return the 6.6 schema object, or None if no API key is configured."""
 81    client = _client()
 82    if client is None:
 83        return None
 84    model = model or CFG.review.model
 85
 86    parts = [f"PR title: {title}", f"PR description: {description}",
 87             f"Discussion:\n{discussion}", f"Diff:\n{diff[:CFG.review.max_diff_chars]}"]
 88    if machine_findings:  # 6.12 structured evidence, no identity
 89        parts.append("Automated scan findings (advisory evidence):\n"
 90                     + json.dumps(machine_findings, indent=2))
 91    user = "\n\n".join(parts)
 92
 93    kwargs = dict(
 94        model=model, max_tokens=1500, system=SYSTEM_PROMPT,
 95        tools=[ASSESSMENT_TOOL], tool_choice={"type": "tool", "name": "submit_assessment"},
 96        messages=[{"role": "user", "content": user}],
 97    )
 98    if not any(m in model for m in _NO_TEMPERATURE):
 99        kwargs["temperature"] = 0
100
101    resp = client.messages.create(**kwargs)
102    for block in resp.content:
103        if block.type == "tool_use" and block.name == "submit_assessment":
104            return block.input
105    return None
106
107
108def demo() -> None:
109    """Self-check: schema shape is parseable; live call only if a key is set."""
110    out = review_pr("def add(a,b):\n    return a-b  # says add, does subtract",
111                    title="Add helper", description="adds two numbers")
112    if out is None:
113        print("no ANTHROPIC_API_KEY -> content signal skipped (gate treats as None). ok")
114        return
115    assert 0.0 <= out["content_risk"] <= 1.0
116    assert isinstance(out["flags"], list) and "summary" in out
117    print(f"content_risk={out['content_risk']} flags={len(out['flags'])} :: {out['summary']}")
118
119
120if __name__ == "__main__":
121    demo()
Configure Feed

Configure Feed