Sunstead trust scoring project
1"""Demo data so the full pipeline runs without a live Jetstream or external drive.
2
3NOT real Tangled data — a synthetic vouch graph (trusted core + sybil cluster)
4plus labelled PRs, enough to demo M3 (EigenTrust triage) and M4 (Claude + gate).
5Run real ingest (`python -m trust.ingest`) to replace this with live data.
6"""
7
8from __future__ import annotations
9
10import datetime as dt
11
12from .db import connection, ensure_schema
13
14# did -> handle. The maintainer is the EigenTrust seed.
15PEOPLE = {
16 "did:plc:maintainer": "lewis.tangled.sh",
17 "did:plc:alice": "alice.dev",
18 "did:plc:bob": "bob.codes",
19 "did:plc:carol": "carol.sh",
20 "did:plc:newcomer": "newcomer.xyz", # legit but unvouched (cold start)
21 "did:plc:sybil1": "throwaway1",
22 "did:plc:sybil2": "throwaway2",
23 "did:plc:sybil3": "throwaway3",
24}
25# voucher -> subject (positive vouches). Sybils only vouch for each other.
26VOUCHES = [
27 ("did:plc:maintainer", "did:plc:alice"),
28 ("did:plc:maintainer", "did:plc:bob"),
29 ("did:plc:alice", "did:plc:carol"),
30 ("did:plc:bob", "did:plc:carol"),
31 ("did:plc:sybil1", "did:plc:sybil2"),
32 ("did:plc:sybil2", "did:plc:sybil3"),
33 ("did:plc:sybil3", "did:plc:sybil1"),
34 ("did:plc:sybil1", "did:plc:sybil3"),
35]
36
37_CLEAN_DIFF = """--- a/util.py
38+++ b/util.py
39@@
40-def clamp(x, lo, hi):
41- return x
42+def clamp(x, lo, hi):
43+ return max(lo, min(x, hi))
44"""
45_BUGGY_DIFF = """--- a/auth.py
46+++ b/auth.py
47@@
48-def verify(token, secret):
49- return hmac.compare_digest(sign(token), secret)
50+def verify(token, secret):
51+ return sign(token) == secret # timing-unsafe; intent says 'verify' but weakens it
52"""
53
54
55def seed(con) -> None:
56 con.execute("DELETE FROM vouches; DELETE FROM contributors; DELETE FROM seeds; "
57 "DELETE FROM pull_requests; DELETE FROM pr_followups; DELETE FROM scores")
58 now = dt.datetime.now(dt.timezone.utc)
59 for did, handle in PEOPLE.items():
60 age = 400 if "sybil" not in did and did != "did:plc:newcomer" else 3
61 con.execute(
62 "INSERT INTO contributors (did, handle, did_created_at) VALUES (?,?,?)",
63 [did, handle, now - dt.timedelta(days=age)],
64 )
65 con.execute("INSERT INTO seeds VALUES ('did:plc:maintainer')")
66 for v, s in VOUCHES:
67 con.execute(
68 "INSERT INTO vouches (voucher_did, subject_did, polarity, created_at) VALUES (?,?,1,?)",
69 [v, s, now - dt.timedelta(days=30)],
70 )
71
72 def add_pr(pr_id, did, merged, ci, reverted, diff, is_open=False, age=40, repo="tangled/core",
73 add=20, dele=5, files=2):
74 # Historical PRs staggered in time so the M5 train/val split is by-time, not a tie.
75 opened = now - dt.timedelta(days=1 if is_open else age)
76 closed_unmerged = (not merged) and (not is_open)
77 con.execute(
78 "INSERT INTO pull_requests (pr_id, author_did, repo, target, opened_at, ci_status, "
79 "merged, closed_unmerged, additions, deletions, files_touched, diff_text, discussion_len) "
80 "VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?)",
81 [pr_id, did, repo, "main", opened, ci, merged, closed_unmerged,
82 add, dele, files, diff, 120],
83 )
84 con.execute("INSERT INTO pr_followups (pr_id, reverted) VALUES (?,?)", [pr_id, reverted])
85
86 for i in range(8):
87 add_pr(f"alice/{i}", "did:plc:alice", True, "passed", False, _CLEAN_DIFF, age=90 - i * 3)
88 for i in range(5):
89 add_pr(f"bob/{i}", "did:plc:bob", True, "passed", i == 0, _CLEAN_DIFF, age=85 - i * 3) # one revert
90 for i in range(6):
91 add_pr(f"carol/{i}", "did:plc:carol", True, "passed", False, _CLEAN_DIFF, age=80 - i * 3)
92 for i in range(3):
93 add_pr(f"sybil1/{i}", "did:plc:sybil1", i == 0, "failed", False, _BUGGY_DIFF, age=70 - i * 3)
94
95 # Two open PRs for the live demo: one clean from a trusted DID, one buggy from a sybil.
96 add_pr("live/trusted-clean", "did:plc:carol", False, "passed", False, _CLEAN_DIFF, is_open=True)
97 add_pr("live/sybil-buggy", "did:plc:sybil2", False, "passed", False, _BUGGY_DIFF, is_open=True)
98
99 # 6.13 repo tiering: a sensitive/dual-use repo gates fast-lane on a jurisdiction attestation.
100 con.execute("INSERT INTO repo_tiers VALUES ('tangled/secure-enclave', 'sensitive') "
101 "ON CONFLICT DO NOTHING") # seed() is re-run across tests; keep it idempotent
102 con.execute("INSERT INTO attestations VALUES (?,?,?,?) ON CONFLICT DO NOTHING",
103 ["did:plc:carol", "FI", "signed_record", now - dt.timedelta(days=10)])
104 # carol is attested -> her clean PR can fast-lane even on the sensitive repo.
105 add_pr("live/sensitive-attested", "did:plc:carol", False, "passed", False, _CLEAN_DIFF,
106 is_open=True, repo="tangled/secure-enclave")
107 # alice is highly trusted but NOT attested -> forced to needs_human on the sensitive repo.
108 add_pr("live/sensitive-blocked", "did:plc:alice", False, "passed", False, _CLEAN_DIFF,
109 is_open=True, repo="tangled/secure-enclave")
110
111
112def main() -> None:
113 ensure_schema() # retries past the cross-process lock if other panes hold it
114 with connection(read_only=False) as con:
115 seed(con)
116 n = con.execute("SELECT count(*) FROM contributors").fetchone()[0]
117 e = con.execute("SELECT count(*) FROM vouches").fetchone()[0]
118 p = con.execute("SELECT count(*) FROM pull_requests").fetchone()[0]
119 print(f"[seed] {n} contributors, {e} vouches, {p} PRs (DEMO DATA)")
120
121
122if __name__ == "__main__":
123 main()