Sunstead trust scoring project
0

Configure Feed

Select the types of activity you want to include in your feed.

at main 5.5 kB View raw
1"""Demo data so the full pipeline runs without a live Jetstream or external drive. 2 3NOT real Tangled data — a synthetic vouch graph (trusted core + sybil cluster) 4plus labelled PRs, enough to demo M3 (EigenTrust triage) and M4 (Claude + gate). 5Run real ingest (`python -m trust.ingest`) to replace this with live data. 6""" 7 8from __future__ import annotations 9 10import datetime as dt 11 12from .db import connection, ensure_schema 13 14# did -> handle. The maintainer is the EigenTrust seed. 15PEOPLE = { 16 "did:plc:maintainer": "lewis.tangled.sh", 17 "did:plc:alice": "alice.dev", 18 "did:plc:bob": "bob.codes", 19 "did:plc:carol": "carol.sh", 20 "did:plc:newcomer": "newcomer.xyz", # legit but unvouched (cold start) 21 "did:plc:sybil1": "throwaway1", 22 "did:plc:sybil2": "throwaway2", 23 "did:plc:sybil3": "throwaway3", 24} 25# voucher -> subject (positive vouches). Sybils only vouch for each other. 26VOUCHES = [ 27 ("did:plc:maintainer", "did:plc:alice"), 28 ("did:plc:maintainer", "did:plc:bob"), 29 ("did:plc:alice", "did:plc:carol"), 30 ("did:plc:bob", "did:plc:carol"), 31 ("did:plc:sybil1", "did:plc:sybil2"), 32 ("did:plc:sybil2", "did:plc:sybil3"), 33 ("did:plc:sybil3", "did:plc:sybil1"), 34 ("did:plc:sybil1", "did:plc:sybil3"), 35] 36 37_CLEAN_DIFF = """--- a/util.py 38+++ b/util.py 39@@ 40-def clamp(x, lo, hi): 41- return x 42+def clamp(x, lo, hi): 43+ return max(lo, min(x, hi)) 44""" 45_BUGGY_DIFF = """--- a/auth.py 46+++ b/auth.py 47@@ 48-def verify(token, secret): 49- return hmac.compare_digest(sign(token), secret) 50+def verify(token, secret): 51+ return sign(token) == secret # timing-unsafe; intent says 'verify' but weakens it 52""" 53 54 55def seed(con) -> None: 56 con.execute("DELETE FROM vouches; DELETE FROM contributors; DELETE FROM seeds; " 57 "DELETE FROM pull_requests; DELETE FROM pr_followups; DELETE FROM scores") 58 now = dt.datetime.now(dt.timezone.utc) 59 for did, handle in PEOPLE.items(): 60 age = 400 if "sybil" not in did and did != "did:plc:newcomer" else 3 61 con.execute( 62 "INSERT INTO contributors (did, handle, did_created_at) VALUES (?,?,?)", 63 [did, handle, now - dt.timedelta(days=age)], 64 ) 65 con.execute("INSERT INTO seeds VALUES ('did:plc:maintainer')") 66 for v, s in VOUCHES: 67 con.execute( 68 "INSERT INTO vouches (voucher_did, subject_did, polarity, created_at) VALUES (?,?,1,?)", 69 [v, s, now - dt.timedelta(days=30)], 70 ) 71 72 def add_pr(pr_id, did, merged, ci, reverted, diff, is_open=False, age=40, repo="tangled/core", 73 add=20, dele=5, files=2): 74 # Historical PRs staggered in time so the M5 train/val split is by-time, not a tie. 75 opened = now - dt.timedelta(days=1 if is_open else age) 76 closed_unmerged = (not merged) and (not is_open) 77 con.execute( 78 "INSERT INTO pull_requests (pr_id, author_did, repo, target, opened_at, ci_status, " 79 "merged, closed_unmerged, additions, deletions, files_touched, diff_text, discussion_len) " 80 "VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?)", 81 [pr_id, did, repo, "main", opened, ci, merged, closed_unmerged, 82 add, dele, files, diff, 120], 83 ) 84 con.execute("INSERT INTO pr_followups (pr_id, reverted) VALUES (?,?)", [pr_id, reverted]) 85 86 for i in range(8): 87 add_pr(f"alice/{i}", "did:plc:alice", True, "passed", False, _CLEAN_DIFF, age=90 - i * 3) 88 for i in range(5): 89 add_pr(f"bob/{i}", "did:plc:bob", True, "passed", i == 0, _CLEAN_DIFF, age=85 - i * 3) # one revert 90 for i in range(6): 91 add_pr(f"carol/{i}", "did:plc:carol", True, "passed", False, _CLEAN_DIFF, age=80 - i * 3) 92 for i in range(3): 93 add_pr(f"sybil1/{i}", "did:plc:sybil1", i == 0, "failed", False, _BUGGY_DIFF, age=70 - i * 3) 94 95 # Two open PRs for the live demo: one clean from a trusted DID, one buggy from a sybil. 96 add_pr("live/trusted-clean", "did:plc:carol", False, "passed", False, _CLEAN_DIFF, is_open=True) 97 add_pr("live/sybil-buggy", "did:plc:sybil2", False, "passed", False, _BUGGY_DIFF, is_open=True) 98 99 # 6.13 repo tiering: a sensitive/dual-use repo gates fast-lane on a jurisdiction attestation. 100 con.execute("INSERT INTO repo_tiers VALUES ('tangled/secure-enclave', 'sensitive') " 101 "ON CONFLICT DO NOTHING") # seed() is re-run across tests; keep it idempotent 102 con.execute("INSERT INTO attestations VALUES (?,?,?,?) ON CONFLICT DO NOTHING", 103 ["did:plc:carol", "FI", "signed_record", now - dt.timedelta(days=10)]) 104 # carol is attested -> her clean PR can fast-lane even on the sensitive repo. 105 add_pr("live/sensitive-attested", "did:plc:carol", False, "passed", False, _CLEAN_DIFF, 106 is_open=True, repo="tangled/secure-enclave") 107 # alice is highly trusted but NOT attested -> forced to needs_human on the sensitive repo. 108 add_pr("live/sensitive-blocked", "did:plc:alice", False, "passed", False, _CLEAN_DIFF, 109 is_open=True, repo="tangled/secure-enclave") 110 111 112def main() -> None: 113 ensure_schema() # retries past the cross-process lock if other panes hold it 114 with connection(read_only=False) as con: 115 seed(con) 116 n = con.execute("SELECT count(*) FROM contributors").fetchone()[0] 117 e = con.execute("SELECT count(*) FROM vouches").fetchone()[0] 118 p = con.execute("SELECT count(*) FROM pull_requests").fetchone()[0] 119 print(f"[seed] {n} contributors, {e} vouches, {p} PRs (DEMO DATA)") 120 121 122if __name__ == "__main__": 123 main()