This repository has no description
0

Configure Feed

Select the types of activity you want to include in your feed.

at main 4.3 kB View raw
1"""Offline eval: held-out-seed retrieval (recall@k / nDCG). 2 3A content-similarity recommender excludes the user's own repos from results, so we 4can't hold out an owned repo and expect it *recommended*. Instead we measure the 5underlying relevance signal: hold out each user's most recent repo, generate 6candidates from their REMAINING repos (excluding the other seeds but NOT the 7held-out target), and check where the held-out repo ranks. A good engine ranks 8"what the user built next" near the top of what it would surface. 9 10Run: `python eval/harness.py` (needs DB_CONNECTION_STRING). Establishes a 11baseline BEFORE any ranking changes — no "feels better" tuning. 12""" 13 14from __future__ import annotations 15 16import math 17import sys 18 19from app import db 20from app.config import get_settings 21 22K_VALUES = (10, 20, 50) 23PER_SEED_K = 50 # neighbours pulled per remaining seed 24MAX_USERS = 60 # sample size (keeps the run quick) 25MIN_SEEDS = 3 # need enough seeds to hold one out and still have signal 26# Candidate content gate, mirroring the live service (REC_MIN_README_CHARS). 27# Set to 0 to reproduce the pre-gate baseline. 28MIN_README_CHARS = get_settings().min_readme_chars 29 30_USERS_SQL = """ 31 select split_part(replace(repo_uri, 'at://', ''), '/', 1) as owner_did, 32 count(*)::int as n 33 from tangled_readmes 34 where embedding is not null and repo_uri is not null 35 group by 1 36 having count(*) between %(lo)s and 30 37 order by n desc 38 limit %(max_users)s 39""" 40 41# Owned repos for one user, with createdAt so we can hold out the most recent. 42_OWNED_SQL = """ 43 select r.repo_did, 44 r.embedding::text as etext, 45 tr.record_raw->>'createdAt' as created_at 46 from tangled_readmes r 47 left join tangled_repos tr 48 on coalesce(tr.repo_did, tr.record_raw->>'repoDid') = r.repo_did 49 where r.embedding is not null 50 and r.repo_uri like 'at://' || %(did)s || '/%%' 51""" 52 53 54def _users() -> list[str]: 55 with db.get_pool().connection() as conn: 56 rows = conn.execute(_USERS_SQL, {"lo": MIN_SEEDS, "max_users": MAX_USERS}).fetchall() 57 return [r["owner_did"] for r in rows] 58 59 60def _owned(did: str) -> list[dict]: 61 with db.get_pool().connection() as conn: 62 return [dict(r) for r in conn.execute(_OWNED_SQL, {"did": did}).fetchall()] 63 64 65def _rank_of_target(seeds: list[dict], target: dict) -> int | None: 66 """Generate candidates from the remaining seeds and return the 1-based rank 67 of the held-out target repo (None if outside the candidate pool).""" 68 rest = [s for s in seeds if s["repo_did"] != target["repo_did"]] 69 exclude = [s["repo_did"] for s in rest] # exclude seeds, but allow the target 70 best: dict[str, float] = {} 71 for s in rest: 72 for row in db.knn_repos(s["etext"], exclude, PER_SEED_K, MIN_README_CHARS): 73 rd = row["repo_did"] 74 d = float(row["distance"]) 75 if rd not in best or d < best[rd]: 76 best[rd] = d 77 ranked = sorted(best, key=best.get) 78 tdid = target["repo_did"] 79 return ranked.index(tdid) + 1 if tdid in ranked else None 80 81 82def main() -> int: 83 if not get_settings().db_connection_string: 84 print("DB_CONNECTION_STRING not set", file=sys.stderr) 85 return 1 86 87 users = _users() 88 evaluated = 0 89 hits = {k: 0 for k in K_VALUES} 90 ndcg_sum = 0.0 91 92 for did in users: 93 seeds = _owned(did) 94 if len(seeds) < MIN_SEEDS: 95 continue 96 # hold out the most recent repo (fallback: last by repo_did for stability) 97 target = max(seeds, key=lambda s: (s.get("created_at") or "", s["repo_did"])) 98 rank = _rank_of_target(seeds, target) 99 evaluated += 1 100 if rank is not None: 101 for k in K_VALUES: 102 if rank <= k: 103 hits[k] += 1 104 ndcg_sum += 1.0 / math.log2(rank + 1) # ideal DCG = 1 (single relevant item) 105 106 if evaluated == 0: 107 print("no users evaluated") 108 return 1 109 110 print(f"evaluated users: {evaluated} (per-seed k={PER_SEED_K})") 111 for k in K_VALUES: 112 print(f" recall@{k:<3} = {hits[k] / evaluated:.3f}") 113 print(f" nDCG = {ndcg_sum / evaluated:.3f}") 114 return 0 115 116 117if __name__ == "__main__": 118 raise SystemExit(main())