This repository has no description
1"""Offline eval: held-out-seed retrieval (recall@k / nDCG).
2
3A content-similarity recommender excludes the user's own repos from results, so we
4can't hold out an owned repo and expect it *recommended*. Instead we measure the
5underlying relevance signal: hold out each user's most recent repo, generate
6candidates from their REMAINING repos (excluding the other seeds but NOT the
7held-out target), and check where the held-out repo ranks. A good engine ranks
8"what the user built next" near the top of what it would surface.
9
10Run: `python eval/harness.py` (needs DB_CONNECTION_STRING). Establishes a
11baseline BEFORE any ranking changes — no "feels better" tuning.
12"""
13
14from __future__ import annotations
15
16import math
17import sys
18
19from app import db
20from app.config import get_settings
21
22K_VALUES = (10, 20, 50)
23PER_SEED_K = 50 # neighbours pulled per remaining seed
24MAX_USERS = 60 # sample size (keeps the run quick)
25MIN_SEEDS = 3 # need enough seeds to hold one out and still have signal
26# Candidate content gate, mirroring the live service (REC_MIN_README_CHARS).
27# Set to 0 to reproduce the pre-gate baseline.
28MIN_README_CHARS = get_settings().min_readme_chars
29
30_USERS_SQL = """
31 select split_part(replace(repo_uri, 'at://', ''), '/', 1) as owner_did,
32 count(*)::int as n
33 from tangled_readmes
34 where embedding is not null and repo_uri is not null
35 group by 1
36 having count(*) between %(lo)s and 30
37 order by n desc
38 limit %(max_users)s
39"""
40
41# Owned repos for one user, with createdAt so we can hold out the most recent.
42_OWNED_SQL = """
43 select r.repo_did,
44 r.embedding::text as etext,
45 tr.record_raw->>'createdAt' as created_at
46 from tangled_readmes r
47 left join tangled_repos tr
48 on coalesce(tr.repo_did, tr.record_raw->>'repoDid') = r.repo_did
49 where r.embedding is not null
50 and r.repo_uri like 'at://' || %(did)s || '/%%'
51"""
52
53
54def _users() -> list[str]:
55 with db.get_pool().connection() as conn:
56 rows = conn.execute(_USERS_SQL, {"lo": MIN_SEEDS, "max_users": MAX_USERS}).fetchall()
57 return [r["owner_did"] for r in rows]
58
59
60def _owned(did: str) -> list[dict]:
61 with db.get_pool().connection() as conn:
62 return [dict(r) for r in conn.execute(_OWNED_SQL, {"did": did}).fetchall()]
63
64
65def _rank_of_target(seeds: list[dict], target: dict) -> int | None:
66 """Generate candidates from the remaining seeds and return the 1-based rank
67 of the held-out target repo (None if outside the candidate pool)."""
68 rest = [s for s in seeds if s["repo_did"] != target["repo_did"]]
69 exclude = [s["repo_did"] for s in rest] # exclude seeds, but allow the target
70 best: dict[str, float] = {}
71 for s in rest:
72 for row in db.knn_repos(s["etext"], exclude, PER_SEED_K, MIN_README_CHARS):
73 rd = row["repo_did"]
74 d = float(row["distance"])
75 if rd not in best or d < best[rd]:
76 best[rd] = d
77 ranked = sorted(best, key=best.get)
78 tdid = target["repo_did"]
79 return ranked.index(tdid) + 1 if tdid in ranked else None
80
81
82def main() -> int:
83 if not get_settings().db_connection_string:
84 print("DB_CONNECTION_STRING not set", file=sys.stderr)
85 return 1
86
87 users = _users()
88 evaluated = 0
89 hits = {k: 0 for k in K_VALUES}
90 ndcg_sum = 0.0
91
92 for did in users:
93 seeds = _owned(did)
94 if len(seeds) < MIN_SEEDS:
95 continue
96 # hold out the most recent repo (fallback: last by repo_did for stability)
97 target = max(seeds, key=lambda s: (s.get("created_at") or "", s["repo_did"]))
98 rank = _rank_of_target(seeds, target)
99 evaluated += 1
100 if rank is not None:
101 for k in K_VALUES:
102 if rank <= k:
103 hits[k] += 1
104 ndcg_sum += 1.0 / math.log2(rank + 1) # ideal DCG = 1 (single relevant item)
105
106 if evaluated == 0:
107 print("no users evaluated")
108 return 1
109
110 print(f"evaluated users: {evaluated} (per-seed k={PER_SEED_K})")
111 for k in K_VALUES:
112 print(f" recall@{k:<3} = {hits[k] / evaluated:.3f}")
113 print(f" nDCG = {ndcg_sum / evaluated:.3f}")
114 return 0
115
116
117if __name__ == "__main__":
118 raise SystemExit(main())