This repository has no description
0

Configure Feed

Select the types of activity you want to include in your feed.

at main 4.1 kB View raw
1// Experiment: embed a Tangled issue as a query and vector-search the README embeddings. 2// Validates the matching: (a) does the issue's OWN repo rank highly? (b) are other hits topical? 3import pg from "pg"; 4import { readFileSync } from "node:fs"; 5 6function fromEnv(key) { 7 if (process.env[key]) return process.env[key]; 8 for (const p of ["../.env", ".env"]) { 9 try { const m = readFileSync(p, "utf8").match(new RegExp(`^\\s*${key}\\s*=\\s*(.+)$`, "m")); if (m) return m[1].trim().replace(/^["']|["']$/g, ""); } catch {} 10 } 11} 12const CONN = fromEnv("DB_CONNECTION_STRING"); 13const API_KEY = fromEnv("GEMINI_API_KEY"); 14const MODEL = "gemini-embedding-001"; 15const N = parseInt(process.env.ISSUES ?? "3", 10); 16 17const pool = new pg.Pool({ connectionString: CONN, ssl: { rejectUnauthorized: false }, max: 3 }); 18 19async function embedQuery(text) { 20 const resp = await fetch(`https://generativelanguage.googleapis.com/v1beta/models/${MODEL}:embedContent`, { 21 method: "POST", 22 headers: { "content-type": "application/json", "x-goog-api-key": API_KEY }, 23 body: JSON.stringify({ 24 model: `models/${MODEL}`, 25 content: { parts: [{ text: text.slice(0, 8000) }] }, 26 taskType: "RETRIEVAL_QUERY", 27 outputDimensionality: 1536, 28 }), 29 }); 30 if (!resp.ok) throw new Error(`embed HTTP ${resp.status}: ${(await resp.text()).slice(0, 200)}`); 31 const v = (await resp.json()).embedding.values; 32 let s = 0; for (const x of v) s += x * x; const n = Math.sqrt(s) || 1; 33 return `[${v.map((x) => x / n).join(",")}]`; 34} 35 36async function main() { 37 const total = (await pool.query(`select count(*)::int n from tangled_issues`)).rows[0].n; 38 console.log(`tangled_issues total: ${total}`); 39 const joinable = (await pool.query(` 40 select count(*)::int n from tangled_issues i 41 where exists (select 1 from tangled_readmes r where r.repo_did = i.repo_did and r.embedding is not null)`)).rows[0].n; 42 console.log(`issues whose parent repo has an embedded README: ${joinable}\n`); 43 if (joinable === 0) { console.log("No joinable issues — cannot run the own-repo sanity check."); await pool.end(); return; } 44 45 // Pick a few substantive issues (decent body) whose repo is embedded. 46 const issues = (await pool.query(` 47 select i.uri, i.repo_did, i.title, i.body, 48 (select repo_name from tangled_readmes r where r.repo_did = i.repo_did limit 1) as parent_repo 49 from tangled_issues i 50 where i.title is not null and length(coalesce(i.body,'')) > 80 51 and exists (select 1 from tangled_readmes r where r.repo_did = i.repo_did and r.embedding is not null) 52 order by length(i.body) desc 53 limit ${N}`)).rows; 54 55 for (const iss of issues) { 56 const queryText = `${iss.title}\n\n${iss.body}`; 57 console.log("\n" + "=".repeat(70)); 58 console.log(`ISSUE: ${iss.title}`); 59 console.log(`parent repo: ${iss.parent_repo} (${iss.repo_did})`); 60 console.log(`body: ${iss.body.replace(/\s+/g, " ").slice(0, 180)}`); 61 const qvec = await embedQuery(queryText); 62 const hits = (await pool.query(` 63 select repo_name, repo_did, round((embedding <=> $1::vector)::numeric, 4) as dist, 64 (repo_did = $2) as is_parent 65 from tangled_readmes 66 where embedding is not null 67 order by embedding <=> $1::vector 68 limit 8`, [qvec, iss.repo_did])).rows; 69 console.log("top README matches:"); 70 hits.forEach((h, idx) => { 71 console.log(` ${idx + 1}. ${h.is_parent ? "👉 " : " "}${h.repo_name?.padEnd(32) ?? "(no name)"} dist=${h.dist}${h.is_parent ? " <-- OWN REPO" : ""}`); 72 }); 73 // Where does the own repo rank overall? 74 const rank = (await pool.query(` 75 select 1 + count(*)::int as rnk 76 from tangled_readmes 77 where embedding is not null 78 and (embedding <=> $1::vector) < (select embedding <=> $1::vector from tangled_readmes where repo_did=$2 limit 1)`, 79 [qvec, iss.repo_did])).rows[0].rnk; 80 console.log(` → own repo overall rank: #${rank} of all embedded READMEs`); 81 } 82 await pool.end(); 83} 84main().catch((e) => { console.error("FATAL:", e); process.exit(1); });