This repository has no description
0

Configure Feed

Select the types of activity you want to include in your feed.

at main 2.7 kB View raw
1// Search ISSUES from a repo's README embedding (README -> issue cosine, in-DB pgvector). 2// This is the "recommend issues to work on" path: given repos a user knows, surface relevant issues. 3import pg from "pg"; 4import { readFileSync } from "node:fs"; 5function fromEnv(k){ if(process.env[k])return process.env[k]; for(const p of["../.env",".env"]){try{const m=readFileSync(p,"utf8").match(new RegExp(`^\\s*${k}\\s*=\\s*(.+)$`,"m"));if(m)return m[1].trim();}catch{}} } 6const pool = new pg.Pool({ connectionString: fromEnv("DB_CONNECTION_STRING"), ssl: { rejectUnauthorized: false }, max: 3 }); 7const K = parseInt(process.env.K ?? "8", 10); 8// which issue table to search 9const ISSUE_TBL = process.env.ISSUE_TBL || "tangled_issues"; 10 11async function coverage() { 12 for (const t of ["tangled_issues", "tangled_open_issues"]) { 13 try { 14 const r = (await pool.query(`select count(*)::int total, count(*) filter (where embedding is not null)::int emb, count(distinct embedding_model) models, max(embedding_model) model, max(vector_dims(embedding)) dims from ${t}`)).rows[0]; 15 console.log(`${t}: total=${r.total} embedded=${r.emb} model=${r.model} dims=${r.dims}`); 16 } catch (e) { console.log(`${t}: ${e.message}`); } 17 } 18} 19 20async function main() { 21 await coverage(); 22 const seeds = process.env.SEED ? [process.env.SEED] : ["tangled-cli", "atproto-oauth", "nixpkgs", "knot-docker"]; 23 for (const s of seeds) { 24 const seed = (await pool.query( 25 `select repo_name, repo_did, embedding::text et from tangled_readmes 26 where embedding is not null and repo_name ilike $1 order by length(content) desc limit 1`, [s])).rows[0]; 27 console.log("\n" + "=".repeat(74)); 28 if (!seed) { console.log(`SEED "${s}" not found`); continue; } 29 console.log(`SEED REPO README: ${seed.repo_name}`); 30 const hits = (await pool.query(` 31 select i.title, i.repo_did, left(regexp_replace(coalesce(i.body,''), '\\s+', ' ', 'g'), 120) as body, 32 rd.repo_name as issue_repo, 33 round((i.embedding <=> $1::vector)::numeric, 4) as dist 34 from ${ISSUE_TBL} i 35 left join tangled_readmes rd on rd.repo_did = i.repo_did 36 where i.embedding is not null 37 order by i.embedding <=> $1::vector 38 limit ${K}`, [seed.et])).rows; 39 console.log(`top ${hits.length} matching issues:`); 40 hits.forEach((h, idx) => { 41 console.log(` ${idx + 1}. [${h.dist}] "${(h.title ?? "(no title)").slice(0, 60)}" (repo: ${h.issue_repo ?? h.repo_did?.slice(0, 16)})`); 42 if (h.body?.trim()) console.log(` ${h.body}`); 43 }); 44 } 45 await pool.end(); 46} 47main().catch((e) => { console.error("FATAL:", e); process.exit(1); });