This repository has no description
1// Search ISSUES from a repo's README embedding (README -> issue cosine, in-DB pgvector).
2// This is the "recommend issues to work on" path: given repos a user knows, surface relevant issues.
3import pg from "pg";
4import { readFileSync } from "node:fs";
5function fromEnv(k){ if(process.env[k])return process.env[k]; for(const p of["../.env",".env"]){try{const m=readFileSync(p,"utf8").match(new RegExp(`^\\s*${k}\\s*=\\s*(.+)$`,"m"));if(m)return m[1].trim();}catch{}} }
6const pool = new pg.Pool({ connectionString: fromEnv("DB_CONNECTION_STRING"), ssl: { rejectUnauthorized: false }, max: 3 });
7const K = parseInt(process.env.K ?? "8", 10);
8// which issue table to search
9const ISSUE_TBL = process.env.ISSUE_TBL || "tangled_issues";
10
11async function coverage() {
12 for (const t of ["tangled_issues", "tangled_open_issues"]) {
13 try {
14 const r = (await pool.query(`select count(*)::int total, count(*) filter (where embedding is not null)::int emb, count(distinct embedding_model) models, max(embedding_model) model, max(vector_dims(embedding)) dims from ${t}`)).rows[0];
15 console.log(`${t}: total=${r.total} embedded=${r.emb} model=${r.model} dims=${r.dims}`);
16 } catch (e) { console.log(`${t}: ${e.message}`); }
17 }
18}
19
20async function main() {
21 await coverage();
22 const seeds = process.env.SEED ? [process.env.SEED] : ["tangled-cli", "atproto-oauth", "nixpkgs", "knot-docker"];
23 for (const s of seeds) {
24 const seed = (await pool.query(
25 `select repo_name, repo_did, embedding::text et from tangled_readmes
26 where embedding is not null and repo_name ilike $1 order by length(content) desc limit 1`, [s])).rows[0];
27 console.log("\n" + "=".repeat(74));
28 if (!seed) { console.log(`SEED "${s}" not found`); continue; }
29 console.log(`SEED REPO README: ${seed.repo_name}`);
30 const hits = (await pool.query(`
31 select i.title, i.repo_did, left(regexp_replace(coalesce(i.body,''), '\\s+', ' ', 'g'), 120) as body,
32 rd.repo_name as issue_repo,
33 round((i.embedding <=> $1::vector)::numeric, 4) as dist
34 from ${ISSUE_TBL} i
35 left join tangled_readmes rd on rd.repo_did = i.repo_did
36 where i.embedding is not null
37 order by i.embedding <=> $1::vector
38 limit ${K}`, [seed.et])).rows;
39 console.log(`top ${hits.length} matching issues:`);
40 hits.forEach((h, idx) => {
41 console.log(` ${idx + 1}. [${h.dist}] "${(h.title ?? "(no title)").slice(0, 60)}" (repo: ${h.issue_repo ?? h.repo_did?.slice(0, 16)})`);
42 if (h.body?.trim()) console.log(` ${h.body}…`);
43 });
44 }
45 await pool.end();
46}
47main().catch((e) => { console.error("FATAL:", e); process.exit(1); });