This repository has no description
0

Configure Feed

Select the types of activity you want to include in your feed.

at main 5.0 kB View raw
1// Full experiment: fetch real Tangled issues live, embed as queries, vector-search READMEs. 2import pg from "pg"; 3import { readFileSync } from "node:fs"; 4function fromEnv(k){ if(process.env[k])return process.env[k]; for(const p of["../.env",".env"]){try{const m=readFileSync(p,"utf8").match(new RegExp(`^\\s*${k}\\s*=\\s*(.+)$`,"m"));if(m)return m[1].trim();}catch{}} } 5const API_KEY = fromEnv("GEMINI_API_KEY"); 6const MODEL = "gemini-embedding-001"; 7const pool = new pg.Pool({ connectionString: fromEnv("DB_CONNECTION_STRING"), ssl: { rejectUnauthorized: false }, max: 4 }); 8const pdsUrl = (h) => (/^https?:\/\//.test(h) ? h : `https://${h}`); 9 10async function embedQuery(text) { 11 const resp = await fetch(`https://generativelanguage.googleapis.com/v1beta/models/${MODEL}:embedContent`, { 12 method: "POST", headers: { "content-type": "application/json", "x-goog-api-key": API_KEY }, 13 body: JSON.stringify({ model: `models/${MODEL}`, content: { parts: [{ text: text.slice(0, 8000) }] }, taskType: "RETRIEVAL_QUERY", outputDimensionality: 1536 }), 14 }); 15 if (!resp.ok) throw new Error(`embed HTTP ${resp.status}`); 16 const v = (await resp.json()).embedding.values; 17 let s = 0; for (const x of v) s += x * x; const n = Math.sqrt(s) || 1; 18 return `[${v.map((x) => x / n).join(",")}]`; 19} 20 21// Map an issue.repo reference (bare DID or at://owner/sh.tangled.repo/rkey) -> knot repoDid in readmes. 22async function resolveRepoDid(ref) { 23 if (!ref) return null; 24 if (ref.startsWith("at://")) { 25 const m = ref.match(/^at:\/\/([^/]+)\/[^/]+\/(.+)$/); 26 if (!m) return null; 27 const r = await pool.query(`select coalesce(repo_did, record_raw->>'repoDid') as rd from tangled_repos where owner_did=$1 and rkey=$2 limit 1`, [m[1], m[2]]); 28 return r.rows[0]?.rd ?? null; 29 } 30 return ref; // bare DID == repoDid 31} 32 33async function fetchIssues() { 34 const rows = (await pool.query(` 35 select distinct tr.owner_did, pa.pds_host 36 from tangled_repos tr join tangled_pds_accounts pa on pa.did = tr.owner_did 37 where exists (select 1 from tangled_readmes r where r.repo_did = coalesce(tr.repo_did, tr.record_raw->>'repoDid') and r.embedding is not null) 38 limit 120`)).rows; 39 const found = []; 40 const q = rows.slice(); 41 await Promise.all(Array.from({ length: 14 }, async () => { 42 while (q.length) { 43 const r = q.pop(); 44 const url = `${pdsUrl(r.pds_host)}/xrpc/com.atproto.repo.listRecords?repo=${encodeURIComponent(r.owner_did)}&collection=sh.tangled.repo.issue&limit=30`; 45 try { 46 const ctrl = new AbortController(); const t = setTimeout(() => ctrl.abort(), 10000); 47 const resp = await fetch(url, { signal: ctrl.signal }); 48 clearTimeout(t); 49 if (!resp.ok) continue; 50 const j = await resp.json(); 51 for (const rec of j.records ?? []) if (rec.value?.title) found.push(rec.value); 52 } catch {} 53 } 54 })); 55 return found; 56} 57 58async function main() { 59 const issues = await fetchIssues(); 60 console.log(`fetched ${issues.length} live issues\n`); 61 // attach resolved repoDid + whether embedded; prefer substantive bodies whose repo is embedded 62 for (const iss of issues) { 63 iss._repoDid = await resolveRepoDid(iss.repo); 64 iss._embedded = iss._repoDid 65 ? (await pool.query(`select repo_name from tangled_readmes where repo_did=$1 and embedding is not null limit 1`, [iss._repoDid])).rows[0]?.repo_name ?? null 66 : null; 67 } 68 const pick = issues 69 .filter((i) => (i.body ?? "").length > 60) 70 .sort((a, b) => (b._embedded ? 1 : 0) - (a._embedded ? 1 : 0) || (b.body?.length ?? 0) - (a.body?.length ?? 0)) 71 .slice(0, 4); 72 73 for (const iss of pick) { 74 console.log("\n" + "=".repeat(72)); 75 console.log(`ISSUE: ${iss.title}`); 76 console.log(`own repo: ${iss._embedded ? iss._embedded + " (embedded ✓)" : "(parent README not embedded / unresolved)"}`); 77 console.log(`body: ${(iss.body ?? "").replace(/\s+/g, " ").slice(0, 200)}`); 78 const qvec = await embedQuery(`${iss.title}\n\n${iss.body ?? ""}`); 79 const hits = (await pool.query(` 80 select repo_name, repo_did, round((embedding <=> $1::vector)::numeric,4) dist, (repo_did=$2) is_parent 81 from tangled_readmes where embedding is not null 82 order by embedding <=> $1::vector limit 8`, [qvec, iss._repoDid])).rows; 83 console.log("top README matches:"); 84 hits.forEach((h, i) => console.log(` ${i + 1}. ${h.is_parent ? "👉" : " "} ${(h.repo_name ?? "(no name)").padEnd(34)} dist=${h.dist}${h.is_parent ? " <-- OWN REPO" : ""}`)); 85 if (iss._embedded) { 86 const rnk = (await pool.query(` 87 select 1 + count(*)::int rnk from tangled_readmes 88 where embedding is not null and (embedding <=> $1::vector) < (select embedding <=> $1::vector from tangled_readmes where repo_did=$2 limit 1)`, 89 [qvec, iss._repoDid])).rows[0].rnk; 90 console.log(` → own repo overall rank: #${rnk} of all embedded READMEs`); 91 } 92 } 93 await pool.end(); 94} 95main().catch((e) => { console.error("FATAL:", e); process.exit(1); });