This repository has no description
0

Configure Feed

Select the types of activity you want to include in your feed.

at main 2.1 kB View raw
1// Find owners with several embedded repos, and measure how SPREAD their repos are 2// (high mean pairwise cosine distance = multi-interest user — good demo candidate). 3import pg from "pg"; 4import { readFileSync } from "node:fs"; 5function fromEnv(k){ if(process.env[k])return process.env[k]; for(const p of["../.env",".env"]){try{const m=readFileSync(p,"utf8").match(new RegExp(`^\\s*${k}\\s*=\\s*(.+)$`,"m"));if(m)return m[1].trim();}catch{}} } 6const pool = new pg.Pool({ connectionString: fromEnv("DB_CONNECTION_STRING"), ssl: { rejectUnauthorized: false }, max: 3 }); 7 8const ownerDid = (uri) => uri ? uri.replace("at://", "").split("/")[0] : null; 9function parseVec(s){ return s.replace(/^\[|\]$/g, "").split(",").map(Number); } 10function cos(a, b){ let d = 0; for (let i = 0; i < a.length; i++) d += a[i]*b[i]; return d; } // already unit-norm 11 12const owners = (await pool.query(` 13 select split_part(replace(repo_uri,'at://',''),'/',1) as owner_did, 14 count(*)::int n, array_agg(repo_name) as names 15 from tangled_readmes 16 where embedding is not null and repo_uri is not null 17 group by 1 having count(*) between 4 and 12 18 order by n desc limit 25`)).rows; 19 20const scored = []; 21for (const o of owners) { 22 const rows = (await pool.query( 23 `select repo_name, embedding::text as e from tangled_readmes where embedding is not null and repo_uri like $1`, 24 [`at://${o.owner_did}/%`])).rows; 25 const vecs = rows.map((r) => parseVec(r.e)); 26 let sum = 0, cnt = 0; 27 for (let i = 0; i < vecs.length; i++) for (let j = i + 1; j < vecs.length; j++) { sum += 1 - cos(vecs[i], vecs[j]); cnt++; } 28 const meanDist = cnt ? sum / cnt : 0; 29 scored.push({ owner_did: o.owner_did, n: o.n, meanDist: +meanDist.toFixed(3), names: rows.map((r) => r.repo_name) }); 30} 31scored.sort((a, b) => b.meanDist - a.meanDist); 32console.log("most multi-interest owners (high mean pairwise README distance):\n"); 33for (const s of scored.slice(0, 8)) { 34 console.log(`mean_dist=${s.meanDist} n=${s.n} ${s.owner_did}`); 35 console.log(` repos: ${s.names.join(", ")}\n`); 36} 37await pool.end();