This repository has no description
1// Find owners with several embedded repos, and measure how SPREAD their repos are
2// (high mean pairwise cosine distance = multi-interest user — good demo candidate).
3import pg from "pg";
4import { readFileSync } from "node:fs";
5function fromEnv(k){ if(process.env[k])return process.env[k]; for(const p of["../.env",".env"]){try{const m=readFileSync(p,"utf8").match(new RegExp(`^\\s*${k}\\s*=\\s*(.+)$`,"m"));if(m)return m[1].trim();}catch{}} }
6const pool = new pg.Pool({ connectionString: fromEnv("DB_CONNECTION_STRING"), ssl: { rejectUnauthorized: false }, max: 3 });
7
8const ownerDid = (uri) => uri ? uri.replace("at://", "").split("/")[0] : null;
9function parseVec(s){ return s.replace(/^\[|\]$/g, "").split(",").map(Number); }
10function cos(a, b){ let d = 0; for (let i = 0; i < a.length; i++) d += a[i]*b[i]; return d; } // already unit-norm
11
12const owners = (await pool.query(`
13 select split_part(replace(repo_uri,'at://',''),'/',1) as owner_did,
14 count(*)::int n, array_agg(repo_name) as names
15 from tangled_readmes
16 where embedding is not null and repo_uri is not null
17 group by 1 having count(*) between 4 and 12
18 order by n desc limit 25`)).rows;
19
20const scored = [];
21for (const o of owners) {
22 const rows = (await pool.query(
23 `select repo_name, embedding::text as e from tangled_readmes where embedding is not null and repo_uri like $1`,
24 [`at://${o.owner_did}/%`])).rows;
25 const vecs = rows.map((r) => parseVec(r.e));
26 let sum = 0, cnt = 0;
27 for (let i = 0; i < vecs.length; i++) for (let j = i + 1; j < vecs.length; j++) { sum += 1 - cos(vecs[i], vecs[j]); cnt++; }
28 const meanDist = cnt ? sum / cnt : 0;
29 scored.push({ owner_did: o.owner_did, n: o.n, meanDist: +meanDist.toFixed(3), names: rows.map((r) => r.repo_name) });
30}
31scored.sort((a, b) => b.meanDist - a.meanDist);
32console.log("most multi-interest owners (high mean pairwise README distance):\n");
33for (const s of scored.slice(0, 8)) {
34 console.log(`mean_dist=${s.meanDist} n=${s.n} ${s.owner_did}`);
35 console.log(` repos: ${s.names.join(", ")}\n`);
36}
37await pool.end();