This repository has no description
0

Configure Feed

Select the types of activity you want to include in your feed.

at main 4.7 kB View raw
1import pg from "pg"; 2import { readFileSync } from "node:fs"; 3 4// Read DB_CONNECTION_STRING from repo-root .env (ignore the gcloud helper line). 5function loadConn() { 6 if (process.env.DB_CONNECTION_STRING) return process.env.DB_CONNECTION_STRING; 7 for (const p of ["../.env", ".env", "../../.env"]) { 8 try { 9 const m = readFileSync(p, "utf8").match(/^\s*DB_CONNECTION_STRING\s*=\s*(.+)\s*$/m); 10 if (m) return m[1].trim(); 11 } catch {} 12 } 13 throw new Error("DB_CONNECTION_STRING not found"); 14} 15 16const SAMPLE = process.env.SAMPLE ? parseInt(process.env.SAMPLE, 10) : 0; // 0 = all 17const CONCURRENCY = parseInt(process.env.CONCURRENCY ?? "30", 10); 18const TIMEOUT_MS = parseInt(process.env.TIMEOUT_MS ?? "9000", 10); 19 20const pool = new pg.Pool({ 21 connectionString: loadConn(), 22 ssl: { rejectUnauthorized: false }, 23 connectionTimeoutMillis: 10_000, 24 max: 4, 25}); 26 27const sql = ` 28 select knot_hostname, 29 coalesce(record_raw->>'repoDid', repo_did) as repodid, 30 record_raw->>'name' as name 31 from tangled_repos 32 where knot_hostname is not null 33 and coalesce(record_raw->>'repoDid', repo_did) is not null 34 ${SAMPLE ? "order by random() limit " + SAMPLE : ""}`; 35 36const { rows } = await pool.query(sql); 37await pool.end(); 38 39const totalRepos = rows.length; 40console.log(`Checking README presence for ${totalRepos} repos (repoDid-addressable) ...`); 41console.log(`concurrency=${CONCURRENCY} timeout=${TIMEOUT_MS}ms sample=${SAMPLE || "ALL"}\n`); 42 43async function checkRepo(r) { 44 // sh.tangled.repo.tree defaults to the repo's default branch when ref is omitted, 45 // and returns a top-level `readme` (with `contents`) when the knot finds a README 46 // under any extension (.md/.org/.rst/...). One request per repo. 47 const url = `https://${r.knot_hostname}/xrpc/sh.tangled.repo.tree?repo=${encodeURIComponent(r.repodid)}&path=`; 48 const ctrl = new AbortController(); 49 const t = setTimeout(() => ctrl.abort(), TIMEOUT_MS); 50 try { 51 const resp = await fetch(url, { signal: ctrl.signal, headers: { accept: "application/json" } }); 52 const txt = await resp.text(); 53 if (!resp.ok) return { status: "http_" + resp.status }; 54 let j; try { j = JSON.parse(txt); } catch { return { status: "bad_json" }; } 55 const files = Array.isArray(j?.files) ? j.files : []; 56 const readmeObj = !!(j?.readme && typeof j.readme === "object" && 57 typeof j.readme.contents === "string" && j.readme.contents.trim().length > 0); 58 const readmeFile = files.some((f) => /^readme(\.|$)/i.test(f?.name ?? "")); 59 const empty = files.length === 0 && !readmeObj; 60 return { status: "ok", reachable: true, hasReadme: readmeObj || readmeFile, empty }; 61 } catch (e) { 62 return { status: e.name === "AbortError" ? "timeout" : "neterr" }; 63 } finally { 64 clearTimeout(t); 65 } 66} 67 68let done = 0; 69const stats = { reachable: 0, hasReadme: 0, empty: 0 }; 70const statusCounts = {}; 71const byKnot = {}; // knot -> {reachable, hasReadme} 72 73async function worker(queue) { 74 while (queue.length) { 75 const r = queue.pop(); 76 const res = await checkRepo(r); 77 statusCounts[res.status] = (statusCounts[res.status] ?? 0) + 1; 78 const k = (byKnot[r.knot_hostname] ??= { total: 0, reachable: 0, hasReadme: 0 }); 79 k.total++; 80 if (res.status === "ok") { 81 stats.reachable++; k.reachable++; 82 if (res.hasReadme) { stats.hasReadme++; k.hasReadme++; } 83 if (res.empty) stats.empty++; 84 } 85 if (++done % 100 === 0) process.stderr.write(` ...${done}/${totalRepos}\n`); 86 } 87} 88 89const queue = rows.slice(); 90await Promise.all(Array.from({ length: CONCURRENCY }, () => worker(queue))); 91 92const pct = (n, d) => (d === 0 ? "n/a" : ((100 * n) / d).toFixed(1) + "%"); 93 94console.log("\n================ README COVERAGE ================"); 95console.log(`repoDid-addressable repos checked : ${totalRepos}`); 96console.log(`reachable (knot responded w/ tree): ${stats.reachable} (${pct(stats.reachable, totalRepos)} of checked)`); 97console.log(` ├─ have a README : ${stats.hasReadme} (${pct(stats.hasReadme, stats.reachable)} of reachable)`); 98console.log(` └─ empty repo (no files) : ${stats.empty}`); 99console.log(`README % of ALL checked repos : ${pct(stats.hasReadme, totalRepos)}`); 100console.log("\nstatus breakdown:", JSON.stringify(statusCounts)); 101console.log("\nper-knot (knots with >=10 repos):"); 102for (const [knot, k] of Object.entries(byKnot).sort((a, b) => b[1].total - a[1].total)) { 103 if (k.total >= 10) console.log(` ${knot.padEnd(26)} total=${String(k.total).padStart(4)} reachable=${String(k.reachable).padStart(4)} readme=${String(k.hasReadme).padStart(4)} (${pct(k.hasReadme, k.reachable)} of reachable)`); 104}