This repository has no description
1import pg from "pg";
2import { readFileSync } from "node:fs";
3
4// Read DB_CONNECTION_STRING from repo-root .env (ignore the gcloud helper line).
5function loadConn() {
6 if (process.env.DB_CONNECTION_STRING) return process.env.DB_CONNECTION_STRING;
7 for (const p of ["../.env", ".env", "../../.env"]) {
8 try {
9 const m = readFileSync(p, "utf8").match(/^\s*DB_CONNECTION_STRING\s*=\s*(.+)\s*$/m);
10 if (m) return m[1].trim();
11 } catch {}
12 }
13 throw new Error("DB_CONNECTION_STRING not found");
14}
15
16const SAMPLE = process.env.SAMPLE ? parseInt(process.env.SAMPLE, 10) : 0; // 0 = all
17const CONCURRENCY = parseInt(process.env.CONCURRENCY ?? "30", 10);
18const TIMEOUT_MS = parseInt(process.env.TIMEOUT_MS ?? "9000", 10);
19
20const pool = new pg.Pool({
21 connectionString: loadConn(),
22 ssl: { rejectUnauthorized: false },
23 connectionTimeoutMillis: 10_000,
24 max: 4,
25});
26
27const sql = `
28 select knot_hostname,
29 coalesce(record_raw->>'repoDid', repo_did) as repodid,
30 record_raw->>'name' as name
31 from tangled_repos
32 where knot_hostname is not null
33 and coalesce(record_raw->>'repoDid', repo_did) is not null
34 ${SAMPLE ? "order by random() limit " + SAMPLE : ""}`;
35
36const { rows } = await pool.query(sql);
37await pool.end();
38
39const totalRepos = rows.length;
40console.log(`Checking README presence for ${totalRepos} repos (repoDid-addressable) ...`);
41console.log(`concurrency=${CONCURRENCY} timeout=${TIMEOUT_MS}ms sample=${SAMPLE || "ALL"}\n`);
42
43async function checkRepo(r) {
44 // sh.tangled.repo.tree defaults to the repo's default branch when ref is omitted,
45 // and returns a top-level `readme` (with `contents`) when the knot finds a README
46 // under any extension (.md/.org/.rst/...). One request per repo.
47 const url = `https://${r.knot_hostname}/xrpc/sh.tangled.repo.tree?repo=${encodeURIComponent(r.repodid)}&path=`;
48 const ctrl = new AbortController();
49 const t = setTimeout(() => ctrl.abort(), TIMEOUT_MS);
50 try {
51 const resp = await fetch(url, { signal: ctrl.signal, headers: { accept: "application/json" } });
52 const txt = await resp.text();
53 if (!resp.ok) return { status: "http_" + resp.status };
54 let j; try { j = JSON.parse(txt); } catch { return { status: "bad_json" }; }
55 const files = Array.isArray(j?.files) ? j.files : [];
56 const readmeObj = !!(j?.readme && typeof j.readme === "object" &&
57 typeof j.readme.contents === "string" && j.readme.contents.trim().length > 0);
58 const readmeFile = files.some((f) => /^readme(\.|$)/i.test(f?.name ?? ""));
59 const empty = files.length === 0 && !readmeObj;
60 return { status: "ok", reachable: true, hasReadme: readmeObj || readmeFile, empty };
61 } catch (e) {
62 return { status: e.name === "AbortError" ? "timeout" : "neterr" };
63 } finally {
64 clearTimeout(t);
65 }
66}
67
68let done = 0;
69const stats = { reachable: 0, hasReadme: 0, empty: 0 };
70const statusCounts = {};
71const byKnot = {}; // knot -> {reachable, hasReadme}
72
73async function worker(queue) {
74 while (queue.length) {
75 const r = queue.pop();
76 const res = await checkRepo(r);
77 statusCounts[res.status] = (statusCounts[res.status] ?? 0) + 1;
78 const k = (byKnot[r.knot_hostname] ??= { total: 0, reachable: 0, hasReadme: 0 });
79 k.total++;
80 if (res.status === "ok") {
81 stats.reachable++; k.reachable++;
82 if (res.hasReadme) { stats.hasReadme++; k.hasReadme++; }
83 if (res.empty) stats.empty++;
84 }
85 if (++done % 100 === 0) process.stderr.write(` ...${done}/${totalRepos}\n`);
86 }
87}
88
89const queue = rows.slice();
90await Promise.all(Array.from({ length: CONCURRENCY }, () => worker(queue)));
91
92const pct = (n, d) => (d === 0 ? "n/a" : ((100 * n) / d).toFixed(1) + "%");
93
94console.log("\n================ README COVERAGE ================");
95console.log(`repoDid-addressable repos checked : ${totalRepos}`);
96console.log(`reachable (knot responded w/ tree): ${stats.reachable} (${pct(stats.reachable, totalRepos)} of checked)`);
97console.log(` ├─ have a README : ${stats.hasReadme} (${pct(stats.hasReadme, stats.reachable)} of reachable)`);
98console.log(` └─ empty repo (no files) : ${stats.empty}`);
99console.log(`README % of ALL checked repos : ${pct(stats.hasReadme, totalRepos)}`);
100console.log("\nstatus breakdown:", JSON.stringify(statusCounts));
101console.log("\nper-knot (knots with >=10 repos):");
102for (const [knot, k] of Object.entries(byKnot).sort((a, b) => b[1].total - a[1].total)) {
103 if (k.total >= 10) console.log(` ${knot.padEnd(26)} total=${String(k.total).padStart(4)} reachable=${String(k.reachable).padStart(4)} readme=${String(k.hasReadme).padStart(4)} (${pct(k.hasReadme, k.reachable)} of reachable)`);
104}