fix: count distinct solvers and hedge when the sample is incomplete · jeremy.herve.bzh/atmot-bot@c8ec5a6

+2

src/compose.ts

··· 32 32 if (players == null || players === 0) return null; 33 33 34 34 if (solvers === 0) { 35 + // An incomplete sample can't prove nobody won — only claim it when we're sure. 36 + if (sampled) return null; 35 37 return lang === 'fr' 36 38 ? `Personne n'a trouvé le mot d'hier. Nouveau départ aujourd'hui !` 37 39 : `Nobody cracked yesterday's word. Fresh start today!`;

+2 -2

src/config.ts

··· 56 56 export interface YesterdayCounts { 57 57 /** Distinct players who recorded a result (wins + losses); null if Constellation was unreachable. */ 58 58 players: number | null; 59 - /** Players who solved, counted from the sampled records (≤ SOLVER_SAMPLE_CAP). */ 59 + /** Distinct DIDs that solved, from the sample (≤ SOLVER_SAMPLE_CAP); clamped to `players`. */ 60 60 solvers: number; 61 - /** True if the sample hit the cap (more results may exist than were counted). */ 61 + /** True when the sample is incomplete (truncated page or unresolved players); compose then hedges. */ 62 62 sampled: boolean; 63 63 }

+32 -8

src/constellation.ts

··· 52 52 return { records: data.records as BacklinkRecord[], cursor: data.cursor ?? null }; 53 53 } 54 54 55 - async function collectBacklinks(target: string, collection: string, path: string, cap: number): Promise<BacklinkRecord[]> { 55 + /** 56 + * Up to `cap` backlink records. `truncated` is true when the sample is known to 57 + * be incomplete — more than `cap` records exist, a page fetch failed mid-walk, 58 + * or pages remained when the page budget ran out — so callers can hedge. 59 + */ 60 + async function collectBacklinks( 61 + target: string, 62 + collection: string, 63 + path: string, 64 + cap: number, 65 + ): Promise<{ records: BacklinkRecord[]; truncated: boolean }> { 56 66 const out: BacklinkRecord[] = []; 57 67 let cursor: string | undefined; 58 - for (let i = 0; i < 10 && out.length < cap; i++) { 59 - const page = await getBacklinksPage(target, collection, path, { limit: Math.min(100, cap), cursor }); 60 - if (!page) break; 68 + let incomplete = false; 69 + for (let i = 0; i < 10; i++) { 70 + if (out.length > cap) break; // already have more than we need; the rest is truncated 71 + // Fetch one extra so we can distinguish "exactly cap" from "more than cap". 72 + const page = await getBacklinksPage(target, collection, path, { limit: Math.min(100, cap + 1), cursor }); 73 + if (!page) { 74 + incomplete = true; // a page fetch failed; we can't be sure we have everything 75 + break; 76 + } 61 77 out.push(...page.records); 62 - if (!page.cursor) break; 78 + if (!page.cursor) { 79 + cursor = undefined; 80 + break; 81 + } 63 82 cursor = page.cursor; 64 83 } 65 - return out.slice(0, cap); 84 + const truncated = incomplete || out.length > cap || cursor !== undefined; 85 + return { records: out.slice(0, cap), truncated }; 66 86 } 67 87 68 88 /** Distinct players who recorded a result for this puzzle (wins + losses). Null on failure. */ ··· 70 90 return countDistinctDids(puzzleTarget(lang, puzzleNumber), COLLECTION.result, RESULT_TARGET_PATH); 71 91 } 72 92 73 - /** Up to `cap` result-record backlinks for this puzzle. */ 74 - export function dailyResultBacklinks(lang: Lang, puzzleNumber: number, cap: number): Promise<BacklinkRecord[]> { 93 + /** Up to `cap` result-record backlinks for this puzzle, plus a `truncated` flag. */ 94 + export function dailyResultBacklinks( 95 + lang: Lang, 96 + puzzleNumber: number, 97 + cap: number, 98 + ): Promise<{ records: BacklinkRecord[]; truncated: boolean }> { 75 99 return collectBacklinks(puzzleTarget(lang, puzzleNumber), COLLECTION.result, RESULT_TARGET_PATH, cap); 76 100 } 77 101

+22 -7

src/counts.ts

··· 4 4 5 5 /** 6 6 * Yesterday's { players, solvers, sampled } for one language. `players` is the 7 - * cheap distinct-DID count; `solvers` is counted from the sampled records 8 - * (≤ SOLVER_SAMPLE_CAP) to stay under the free-plan subrequest budget. 7 + * cheap distinct-DID count; `solvers` is the number of *distinct* DIDs whose 8 + * sampled record (≤ SOLVER_SAMPLE_CAP) shows a win. 9 + * 10 + * Both numbers are deduped by DID so they are commensurable, and `solvers` is 11 + * clamped to `players` so the published copy can never say more people solved 12 + * than played. `sampled` is true whenever the sample is incomplete — the page 13 + * was truncated, or we resolved fewer distinct players than Constellation 14 + * counted (a dropped read or index skew) — so compose hedges instead of 15 + * publishing an untrustworthy exact "X solved, Y didn't". 9 16 */ 10 17 export async function yesterdayCounts(lang: Lang, yesterdayN: number): Promise<YesterdayCounts> { 11 - const [players, backlinks] = await Promise.all([ 18 + const [players, { records, truncated }] = await Promise.all([ 12 19 dailyPlayerCount(lang, yesterdayN), 13 20 dailyResultBacklinks(lang, yesterdayN, SOLVER_SAMPLE_CAP), 14 21 ]); 15 22 16 - let solvers = 0; 23 + const seenDids = new Set<string>(); 24 + const solverDids = new Set<string>(); 17 25 await Promise.all( 18 - backlinks.map(async (bl) => { 26 + records.map(async (bl) => { 19 27 const rec = await getRecordByUri<ResultRecord>(backlinkUri(bl)); 20 - if (rec && rec.lang === lang && rec.puzzleNumber === yesterdayN && rec.solved) solvers++; 28 + if (!rec || rec.lang !== lang || rec.puzzleNumber !== yesterdayN) return; 29 + seenDids.add(bl.did); 30 + if (rec.solved) solverDids.add(bl.did); 21 31 }), 22 32 ); 23 33 24 - return { players, solvers, sampled: backlinks.length >= SOLVER_SAMPLE_CAP }; 34 + let solvers = solverDids.size; 35 + if (players != null) solvers = Math.min(solvers, players); 36 + 37 + const sampled = truncated || players == null || (players > 0 && seenDids.size < players); 38 + 39 + return { players, solvers, sampled }; 25 40 }

+132

test/counts.test.ts

··· 1 + import { describe, it, expect, vi, beforeEach } from 'vitest'; 2 + 3 + // Mock the network-facing modules so we can drive yesterdayCounts deterministically. 4 + vi.mock('../src/constellation.js', () => ({ 5 + dailyPlayerCount: vi.fn(), 6 + dailyResultBacklinks: vi.fn(), 7 + backlinkUri: (r: { did: string; rkey: string }) => `at://${r.did}/c/${r.rkey}`, 8 + })); 9 + vi.mock('../src/identity.js', () => ({ getRecordByUri: vi.fn() })); 10 + 11 + import { yesterdayCounts } from '../src/counts.js'; 12 + import { dailyPlayerCount, dailyResultBacklinks } from '../src/constellation.js'; 13 + import { getRecordByUri } from '../src/identity.js'; 14 + 15 + interface Rec { 16 + did: string; 17 + rkey: string; 18 + lang?: string; 19 + puzzleNumber?: number; 20 + solved?: boolean; 21 + /** When set, getRecordByUri resolves to null for this backlink (a dropped read). */ 22 + drop?: boolean; 23 + } 24 + 25 + /** Wire the mocks for one scenario. Records default to (lang='en', puzzle=N, solved). */ 26 + function setup(opts: { players: number | null; records: Rec[]; truncated?: boolean }, lang = 'en', n = 5) { 27 + vi.mocked(dailyPlayerCount).mockResolvedValue(opts.players); 28 + vi.mocked(dailyResultBacklinks).mockResolvedValue({ 29 + records: opts.records.map((r) => ({ did: r.did, collection: 'c', rkey: r.rkey })), 30 + truncated: opts.truncated ?? false, 31 + }); 32 + const byUri = new Map<string, unknown>(); 33 + for (const r of opts.records) { 34 + if (r.drop) continue; 35 + byUri.set(`at://${r.did}/c/${r.rkey}`, { 36 + lang: r.lang ?? lang, 37 + puzzleNumber: r.puzzleNumber ?? n, 38 + solved: r.solved ?? true, 39 + }); 40 + } 41 + vi.mocked(getRecordByUri).mockImplementation(async (uri: string) => (byUri.get(uri) ?? null) as never); 42 + } 43 + 44 + beforeEach(() => vi.clearAllMocks()); 45 + 46 + describe('yesterdayCounts', () => { 47 + it('dedupes solvers by DID — one player with two winning records counts once', async () => { 48 + setup({ 49 + players: 3, 50 + records: [ 51 + { did: 'did:a', rkey: '1' }, 52 + { did: 'did:a', rkey: '2' }, // same player, second record 53 + { did: 'did:b', rkey: '1' }, 54 + ], 55 + }); 56 + const { solvers } = await yesterdayCounts('en', 5); 57 + expect(solvers).toBe(2); 58 + }); 59 + 60 + it('clamps solvers to players when the two endpoints disagree', async () => { 61 + setup({ 62 + players: 1, // distinct-DID count says 1... 63 + records: [ 64 + { did: 'did:a', rkey: '1' }, 65 + { did: 'did:b', rkey: '1' }, // ...but the backlinks index returned two DIDs 66 + ], 67 + }); 68 + const { players, solvers } = await yesterdayCounts('en', 5); 69 + expect(players).toBe(1); 70 + expect(solvers).toBe(1); // never report more solvers than players 71 + }); 72 + 73 + it('ignores records for the wrong lang or puzzle, and losses', async () => { 74 + setup({ 75 + players: 4, 76 + records: [ 77 + { did: 'did:a', rkey: '1', solved: true }, 78 + { did: 'did:b', rkey: '1', solved: false }, // a loss 79 + { did: 'did:c', rkey: '1', lang: 'fr' }, // wrong language 80 + { did: 'did:d', rkey: '1', puzzleNumber: 99 }, // wrong puzzle 81 + ], 82 + }); 83 + const { solvers, sampled } = await yesterdayCounts('en', 5); 84 + expect(solvers).toBe(1); // only did:a 85 + // Only did:a and did:b are valid players for this puzzle; 2 < players(4) => hedge. 86 + expect(sampled).toBe(true); 87 + }); 88 + 89 + it('is not sampled when every player was resolved and the page was complete', async () => { 90 + setup({ 91 + players: 2, 92 + records: [ 93 + { did: 'did:a', rkey: '1', solved: true }, 94 + { did: 'did:b', rkey: '1', solved: false }, 95 + ], 96 + }); 97 + const { players, solvers, sampled } = await yesterdayCounts('en', 5); 98 + expect(players).toBe(2); 99 + expect(solvers).toBe(1); 100 + expect(sampled).toBe(false); 101 + }); 102 + 103 + it('is sampled when the backlink page was truncated', async () => { 104 + setup({ 105 + players: 10, 106 + records: [{ did: 'did:a', rkey: '1' }], 107 + truncated: true, 108 + }); 109 + const { sampled } = await yesterdayCounts('en', 5); 110 + expect(sampled).toBe(true); 111 + }); 112 + 113 + it('is sampled when a record read is dropped (fewer resolved than players)', async () => { 114 + setup({ 115 + players: 2, 116 + records: [ 117 + { did: 'did:a', rkey: '1', solved: true }, 118 + { did: 'did:b', rkey: '1', drop: true }, // transient read failure 119 + ], 120 + }); 121 + const { solvers, sampled } = await yesterdayCounts('en', 5); 122 + expect(solvers).toBe(1); 123 + expect(sampled).toBe(true); 124 + }); 125 + 126 + it('propagates a null player count (Constellation unreachable) as sampled', async () => { 127 + setup({ players: null, records: [{ did: 'did:a', rkey: '1' }] }); 128 + const { players, sampled } = await yesterdayCounts('en', 5); 129 + expect(players).toBeNull(); 130 + expect(sampled).toBe(true); 131 + }); 132 + });

Configure Feed

Configure Feed