Harden read path: validate author handle before resolver fetch

The public renderer took the `@author` segment straight from the URL and
used it as a fetch host, so a non-handle-shaped value (e.g.
`evil.com/.well-known/atproto-did`, `evil.com:8080`, `evil.com?x=y`) could
smuggle a path/port/query into the outbound request and turn the worker
into a GET request proxy to arbitrary public hosts.

- Add `isValidHandleOrDid()` to auth/config: accepts only a syntactic
handle or DID, rejecting URLs and anything with a path, port, query or
scheme (stricter than `isValidAccountInput`).
- Gate `resolveHandleToDid()` on it before any network call, so both
reader pages reject bad input (they already 404 on null). `safeFetch`
stays the internal-host backstop; this is the syntactic gate in front.
- Regression test asserts malicious input resolves to null with zero
fetches; DIDs pass through without a network call.

Also sanitise preview.astro's `set:html` (static input today) so every
raw-HTML sink uniformly goes through `sanitizeArticleHtml`.

author

Jeremy Herve date 2 weeks ago (Jun 8, 2026, 6:58 PM +0200) commit 5979fbc9 5979fbc94240e7e9602a31807ba8476d5196cd92 parent 5c09d5f3 5c09d5f31cffa641cc2d2daf69d9230de48c9a3e

+100 -2

5 changed files

Expand all

src

lib

auth

config.test.ts

config.ts

reader

identity.test.ts

identity.ts

pages

preview.astro

+25

src/lib/auth/config.test.ts

··· 4 4 clientMetadataUrl, 5 5 normalizeHandle, 6 6 isValidAccountInput, 7 + isValidHandleOrDid, 7 8 OAUTH_SCOPE, 8 9 } from './config'; 9 10 ··· 46 47 expect( isValidAccountInput( '' ) ).toBe( false ); 47 48 expect( isValidAccountInput( ' ' ) ).toBe( false ); 48 49 expect( isValidAccountInput( 'notahandle' ) ).toBe( false ); 50 + } ); 51 + } ); 52 + 53 + describe( 'isValidHandleOrDid', () => { 54 + it( 'accepts handles and DIDs', () => { 55 + expect( isValidHandleOrDid( 'alice.bsky.social' ) ).toBe( true ); 56 + expect( isValidHandleOrDid( '@alice.bsky.social' ) ).toBe( true ); 57 + expect( isValidHandleOrDid( 'did:plc:abc123' ) ).toBe( true ); 58 + expect( isValidHandleOrDid( 'did:web:example.com' ) ).toBe( true ); 59 + } ); 60 + 61 + it( 'rejects URLs and anything carrying a path, port, query or scheme', () => { 62 + // These would otherwise be used directly as a resolver fetch host (SSRF/proxy). 63 + expect( isValidHandleOrDid( 'https://pds.example.com' ) ).toBe( false ); 64 + expect( isValidHandleOrDid( 'evil.com/.well-known/atproto-did' ) ).toBe( false ); 65 + expect( isValidHandleOrDid( 'evil.com?x=y' ) ).toBe( false ); 66 + expect( isValidHandleOrDid( 'evil.com:8080' ) ).toBe( false ); 67 + expect( isValidHandleOrDid( 'user@evil.com' ) ).toBe( false ); 68 + } ); 69 + 70 + it( 'rejects empty input and bare words without a domain', () => { 71 + expect( isValidHandleOrDid( '' ) ).toBe( false ); 72 + expect( isValidHandleOrDid( ' ' ) ).toBe( false ); 73 + expect( isValidHandleOrDid( 'notahandle' ) ).toBe( false ); 49 74 } ); 50 75 } ); 51 76

+19

src/lib/auth/config.ts

··· 56 56 } 57 57 return HANDLE_RE.test( normalizeHandle( value ) ); 58 58 } 59 + 60 + /** 61 + * Stricter sibling of `isValidAccountInput` for the public READ path. Accepts only a 62 + * syntactic handle (`alice.bsky.social`) or DID (`did:plc:…`) — and, unlike 63 + * `isValidAccountInput`, rejects `https://…` URLs and anything carrying a path, port, 64 + * query or scheme. The renderer takes the author straight from the URL and uses it as a 65 + * resolver fetch host, so this stops a value like `evil.com/x?y=` from smuggling into the 66 + * outbound request (`safeFetch` still blocks internal hosts; this gates the syntax). 67 + */ 68 + export function isValidHandleOrDid( input: string ): boolean { 69 + const value = input.trim(); 70 + if ( ! value ) { 71 + return false; 72 + } 73 + if ( DID_RE.test( value ) ) { 74 + return true; 75 + } 76 + return HANDLE_RE.test( normalizeHandle( value ) ); 77 + }

+42

src/lib/reader/identity.test.ts

··· 1 + import { afterEach, describe, expect, it, vi } from 'vitest'; 2 + import { resolveHandleToDid } from './identity'; 3 + 4 + /** 5 + * The read path takes the author straight from the URL (`/@<author>`) and uses it as a 6 + * resolver fetch host. These tests lock in that a non-handle-shaped value (path/port/ 7 + * query/scheme smuggling) is rejected BEFORE any network request — so the worker can't be 8 + * turned into a request proxy. `safeFetch` is the internal-host backstop; this is the 9 + * syntactic gate in front of it. 10 + */ 11 + describe( 'resolveHandleToDid — read-path input validation', () => { 12 + afterEach( () => { 13 + vi.unstubAllGlobals(); 14 + } ); 15 + 16 + it( 'rejects non-handle input without making any network request', async () => { 17 + const fetchSpy = vi.fn(); 18 + vi.stubGlobal( 'fetch', fetchSpy ); 19 + 20 + for ( const bad of [ 21 + 'evil.com/.well-known/atproto-did', 22 + 'evil.com?x=y', 23 + 'evil.com:8080', 24 + 'http://evil.com', 25 + 'user@evil.com', 26 + '', 27 + ' ', 28 + 'notahandle', 29 + ] ) { 30 + expect( await resolveHandleToDid( bad ) ).toBeNull(); 31 + } 32 + expect( fetchSpy ).not.toHaveBeenCalled(); 33 + } ); 34 + 35 + it( 'returns a DID unchanged without a network request', async () => { 36 + const fetchSpy = vi.fn(); 37 + vi.stubGlobal( 'fetch', fetchSpy ); 38 + 39 + expect( await resolveHandleToDid( 'did:plc:abc123' ) ).toBe( 'did:plc:abc123' ); 40 + expect( fetchSpy ).not.toHaveBeenCalled(); 41 + } ); 42 + } );

+10 -1

src/lib/reader/identity.ts

··· 4 4 */ 5 5 import { resolvePdsUrl } from '../media/pds'; 6 6 import { safeFetch } from '../net/safe-fetch'; 7 + import { isValidHandleOrDid, normalizeHandle } from '../auth/config'; 7 8 8 9 /** Default handle resolver — a `com.atproto.identity.resolveHandle` XRPC endpoint. */ 9 10 const FALLBACK_RESOLVER = 'https://bsky.social'; ··· 45 46 * (no third party), falling back to a public resolver XRPC. 46 47 */ 47 48 export async function resolveHandleToDid( handleOrDid: string ): Promise< string | null > { 49 + // Validate before the value is used as a resolver fetch host: the read path takes it 50 + // straight from the URL (`/@<author>`), so a non-handle-shaped value could otherwise 51 + // smuggle a path/port/query into the outbound request and turn the worker into a 52 + // request proxy. safeFetch still guards internal hosts; this rejects bad syntax first. 53 + if ( ! isValidHandleOrDid( handleOrDid ) ) { 54 + return null; 55 + } 48 56 if ( handleOrDid.startsWith( 'did:' ) ) { 49 57 return handleOrDid; 50 58 } 51 - return ( await resolveViaWellKnown( handleOrDid ) ) ?? ( await resolveViaXrpc( handleOrDid ) ); 59 + const handle = normalizeHandle( handleOrDid ); 60 + return ( await resolveViaWellKnown( handle ) ) ?? ( await resolveViaXrpc( handle ) ); 52 61 } 53 62 54 63 export interface Author {

+4 -1

src/pages/preview.astro

··· 1 1 --- 2 2 import Base from '../layouts/Base.astro'; 3 3 import { renderBlocks, blocksToText } from '../lib/blocks/render'; 4 + import { sanitizeArticleHtml } from '../lib/reader/sanitize'; 4 5 import { SAMPLE_TREE } from '../lib/blocks/sample'; 5 6 6 7 // Frontend block styles only — no editor chrome, no JS. ··· 10 11 11 12 // Render path (server/edge-safe): stored content (a block tree) → HTML via the 12 13 // dependency-free renderer (Decision 0003). No @wordpress runtime on this page. 13 - const html = renderBlocks( SAMPLE_TREE ); 14 + // Sanitised even though SAMPLE_TREE is static: every `set:html` sink goes through the 15 + // sanitizer, so the invariant holds if this is ever repointed at untrusted content. 16 + const html = sanitizeArticleHtml( renderBlocks( SAMPLE_TREE ) ); 14 17 const textContent = blocksToText( SAMPLE_TREE ); 15 18 16 19 const words = textContent.split( /\s+/ ).filter( Boolean ).length;

Configure Feed

Configure Feed