Guard the read-through renderer against SSRF · jeremy.herve.bzh/skypress@a526025

+5 -2

AGENTS.md

··· 31 31 Adding a block means adding a `render.ts` case **and** a fidelity assertion. 32 32 5. **Curated block allowlist is the content model.** Add blocks deliberately; removing 33 33 one after content exists is a breaking change. (Decision 0002) 34 - 6. **Untrusted content:** stored block trees come from arbitrary PDSes. The reader must 35 - **sanitise** HTML before injecting it (tracked for SP4). 34 + 6. **Untrusted content:** stored block trees come from arbitrary PDSes. The reader 35 + **sanitises** HTML before injecting it (`src/lib/reader/sanitize.ts`). Two standing 36 + rules for the read path: (a) any server-side `fetch` to a host derived from user input 37 + (a handle, `did:web`, a PDS `serviceEndpoint`) MUST go through `src/lib/net/safe-fetch.ts` 38 + (SSRF guard); (b) never inject PDS-sourced HTML without sanitising. 36 39 7. **OAuth is a browser public client** (`@atproto/oauth-client-browser`, Decision 0004). 37 40 In **dev you must serve on `http://127.0.0.1:<port>`, not `localhost`** (atproto 38 41 loopback requirement), and the loopback `client_id` must be path-less — see

+17

docs/decisions/0007-read-through-renderer.md

··· 53 53 `skypress.blog/@<handle>/<rkey>` URL. With these + the publication `url` pointing back at 54 54 skypress.blog, no separate `.well-known` route is needed for verification. 55 55 56 + ## Security — SSRF guard 57 + 58 + The renderer fetches hostnames derived from untrusted input: the `@handle` in the URL, 59 + a `did:web` host, and the PDS `serviceEndpoint` from a DID document. Unguarded, a request 60 + like `/@169.254.169.254/x` or a `did:web` pointing at an internal host would make the 61 + server fetch loopback / cloud-metadata / private addresses (SSRF). 62 + 63 + Every such fetch goes through **`src/lib/net/safe-fetch.ts`**, which allows only `https://` 64 + to syntactically valid **public** domains — rejecting IP literals, single-label/`localhost`, 65 + and reserved/internal TLDs — and refuses to follow redirects (no pivot to an internal 66 + host). Verified: legit handles render (200) while `/@127.0.0.1/…` and `/@169.254.169.254/…` 67 + are rejected (404, no internal request made). Unit-tested in `net/safe-fetch.test.ts`. 68 + 69 + **Residual:** a public domain whose DNS resolves to a private IP (DNS rebinding) can't be 70 + closed portably without resolving DNS and inspecting the address (Node-only, fights edge 71 + portability). Close it at the network/egress layer of the chosen host in SP7. 72 + 56 73 ## Consequences 57 74 58 75 - `@astrojs/node` is a placeholder host; `sanitize-html` is Node-oriented. SP7 must

+9 -5

src/lib/media/pds.ts

··· 1 + import { safeFetch, assertSafeUrl } from '../net/safe-fetch'; 2 + 1 3 /** 2 - * Resolve a writer's PDS endpoint from their DID document (browser-only). 3 - * Needed to build `getBlob` URLs. Handles `did:plc` (via plc.directory) and `did:web`. 4 + * Resolve a writer's PDS endpoint from their DID document. Handles `did:plc` (via 5 + * plc.directory) and `did:web`. The `did:web` host and the returned `serviceEndpoint` 6 + * are untrusted, so both go through `safeFetch`/`assertSafeUrl` to prevent SSRF. 4 7 */ 5 8 export async function resolvePdsUrl( did: string ): Promise< string > { 6 9 let docUrl: string; 7 10 if ( did.startsWith( 'did:plc:' ) ) { 8 - docUrl = `https://plc.directory/${ did }`; 11 + docUrl = `https://plc.directory/${ encodeURIComponent( did ) }`; 9 12 } else if ( did.startsWith( 'did:web:' ) ) { 10 13 const host = did.slice( 'did:web:'.length ).replace( /:/g, '/' ); 11 14 docUrl = `https://${ host }/.well-known/did.json`; ··· 13 16 throw new Error( `Unsupported DID method: ${ did }` ); 14 17 } 15 18 16 - const res = await fetch( docUrl ); 19 + const res = await safeFetch( docUrl ); 17 20 if ( ! res.ok ) { 18 21 throw new Error( `Failed to resolve DID document (${ res.status })` ); 19 22 } ··· 27 30 if ( ! pds?.serviceEndpoint ) { 28 31 throw new Error( 'No atproto PDS endpoint in DID document' ); 29 32 } 30 - return pds.serviceEndpoint; 33 + // The endpoint comes from the DID doc — validate before anyone fetches it. 34 + return assertSafeUrl( pds.serviceEndpoint ).toString().replace( /\/$/, '' ); 31 35 }

+41

src/lib/net/safe-fetch.test.ts

··· 1 + import { describe, expect, it } from 'vitest'; 2 + import { isPublicHostname, assertSafeUrl } from './safe-fetch'; 3 + 4 + describe( 'isPublicHostname', () => { 5 + it( 'accepts real public domains', () => { 6 + expect( isPublicHostname( 'bsky.social' ) ).toBe( true ); 7 + expect( isPublicHostname( 'jeherve.com' ) ).toBe( true ); 8 + expect( isPublicHostname( 'chanterelle.us-west.host.bsky.network' ) ).toBe( true ); 9 + } ); 10 + 11 + it( 'rejects IP literals (incl. cloud metadata + loopback)', () => { 12 + expect( isPublicHostname( '127.0.0.1' ) ).toBe( false ); 13 + expect( isPublicHostname( '169.254.169.254' ) ).toBe( false ); 14 + expect( isPublicHostname( '10.0.0.5' ) ).toBe( false ); 15 + expect( isPublicHostname( '[::1]' ) ).toBe( false ); 16 + expect( isPublicHostname( '::1' ) ).toBe( false ); 17 + } ); 18 + 19 + it( 'rejects single-label and reserved/internal names', () => { 20 + expect( isPublicHostname( 'localhost' ) ).toBe( false ); 21 + expect( isPublicHostname( 'internal' ) ).toBe( false ); 22 + expect( isPublicHostname( 'foo.internal' ) ).toBe( false ); 23 + expect( isPublicHostname( 'db.local' ) ).toBe( false ); 24 + expect( isPublicHostname( 'host.localhost' ) ).toBe( false ); 25 + expect( isPublicHostname( '' ) ).toBe( false ); 26 + } ); 27 + } ); 28 + 29 + describe( 'assertSafeUrl', () => { 30 + it( 'returns a parsed URL for safe https targets', () => { 31 + expect( assertSafeUrl( 'https://bsky.social/xrpc/x' ).hostname ).toBe( 'bsky.social' ); 32 + } ); 33 + 34 + it( 'throws for non-https, IPs, ports-to-internal, and reserved hosts', () => { 35 + expect( () => assertSafeUrl( 'http://bsky.social/x' ) ).toThrow(); 36 + expect( () => assertSafeUrl( 'https://127.0.0.1/x' ) ).toThrow(); 37 + expect( () => assertSafeUrl( 'https://localhost/x' ) ).toThrow(); 38 + expect( () => assertSafeUrl( 'https://169.254.169.254/latest/meta-data' ) ).toThrow(); 39 + expect( () => assertSafeUrl( 'file:///etc/passwd' ) ).toThrow(); 40 + } ); 41 + } );

+73

src/lib/net/safe-fetch.ts

··· 1 + /** 2 + * SSRF-guarded fetch for the read-through renderer. 3 + * 4 + * The renderer fetches hostnames derived from UNTRUSTED input — the `@handle` in the 5 + * URL, a `did:web` host, and the PDS `serviceEndpoint` from a DID document. Without 6 + * guarding, a request like `/@169.254.169.254/x` or a `did:web` pointing at an internal 7 + * host would make the server fetch loopback / cloud-metadata / private addresses (SSRF). 8 + * 9 + * `assertSafeUrl` allows only `https://` to syntactically-valid **public** domains — 10 + * rejecting IP literals, single-label/`localhost`, and reserved/internal TLDs — and 11 + * `safeFetch` additionally refuses to follow redirects (which could pivot to an internal 12 + * host). 13 + * 14 + * Residual risk: a public domain whose DNS resolves to a private IP (DNS rebinding). 15 + * That can't be closed portably without resolving DNS and inspecting the address; it is 16 + * best handled at the network/egress layer of the deploy host (tracked for SP7). 17 + */ 18 + 19 + const RESERVED_TLDS = new Set( [ 20 + 'localhost', 'local', 'internal', 'intranet', 'lan', 'home', 'corp', 21 + 'test', 'example', 'invalid', 'arpa', 'onion', 'localdomain', 22 + ] ); 23 + 24 + /** True only for a syntactically valid, public, non-IP domain name. */ 25 + export function isPublicHostname( hostname: string ): boolean { 26 + if ( ! hostname ) { 27 + return false; 28 + } 29 + const host = hostname.toLowerCase().replace( /\.$/, '' ); 30 + if ( host.includes( ':' ) || host.includes( '[' ) ) { 31 + return false; // IPv6 literal or embedded port 32 + } 33 + if ( /^\d{1,3}(\.\d{1,3}){3}$/.test( host ) ) { 34 + return false; // IPv4 literal 35 + } 36 + const labels = host.split( '.' ); 37 + if ( labels.length < 2 ) { 38 + return false; // single-label (localhost, etc.) 39 + } 40 + for ( const label of labels ) { 41 + if ( ! /^[a-z0-9-]{1,63}$/.test( label ) || label.startsWith( '-' ) || label.endsWith( '-' ) ) { 42 + return false; 43 + } 44 + } 45 + const tld = labels[ labels.length - 1 ]; 46 + if ( /^\d+$/.test( tld ) || RESERVED_TLDS.has( tld ) ) { 47 + return false; 48 + } 49 + return true; 50 + } 51 + 52 + /** Parse + validate a URL for outbound fetching; throws if it isn't a safe public https target. */ 53 + export function assertSafeUrl( rawUrl: string ): URL { 54 + let url: URL; 55 + try { 56 + url = new URL( rawUrl ); 57 + } catch { 58 + throw new Error( `Invalid URL: ${ rawUrl }` ); 59 + } 60 + if ( url.protocol !== 'https:' ) { 61 + throw new Error( `Refusing non-https URL (${ url.protocol })` ); 62 + } 63 + if ( ! isPublicHostname( url.hostname ) ) { 64 + throw new Error( `Refusing non-public host: ${ url.hostname }` ); 65 + } 66 + return url; 67 + } 68 + 69 + /** `fetch` restricted to safe public https targets, without following redirects. */ 70 + export function safeFetch( rawUrl: string, init: RequestInit = {} ): Promise< Response > { 71 + const url = assertSafeUrl( rawUrl ); 72 + return fetch( url, { ...init, redirect: 'manual' } ); 73 + }

+5 -2

src/lib/reader/identity.ts

··· 3 3 * Runs server-side (Node/edge): handle → DID → PDS. 4 4 */ 5 5 import { resolvePdsUrl } from '../media/pds'; 6 + import { safeFetch } from '../net/safe-fetch'; 6 7 7 8 /** Default handle resolver — a `com.atproto.identity.resolveHandle` XRPC endpoint. */ 8 9 const FALLBACK_RESOLVER = 'https://bsky.social'; 9 10 10 11 async function resolveViaWellKnown( handle: string ): Promise< string | null > { 11 12 try { 12 - const res = await fetch( `https://${ handle }/.well-known/atproto-did` ); 13 + // safeFetch rejects non-public hosts, so an attacker-supplied handle like 14 + // `169.254.169.254` or `internal-svc` can't trigger an internal request (SSRF). 15 + const res = await safeFetch( `https://${ handle }/.well-known/atproto-did` ); 13 16 if ( ! res.ok ) { 14 17 return null; 15 18 } ··· 22 25 23 26 async function resolveViaXrpc( handle: string ): Promise< string | null > { 24 27 try { 25 - const res = await fetch( 28 + const res = await safeFetch( 26 29 `${ FALLBACK_RESOLVER }/xrpc/com.atproto.identity.resolveHandle?handle=${ encodeURIComponent( 27 30 handle 28 31 ) }`

+16 -8

src/lib/reader/records.ts

··· 1 1 /** 2 2 * Read records from a PDS via the public `com.atproto.repo` XRPC (no auth needed). 3 + * `safeFetch` guards the (DID-doc-derived) PDS host against SSRF; failures resolve to 4 + * null/empty so a bad host or unreachable PDS degrades gracefully. 3 5 */ 6 + import { safeFetch } from '../net/safe-fetch'; 4 7 5 8 export interface RepoRecord< T = Record< string, unknown > > { 6 9 uri: string; ··· 8 11 value: T; 9 12 } 10 13 11 - /** Fetch a single record, or null if it doesn't exist. */ 14 + /** Fetch a single record, or null if it doesn't exist / can't be fetched. */ 12 15 export async function getRecord< T = Record< string, unknown > >( 13 16 pdsUrl: string, 14 17 did: string, ··· 20 23 `?repo=${ encodeURIComponent( did ) }` + 21 24 `&collection=${ encodeURIComponent( collection ) }` + 22 25 `&rkey=${ encodeURIComponent( rkey ) }`; 23 - const res = await fetch( url ); 24 - if ( ! res.ok ) { 26 + try { 27 + const res = await safeFetch( url ); 28 + return res.ok ? ( ( await res.json() ) as RepoRecord< T > ) : null; 29 + } catch { 25 30 return null; 26 31 } 27 - return ( await res.json() ) as RepoRecord< T >; 28 32 } 29 33 30 34 /** List records in a collection (most recent first). */ ··· 38 42 `${ pdsUrl.replace( /\/$/, '' ) }/xrpc/com.atproto.repo.listRecords` + 39 43 `?repo=${ encodeURIComponent( did ) }` + 40 44 `&collection=${ encodeURIComponent( collection ) }&limit=${ limit }`; 41 - const res = await fetch( url ); 42 - if ( ! res.ok ) { 45 + try { 46 + const res = await safeFetch( url ); 47 + if ( ! res.ok ) { 48 + return []; 49 + } 50 + const data: { records?: RepoRecord< T >[] } = await res.json(); 51 + return data.records ?? []; 52 + } catch { 43 53 return []; 44 54 } 45 - const data: { records?: RepoRecord< T >[] } = await res.json(); 46 - return data.records ?? []; 47 55 }

Configure Feed

Configure Feed