A calm place to write long-form, and publish it to the open social web. skypress.blog/
0

Configure Feed

Select the types of activity you want to include in your feed.

at trunk 5.1 kB View raw
1/** 2 * Server-side syntax highlighting for the reader's code blocks (Decision: see 3 * 2026-06-11 spec). Runs AFTER `renderBlocks` and BEFORE `sanitizeArticleHtml` 4 * in `render-article.ts`: it wraps each block in `<code class="hljs language-…">` 5 * and adds `<span class="hljs-…">` token spans — all of which the sanitiser already 6 * allows, so sanitise stays the last step (AGENTS.md #6b). 7 * 8 * The `core/code` block stores no language, so we auto-detect among a CURATED 9 * subset (fewer candidates → more accurate detection, leaner import). This module 10 * is the only place `highlight.js` is imported; like `render.ts` it must never 11 * import `@wordpress/*` (Decision 0003). 12 */ 13import hljs from 'highlight.js/lib/core'; 14import javascript from 'highlight.js/lib/languages/javascript'; 15import typescript from 'highlight.js/lib/languages/typescript'; 16import json from 'highlight.js/lib/languages/json'; 17import bash from 'highlight.js/lib/languages/bash'; 18import php from 'highlight.js/lib/languages/php'; 19import python from 'highlight.js/lib/languages/python'; 20import xml from 'highlight.js/lib/languages/xml'; 21import css from 'highlight.js/lib/languages/css'; 22import scss from 'highlight.js/lib/languages/scss'; 23import sql from 'highlight.js/lib/languages/sql'; 24import yaml from 'highlight.js/lib/languages/yaml'; 25import markdown from 'highlight.js/lib/languages/markdown'; 26import diff from 'highlight.js/lib/languages/diff'; 27import go from 'highlight.js/lib/languages/go'; 28import rust from 'highlight.js/lib/languages/rust'; 29import { decodeEntities } from '../blocks/render'; 30 31const LANGUAGES: Record< string, Parameters< typeof hljs.registerLanguage >[ 1 ] > = { 32 javascript, 33 typescript, 34 json, 35 bash, 36 php, 37 python, 38 xml, 39 css, 40 scss, 41 sql, 42 yaml, 43 markdown, 44 diff, 45 go, 46 rust, 47}; 48 49for ( const [ name, lang ] of Object.entries( LANGUAGES ) ) { 50 hljs.registerLanguage( name, lang ); 51} 52 53// render.ts emits exactly this shape; content is entity-escaped, so `</code></pre>` 54// only appears as our own delimiter, never inside a block's source. 55const CODE_BLOCK = /<pre class="wp-block-code"><code>([\s\S]*?)<\/code><\/pre>/g; 56 57function highlightOne( escapedSource: string ): string { 58 // Stored code may use <br> for line breaks (render.ts passes it through and the 59 // sanitiser keeps it as a real break). Turn those into real newlines BEFORE tokenising, 60 // or the highlighter treats "<br>" as an xml tag and emits literal "<br>" text, 61 // collapsing the code's line structure. Strip the breaks on the still-escaped source 62 // so only genuine break tags are affected — a literal `<br>` typed in a code sample is 63 // stored escaped (`&lt;br&gt;`) and must survive as text, not become a newline. 64 const raw = decodeEntities( escapedSource.replace( /<br\s*\/?>/gi, '\n' ) ); 65 try { 66 const { value, language, relevance } = hljs.highlightAuto( raw ); 67 // Zero relevance / no detected language → no useful tokens; keep it plain. 68 if ( ! language || relevance === 0 ) { 69 return `<pre class="wp-block-code"><code>${ escapedSource }</code></pre>`; 70 } 71 return `<pre class="wp-block-code"><code class="hljs language-${ language }">${ value }</code></pre>`; 72 } catch { 73 // Never let a highlighter error break a page — fall back to today's output. 74 return `<pre class="wp-block-code"><code>${ escapedSource }</code></pre>`; 75 } 76} 77 78/** Replace each rendered code block's body with highlight.js token markup. */ 79export function highlightCodeBlocks( html: string ): string { 80 return html.replace( CODE_BLOCK, ( _full, inner: string ) => highlightOne( inner ) ); 81} 82 83/** HTML-escape for the fallback path (hljs already escapes its own token output). */ 84function escapeHtml( source: string ): string { 85 return source 86 .replace( /&/g, '&amp;' ) 87 .replace( /</g, '&lt;' ) 88 .replace( />/g, '&gt;' ); 89} 90 91/** 92 * Highlight a known-language source string into hljs token HTML, for static pages 93 * (e.g. the lexicon's JSON schema dump) that know their language up front and so 94 * skip the reader's auto-detection. Returns highlight.js's already-escaped token 95 * HTML, safe to inject; on any error (unregistered language, etc.) it falls back to 96 * plain entity-escaped text — the source still renders, never as raw markup. Keeping 97 * this here preserves the rule that highlight.js is imported in this module only. 98 */ 99export function highlightSource( source: string, language: string ): string { 100 try { 101 return hljs.highlight( source, { language } ).value; 102 } catch { 103 return escapeHtml( source ); 104 } 105} 106 107/** 108 * Highlight a JSON string as a standalone `<code class="hljs language-json">…</code>` 109 * fragment (no `<pre>` wrapper — the caller owns the chrome). Used by the reader's 110 * record-JSON viewer. `highlightSource` HTML-escapes the source on both the highlight 111 * and fallback paths, so untrusted record values can't inject markup; `record-json.ts` 112 * sanitises the result as the final gate. 113 */ 114export function highlightJson( source: string ): string { 115 return `<code class="hljs language-json">${ highlightSource( source, 'json' ) }</code>`; 116}