A calm place to write long-form, and publish it to the open social web.
skypress.blog/
1/**
2 * Server-side syntax highlighting for the reader's code blocks (Decision: see
3 * 2026-06-11 spec). Runs AFTER `renderBlocks` and BEFORE `sanitizeArticleHtml`
4 * in `render-article.ts`: it wraps each block in `<code class="hljs language-…">`
5 * and adds `<span class="hljs-…">` token spans — all of which the sanitiser already
6 * allows, so sanitise stays the last step (AGENTS.md #6b).
7 *
8 * The `core/code` block stores no language, so we auto-detect among a CURATED
9 * subset (fewer candidates → more accurate detection, leaner import). This module
10 * is the only place `highlight.js` is imported; like `render.ts` it must never
11 * import `@wordpress/*` (Decision 0003).
12 */
13import hljs from 'highlight.js/lib/core';
14import javascript from 'highlight.js/lib/languages/javascript';
15import typescript from 'highlight.js/lib/languages/typescript';
16import json from 'highlight.js/lib/languages/json';
17import bash from 'highlight.js/lib/languages/bash';
18import php from 'highlight.js/lib/languages/php';
19import python from 'highlight.js/lib/languages/python';
20import xml from 'highlight.js/lib/languages/xml';
21import css from 'highlight.js/lib/languages/css';
22import scss from 'highlight.js/lib/languages/scss';
23import sql from 'highlight.js/lib/languages/sql';
24import yaml from 'highlight.js/lib/languages/yaml';
25import markdown from 'highlight.js/lib/languages/markdown';
26import diff from 'highlight.js/lib/languages/diff';
27import go from 'highlight.js/lib/languages/go';
28import rust from 'highlight.js/lib/languages/rust';
29import { decodeEntities } from '../blocks/render';
30
31const LANGUAGES: Record< string, Parameters< typeof hljs.registerLanguage >[ 1 ] > = {
32 javascript,
33 typescript,
34 json,
35 bash,
36 php,
37 python,
38 xml,
39 css,
40 scss,
41 sql,
42 yaml,
43 markdown,
44 diff,
45 go,
46 rust,
47};
48
49for ( const [ name, lang ] of Object.entries( LANGUAGES ) ) {
50 hljs.registerLanguage( name, lang );
51}
52
53// render.ts emits exactly this shape; content is entity-escaped, so `</code></pre>`
54// only appears as our own delimiter, never inside a block's source.
55const CODE_BLOCK = /<pre class="wp-block-code"><code>([\s\S]*?)<\/code><\/pre>/g;
56
57function highlightOne( escapedSource: string ): string {
58 // Stored code may use <br> for line breaks (render.ts passes it through and the
59 // sanitiser keeps it as a real break). Turn those into real newlines BEFORE tokenising,
60 // or the highlighter treats "<br>" as an xml tag and emits literal "<br>" text,
61 // collapsing the code's line structure. Strip the breaks on the still-escaped source
62 // so only genuine break tags are affected — a literal `<br>` typed in a code sample is
63 // stored escaped (`<br>`) and must survive as text, not become a newline.
64 const raw = decodeEntities( escapedSource.replace( /<br\s*\/?>/gi, '\n' ) );
65 try {
66 const { value, language, relevance } = hljs.highlightAuto( raw );
67 // Zero relevance / no detected language → no useful tokens; keep it plain.
68 if ( ! language || relevance === 0 ) {
69 return `<pre class="wp-block-code"><code>${ escapedSource }</code></pre>`;
70 }
71 return `<pre class="wp-block-code"><code class="hljs language-${ language }">${ value }</code></pre>`;
72 } catch {
73 // Never let a highlighter error break a page — fall back to today's output.
74 return `<pre class="wp-block-code"><code>${ escapedSource }</code></pre>`;
75 }
76}
77
78/** Replace each rendered code block's body with highlight.js token markup. */
79export function highlightCodeBlocks( html: string ): string {
80 return html.replace( CODE_BLOCK, ( _full, inner: string ) => highlightOne( inner ) );
81}
82
83/** HTML-escape for the fallback path (hljs already escapes its own token output). */
84function escapeHtml( source: string ): string {
85 return source
86 .replace( /&/g, '&' )
87 .replace( /</g, '<' )
88 .replace( />/g, '>' );
89}
90
91/**
92 * Highlight a known-language source string into hljs token HTML, for static pages
93 * (e.g. the lexicon's JSON schema dump) that know their language up front and so
94 * skip the reader's auto-detection. Returns highlight.js's already-escaped token
95 * HTML, safe to inject; on any error (unregistered language, etc.) it falls back to
96 * plain entity-escaped text — the source still renders, never as raw markup. Keeping
97 * this here preserves the rule that highlight.js is imported in this module only.
98 */
99export function highlightSource( source: string, language: string ): string {
100 try {
101 return hljs.highlight( source, { language } ).value;
102 } catch {
103 return escapeHtml( source );
104 }
105}
106
107/**
108 * Highlight a JSON string as a standalone `<code class="hljs language-json">…</code>`
109 * fragment (no `<pre>` wrapper — the caller owns the chrome). Used by the reader's
110 * record-JSON viewer. `highlightSource` HTML-escapes the source on both the highlight
111 * and fallback paths, so untrusted record values can't inject markup; `record-json.ts`
112 * sanitises the result as the final gate.
113 */
114export function highlightJson( source: string ): string {
115 return `<code class="hljs language-json">${ highlightSource( source, 'json' ) }</code>`;
116}