Monorepo for Tangled
tangled.org
1package markup
2
3import (
4 "maps"
5 "regexp"
6 "slices"
7 "strings"
8
9 "github.com/alecthomas/chroma/v2"
10 "github.com/microcosm-cc/bluemonday"
11)
12
13// shared policies built once at init; safe for concurrent use per bluemonday docs
14var (
15 sharedDefaultPolicy *bluemonday.Policy
16 sharedDescriptionPolicy *bluemonday.Policy
17 sharedLogsPolicy *bluemonday.Policy
18)
19
20func init() {
21 sharedDefaultPolicy = buildDefaultPolicy()
22 sharedDescriptionPolicy = buildDescriptionPolicy()
23 sharedLogsPolicy = buildLogsPolicy()
24}
25
26type Sanitizer struct {
27 defaultPolicy *bluemonday.Policy
28 descriptionPolicy *bluemonday.Policy
29 logsPolicy *bluemonday.Policy
30}
31
32func NewSanitizer() Sanitizer {
33 return Sanitizer{
34 defaultPolicy: sharedDefaultPolicy,
35 descriptionPolicy: sharedDescriptionPolicy,
36 logsPolicy: sharedLogsPolicy,
37 }
38}
39
40func (s *Sanitizer) SanitizeDefault(html string) string {
41 return s.defaultPolicy.Sanitize(html)
42}
43func (s *Sanitizer) SanitizeDescription(html string) string {
44 return s.descriptionPolicy.Sanitize(html)
45}
46func (s *Sanitizer) SanitizeLogs(html string) string {
47 return s.logsPolicy.Sanitize(html)
48}
49
50func buildDefaultPolicy() *bluemonday.Policy {
51 policy := bluemonday.UGCPolicy()
52
53 // Allow generally safe attributes
54 generalSafeAttrs := []string{
55 "abbr", "accept", "accept-charset",
56 "accesskey", "action", "align", "alt",
57 "aria-describedby", "aria-hidden", "aria-label", "aria-labelledby",
58 "axis", "border", "cellpadding", "cellspacing", "char",
59 "charoff", "charset", "checked",
60 "clear", "cols", "colspan", "color",
61 "compact", "coords", "datetime", "dir",
62 "disabled", "enctype", "for", "frame",
63 "headers", "height", "hreflang",
64 "hspace", "ismap", "label", "lang",
65 "maxlength", "media", "method",
66 "multiple", "name", "nohref", "noshade",
67 "nowrap", "open", "prompt", "readonly", "rel", "rev",
68 "rows", "rowspan", "rules", "scope",
69 "selected", "shape", "size", "span",
70 "start", "summary", "tabindex", "target",
71 "title", "type", "usemap", "valign", "value",
72 "vspace", "width", "itemprop",
73 }
74
75 generalSafeElements := []string{
76 "h1", "h2", "h3", "h4", "h5", "h6", "h7", "h8", "br", "b", "i", "strong", "em", "a", "pre", "code", "img", "tt",
77 "div", "ins", "del", "sup", "sub", "p", "ol", "ul", "table", "thead", "tbody", "tfoot", "blockquote", "label",
78 "dl", "dt", "dd", "kbd", "q", "samp", "var", "hr", "ruby", "rt", "rp", "li", "tr", "td", "th", "s", "strike", "summary",
79 "details", "caption", "figure", "figcaption",
80 "abbr", "bdo", "cite", "dfn", "mark", "small", "span", "time", "video", "wbr",
81 "picture", "source",
82 }
83
84 policy.AllowAttrs(generalSafeAttrs...).OnElements(generalSafeElements...)
85
86 // image
87 policy.AllowAttrs("onerror").OnElements("img")
88
89 // video
90 policy.AllowAttrs("src", "autoplay", "controls").OnElements("video")
91
92 // picture/source for modern image formats (avif, webp, etc.)
93 policy.AllowAttrs("srcset", "type", "media").OnElements("source")
94
95 // checkboxes
96 policy.AllowAttrs("type").Matching(regexp.MustCompile(`^checkbox$`)).OnElements("input")
97 policy.AllowAttrs("checked", "disabled", "data-source-position").OnElements("input")
98
99 // for code blocks
100 policy.AllowAttrs("class").Matching(regexp.MustCompile(`chroma|mermaid`)).OnElements("pre")
101 policy.AllowAttrs("class").Matching(regexp.MustCompile(`anchor|footnote-ref|footnote-backref`)).OnElements("a")
102 policy.AllowAttrs("class").Matching(regexp.MustCompile(`heading`)).OnElements("h1", "h2", "h3", "h4", "h5", "h6", "h7", "h8")
103 policy.AllowAttrs("class").Matching(regexp.MustCompile(strings.Join(slices.Collect(maps.Values(chroma.StandardTypes)), "|"))).OnElements("span")
104
105 // at-mentions
106 policy.AllowAttrs("class").Matching(regexp.MustCompile(`mention`)).OnElements("a")
107
108 // centering content
109 policy.AllowElements("center")
110
111 policy.AllowAttrs("align", "style", "width", "height").Globally()
112 policy.AllowStyles(
113 "margin",
114 "padding",
115 "text-align",
116 "font-weight",
117 "text-decoration",
118 "padding-left",
119 "padding-right",
120 "padding-top",
121 "padding-bottom",
122 "margin-left",
123 "margin-right",
124 "margin-top",
125 "margin-bottom",
126 )
127
128 // math
129 mathAttrs := []string{
130 "accent", "columnalign", "columnlines", "columnspan", "dir", "display",
131 "displaystyle", "encoding", "fence", "form", "largeop", "linebreak",
132 "linethickness", "lspace", "mathcolor", "mathsize", "mathvariant", "minsize",
133 "movablelimits", "notation", "rowalign", "rspace", "rowspacing", "rowspan",
134 "scriptlevel", "stretchy", "symmetric", "title", "voffset", "width",
135 }
136 mathElements := []string{
137 "annotation", "math", "menclose", "merror", "mfrac", "mi", "mmultiscripts",
138 "mn", "mo", "mover", "mpadded", "mprescripts", "mroot", "mrow", "mspace",
139 "msqrt", "mstyle", "msub", "msubsup", "msup", "mtable", "mtd", "mtext",
140 "mtr", "munder", "munderover", "semantics",
141 }
142 policy.AllowNoAttrs().OnElements(mathElements...)
143 policy.AllowAttrs(mathAttrs...).OnElements(mathElements...)
144
145 // goldmark-callout
146 policy.AllowAttrs("data-callout").OnElements("details")
147
148 return policy
149}
150
151func buildDescriptionPolicy() *bluemonday.Policy {
152 policy := bluemonday.NewPolicy()
153 policy.AllowStandardURLs()
154
155 // allow italics and bold.
156 policy.AllowElements("i", "b", "em", "strong")
157
158 // allow code.
159 policy.AllowElements("code")
160
161 // allow links
162 policy.AllowAttrs("href", "target", "rel").OnElements("a")
163
164 return policy
165}
166
167func buildLogsPolicy() *bluemonday.Policy {
168 policy := bluemonday.NewPolicy()
169
170 policy.AllowElements("p", "span")
171
172 // allow italics and bold
173 policy.AllowElements("i", "b", "em", "strong")
174
175 // allow fg/bg classes from terminal-to-html
176 policy.AllowAttrs("class").Matching(regexp.MustCompile(`term-*`)).OnElements("span")
177
178 return policy
179}