Monorepo for Tangled
tangled.org
1package markup
2
3import (
4 "maps"
5 "regexp"
6 "slices"
7 "strings"
8
9 "github.com/alecthomas/chroma/v2"
10 "github.com/microcosm-cc/bluemonday"
11)
12
13// shared policies built once at init; safe for concurrent use per bluemonday docs
14var (
15 sharedDefaultPolicy *bluemonday.Policy
16 sharedDescriptionPolicy *bluemonday.Policy
17 sharedLogsPolicy *bluemonday.Policy
18)
19
20func init() {
21 sharedDefaultPolicy = buildDefaultPolicy()
22 sharedDescriptionPolicy = buildDescriptionPolicy()
23 sharedLogsPolicy = buildLogsPolicy()
24}
25
26type Sanitizer struct {
27 defaultPolicy *bluemonday.Policy
28 descriptionPolicy *bluemonday.Policy
29 logsPolicy *bluemonday.Policy
30}
31
32func NewSanitizer() Sanitizer {
33 return Sanitizer{
34 defaultPolicy: sharedDefaultPolicy,
35 descriptionPolicy: sharedDescriptionPolicy,
36 logsPolicy: sharedLogsPolicy,
37 }
38}
39
40func (s *Sanitizer) SanitizeDefault(html string) string {
41 return s.defaultPolicy.Sanitize(html)
42}
43func (s *Sanitizer) SanitizeDescription(html string) string {
44 return s.descriptionPolicy.Sanitize(html)
45}
46func (s *Sanitizer) SanitizeLogs(html string) string {
47 return s.logsPolicy.Sanitize(html)
48}
49
50func buildDefaultPolicy() *bluemonday.Policy {
51 policy := bluemonday.UGCPolicy()
52
53 // Allow generally safe attributes
54 generalSafeAttrs := []string{
55 "abbr", "accept", "accept-charset",
56 "accesskey", "action", "align", "alt",
57 "aria-describedby", "aria-hidden", "aria-label", "aria-labelledby",
58 "axis", "border", "cellpadding", "cellspacing", "char",
59 "charoff", "charset", "checked",
60 "clear", "cols", "colspan", "color",
61 "compact", "coords", "datetime", "dir",
62 "disabled", "enctype", "for", "frame",
63 "headers", "height", "hreflang",
64 "hspace", "ismap", "label", "lang",
65 "maxlength", "media", "method",
66 "multiple", "name", "nohref", "noshade",
67 "nowrap", "open", "prompt", "readonly", "rel", "rev",
68 "rows", "rowspan", "rules", "scope",
69 "selected", "shape", "size", "span",
70 "start", "summary", "tabindex", "target",
71 "title", "type", "usemap", "valign", "value",
72 "vspace", "width", "itemprop",
73 }
74
75 generalSafeElements := []string{
76 "h1", "h2", "h3", "h4", "h5", "h6", "h7", "h8", "br", "b", "i", "strong", "em", "a", "pre", "code", "img", "tt",
77 "div", "ins", "del", "sup", "sub", "p", "ol", "ul", "table", "thead", "tbody", "tfoot", "blockquote", "label",
78 "dl", "dt", "dd", "kbd", "q", "samp", "var", "hr", "ruby", "rt", "rp", "li", "tr", "td", "th", "s", "strike", "summary",
79 "details", "caption", "figure", "figcaption",
80 "abbr", "bdo", "cite", "dfn", "mark", "small", "span", "time", "video", "wbr",
81 "picture", "source",
82 }
83
84 policy.AllowAttrs(generalSafeAttrs...).OnElements(generalSafeElements...)
85
86 // video
87 policy.AllowAttrs("src", "autoplay", "controls").OnElements("video")
88
89 // picture/source for modern image formats (avif, webp, etc.)
90 policy.AllowAttrs("srcset", "type", "media").OnElements("source")
91
92 // checkboxes
93 policy.AllowAttrs("type").Matching(regexp.MustCompile(`^checkbox$`)).OnElements("input")
94 policy.AllowAttrs("checked", "disabled", "data-source-position").OnElements("input")
95
96 // for code blocks
97 policy.AllowAttrs("class").Matching(regexp.MustCompile(`chroma|mermaid`)).OnElements("pre")
98 policy.AllowAttrs("class").Matching(regexp.MustCompile(`anchor|footnote-ref|footnote-backref`)).OnElements("a")
99 policy.AllowAttrs("class").Matching(regexp.MustCompile(`heading`)).OnElements("h1", "h2", "h3", "h4", "h5", "h6", "h7", "h8")
100 policy.AllowAttrs("class").Matching(regexp.MustCompile(strings.Join(slices.Collect(maps.Values(chroma.StandardTypes)), "|"))).OnElements("span")
101
102 // at-mentions
103 policy.AllowAttrs("class").Matching(regexp.MustCompile(`mention`)).OnElements("a")
104
105 // centering content
106 policy.AllowElements("center")
107
108 policy.AllowAttrs("align", "style", "width", "height").Globally()
109 policy.AllowStyles(
110 "margin",
111 "padding",
112 "text-align",
113 "font-weight",
114 "text-decoration",
115 "padding-left",
116 "padding-right",
117 "padding-top",
118 "padding-bottom",
119 "margin-left",
120 "margin-right",
121 "margin-top",
122 "margin-bottom",
123 )
124
125 // math
126 mathAttrs := []string{
127 "accent", "columnalign", "columnlines", "columnspan", "dir", "display",
128 "displaystyle", "encoding", "fence", "form", "largeop", "linebreak",
129 "linethickness", "lspace", "mathcolor", "mathsize", "mathvariant", "minsize",
130 "movablelimits", "notation", "rowalign", "rspace", "rowspacing", "rowspan",
131 "scriptlevel", "stretchy", "symmetric", "title", "voffset", "width",
132 }
133 mathElements := []string{
134 "annotation", "math", "menclose", "merror", "mfrac", "mi", "mmultiscripts",
135 "mn", "mo", "mover", "mpadded", "mprescripts", "mroot", "mrow", "mspace",
136 "msqrt", "mstyle", "msub", "msubsup", "msup", "mtable", "mtd", "mtext",
137 "mtr", "munder", "munderover", "semantics",
138 }
139 policy.AllowNoAttrs().OnElements(mathElements...)
140 policy.AllowAttrs(mathAttrs...).OnElements(mathElements...)
141
142 // goldmark-callout
143 policy.AllowAttrs("data-callout").OnElements("details")
144
145 return policy
146}
147
148func buildDescriptionPolicy() *bluemonday.Policy {
149 policy := bluemonday.NewPolicy()
150 policy.AllowStandardURLs()
151
152 // allow italics and bold.
153 policy.AllowElements("i", "b", "em", "strong")
154
155 // allow code.
156 policy.AllowElements("code")
157
158 // allow links
159 policy.AllowAttrs("href", "target", "rel").OnElements("a")
160
161 return policy
162}
163
164func buildLogsPolicy() *bluemonday.Policy {
165 policy := bluemonday.NewPolicy()
166
167 policy.AllowElements("p", "span")
168
169 // allow italics and bold
170 policy.AllowElements("i", "b", "em", "strong")
171
172 // allow fg/bg classes from terminal-to-html
173 policy.AllowAttrs("class").Matching(regexp.MustCompile(`term-*`)).OnElements("span")
174
175 return policy
176}