forked from
tangled.org/core
Monorepo for Tangled
1package markup
2
3import (
4 "maps"
5 "regexp"
6 "slices"
7 "strings"
8
9 "github.com/alecthomas/chroma/v2"
10 "github.com/microcosm-cc/bluemonday"
11)
12
13// shared policies built once at init; safe for concurrent use per bluemonday docs
14var (
15 sharedDefaultPolicy *bluemonday.Policy
16 sharedDescriptionPolicy *bluemonday.Policy
17)
18
19func init() {
20 sharedDefaultPolicy = buildDefaultPolicy()
21 sharedDescriptionPolicy = buildDescriptionPolicy()
22}
23
24type Sanitizer struct {
25 defaultPolicy *bluemonday.Policy
26 descriptionPolicy *bluemonday.Policy
27}
28
29func NewSanitizer() Sanitizer {
30 return Sanitizer{
31 defaultPolicy: sharedDefaultPolicy,
32 descriptionPolicy: sharedDescriptionPolicy,
33 }
34}
35
36func (s *Sanitizer) SanitizeDefault(html string) string {
37 return s.defaultPolicy.Sanitize(html)
38}
39func (s *Sanitizer) SanitizeDescription(html string) string {
40 return s.descriptionPolicy.Sanitize(html)
41}
42
43func buildDefaultPolicy() *bluemonday.Policy {
44 policy := bluemonday.UGCPolicy()
45
46 // Allow generally safe attributes
47 generalSafeAttrs := []string{
48 "abbr", "accept", "accept-charset",
49 "accesskey", "action", "align", "alt",
50 "aria-describedby", "aria-hidden", "aria-label", "aria-labelledby",
51 "axis", "border", "cellpadding", "cellspacing", "char",
52 "charoff", "charset", "checked",
53 "clear", "cols", "colspan", "color",
54 "compact", "coords", "datetime", "dir",
55 "disabled", "enctype", "for", "frame",
56 "headers", "height", "hreflang",
57 "hspace", "ismap", "label", "lang",
58 "maxlength", "media", "method",
59 "multiple", "name", "nohref", "noshade",
60 "nowrap", "open", "prompt", "readonly", "rel", "rev",
61 "rows", "rowspan", "rules", "scope",
62 "selected", "shape", "size", "span",
63 "start", "summary", "tabindex", "target",
64 "title", "type", "usemap", "valign", "value",
65 "vspace", "width", "itemprop",
66 }
67
68 generalSafeElements := []string{
69 "h1", "h2", "h3", "h4", "h5", "h6", "h7", "h8", "br", "b", "i", "strong", "em", "a", "pre", "code", "img", "tt",
70 "div", "ins", "del", "sup", "sub", "p", "ol", "ul", "table", "thead", "tbody", "tfoot", "blockquote", "label",
71 "dl", "dt", "dd", "kbd", "q", "samp", "var", "hr", "ruby", "rt", "rp", "li", "tr", "td", "th", "s", "strike", "summary",
72 "details", "caption", "figure", "figcaption",
73 "abbr", "bdo", "cite", "dfn", "mark", "small", "span", "time", "video", "wbr",
74 "picture", "source",
75 }
76
77 policy.AllowAttrs(generalSafeAttrs...).OnElements(generalSafeElements...)
78
79 // video
80 policy.AllowAttrs("src", "autoplay", "controls").OnElements("video")
81
82 // picture/source for modern image formats (avif, webp, etc.)
83 policy.AllowAttrs("srcset", "type", "media").OnElements("source")
84
85 // checkboxes
86 policy.AllowAttrs("type").Matching(regexp.MustCompile(`^checkbox$`)).OnElements("input")
87 policy.AllowAttrs("checked", "disabled", "data-source-position").OnElements("input")
88
89 // for code blocks
90 policy.AllowAttrs("class").Matching(regexp.MustCompile(`chroma|mermaid`)).OnElements("pre")
91 policy.AllowAttrs("class").Matching(regexp.MustCompile(`anchor|footnote-ref|footnote-backref`)).OnElements("a")
92 policy.AllowAttrs("class").Matching(regexp.MustCompile(`heading`)).OnElements("h1", "h2", "h3", "h4", "h5", "h6", "h7", "h8")
93 policy.AllowAttrs("class").Matching(regexp.MustCompile(strings.Join(slices.Collect(maps.Values(chroma.StandardTypes)), "|"))).OnElements("span")
94
95 // at-mentions
96 policy.AllowAttrs("class").Matching(regexp.MustCompile(`mention`)).OnElements("a")
97
98 // centering content
99 policy.AllowElements("center")
100
101 policy.AllowAttrs("align", "style", "width", "height").Globally()
102 policy.AllowStyles(
103 "margin",
104 "padding",
105 "text-align",
106 "font-weight",
107 "text-decoration",
108 "padding-left",
109 "padding-right",
110 "padding-top",
111 "padding-bottom",
112 "margin-left",
113 "margin-right",
114 "margin-top",
115 "margin-bottom",
116 )
117
118 // math
119 mathAttrs := []string{
120 "accent", "columnalign", "columnlines", "columnspan", "dir", "display",
121 "displaystyle", "encoding", "fence", "form", "largeop", "linebreak",
122 "linethickness", "lspace", "mathcolor", "mathsize", "mathvariant", "minsize",
123 "movablelimits", "notation", "rowalign", "rspace", "rowspacing", "rowspan",
124 "scriptlevel", "stretchy", "symmetric", "title", "voffset", "width",
125 }
126 mathElements := []string{
127 "annotation", "math", "menclose", "merror", "mfrac", "mi", "mmultiscripts",
128 "mn", "mo", "mover", "mpadded", "mprescripts", "mroot", "mrow", "mspace",
129 "msqrt", "mstyle", "msub", "msubsup", "msup", "mtable", "mtd", "mtext",
130 "mtr", "munder", "munderover", "semantics",
131 }
132 policy.AllowNoAttrs().OnElements(mathElements...)
133 policy.AllowAttrs(mathAttrs...).OnElements(mathElements...)
134
135 // goldmark-callout
136 policy.AllowAttrs("data-callout").OnElements("details")
137
138 return policy
139}
140
141func buildDescriptionPolicy() *bluemonday.Policy {
142 policy := bluemonday.NewPolicy()
143 policy.AllowStandardURLs()
144
145 // allow italics and bold.
146 policy.AllowElements("i", "b", "em", "strong")
147
148 // allow code.
149 policy.AllowElements("code")
150
151 // allow links
152 policy.AllowAttrs("href", "target", "rel").OnElements("a")
153
154 return policy
155}