2017-04-13 10:52:24 +08:00
// Copyright 2017 The Gitea Authors. All rights reserved.
// Copyright 2017 The Gogs Authors. All rights reserved.
2022-11-28 02:20:29 +08:00
// SPDX-License-Identifier: MIT
2017-04-13 10:52:24 +08:00
2017-09-17 01:17:57 +08:00
package markup
2017-04-13 10:52:24 +08:00
import (
2019-12-31 09:53:28 +08:00
"io"
2023-07-18 23:18:37 +08:00
"net/url"
2017-04-13 10:52:24 +08:00
"regexp"
"sync"
"code.gitea.io/gitea/modules/setting"
"github.com/microcosm-cc/bluemonday"
)
// Sanitizer is a protection wrapper of *bluemonday.Policy which does not allow
// any modification to the underlying policies once it's been created.
type Sanitizer struct {
2023-11-24 00:34:25 +08:00
defaultPolicy * bluemonday . Policy
descriptionPolicy * bluemonday . Policy
rendererPolicies map [ string ] * bluemonday . Policy
init sync . Once
2017-04-13 10:52:24 +08:00
}
2023-05-19 23:17:07 +08:00
var (
sanitizer = & Sanitizer { }
allowAllRegex = regexp . MustCompile ( ".+" )
)
2017-04-13 10:52:24 +08:00
// NewSanitizer initializes sanitizer with allowed attributes based on settings.
// Multiple calls to this function will only create one instance of Sanitizer during
// entire application lifecycle.
func NewSanitizer ( ) {
sanitizer . init . Do ( func ( ) {
2021-06-24 05:09:51 +08:00
InitializeSanitizer ( )
2019-10-15 09:31:09 +08:00
} )
}
2017-04-13 10:52:24 +08:00
2021-06-24 05:09:51 +08:00
// InitializeSanitizer (re)initializes the current sanitizer to account for changes in settings
func InitializeSanitizer ( ) {
sanitizer . rendererPolicies = map [ string ] * bluemonday . Policy { }
sanitizer . defaultPolicy = createDefaultPolicy ( )
2023-11-24 00:34:25 +08:00
sanitizer . descriptionPolicy = createRepoDescriptionPolicy ( )
2021-06-24 05:09:51 +08:00
for name , renderer := range renderers {
sanitizerRules := renderer . SanitizerRules ( )
if len ( sanitizerRules ) > 0 {
policy := createDefaultPolicy ( )
addSanitizerRules ( policy , sanitizerRules )
sanitizer . rendererPolicies [ name ] = policy
}
}
}
func createDefaultPolicy ( ) * bluemonday . Policy {
policy := bluemonday . UGCPolicy ( )
2021-11-16 16:16:05 +08:00
// For JS code copy and Mermaid loading state
policy . AllowAttrs ( "class" ) . Matching ( regexp . MustCompile ( ` ^code-block( is-loading)?$ ` ) ) . OnElements ( "pre" )
2024-04-03 01:48:27 +08:00
// For code preview
policy . AllowAttrs ( "class" ) . Matching ( regexp . MustCompile ( ` ^code-preview-[-\w]+( file-content)?$ ` ) ) . Globally ( )
policy . AllowAttrs ( "class" ) . Matching ( regexp . MustCompile ( ` ^lines-num$ ` ) ) . OnElements ( "td" )
policy . AllowAttrs ( "data-line-number" ) . OnElements ( "span" )
policy . AllowAttrs ( "class" ) . Matching ( regexp . MustCompile ( ` ^lines-code chroma$ ` ) ) . OnElements ( "td" )
policy . AllowAttrs ( "class" ) . Matching ( regexp . MustCompile ( ` ^code-inner$ ` ) ) . OnElements ( "code" )
// For code preview (unicode escape)
policy . AllowAttrs ( "class" ) . Matching ( regexp . MustCompile ( ` ^file-view( unicode-escaped)?$ ` ) ) . OnElements ( "table" )
policy . AllowAttrs ( "class" ) . Matching ( regexp . MustCompile ( ` ^lines-escape$ ` ) ) . OnElements ( "td" )
policy . AllowAttrs ( "class" ) . Matching ( regexp . MustCompile ( ` ^toggle-escape-button btn interact-bg$ ` ) ) . OnElements ( "a" ) // don't use button, button might submit a form
policy . AllowAttrs ( "class" ) . Matching ( regexp . MustCompile ( ` ^(ambiguous-code-point|escaped-code-point|broken-code-point)$ ` ) ) . OnElements ( "span" )
policy . AllowAttrs ( "class" ) . Matching ( regexp . MustCompile ( ` ^char$ ` ) ) . OnElements ( "span" )
policy . AllowAttrs ( "data-tooltip-content" , "data-escaped" ) . OnElements ( "span" )
2022-10-21 20:00:53 +08:00
// For color preview
policy . AllowAttrs ( "class" ) . Matching ( regexp . MustCompile ( ` ^color-preview$ ` ) ) . OnElements ( "span" )
2022-11-09 08:11:26 +08:00
// For attention
2024-03-16 19:34:38 +08:00
policy . AllowAttrs ( "class" ) . Matching ( regexp . MustCompile ( ` ^attention-header attention-\w+$ ` ) ) . OnElements ( "blockquote" )
2022-11-09 08:11:26 +08:00
policy . AllowAttrs ( "class" ) . Matching ( regexp . MustCompile ( ` ^attention-\w+$ ` ) ) . OnElements ( "strong" )
2024-03-16 19:34:38 +08:00
policy . AllowAttrs ( "class" ) . Matching ( regexp . MustCompile ( ` ^attention-icon attention-\w+ svg octicon-[\w-]+$ ` ) ) . OnElements ( "svg" )
2022-11-09 08:11:26 +08:00
policy . AllowAttrs ( "viewBox" , "width" , "height" , "aria-hidden" ) . OnElements ( "svg" )
policy . AllowAttrs ( "fill-rule" , "d" ) . OnElements ( "path" )
2020-07-01 05:34:03 +08:00
// For Chroma markdown plugin
2022-09-14 00:33:37 +08:00
policy . AllowAttrs ( "class" ) . Matching ( regexp . MustCompile ( ` ^(chroma )?language-[\w-]+( display)?( is-loading)?$ ` ) ) . OnElements ( "code" )
2017-04-13 10:52:24 +08:00
2019-10-15 09:31:09 +08:00
// Checkboxes
2021-06-24 05:09:51 +08:00
policy . AllowAttrs ( "type" ) . Matching ( regexp . MustCompile ( ` ^checkbox$ ` ) ) . OnElements ( "input" )
policy . AllowAttrs ( "checked" , "disabled" , "data-source-position" ) . OnElements ( "input" )
2019-10-14 06:29:10 +08:00
2019-10-15 09:31:09 +08:00
// Custom URL-Schemes
2021-04-06 05:38:31 +08:00
if len ( setting . Markdown . CustomURLSchemes ) > 0 {
2021-06-24 05:09:51 +08:00
policy . AllowURLSchemes ( setting . Markdown . CustomURLSchemes ... )
2023-05-19 23:17:07 +08:00
} else {
policy . AllowURLSchemesMatching ( allowAllRegex )
2023-07-18 23:18:37 +08:00
// Even if every scheme is allowed, these three are blocked for security reasons
disallowScheme := func ( * url . URL ) bool {
return false
}
policy . AllowURLSchemeWithCustomPolicy ( "javascript" , disallowScheme )
policy . AllowURLSchemeWithCustomPolicy ( "vbscript" , disallowScheme )
policy . AllowURLSchemeWithCustomPolicy ( "data" , disallowScheme )
2021-04-06 05:38:31 +08:00
}
2019-10-15 09:31:09 +08:00
2020-01-20 12:39:21 +08:00
// Allow classes for anchors
2021-09-15 16:45:27 +08:00
policy . AllowAttrs ( "class" ) . Matching ( regexp . MustCompile ( ` ref-issue( ref-external-issue)? ` ) ) . OnElements ( "a" )
2020-01-20 12:39:21 +08:00
2020-03-09 03:17:03 +08:00
// Allow classes for task lists
2021-06-24 05:09:51 +08:00
policy . AllowAttrs ( "class" ) . Matching ( regexp . MustCompile ( ` task-list-item ` ) ) . OnElements ( "li" )
2020-03-09 03:17:03 +08:00
2023-07-27 22:15:31 +08:00
// Allow classes for org mode list item status.
policy . AllowAttrs ( "class" ) . Matching ( regexp . MustCompile ( ` ^(unchecked|checked|indeterminate)$ ` ) ) . OnElements ( "li" )
2020-04-24 21:22:36 +08:00
// Allow icons
2021-06-24 05:09:51 +08:00
policy . AllowAttrs ( "class" ) . Matching ( regexp . MustCompile ( ` ^icon(\s+[\p { L}\p { N}_-]+)+$ ` ) ) . OnElements ( "i" )
2020-04-26 13:09:08 +08:00
2020-04-29 02:05:39 +08:00
// Allow classes for emojis
2021-06-24 05:09:51 +08:00
policy . AllowAttrs ( "class" ) . Matching ( regexp . MustCompile ( ` emoji ` ) ) . OnElements ( "img" )
2020-04-29 02:05:39 +08:00
2021-06-16 09:02:03 +08:00
// Allow icons, emojis, chroma syntax and keyword markup on span
2022-09-14 00:33:37 +08:00
policy . AllowAttrs ( "class" ) . Matching ( regexp . MustCompile ( ` ^((icon(\s+[\p { L}\p { N}_-]+)+)|(emoji)|(language-math display)|(language-math inline))$|^([a-z][a-z0-9] { 0,2})$|^ ` + keywordClass + ` $ ` ) ) . OnElements ( "span" )
2021-03-30 04:44:28 +08:00
2022-10-21 20:00:53 +08:00
// Allow 'color' and 'background-color' properties for the style attribute on text elements.
policy . AllowStyles ( "color" , "background-color" ) . OnElements ( "span" , "p" )
2022-07-15 14:38:10 +08:00
2020-02-29 04:05:12 +08:00
// Allow generally safe attributes
2022-01-21 01:46:10 +08:00
generalSafeAttrs := [ ] string {
"abbr" , "accept" , "accept-charset" ,
2020-02-29 04:05:12 +08:00
"accesskey" , "action" , "align" , "alt" ,
"aria-describedby" , "aria-hidden" , "aria-label" , "aria-labelledby" ,
"axis" , "border" , "cellpadding" , "cellspacing" , "char" ,
"charoff" , "charset" , "checked" ,
"clear" , "cols" , "colspan" , "color" ,
"compact" , "coords" , "datetime" , "dir" ,
"disabled" , "enctype" , "for" , "frame" ,
"headers" , "height" , "hreflang" ,
"hspace" , "ismap" , "label" , "lang" ,
"maxlength" , "media" , "method" ,
"multiple" , "name" , "nohref" , "noshade" ,
"nowrap" , "open" , "prompt" , "readonly" , "rel" , "rev" ,
"rows" , "rowspan" , "rules" , "scope" ,
"selected" , "shape" , "size" , "span" ,
"start" , "summary" , "tabindex" , "target" ,
"title" , "type" , "usemap" , "valign" , "value" ,
"vspace" , "width" , "itemprop" ,
}
generalSafeElements := [ ] string {
"h1" , "h2" , "h3" , "h4" , "h5" , "h6" , "h7" , "h8" , "br" , "b" , "i" , "strong" , "em" , "a" , "pre" , "code" , "img" , "tt" ,
2024-03-16 19:34:38 +08:00
"div" , "ins" , "del" , "sup" , "sub" , "p" , "ol" , "ul" , "table" , "thead" , "tbody" , "tfoot" , "blockquote" , "label" ,
2020-02-29 04:05:12 +08:00
"dl" , "dt" , "dd" , "kbd" , "q" , "samp" , "var" , "hr" , "ruby" , "rt" , "rp" , "li" , "tr" , "td" , "th" , "s" , "strike" , "summary" ,
"details" , "caption" , "figure" , "figcaption" ,
2023-05-22 05:19:37 +08:00
"abbr" , "bdo" , "cite" , "dfn" , "mark" , "small" , "span" , "time" , "video" , "wbr" ,
2020-02-29 04:05:12 +08:00
}
2021-06-24 05:09:51 +08:00
policy . AllowAttrs ( generalSafeAttrs ... ) . OnElements ( generalSafeElements ... )
2020-02-29 04:05:12 +08:00
Allow `<video>` in MarkDown (#22892)
As you can imagine, for the Blender development process it is rather
nice to be able to include videos in issues, pull requests, etc.
This PR allows the `<video>` HTML tag to be used in MarkDown, with the
`src`, `autoplay`, and `controls` attributes.
## Help Needed
To have this fully functional, personally I feel the following things
are still missing, and would appreciate some help from the Gitea team.
### Styling
Some CSS is needed, but I couldn't figure out which of the LESS files
would work. I tried `web_src/less/markup/content.less` and
`web_src/less/_base.less`, but after running `make` the changes weren't
seen in the frontend.
This I would consider a minimal set of CSS rules to be applied:
```css
video {
max-width: 100%;
max-height: 100vh;
}
```
### Default Attributes
It would be fantastic if Gitea could add some default attributes to the
`<video>` tag. Basically `controls` should always be there, as there is
no point in disallowing scrolling through videos, looping them, etc.
### Integration with the attachments system
Another thing that could be added, but probably should be done in a
separate PR, is the integration with the attachments system. Dragging in
a video should attach it, then generate the appropriate MarkDown/HTML.
2023-03-02 05:30:51 +08:00
policy . AllowAttrs ( "src" , "autoplay" , "controls" ) . OnElements ( "video" )
2021-06-24 05:09:51 +08:00
policy . AllowAttrs ( "itemscope" , "itemtype" ) . OnElements ( "div" )
2020-02-29 04:05:12 +08:00
// FIXME: Need to handle longdesc in img but there is no easy way to do it
2019-12-08 03:49:04 +08:00
// Custom keyword markup
2021-06-24 05:09:51 +08:00
addSanitizerRules ( policy , setting . ExternalSanitizerRules )
return policy
}
2023-11-24 00:34:25 +08:00
// createRepoDescriptionPolicy returns a minimal more strict policy that is used for
// repository descriptions.
func createRepoDescriptionPolicy ( ) * bluemonday . Policy {
policy := bluemonday . NewPolicy ( )
// Allow italics and bold.
policy . AllowElements ( "i" , "b" , "em" , "strong" )
// Allow code.
policy . AllowElements ( "code" )
// Allow links
policy . AllowAttrs ( "href" , "target" , "rel" ) . OnElements ( "a" )
// Allow classes for emojis
policy . AllowAttrs ( "class" ) . Matching ( regexp . MustCompile ( ` ^emoji$ ` ) ) . OnElements ( "img" , "span" )
policy . AllowAttrs ( "aria-label" ) . OnElements ( "span" )
return policy
}
2021-06-24 05:09:51 +08:00
func addSanitizerRules ( policy * bluemonday . Policy , rules [ ] setting . MarkupSanitizerRule ) {
for _ , rule := range rules {
if rule . AllowDataURIImages {
policy . AllowDataURIImages ( )
}
if rule . Element != "" {
if rule . Regexp != nil {
policy . AllowAttrs ( rule . AllowAttr ) . Matching ( rule . Regexp ) . OnElements ( rule . Element )
} else {
policy . AllowAttrs ( rule . AllowAttr ) . OnElements ( rule . Element )
}
2019-12-08 03:49:04 +08:00
}
}
2017-04-13 10:52:24 +08:00
}
2023-11-24 00:34:25 +08:00
// SanitizeDescription sanitizes the HTML generated for a repository description.
func SanitizeDescription ( s string ) string {
NewSanitizer ( )
return sanitizer . descriptionPolicy . Sanitize ( s )
}
2017-04-13 10:52:24 +08:00
// Sanitize takes a string that contains a HTML fragment or document and applies policy whitelist.
func Sanitize ( s string ) string {
2017-04-19 19:16:36 +08:00
NewSanitizer ( )
2021-06-24 05:09:51 +08:00
return sanitizer . defaultPolicy . Sanitize ( s )
2017-04-13 10:52:24 +08:00
}
2019-12-31 09:53:28 +08:00
// SanitizeReader sanitizes a Reader
2021-11-19 18:46:47 +08:00
func SanitizeReader ( r io . Reader , renderer string , w io . Writer ) error {
2019-12-31 09:53:28 +08:00
NewSanitizer ( )
2021-06-24 05:09:51 +08:00
policy , exist := sanitizer . rendererPolicies [ renderer ]
if ! exist {
policy = sanitizer . defaultPolicy
}
2021-11-19 18:46:47 +08:00
return policy . SanitizeReaderToWriter ( r , w )
2019-12-31 09:53:28 +08:00
}