Add sanitizer rules per renderer (#16110)

* Added sanitizer rules per renderer.

* Updated documentation.

Co-authored-by: techknowlogick <techknowlogick@gitea.io>
This commit is contained in:
KN4CK3R 2021-06-23 23:09:51 +02:00 committed by GitHub
parent eb324a9402
commit c9c7afda1a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 215 additions and 113 deletions

View file

@ -10,6 +10,7 @@ import (
"html"
"io"
"io/ioutil"
"regexp"
"strconv"
"code.gitea.io/gitea/modules/csv"
@ -38,6 +39,15 @@ func (Renderer) Extensions() []string {
return []string{".csv", ".tsv"}
}
// SanitizerRules implements markup.Renderer
func (Renderer) SanitizerRules() []setting.MarkupSanitizerRule {
return []setting.MarkupSanitizerRule{
{Element: "table", AllowAttr: "class", Regexp: regexp.MustCompile(`data-table`)},
{Element: "th", AllowAttr: "class", Regexp: regexp.MustCompile(`line-num`)},
{Element: "td", AllowAttr: "class", Regexp: regexp.MustCompile(`line-num`)},
}
}
func writeField(w io.Writer, element, class, field string) error {
if _, err := io.WriteString(w, "<"); err != nil {
return err

View file

@ -30,7 +30,7 @@ func RegisterRenderers() {
// Renderer implements markup.Renderer for external tools
type Renderer struct {
setting.MarkupRenderer
*setting.MarkupRenderer
}
// Name returns the external tool name
@ -48,6 +48,11 @@ func (p *Renderer) Extensions() []string {
return p.FileExtensions
}
// SanitizerRules implements markup.Renderer
func (p *Renderer) SanitizerRules() []setting.MarkupSanitizerRule {
return p.MarkupSanitizerRules
}
func envMark(envName string) string {
if runtime.GOOS == "windows" {
return "%" + envName + "%"

View file

@ -112,7 +112,7 @@ func TestRender_links(t *testing.T) {
defaultCustom := setting.Markdown.CustomURLSchemes
setting.Markdown.CustomURLSchemes = []string{"ftp", "magnet"}
ReplaceSanitizer()
InitializeSanitizer()
CustomLinkURLSchemes(setting.Markdown.CustomURLSchemes)
test(
@ -192,7 +192,7 @@ func TestRender_links(t *testing.T) {
// Restore previous settings
setting.Markdown.CustomURLSchemes = defaultCustom
ReplaceSanitizer()
InitializeSanitizer()
CustomLinkURLSchemes(setting.Markdown.CustomURLSchemes)
}

View file

@ -199,7 +199,7 @@ func actualRender(ctx *markup.RenderContext, input io.Reader, output io.Writer)
}
_ = lw.Close()
}()
buf := markup.SanitizeReader(rd)
buf := markup.SanitizeReader(rd, "")
_, err := io.Copy(output, buf)
return err
}
@ -215,7 +215,7 @@ func render(ctx *markup.RenderContext, input io.Reader, output io.Writer) error
if log.IsDebug() {
log.Debug("Panic in markdown: %v\n%s", err, string(log.Stack(2)))
}
ret := markup.SanitizeReader(input)
ret := markup.SanitizeReader(input, "")
_, err = io.Copy(output, ret)
if err != nil {
log.Error("SanitizeReader failed: %v", err)
@ -249,6 +249,11 @@ func (Renderer) Extensions() []string {
return setting.Markdown.FileExtensions
}
// SanitizerRules implements markup.Renderer
func (Renderer) SanitizerRules() []setting.MarkupSanitizerRule {
return []setting.MarkupSanitizerRule{}
}
// Render implements markup.Renderer
func (Renderer) Render(ctx *markup.RenderContext, input io.Reader, output io.Writer) error {
return render(ctx, input, output)

View file

@ -13,6 +13,7 @@ import (
"code.gitea.io/gitea/modules/highlight"
"code.gitea.io/gitea/modules/markup"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/util"
"github.com/alecthomas/chroma"
@ -41,6 +42,11 @@ func (Renderer) Extensions() []string {
return []string{".org"}
}
// SanitizerRules implements markup.Renderer
func (Renderer) SanitizerRules() []setting.MarkupSanitizerRule {
return []setting.MarkupSanitizerRule{}
}
// Render renders orgmode rawbytes to HTML
func Render(ctx *markup.RenderContext, input io.Reader, output io.Writer) error {
htmlWriter := org.NewHTMLWriter()

View file

@ -81,6 +81,7 @@ type Renderer interface {
Name() string // markup format name
Extensions() []string
NeedPostProcess() bool
SanitizerRules() []setting.MarkupSanitizerRule
Render(ctx *RenderContext, input io.Reader, output io.Writer) error
}
@ -136,37 +137,32 @@ func render(ctx *RenderContext, renderer Renderer, input io.Reader, output io.Wr
_ = pw.Close()
}()
if renderer.NeedPostProcess() {
pr2, pw2 := io.Pipe()
defer func() {
_ = pr2.Close()
_ = pw2.Close()
}()
pr2, pw2 := io.Pipe()
defer func() {
_ = pr2.Close()
_ = pw2.Close()
}()
wg.Add(1)
go func() {
buf := SanitizeReader(pr2)
_, err = io.Copy(output, buf)
_ = pr2.Close()
wg.Done()
}()
wg.Add(1)
go func() {
buf := SanitizeReader(pr2, renderer.Name())
_, err = io.Copy(output, buf)
_ = pr2.Close()
wg.Done()
}()
wg.Add(1)
go func() {
wg.Add(1)
go func() {
if renderer.NeedPostProcess() {
err = PostProcess(ctx, pr, pw2)
_ = pr.Close()
_ = pw2.Close()
wg.Done()
}()
} else {
wg.Add(1)
go func() {
buf := SanitizeReader(pr)
_, err = io.Copy(output, buf)
_ = pr.Close()
wg.Done()
}()
}
} else {
_, err = io.Copy(pw2, pr)
}
_ = pr.Close()
_ = pw2.Close()
wg.Done()
}()
if err1 := renderer.Render(ctx, input, pw); err1 != nil {
return err1
}

View file

@ -19,8 +19,9 @@ import (
// Sanitizer is a protection wrapper of *bluemonday.Policy which does not allow
// any modification to the underlying policies once it's been created.
type Sanitizer struct {
policy *bluemonday.Policy
init sync.Once
defaultPolicy *bluemonday.Policy
rendererPolicies map[string]*bluemonday.Policy
init sync.Once
}
var sanitizer = &Sanitizer{}
@ -30,47 +31,57 @@ var sanitizer = &Sanitizer{}
// entire application lifecycle.
func NewSanitizer() {
sanitizer.init.Do(func() {
ReplaceSanitizer()
InitializeSanitizer()
})
}
// ReplaceSanitizer replaces the current sanitizer to account for changes in settings
func ReplaceSanitizer() {
sanitizer.policy = bluemonday.UGCPolicy()
// InitializeSanitizer (re)initializes the current sanitizer to account for changes in settings
func InitializeSanitizer() {
sanitizer.rendererPolicies = map[string]*bluemonday.Policy{}
sanitizer.defaultPolicy = createDefaultPolicy()
for name, renderer := range renderers {
sanitizerRules := renderer.SanitizerRules()
if len(sanitizerRules) > 0 {
policy := createDefaultPolicy()
addSanitizerRules(policy, sanitizerRules)
sanitizer.rendererPolicies[name] = policy
}
}
}
func createDefaultPolicy() *bluemonday.Policy {
policy := bluemonday.UGCPolicy()
// For Chroma markdown plugin
sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`^is-loading$`)).OnElements("pre")
sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`^(chroma )?language-[\w-]+$`)).OnElements("code")
policy.AllowAttrs("class").Matching(regexp.MustCompile(`^is-loading$`)).OnElements("pre")
policy.AllowAttrs("class").Matching(regexp.MustCompile(`^(chroma )?language-[\w-]+$`)).OnElements("code")
// Checkboxes
sanitizer.policy.AllowAttrs("type").Matching(regexp.MustCompile(`^checkbox$`)).OnElements("input")
sanitizer.policy.AllowAttrs("checked", "disabled", "data-source-position").OnElements("input")
policy.AllowAttrs("type").Matching(regexp.MustCompile(`^checkbox$`)).OnElements("input")
policy.AllowAttrs("checked", "disabled", "data-source-position").OnElements("input")
// Custom URL-Schemes
if len(setting.Markdown.CustomURLSchemes) > 0 {
sanitizer.policy.AllowURLSchemes(setting.Markdown.CustomURLSchemes...)
policy.AllowURLSchemes(setting.Markdown.CustomURLSchemes...)
}
// Allow classes for anchors
sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`ref-issue`)).OnElements("a")
policy.AllowAttrs("class").Matching(regexp.MustCompile(`ref-issue`)).OnElements("a")
// Allow classes for task lists
sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`task-list-item`)).OnElements("li")
policy.AllowAttrs("class").Matching(regexp.MustCompile(`task-list-item`)).OnElements("li")
// Allow icons
sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`^icon(\s+[\p{L}\p{N}_-]+)+$`)).OnElements("i")
policy.AllowAttrs("class").Matching(regexp.MustCompile(`^icon(\s+[\p{L}\p{N}_-]+)+$`)).OnElements("i")
// Allow unlabelled labels
sanitizer.policy.AllowNoAttrs().OnElements("label")
policy.AllowNoAttrs().OnElements("label")
// Allow classes for emojis
sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`emoji`)).OnElements("img")
policy.AllowAttrs("class").Matching(regexp.MustCompile(`emoji`)).OnElements("img")
// Allow icons, emojis, chroma syntax and keyword markup on span
sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`^((icon(\s+[\p{L}\p{N}_-]+)+)|(emoji))$|^([a-z][a-z0-9]{0,2})$|^` + keywordClass + `$`)).OnElements("span")
// Allow data tables
sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`data-table`)).OnElements("table")
sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`line-num`)).OnElements("th", "td")
policy.AllowAttrs("class").Matching(regexp.MustCompile(`^((icon(\s+[\p{L}\p{N}_-]+)+)|(emoji))$|^([a-z][a-z0-9]{0,2})$|^` + keywordClass + `$`)).OnElements("span")
// Allow generally safe attributes
generalSafeAttrs := []string{"abbr", "accept", "accept-charset",
@ -101,18 +112,29 @@ func ReplaceSanitizer() {
"abbr", "bdo", "cite", "dfn", "mark", "small", "span", "time", "wbr",
}
sanitizer.policy.AllowAttrs(generalSafeAttrs...).OnElements(generalSafeElements...)
policy.AllowAttrs(generalSafeAttrs...).OnElements(generalSafeElements...)
sanitizer.policy.AllowAttrs("itemscope", "itemtype").OnElements("div")
policy.AllowAttrs("itemscope", "itemtype").OnElements("div")
// FIXME: Need to handle longdesc in img but there is no easy way to do it
// Custom keyword markup
for _, rule := range setting.ExternalSanitizerRules {
if rule.Regexp != nil {
sanitizer.policy.AllowAttrs(rule.AllowAttr).Matching(rule.Regexp).OnElements(rule.Element)
} else {
sanitizer.policy.AllowAttrs(rule.AllowAttr).OnElements(rule.Element)
addSanitizerRules(policy, setting.ExternalSanitizerRules)
return policy
}
func addSanitizerRules(policy *bluemonday.Policy, rules []setting.MarkupSanitizerRule) {
for _, rule := range rules {
if rule.AllowDataURIImages {
policy.AllowDataURIImages()
}
if rule.Element != "" {
if rule.Regexp != nil {
policy.AllowAttrs(rule.AllowAttr).Matching(rule.Regexp).OnElements(rule.Element)
} else {
policy.AllowAttrs(rule.AllowAttr).OnElements(rule.Element)
}
}
}
}
@ -120,11 +142,15 @@ func ReplaceSanitizer() {
// Sanitize takes a string that contains a HTML fragment or document and applies policy whitelist.
func Sanitize(s string) string {
NewSanitizer()
return sanitizer.policy.Sanitize(s)
return sanitizer.defaultPolicy.Sanitize(s)
}
// SanitizeReader sanitizes a Reader
func SanitizeReader(r io.Reader) *bytes.Buffer {
func SanitizeReader(r io.Reader, renderer string) *bytes.Buffer {
NewSanitizer()
return sanitizer.policy.SanitizeReader(r)
policy, exist := sanitizer.rendererPolicies[renderer]
if !exist {
policy = sanitizer.defaultPolicy
}
return policy.SanitizeReader(r)
}