mirror of
https://codeberg.org/forgejo/forgejo.git
synced 2025-05-25 11:22:16 +00:00
Change markdown rendering from blackfriday to goldmark (#9533)
* Move to goldmark Markdown rendering moved from blackfriday to the goldmark. Multiple subtle changes required to the goldmark extensions to keep current rendering and defaults. Can go further with goldmark linkify and have this work within markdown rendering making the link processor unnecessary. Need to think about how to go about allowing extensions - at present it seems that these would be hard to do without recompilation. * linter fixes Co-authored-by: Lauris BH <lauris@nix.lv>
This commit is contained in:
parent
0c07f1de5b
commit
27757714d0
83 changed files with 13838 additions and 6297 deletions
|
@ -6,33 +6,86 @@ package mdstripper
|
|||
|
||||
import (
|
||||
"bytes"
|
||||
"sync"
|
||||
|
||||
"io"
|
||||
|
||||
"github.com/russross/blackfriday/v2"
|
||||
"code.gitea.io/gitea/modules/log"
|
||||
"code.gitea.io/gitea/modules/markup/common"
|
||||
|
||||
"github.com/yuin/goldmark"
|
||||
"github.com/yuin/goldmark/ast"
|
||||
"github.com/yuin/goldmark/extension"
|
||||
"github.com/yuin/goldmark/parser"
|
||||
"github.com/yuin/goldmark/renderer"
|
||||
"github.com/yuin/goldmark/renderer/html"
|
||||
"github.com/yuin/goldmark/text"
|
||||
)
|
||||
|
||||
// MarkdownStripper extends blackfriday.Renderer
|
||||
type MarkdownStripper struct {
|
||||
links []string
|
||||
coallesce bool
|
||||
empty bool
|
||||
type stripRenderer struct {
|
||||
links []string
|
||||
empty bool
|
||||
}
|
||||
|
||||
const (
|
||||
blackfridayExtensions = 0 |
|
||||
blackfriday.NoIntraEmphasis |
|
||||
blackfriday.Tables |
|
||||
blackfriday.FencedCode |
|
||||
blackfriday.Strikethrough |
|
||||
blackfriday.NoEmptyLineBeforeBlock |
|
||||
blackfriday.DefinitionLists |
|
||||
blackfriday.Footnotes |
|
||||
blackfriday.HeadingIDs |
|
||||
blackfriday.AutoHeadingIDs |
|
||||
// Not included in modules/markup/markdown/markdown.go;
|
||||
// required here to process inline links
|
||||
blackfriday.Autolink
|
||||
)
|
||||
func (r *stripRenderer) Render(w io.Writer, source []byte, doc ast.Node) error {
|
||||
return ast.Walk(doc, func(n ast.Node, entering bool) (ast.WalkStatus, error) {
|
||||
if !entering {
|
||||
return ast.WalkContinue, nil
|
||||
}
|
||||
switch v := n.(type) {
|
||||
case *ast.Text:
|
||||
if !v.IsRaw() {
|
||||
_, prevSibIsText := n.PreviousSibling().(*ast.Text)
|
||||
coalesce := prevSibIsText
|
||||
r.processString(
|
||||
w,
|
||||
v.Text(source),
|
||||
coalesce)
|
||||
if v.SoftLineBreak() {
|
||||
r.doubleSpace(w)
|
||||
}
|
||||
}
|
||||
return ast.WalkContinue, nil
|
||||
case *ast.Link:
|
||||
r.processLink(w, v.Destination)
|
||||
return ast.WalkSkipChildren, nil
|
||||
case *ast.AutoLink:
|
||||
r.processLink(w, v.URL(source))
|
||||
return ast.WalkSkipChildren, nil
|
||||
}
|
||||
return ast.WalkContinue, nil
|
||||
})
|
||||
}
|
||||
|
||||
func (r *stripRenderer) doubleSpace(w io.Writer) {
|
||||
if !r.empty {
|
||||
_, _ = w.Write([]byte{'\n'})
|
||||
}
|
||||
}
|
||||
|
||||
func (r *stripRenderer) processString(w io.Writer, text []byte, coalesce bool) {
|
||||
// Always break-up words
|
||||
if !coalesce {
|
||||
r.doubleSpace(w)
|
||||
}
|
||||
_, _ = w.Write(text)
|
||||
r.empty = false
|
||||
}
|
||||
|
||||
func (r *stripRenderer) processLink(w io.Writer, link []byte) {
|
||||
// Links are processed out of band
|
||||
r.links = append(r.links, string(link))
|
||||
}
|
||||
|
||||
// GetLinks returns the list of link data collected while parsing
|
||||
func (r *stripRenderer) GetLinks() []string {
|
||||
return r.links
|
||||
}
|
||||
|
||||
// AddOptions adds given option to this renderer.
|
||||
func (r *stripRenderer) AddOptions(...renderer.Option) {
|
||||
// no-op
|
||||
}
|
||||
|
||||
// StripMarkdown parses markdown content by removing all markup and code blocks
|
||||
// in order to extract links and other references
|
||||
|
@ -41,78 +94,40 @@ func StripMarkdown(rawBytes []byte) (string, []string) {
|
|||
return string(buf), links
|
||||
}
|
||||
|
||||
var stripParser parser.Parser
|
||||
var once = sync.Once{}
|
||||
|
||||
// StripMarkdownBytes parses markdown content by removing all markup and code blocks
|
||||
// in order to extract links and other references
|
||||
func StripMarkdownBytes(rawBytes []byte) ([]byte, []string) {
|
||||
stripper := &MarkdownStripper{
|
||||
once.Do(func() {
|
||||
gdMarkdown := goldmark.New(
|
||||
goldmark.WithExtensions(extension.Table,
|
||||
extension.Strikethrough,
|
||||
extension.TaskList,
|
||||
extension.DefinitionList,
|
||||
common.FootnoteExtension,
|
||||
common.Linkify,
|
||||
),
|
||||
goldmark.WithParserOptions(
|
||||
parser.WithAttribute(),
|
||||
parser.WithAutoHeadingID(),
|
||||
),
|
||||
goldmark.WithRendererOptions(
|
||||
html.WithUnsafe(),
|
||||
),
|
||||
)
|
||||
stripParser = gdMarkdown.Parser()
|
||||
})
|
||||
stripper := &stripRenderer{
|
||||
links: make([]string, 0, 10),
|
||||
empty: true,
|
||||
}
|
||||
|
||||
parser := blackfriday.New(blackfriday.WithRenderer(stripper), blackfriday.WithExtensions(blackfridayExtensions))
|
||||
ast := parser.Parse(rawBytes)
|
||||
reader := text.NewReader(rawBytes)
|
||||
doc := stripParser.Parse(reader)
|
||||
var buf bytes.Buffer
|
||||
stripper.RenderHeader(&buf, ast)
|
||||
ast.Walk(func(node *blackfriday.Node, entering bool) blackfriday.WalkStatus {
|
||||
return stripper.RenderNode(&buf, node, entering)
|
||||
})
|
||||
stripper.RenderFooter(&buf, ast)
|
||||
if err := stripper.Render(&buf, rawBytes, doc); err != nil {
|
||||
log.Error("Unable to strip: %v", err)
|
||||
}
|
||||
return buf.Bytes(), stripper.GetLinks()
|
||||
}
|
||||
|
||||
// RenderNode is the main rendering method. It will be called once for
|
||||
// every leaf node and twice for every non-leaf node (first with
|
||||
// entering=true, then with entering=false). The method should write its
|
||||
// rendition of the node to the supplied writer w.
|
||||
func (r *MarkdownStripper) RenderNode(w io.Writer, node *blackfriday.Node, entering bool) blackfriday.WalkStatus {
|
||||
if !entering {
|
||||
return blackfriday.GoToNext
|
||||
}
|
||||
switch node.Type {
|
||||
case blackfriday.Text:
|
||||
r.processString(w, node.Literal, node.Parent == nil)
|
||||
return blackfriday.GoToNext
|
||||
case blackfriday.Link:
|
||||
r.processLink(w, node.LinkData.Destination)
|
||||
r.coallesce = false
|
||||
return blackfriday.SkipChildren
|
||||
}
|
||||
r.coallesce = false
|
||||
return blackfriday.GoToNext
|
||||
}
|
||||
|
||||
// RenderHeader is a method that allows the renderer to produce some
|
||||
// content preceding the main body of the output document.
|
||||
func (r *MarkdownStripper) RenderHeader(w io.Writer, ast *blackfriday.Node) {
|
||||
}
|
||||
|
||||
// RenderFooter is a symmetric counterpart of RenderHeader.
|
||||
func (r *MarkdownStripper) RenderFooter(w io.Writer, ast *blackfriday.Node) {
|
||||
}
|
||||
|
||||
func (r *MarkdownStripper) doubleSpace(w io.Writer) {
|
||||
if !r.empty {
|
||||
_, _ = w.Write([]byte{'\n'})
|
||||
}
|
||||
}
|
||||
|
||||
func (r *MarkdownStripper) processString(w io.Writer, text []byte, coallesce bool) {
|
||||
// Always break-up words
|
||||
if !coallesce || !r.coallesce {
|
||||
r.doubleSpace(w)
|
||||
}
|
||||
_, _ = w.Write(text)
|
||||
r.coallesce = coallesce
|
||||
r.empty = false
|
||||
}
|
||||
|
||||
func (r *MarkdownStripper) processLink(w io.Writer, link []byte) {
|
||||
// Links are processed out of band
|
||||
r.links = append(r.links, string(link))
|
||||
r.coallesce = false
|
||||
}
|
||||
|
||||
// GetLinks returns the list of link data collected while parsing
|
||||
func (r *MarkdownStripper) GetLinks() []string {
|
||||
return r.links
|
||||
}
|
||||
|
|
|
@ -53,6 +53,20 @@ A HIDDEN ` + "`" + `GHOST` + "`" + ` IN THIS LINE.
|
|||
[]string{
|
||||
"link",
|
||||
}},
|
||||
{
|
||||
"Simply closes: #29 yes",
|
||||
[]string{
|
||||
"Simply closes: #29 yes",
|
||||
},
|
||||
[]string{},
|
||||
},
|
||||
{
|
||||
"Simply closes: !29 yes",
|
||||
[]string{
|
||||
"Simply closes: !29 yes",
|
||||
},
|
||||
[]string{},
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range list {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue