Refactor markup render to fix various path problems (#34114)

* Fix #33972 * Use consistent path resolving for links and medias. * No need to make the markup renders to resolve the paths, instead, the paths are all correctly resolved in the "post process" step. * Fix #33274 * Since 1.23, all paths starting with "/" are relative to current render context (for example: the current repo branch) * Introduce `/:root/path-relative-to-root`, then the path will be rendered as relative to "ROOT_URL"
2025-06-29 12:39:54 +00:00 · 2025-04-04 23:45:23 +08:00 · 2025-04-04 23:45:23 +08:00 · 6cee3bfa96
commit 6cee3bfa96
parent e8b54d9e44
28 changed files with 239 additions and 286 deletions
--- a/modules/markup/html.go
+++ b/modules/markup/html.go
@ -32,7 +32,6 @@ type globalVarsType struct {
 	comparePattern          *regexp.Regexp
 	fullURLPattern          *regexp.Regexp
 	emailRegex              *regexp.Regexp
-	blackfridayExtRegex     *regexp.Regexp
 	emojiShortCodeRegex     *regexp.Regexp
 	issueFullPattern        *regexp.Regexp
 	filesChangedFullPattern *regexp.Regexp
@ -74,9 +73,6 @@ var globalVars = sync.OnceValue(func() *globalVarsType {
 	//   https://html.spec.whatwg.org/multipage/input.html#e-mail-state-(type%3Demail)
 	v.emailRegex = regexp.MustCompile("(?:\\s|^|\\(|\\[)([a-zA-Z0-9.!#$%&'*+\\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9]{2,}(?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+)(?:\\s|$|\\)|\\]|;|,|\\?|!|\\.(\\s|$))")

-	// blackfridayExtRegex is for blackfriday extensions create IDs like fn:user-content-footnote
-	v.blackfridayExtRegex = regexp.MustCompile(`[^:]*:user-content-`)
-
 	// emojiShortCodeRegex find emoji by alias like :smile:
 	v.emojiShortCodeRegex = regexp.MustCompile(`:[-+\w]+:`)

@ -94,17 +90,12 @@ var globalVars = sync.OnceValue(func() *globalVarsType {
 	return v
 })

-// IsFullURLBytes reports whether link fits valid format.
-func IsFullURLBytes(link []byte) bool {
-	return globalVars().fullURLPattern.Match(link)
-}
-
 func IsFullURLString(link string) bool {
 	return globalVars().fullURLPattern.MatchString(link)
 }

 func IsNonEmptyRelativePath(link string) bool {
-	return link != "" && !IsFullURLString(link) && link[0] != '/' && link[0] != '?' && link[0] != '#'
+	return link != "" && !IsFullURLString(link) && link[0] != '?' && link[0] != '#'
 }

 // CustomLinkURLSchemes allows for additional schemes to be detected when parsing links within text
@ -316,44 +307,38 @@ func isEmojiNode(node *html.Node) bool {
 }

 func visitNode(ctx *RenderContext, procs []processor, node *html.Node) *html.Node {
-	// Add user-content- to IDs and "#" links if they don't already have them
-	for idx, attr := range node.Attr {
-		val := strings.TrimPrefix(attr.Val, "#")
-		notHasPrefix := !(strings.HasPrefix(val, "user-content-") || globalVars().blackfridayExtRegex.MatchString(val))
-
-		if attr.Key == "id" && notHasPrefix {
-			node.Attr[idx].Val = "user-content-" + attr.Val
-		}
-
-		if attr.Key == "href" && strings.HasPrefix(attr.Val, "#") && notHasPrefix {
-			node.Attr[idx].Val = "#user-content-" + val
-		}
-	}
-
-	switch node.Type {
-	case html.TextNode:
+	if node.Type == html.TextNode {
 		for _, proc := range procs {
 			proc(ctx, node) // it might add siblings
 		}
+		return node.NextSibling
+	}
+	if node.Type != html.ElementNode {
+		return node.NextSibling
+	}

-	case html.ElementNode:
-		if isEmojiNode(node) {
-			// TextNode emoji will be converted to `<span class="emoji">`, then the next iteration will visit the "span"
-			// if we don't stop it, it will go into the TextNode again and create an infinite recursion
-			return node.NextSibling
-		} else if node.Data == "code" || node.Data == "pre" {
-			return node.NextSibling // ignore code and pre nodes
-		} else if node.Data == "img" {
-			return visitNodeImg(ctx, node)
-		} else if node.Data == "video" {
-			return visitNodeVideo(ctx, node)
-		} else if node.Data == "a" {
-			procs = emojiProcessors // Restrict text in links to emojis
-		}
-		for n := node.FirstChild; n != nil; {
-			n = visitNode(ctx, procs, n)
-		}
-	default:
+	processNodeAttrID(node)
+
+	if isEmojiNode(node) {
+		// TextNode emoji will be converted to `<span class="emoji">`, then the next iteration will visit the "span"
+		// if we don't stop it, it will go into the TextNode again and create an infinite recursion
+		return node.NextSibling
+	} else if node.Data == "code" || node.Data == "pre" {
+		return node.NextSibling // ignore code and pre nodes
+	} else if node.Data == "img" {
+		return visitNodeImg(ctx, node)
+	} else if node.Data == "video" {
+		return visitNodeVideo(ctx, node)
+	}
+
+	if node.Data == "a" {
+		processNodeA(ctx, node)
+		// only use emoji processors for the content in the "A" tag,
+		// because the content there is not processable, for example: the content is a commit id or a full URL.
+		procs = emojiProcessors
+	}
+	for n := node.FirstChild; n != nil; {
+		n = visitNode(ctx, procs, n)
 	}
 	return node.NextSibling
 }