feat: process HTML on comment text

This commit is contained in:
httpjamesm 2024-03-09 12:05:00 -05:00
parent 9d77743865
commit 81c4568564
No known key found for this signature in database
3 changed files with 12 additions and 11 deletions

View file

@ -180,7 +180,7 @@ func extractQuestionData(doc *goquery.Document, domain string) (question types.F
if err != nil {
return question, err
}
question.Body = template.HTML(processHTMLBody(questionBodyParentHTML))
question.Body = template.HTML(utils.ProcessHTMLBody(questionBodyParentHTML))
// Extract the shortened body description.
shortenedBody := strings.TrimSpace(questionBodyParent.Text())
@ -245,7 +245,7 @@ func extractAnswersData(doc *goquery.Document, domain string) ([]types.FilteredA
answerBodyHTML, _ := answerBody.Html()
// Process code blocks within the answer.
processedAnswerBody := processHTMLBody(answerBodyHTML)
processedAnswerBody := utils.ProcessHTMLBody(answerBodyHTML)
answer.Body = template.HTML(html.UnescapeString(processedAnswerBody))
// Extract author information and timestamp.
@ -257,14 +257,6 @@ func extractAnswersData(doc *goquery.Document, domain string) ([]types.FilteredA
return answers, nil
}
// processHTMLBody highlights syntax and replaces images with proxied versions.
func processHTMLBody(bodyHTML string) string {
highlightedBody := utils.HighlightCodeBlocks(bodyHTML)
imageProxiedBody := utils.ReplaceImgTags(highlightedBody)
stackOverflowLinksReplacedBody := utils.ReplaceStackOverflowLinks(imageProxiedBody)
return stackOverflowLinksReplacedBody
}
// extractAnswerAuthorInfo extracts the author name, URL, and timestamp from an answer block.
// It directly mutates the answer.
func extractAnswerAuthorInfo(selection *goquery.Selection, answer *types.FilteredAnswer, domain string) {

View file

@ -50,7 +50,7 @@ func FindAndReturnComments(inHtml, domain string, postLayout *goquery.Selection)
commentTimestamp := commentBody.Find("span.relativetime-clean").Text()
newFilteredComment := types.FilteredComment{
Text: template.HTML(commentCopy),
Text: template.HTML(ProcessHTMLBody(commentCopy)),
Timestamp: commentTimestamp,
AuthorName: commentAuthor.Text(),
AuthorURL: commentAuthorURL,

9
src/utils/process.go Normal file
View file

@ -0,0 +1,9 @@
package utils
// ProcessHTMLBody runs HTML through the various preparation functions.
func ProcessHTMLBody(bodyHTML string) string {
highlightedBody := HighlightCodeBlocks(bodyHTML)
imageProxiedBody := ReplaceImgTags(highlightedBody)
stackOverflowLinksReplacedBody := ReplaceStackOverflowLinks(imageProxiedBody)
return stackOverflowLinksReplacedBody
}