From 596ffaa02a50e8f54cc40c0df263f619186a0aec Mon Sep 17 00:00:00 2001 From: httpjamesm Date: Sat, 9 Mar 2024 11:51:59 -0500 Subject: [PATCH] feat: replace stackoverflow and exchange links --- go.mod | 4 ++++ go.sum | 2 ++ src/utils/links.go | 45 +++++++++++++++++++++++++++++++++++++ src/utils/links_test.go | 49 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 100 insertions(+) create mode 100644 src/utils/links.go create mode 100644 src/utils/links_test.go diff --git a/go.mod b/go.mod index 2545ecc..823e3de 100644 --- a/go.mod +++ b/go.mod @@ -13,6 +13,7 @@ require ( require ( github.com/andybalholm/cascadia v1.3.1 // indirect + github.com/davecgh/go-spew v1.1.1 // indirect github.com/dlclark/regexp2 v1.7.0 // indirect github.com/gin-contrib/sse v0.1.0 // indirect github.com/go-playground/locales v0.14.0 // indirect @@ -25,6 +26,8 @@ require ( github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.2 // indirect github.com/pelletier/go-toml/v2 v2.0.6 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect + github.com/stretchr/testify v1.9.0 // indirect github.com/ugorji/go/codec v1.2.8 // indirect golang.org/x/crypto v0.4.0 // indirect golang.org/x/net v0.7.0 // indirect @@ -32,4 +35,5 @@ require ( golang.org/x/text v0.7.0 // indirect google.golang.org/protobuf v1.28.1 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go.sum b/go.sum index 017319e..61fbcb1 100644 --- a/go.sum +++ b/go.sum @@ -72,6 +72,8 @@ github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/ github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= +github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/ugorji/go/codec v1.2.8 h1:sgBJS6COt0b/P40VouWKdseidkDgHxYGm0SAglUHfP0= github.com/ugorji/go/codec v1.2.8/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg= golang.org/x/crypto v0.0.0-20211215153901-e495a2d5b3d3/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= diff --git a/src/utils/links.go b/src/utils/links.go new file mode 100644 index 0000000..deeb81c --- /dev/null +++ b/src/utils/links.go @@ -0,0 +1,45 @@ +package utils + +import ( + "net/url" + "regexp" + "strings" +) + +// stackOverflowLinkQualifierRegex matches all anchor elements that meet the following conditions: +// * must be an anchor element +// * the anchor element must have a pathname beginning with /q or /questions +// * if there is a host, it must be stackoverflow.com or a subdomain +var stackOverflowLinkQualifierRegex = regexp.MustCompile(`]*href="(?:https?://(?:www\.)?(?:\w+\.)*(?:stackoverflow|stackexchange)\.com)?/(?:q|questions)/[^"]*"[^>]*>.*?`) + +func ReplaceStackOverflowLinks(html string) string { + return stackOverflowLinkQualifierRegex.ReplaceAllStringFunc(html, func(match string) string { + // Extract the href attribute value from the anchor tag + hrefRegex := regexp.MustCompile(`href="([^"]*)"`) + hrefMatch := hrefRegex.FindStringSubmatch(match) + if len(hrefMatch) < 2 { + return match + } + href := hrefMatch[1] + + // Parse the URL + url, err := url.Parse(href) + if err != nil { + return match + } + + newUrl := url.String() + + // Check if the host is a subdomain + parts := strings.Split(url.Host, ".") + if len(parts) > 2 { + // Prepend the subdomain to the path + url.Path = "/exchange/" + parts[0] + url.Path + + newUrl = url.Path + url.RawQuery + url.Fragment + } + + // Replace the href attribute value in the anchor tag + return strings.Replace(match, hrefMatch[1], newUrl, 1) + }) +} \ No newline at end of file diff --git a/src/utils/links_test.go b/src/utils/links_test.go new file mode 100644 index 0000000..439970c --- /dev/null +++ b/src/utils/links_test.go @@ -0,0 +1,49 @@ +package utils + +import ( + "fmt" + "github.com/stretchr/testify/assert" + "strings" + "testing" +) + +var sampleInput = `
+
+
+
+
+ This question already has answers here: + +
+
+
+
+ + + +
Closed 4 years ago.
+
` + +func TestReplaceStackOverflowLinks(t *testing.T) { + replacedLinks := ReplaceStackOverflowLinks(sampleInput) + + fmt.Println(replacedLinks) + + assert.False(t, strings.Contains(replacedLinks, "stackoverflow.com")) + assert.False(t, strings.Contains(replacedLinks, "stackexchange.com")) +}