mirror of
https://codeberg.org/forgejo/forgejo.git
synced 2025-06-21 16:40:52 +00:00
An attempt at solving #7956. This (and rebuilding the index) seems enough to ensure the issue *appears* among the results. However, I couldn't figure out from [bleve docs](https://github.com/blevesearch/bleve/blob/master/docs/scoring.md) how to affect the scoring based on specific fields, or whether that is possible at all. Disclaimer: I've never written Go before, sorry 😅 take it as a quick PoC more than anything. ### Tests - I added test coverage for Go changes... - [x] in their respective `*_test.go` for unit tests. - [ ] in the `tests/integration` directory if it involves interactions with a live Forgejo server. - I added test coverage for JavaScript changes... - [ ] in `web_src/js/*.test.js` if it can be unit tested. - [ ] in `tests/e2e/*.test.e2e.js` if it requires interactions with a live Forgejo server (see also the [developer guide for JavaScript testing](https://codeberg.org/forgejo/forgejo/src/branch/forgejo/tests/e2e/README.md#end-to-end-tests)). ### Documentation - [ ] I created a pull request [to the documentation](https://codeberg.org/forgejo/docs) to explain to Forgejo users how to use this change. - [x] I did not document these changes and I do not expect someone else to do it. ### Release notes - [ ] I do not want this change to show in the release notes. - [x] I want the title to show in the release notes with a link to this pull request. - [ ] I want the content of the `release-notes/<pull request number>.md` to be be used for the release notes instead of the title. <!--start release-notes-assistant--> ## Release notes <!--URL:https://codeberg.org/forgejo/forgejo--> - Features - [PR](https://codeberg.org/forgejo/forgejo/pulls/7968): <!--number 7968 --><!--line 0 --><!--description QWRkIGlzc3VlIG51bWJlciB0byB0aGUgc2VhcmNoIGluZGV4LCByYW5rIG51bWJlciBhbmQgdGl0bGUgbWF0Y2hlcyBoaWdoZXIgKCM3OTU2KQ==-->Add issue number to the search index, rank number and title matches higher (#7956)<!--description--> <!--end release-notes-assistant--> Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/7968 Reviewed-by: Shiny Nematoda <snematoda@noreply.codeberg.org> Co-authored-by: Danko Aleksejevs <danko@very.lv> Co-committed-by: Danko Aleksejevs <danko@very.lv>
This commit is contained in:
parent
2529923dea
commit
905a5748a8
9 changed files with 100 additions and 14 deletions
|
@ -260,11 +260,11 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int
|
||||||
if opts.Mode == internal.CodeSearchModeUnion {
|
if opts.Mode == internal.CodeSearchModeUnion {
|
||||||
query := bleve.NewDisjunctionQuery()
|
query := bleve.NewDisjunctionQuery()
|
||||||
for _, field := range strings.Fields(opts.Keyword) {
|
for _, field := range strings.Fields(opts.Keyword) {
|
||||||
query.AddQuery(inner_bleve.MatchPhraseQuery(field, "Content", repoIndexerAnalyzer, false))
|
query.AddQuery(inner_bleve.MatchPhraseQuery(field, "Content", repoIndexerAnalyzer, false, 1.0))
|
||||||
}
|
}
|
||||||
keywordQuery = query
|
keywordQuery = query
|
||||||
} else {
|
} else {
|
||||||
keywordQuery = inner_bleve.MatchPhraseQuery(opts.Keyword, "Content", repoIndexerAnalyzer, false)
|
keywordQuery = inner_bleve.MatchPhraseQuery(opts.Keyword, "Content", repoIndexerAnalyzer, false, 1.0)
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(opts.RepoIDs) > 0 {
|
if len(opts.RepoIDs) > 0 {
|
||||||
|
|
|
@ -29,11 +29,12 @@ func MatchQuery(matchTerm, field, analyzer string, fuzziness int) *query.MatchQu
|
||||||
}
|
}
|
||||||
|
|
||||||
// MatchPhraseQuery generates a match phrase query for the given phrase, field and analyzer
|
// MatchPhraseQuery generates a match phrase query for the given phrase, field and analyzer
|
||||||
func MatchPhraseQuery(matchPhrase, field, analyzer string, autoFuzzy bool) *query.MatchPhraseQuery {
|
func MatchPhraseQuery(matchPhrase, field, analyzer string, autoFuzzy bool, boost float64) *query.MatchPhraseQuery {
|
||||||
q := bleve.NewMatchPhraseQuery(matchPhrase)
|
q := bleve.NewMatchPhraseQuery(matchPhrase)
|
||||||
q.FieldVal = field
|
q.FieldVal = field
|
||||||
q.Analyzer = analyzer
|
q.Analyzer = analyzer
|
||||||
q.SetAutoFuzziness(autoFuzzy)
|
q.SetAutoFuzziness(autoFuzzy)
|
||||||
|
q.SetBoost(boost)
|
||||||
return q
|
return q
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -23,7 +23,7 @@ import (
|
||||||
const (
|
const (
|
||||||
issueIndexerAnalyzer = "issueIndexer"
|
issueIndexerAnalyzer = "issueIndexer"
|
||||||
issueIndexerDocType = "issueIndexerDocType"
|
issueIndexerDocType = "issueIndexerDocType"
|
||||||
issueIndexerLatestVersion = 4
|
issueIndexerLatestVersion = 5
|
||||||
)
|
)
|
||||||
|
|
||||||
const unicodeNormalizeName = "unicodeNormalize"
|
const unicodeNormalizeName = "unicodeNormalize"
|
||||||
|
@ -69,6 +69,7 @@ func generateIssueIndexMapping() (mapping.IndexMapping, error) {
|
||||||
|
|
||||||
docMapping.AddFieldMappingsAt("is_public", boolFieldMapping)
|
docMapping.AddFieldMappingsAt("is_public", boolFieldMapping)
|
||||||
|
|
||||||
|
docMapping.AddFieldMappingsAt("index", numberFieldMapping)
|
||||||
docMapping.AddFieldMappingsAt("title", textFieldMapping)
|
docMapping.AddFieldMappingsAt("title", textFieldMapping)
|
||||||
docMapping.AddFieldMappingsAt("content", textFieldMapping)
|
docMapping.AddFieldMappingsAt("content", textFieldMapping)
|
||||||
docMapping.AddFieldMappingsAt("comments", textFieldMapping)
|
docMapping.AddFieldMappingsAt("comments", textFieldMapping)
|
||||||
|
@ -163,9 +164,15 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (
|
||||||
q := bleve.NewBooleanQuery()
|
q := bleve.NewBooleanQuery()
|
||||||
for _, token := range tokens {
|
for _, token := range tokens {
|
||||||
innerQ := bleve.NewDisjunctionQuery(
|
innerQ := bleve.NewDisjunctionQuery(
|
||||||
inner_bleve.MatchPhraseQuery(token.Term, "title", issueIndexerAnalyzer, token.Fuzzy),
|
inner_bleve.MatchPhraseQuery(token.Term, "title", issueIndexerAnalyzer, token.Fuzzy, 2.0),
|
||||||
inner_bleve.MatchPhraseQuery(token.Term, "content", issueIndexerAnalyzer, token.Fuzzy),
|
inner_bleve.MatchPhraseQuery(token.Term, "content", issueIndexerAnalyzer, token.Fuzzy, 1.0),
|
||||||
inner_bleve.MatchPhraseQuery(token.Term, "comments", issueIndexerAnalyzer, token.Fuzzy))
|
inner_bleve.MatchPhraseQuery(token.Term, "comments", issueIndexerAnalyzer, token.Fuzzy, 1.0))
|
||||||
|
|
||||||
|
if issueID, err := token.ParseIssueReference(); err == nil {
|
||||||
|
idQuery := inner_bleve.NumericEqualityQuery(issueID, "index")
|
||||||
|
idQuery.SetBoost(5.0)
|
||||||
|
innerQ.AddQuery(idQuery)
|
||||||
|
}
|
||||||
|
|
||||||
switch token.Kind {
|
switch token.Kind {
|
||||||
case internal.BoolOptMust:
|
case internal.BoolOptMust:
|
||||||
|
|
|
@ -5,6 +5,7 @@ package db
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"strconv"
|
||||||
|
|
||||||
"forgejo.org/models/db"
|
"forgejo.org/models/db"
|
||||||
issue_model "forgejo.org/models/issues"
|
issue_model "forgejo.org/models/issues"
|
||||||
|
@ -71,6 +72,17 @@ func (i *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (
|
||||||
)),
|
)),
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
term := options.Keyword
|
||||||
|
if term[0] == '#' || term[0] == '!' {
|
||||||
|
term = term[1:]
|
||||||
|
}
|
||||||
|
if issueID, err := strconv.ParseInt(term, 10, 64); err == nil {
|
||||||
|
cond = builder.Or(
|
||||||
|
builder.Eq{"`index`": issueID},
|
||||||
|
cond,
|
||||||
|
)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
opt, err := ToDBOptions(ctx, options)
|
opt, err := ToDBOptions(ctx, options)
|
||||||
|
|
|
@ -18,7 +18,7 @@ import (
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
issueIndexerLatestVersion = 1
|
issueIndexerLatestVersion = 2
|
||||||
// multi-match-types, currently only 2 types are used
|
// multi-match-types, currently only 2 types are used
|
||||||
// Reference: https://www.elastic.co/guide/en/elasticsearch/reference/7.0/query-dsl-multi-match-query.html#multi-match-types
|
// Reference: https://www.elastic.co/guide/en/elasticsearch/reference/7.0/query-dsl-multi-match-query.html#multi-match-types
|
||||||
esMultiMatchTypeBestFields = "best_fields"
|
esMultiMatchTypeBestFields = "best_fields"
|
||||||
|
@ -56,7 +56,8 @@ const (
|
||||||
"repo_id": { "type": "long", "index": true },
|
"repo_id": { "type": "long", "index": true },
|
||||||
"is_public": { "type": "boolean", "index": true },
|
"is_public": { "type": "boolean", "index": true },
|
||||||
|
|
||||||
"title": { "type": "text", "index": true },
|
"index": { "type": "long", "index": true },
|
||||||
|
"title": { "type": "text", "index": true },
|
||||||
"content": { "type": "text", "index": true },
|
"content": { "type": "text", "index": true },
|
||||||
"comments": { "type" : "text", "index": true },
|
"comments": { "type" : "text", "index": true },
|
||||||
|
|
||||||
|
@ -155,21 +156,25 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
for _, token := range tokens {
|
for _, token := range tokens {
|
||||||
innerQ := elastic.NewMultiMatchQuery(token.Term, "title", "content", "comments")
|
innerQ := elastic.NewMultiMatchQuery(token.Term, "content", "comments").FieldWithBoost("title", 2.0).TieBreaker(0.5)
|
||||||
if token.Fuzzy {
|
if token.Fuzzy {
|
||||||
// If the term is not a phrase use fuzziness set to AUTO
|
// If the term is not a phrase use fuzziness set to AUTO
|
||||||
innerQ = innerQ.Type(esMultiMatchTypeBestFields).Fuzziness(esFuzzyAuto)
|
innerQ = innerQ.Type(esMultiMatchTypeBestFields).Fuzziness(esFuzzyAuto)
|
||||||
} else {
|
} else {
|
||||||
innerQ = innerQ.Type(esMultiMatchTypePhrasePrefix)
|
innerQ = innerQ.Type(esMultiMatchTypePhrasePrefix)
|
||||||
}
|
}
|
||||||
|
var eitherQ elastic.Query = innerQ
|
||||||
|
if issueID, err := token.ParseIssueReference(); err == nil {
|
||||||
|
indexQ := elastic.NewTermQuery("index", issueID).Boost(15.0)
|
||||||
|
eitherQ = elastic.NewDisMaxQuery().Query(indexQ).Query(innerQ).TieBreaker(0.5)
|
||||||
|
}
|
||||||
switch token.Kind {
|
switch token.Kind {
|
||||||
case internal.BoolOptMust:
|
case internal.BoolOptMust:
|
||||||
q.Must(innerQ)
|
q.Must(eitherQ)
|
||||||
case internal.BoolOptShould:
|
case internal.BoolOptShould:
|
||||||
q.Should(innerQ)
|
q.Should(eitherQ)
|
||||||
case internal.BoolOptNot:
|
case internal.BoolOptNot:
|
||||||
q.MustNot(innerQ)
|
q.MustNot(eitherQ)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
query.Must(q)
|
query.Must(q)
|
||||||
|
|
|
@ -14,6 +14,7 @@ type IndexerData struct {
|
||||||
ID int64 `json:"id"`
|
ID int64 `json:"id"`
|
||||||
RepoID int64 `json:"repo_id"`
|
RepoID int64 `json:"repo_id"`
|
||||||
IsPublic bool `json:"is_public"` // If the repo is public
|
IsPublic bool `json:"is_public"` // If the repo is public
|
||||||
|
Index int64 `json:"index"`
|
||||||
|
|
||||||
// Fields used for keyword searching
|
// Fields used for keyword searching
|
||||||
Title string `json:"title"`
|
Title string `json:"title"`
|
||||||
|
|
|
@ -5,6 +5,7 @@ package internal
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"io"
|
"io"
|
||||||
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -22,6 +23,14 @@ type Token struct {
|
||||||
Fuzzy bool
|
Fuzzy bool
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (tk *Token) ParseIssueReference() (int64, error) {
|
||||||
|
term := tk.Term
|
||||||
|
if term[0] == '#' || term[0] == '!' {
|
||||||
|
term = term[1:]
|
||||||
|
}
|
||||||
|
return strconv.ParseInt(term, 10, 64)
|
||||||
|
}
|
||||||
|
|
||||||
type Tokenizer struct {
|
type Tokenizer struct {
|
||||||
in *strings.Reader
|
in *strings.Reader
|
||||||
}
|
}
|
||||||
|
|
|
@ -549,6 +549,55 @@ var cases = []*testIndexerCase{
|
||||||
}), result.Total)
|
}), result.Total)
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
Name: "Index",
|
||||||
|
SearchOptions: &internal.SearchOptions{
|
||||||
|
Keyword: "13",
|
||||||
|
SortBy: internal.SortByScore,
|
||||||
|
RepoIDs: []int64{5},
|
||||||
|
},
|
||||||
|
ExpectedIDs: []int64{93}, // 93 = #13 in repo 5
|
||||||
|
ExpectedTotal: 1,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Name: "Index with prefix",
|
||||||
|
SearchOptions: &internal.SearchOptions{
|
||||||
|
Keyword: "#13",
|
||||||
|
SortBy: internal.SortByScore,
|
||||||
|
RepoIDs: []int64{5},
|
||||||
|
},
|
||||||
|
ExpectedIDs: []int64{93},
|
||||||
|
ExpectedTotal: 1,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Name: "Index and title boost",
|
||||||
|
ExtraData: []*internal.IndexerData{
|
||||||
|
{ID: 1001, Title: "re #13", RepoID: 5},
|
||||||
|
{ID: 1002, Title: "re #1001", Content: "leave 13 alone. - 13", RepoID: 5},
|
||||||
|
},
|
||||||
|
SearchOptions: &internal.SearchOptions{
|
||||||
|
Keyword: "!13",
|
||||||
|
SortBy: internal.SortByScore,
|
||||||
|
RepoIDs: []int64{5},
|
||||||
|
},
|
||||||
|
ExpectedIDs: []int64{93, 1001, 1002},
|
||||||
|
ExpectedTotal: 3,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Name: "Index exclude",
|
||||||
|
ExtraData: []*internal.IndexerData{
|
||||||
|
{ID: 1001, Index: 101, Title: "Brrr", RepoID: 5},
|
||||||
|
{ID: 1002, Index: 102, Title: "Brrr", Content: "Brrr", RepoID: 5},
|
||||||
|
{ID: 1003, Index: 103, Title: "Brrr", RepoID: 5},
|
||||||
|
{ID: 1004, Index: 104, Title: "Brrr", RepoID: 5},
|
||||||
|
},
|
||||||
|
SearchOptions: &internal.SearchOptions{
|
||||||
|
Keyword: "Brrr -101 -103",
|
||||||
|
SortBy: internal.SortByScore,
|
||||||
|
},
|
||||||
|
ExpectedIDs: []int64{1002, 1004},
|
||||||
|
ExpectedTotal: 2,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
Name: "SortByCreatedDesc",
|
Name: "SortByCreatedDesc",
|
||||||
SearchOptions: &internal.SearchOptions{
|
SearchOptions: &internal.SearchOptions{
|
||||||
|
@ -741,6 +790,7 @@ func generateDefaultIndexerData() []*internal.IndexerData {
|
||||||
|
|
||||||
data = append(data, &internal.IndexerData{
|
data = append(data, &internal.IndexerData{
|
||||||
ID: id,
|
ID: id,
|
||||||
|
Index: issueIndex,
|
||||||
RepoID: repoID,
|
RepoID: repoID,
|
||||||
IsPublic: repoID%2 == 0,
|
IsPublic: repoID%2 == 0,
|
||||||
Title: fmt.Sprintf("issue%d of repo%d", issueIndex, repoID),
|
Title: fmt.Sprintf("issue%d of repo%d", issueIndex, repoID),
|
||||||
|
|
|
@ -95,6 +95,7 @@ func getIssueIndexerData(ctx context.Context, issueID int64) (*internal.IndexerD
|
||||||
return &internal.IndexerData{
|
return &internal.IndexerData{
|
||||||
ID: issue.ID,
|
ID: issue.ID,
|
||||||
RepoID: issue.RepoID,
|
RepoID: issue.RepoID,
|
||||||
|
Index: issue.Index,
|
||||||
IsPublic: !issue.Repo.IsPrivate,
|
IsPublic: !issue.Repo.IsPrivate,
|
||||||
Title: issue.Title,
|
Title: issue.Title,
|
||||||
Content: issue.Content,
|
Content: issue.Content,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue