diff --git a/modules/indexer/code/bleve/bleve.go b/modules/indexer/code/bleve/bleve.go index eb003baec7..c53b7a2e6d 100644 --- a/modules/indexer/code/bleve/bleve.go +++ b/modules/indexer/code/bleve/bleve.go @@ -260,11 +260,11 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int if opts.Mode == internal.CodeSearchModeUnion { query := bleve.NewDisjunctionQuery() for _, field := range strings.Fields(opts.Keyword) { - query.AddQuery(inner_bleve.MatchPhraseQuery(field, "Content", repoIndexerAnalyzer, false)) + query.AddQuery(inner_bleve.MatchPhraseQuery(field, "Content", repoIndexerAnalyzer, false, 1.0)) } keywordQuery = query } else { - keywordQuery = inner_bleve.MatchPhraseQuery(opts.Keyword, "Content", repoIndexerAnalyzer, false) + keywordQuery = inner_bleve.MatchPhraseQuery(opts.Keyword, "Content", repoIndexerAnalyzer, false, 1.0) } if len(opts.RepoIDs) > 0 { diff --git a/modules/indexer/internal/bleve/query.go b/modules/indexer/internal/bleve/query.go index 7f411b516b..e043023671 100644 --- a/modules/indexer/internal/bleve/query.go +++ b/modules/indexer/internal/bleve/query.go @@ -29,11 +29,12 @@ func MatchQuery(matchTerm, field, analyzer string, fuzziness int) *query.MatchQu } // MatchPhraseQuery generates a match phrase query for the given phrase, field and analyzer -func MatchPhraseQuery(matchPhrase, field, analyzer string, autoFuzzy bool) *query.MatchPhraseQuery { +func MatchPhraseQuery(matchPhrase, field, analyzer string, autoFuzzy bool, boost float64) *query.MatchPhraseQuery { q := bleve.NewMatchPhraseQuery(matchPhrase) q.FieldVal = field q.Analyzer = analyzer q.SetAutoFuzziness(autoFuzzy) + q.SetBoost(boost) return q } diff --git a/modules/indexer/issues/bleve/bleve.go b/modules/indexer/issues/bleve/bleve.go index 64d3c8122e..573d63a446 100644 --- a/modules/indexer/issues/bleve/bleve.go +++ b/modules/indexer/issues/bleve/bleve.go @@ -23,7 +23,7 @@ import ( const ( issueIndexerAnalyzer = "issueIndexer" issueIndexerDocType = "issueIndexerDocType" - issueIndexerLatestVersion = 4 + issueIndexerLatestVersion = 5 ) const unicodeNormalizeName = "unicodeNormalize" @@ -69,6 +69,7 @@ func generateIssueIndexMapping() (mapping.IndexMapping, error) { docMapping.AddFieldMappingsAt("is_public", boolFieldMapping) + docMapping.AddFieldMappingsAt("index", numberFieldMapping) docMapping.AddFieldMappingsAt("title", textFieldMapping) docMapping.AddFieldMappingsAt("content", textFieldMapping) docMapping.AddFieldMappingsAt("comments", textFieldMapping) @@ -163,9 +164,15 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) ( q := bleve.NewBooleanQuery() for _, token := range tokens { innerQ := bleve.NewDisjunctionQuery( - inner_bleve.MatchPhraseQuery(token.Term, "title", issueIndexerAnalyzer, token.Fuzzy), - inner_bleve.MatchPhraseQuery(token.Term, "content", issueIndexerAnalyzer, token.Fuzzy), - inner_bleve.MatchPhraseQuery(token.Term, "comments", issueIndexerAnalyzer, token.Fuzzy)) + inner_bleve.MatchPhraseQuery(token.Term, "title", issueIndexerAnalyzer, token.Fuzzy, 2.0), + inner_bleve.MatchPhraseQuery(token.Term, "content", issueIndexerAnalyzer, token.Fuzzy, 1.0), + inner_bleve.MatchPhraseQuery(token.Term, "comments", issueIndexerAnalyzer, token.Fuzzy, 1.0)) + + if issueID, err := token.ParseIssueReference(); err == nil { + idQuery := inner_bleve.NumericEqualityQuery(issueID, "index") + idQuery.SetBoost(5.0) + innerQ.AddQuery(idQuery) + } switch token.Kind { case internal.BoolOptMust: diff --git a/modules/indexer/issues/db/db.go b/modules/indexer/issues/db/db.go index 9dd026e74f..397daa3265 100644 --- a/modules/indexer/issues/db/db.go +++ b/modules/indexer/issues/db/db.go @@ -5,6 +5,7 @@ package db import ( "context" + "strconv" "forgejo.org/models/db" issue_model "forgejo.org/models/issues" @@ -71,6 +72,17 @@ func (i *Indexer) Search(ctx context.Context, options *internal.SearchOptions) ( )), ), ) + + term := options.Keyword + if term[0] == '#' || term[0] == '!' { + term = term[1:] + } + if issueID, err := strconv.ParseInt(term, 10, 64); err == nil { + cond = builder.Or( + builder.Eq{"`index`": issueID}, + cond, + ) + } } opt, err := ToDBOptions(ctx, options) diff --git a/modules/indexer/issues/elasticsearch/elasticsearch.go b/modules/indexer/issues/elasticsearch/elasticsearch.go index 1bf0145796..9d2786e101 100644 --- a/modules/indexer/issues/elasticsearch/elasticsearch.go +++ b/modules/indexer/issues/elasticsearch/elasticsearch.go @@ -18,7 +18,7 @@ import ( ) const ( - issueIndexerLatestVersion = 1 + issueIndexerLatestVersion = 2 // multi-match-types, currently only 2 types are used // Reference: https://www.elastic.co/guide/en/elasticsearch/reference/7.0/query-dsl-multi-match-query.html#multi-match-types esMultiMatchTypeBestFields = "best_fields" @@ -56,7 +56,8 @@ const ( "repo_id": { "type": "long", "index": true }, "is_public": { "type": "boolean", "index": true }, - "title": { "type": "text", "index": true }, + "index": { "type": "long", "index": true }, + "title": { "type": "text", "index": true }, "content": { "type": "text", "index": true }, "comments": { "type" : "text", "index": true }, @@ -155,21 +156,25 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) ( return nil, err } for _, token := range tokens { - innerQ := elastic.NewMultiMatchQuery(token.Term, "title", "content", "comments") + innerQ := elastic.NewMultiMatchQuery(token.Term, "content", "comments").FieldWithBoost("title", 2.0).TieBreaker(0.5) if token.Fuzzy { // If the term is not a phrase use fuzziness set to AUTO innerQ = innerQ.Type(esMultiMatchTypeBestFields).Fuzziness(esFuzzyAuto) } else { innerQ = innerQ.Type(esMultiMatchTypePhrasePrefix) } - + var eitherQ elastic.Query = innerQ + if issueID, err := token.ParseIssueReference(); err == nil { + indexQ := elastic.NewTermQuery("index", issueID).Boost(15.0) + eitherQ = elastic.NewDisMaxQuery().Query(indexQ).Query(innerQ).TieBreaker(0.5) + } switch token.Kind { case internal.BoolOptMust: - q.Must(innerQ) + q.Must(eitherQ) case internal.BoolOptShould: - q.Should(innerQ) + q.Should(eitherQ) case internal.BoolOptNot: - q.MustNot(innerQ) + q.MustNot(eitherQ) } } query.Must(q) diff --git a/modules/indexer/issues/internal/model.go b/modules/indexer/issues/internal/model.go index 03f5595a5b..6c55405179 100644 --- a/modules/indexer/issues/internal/model.go +++ b/modules/indexer/issues/internal/model.go @@ -14,6 +14,7 @@ type IndexerData struct { ID int64 `json:"id"` RepoID int64 `json:"repo_id"` IsPublic bool `json:"is_public"` // If the repo is public + Index int64 `json:"index"` // Fields used for keyword searching Title string `json:"title"` diff --git a/modules/indexer/issues/internal/qstring.go b/modules/indexer/issues/internal/qstring.go index fdb89b09e9..8115fc904f 100644 --- a/modules/indexer/issues/internal/qstring.go +++ b/modules/indexer/issues/internal/qstring.go @@ -5,6 +5,7 @@ package internal import ( "io" + "strconv" "strings" ) @@ -22,6 +23,14 @@ type Token struct { Fuzzy bool } +func (tk *Token) ParseIssueReference() (int64, error) { + term := tk.Term + if term[0] == '#' || term[0] == '!' { + term = term[1:] + } + return strconv.ParseInt(term, 10, 64) +} + type Tokenizer struct { in *strings.Reader } diff --git a/modules/indexer/issues/internal/tests/tests.go b/modules/indexer/issues/internal/tests/tests.go index 1e871c4646..ef75955a14 100644 --- a/modules/indexer/issues/internal/tests/tests.go +++ b/modules/indexer/issues/internal/tests/tests.go @@ -549,6 +549,55 @@ var cases = []*testIndexerCase{ }), result.Total) }, }, + { + Name: "Index", + SearchOptions: &internal.SearchOptions{ + Keyword: "13", + SortBy: internal.SortByScore, + RepoIDs: []int64{5}, + }, + ExpectedIDs: []int64{93}, // 93 = #13 in repo 5 + ExpectedTotal: 1, + }, + { + Name: "Index with prefix", + SearchOptions: &internal.SearchOptions{ + Keyword: "#13", + SortBy: internal.SortByScore, + RepoIDs: []int64{5}, + }, + ExpectedIDs: []int64{93}, + ExpectedTotal: 1, + }, + { + Name: "Index and title boost", + ExtraData: []*internal.IndexerData{ + {ID: 1001, Title: "re #13", RepoID: 5}, + {ID: 1002, Title: "re #1001", Content: "leave 13 alone. - 13", RepoID: 5}, + }, + SearchOptions: &internal.SearchOptions{ + Keyword: "!13", + SortBy: internal.SortByScore, + RepoIDs: []int64{5}, + }, + ExpectedIDs: []int64{93, 1001, 1002}, + ExpectedTotal: 3, + }, + { + Name: "Index exclude", + ExtraData: []*internal.IndexerData{ + {ID: 1001, Index: 101, Title: "Brrr", RepoID: 5}, + {ID: 1002, Index: 102, Title: "Brrr", Content: "Brrr", RepoID: 5}, + {ID: 1003, Index: 103, Title: "Brrr", RepoID: 5}, + {ID: 1004, Index: 104, Title: "Brrr", RepoID: 5}, + }, + SearchOptions: &internal.SearchOptions{ + Keyword: "Brrr -101 -103", + SortBy: internal.SortByScore, + }, + ExpectedIDs: []int64{1002, 1004}, + ExpectedTotal: 2, + }, { Name: "SortByCreatedDesc", SearchOptions: &internal.SearchOptions{ @@ -741,6 +790,7 @@ func generateDefaultIndexerData() []*internal.IndexerData { data = append(data, &internal.IndexerData{ ID: id, + Index: issueIndex, RepoID: repoID, IsPublic: repoID%2 == 0, Title: fmt.Sprintf("issue%d of repo%d", issueIndex, repoID), diff --git a/modules/indexer/issues/util.go b/modules/indexer/issues/util.go index 3e6c8babe4..909e840ae5 100644 --- a/modules/indexer/issues/util.go +++ b/modules/indexer/issues/util.go @@ -95,6 +95,7 @@ func getIssueIndexerData(ctx context.Context, issueID int64) (*internal.IndexerD return &internal.IndexerData{ ID: issue.ID, RepoID: issue.RepoID, + Index: issue.Index, IsPublic: !issue.Repo.IsPrivate, Title: issue.Title, Content: issue.Content,