mirror of
https://codeberg.org/forgejo/forgejo.git
synced 2025-05-16 15:02:43 +00:00
feat(code search): replace fuzzy search with union search for indexer (#6947)
Some checks are pending
/ release (push) Waiting to run
testing / backend-checks (push) Has been skipped
testing / frontend-checks (push) Has been skipped
testing / test-remote-cacher (redis) (push) Has been skipped
testing / test-remote-cacher (valkey) (push) Has been skipped
testing / test-remote-cacher (garnet) (push) Has been skipped
testing / test-remote-cacher (redict) (push) Has been skipped
testing / test-unit (push) Has been skipped
testing / test-e2e (push) Has been skipped
testing / test-mysql (push) Has been skipped
testing / test-pgsql (push) Has been skipped
testing / test-sqlite (push) Has been skipped
testing / security-check (push) Has been skipped
Some checks are pending
/ release (push) Waiting to run
testing / backend-checks (push) Has been skipped
testing / frontend-checks (push) Has been skipped
testing / test-remote-cacher (redis) (push) Has been skipped
testing / test-remote-cacher (valkey) (push) Has been skipped
testing / test-remote-cacher (garnet) (push) Has been skipped
testing / test-remote-cacher (redict) (push) Has been skipped
testing / test-unit (push) Has been skipped
testing / test-e2e (push) Has been skipped
testing / test-mysql (push) Has been skipped
testing / test-pgsql (push) Has been skipped
testing / test-sqlite (push) Has been skipped
testing / security-check (push) Has been skipped
Fuzzy searching for code has been known to be problematic #5264 and in my personal opinion isn't very useful. Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/6947 Reviewed-by: Gusted <gusted@noreply.codeberg.org> Co-authored-by: Shiny Nematoda <snematoda.751k2@aleeas.com> Co-committed-by: Shiny Nematoda <snematoda.751k2@aleeas.com>
This commit is contained in:
parent
cb46a036aa
commit
3816db68aa
10 changed files with 105 additions and 86 deletions
|
@ -40,10 +40,6 @@ import (
|
|||
const (
|
||||
unicodeNormalizeName = "unicodeNormalize"
|
||||
maxBatchSize = 16
|
||||
// fuzzyDenominator determines the levenshtein distance per each character of a keyword
|
||||
fuzzyDenominator = 4
|
||||
// see https://github.com/blevesearch/bleve/issues/1563#issuecomment-786822311
|
||||
maxFuzziness = 2
|
||||
)
|
||||
|
||||
func addUnicodeNormalizeTokenFilter(m *mapping.IndexMappingImpl) error {
|
||||
|
@ -260,12 +256,14 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int
|
|||
keywordQuery query.Query
|
||||
)
|
||||
|
||||
phraseQuery := bleve.NewMatchPhraseQuery(opts.Keyword)
|
||||
phraseQuery.FieldVal = "Content"
|
||||
phraseQuery.Analyzer = repoIndexerAnalyzer
|
||||
keywordQuery = phraseQuery
|
||||
if opts.IsKeywordFuzzy {
|
||||
phraseQuery.Fuzziness = min(maxFuzziness, len(opts.Keyword)/fuzzyDenominator)
|
||||
if opts.Mode == internal.CodeSearchModeUnion {
|
||||
query := bleve.NewDisjunctionQuery()
|
||||
for _, field := range strings.Fields(opts.Keyword) {
|
||||
query.AddQuery(inner_bleve.MatchPhraseQuery(field, "Content", repoIndexerAnalyzer, 0))
|
||||
}
|
||||
keywordQuery = query
|
||||
} else {
|
||||
keywordQuery = inner_bleve.MatchPhraseQuery(opts.Keyword, "Content", repoIndexerAnalyzer, 0)
|
||||
}
|
||||
|
||||
if len(opts.RepoIDs) > 0 {
|
||||
|
@ -325,13 +323,16 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int
|
|||
for i, hit := range result.Hits {
|
||||
startIndex, endIndex := -1, -1
|
||||
for _, locations := range hit.Locations["Content"] {
|
||||
if startIndex != -1 && endIndex != -1 {
|
||||
break
|
||||
}
|
||||
location := locations[0]
|
||||
locationStart := int(location.Start)
|
||||
locationEnd := int(location.End)
|
||||
if startIndex < 0 || locationStart < startIndex {
|
||||
startIndex = locationStart
|
||||
}
|
||||
if endIndex < 0 || locationEnd > endIndex {
|
||||
if endIndex < 0 && locationEnd > endIndex {
|
||||
endIndex = locationEnd
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue