Convert files to utf-8 for indexing (#7814)

* Convert files to utf-8 for indexing

* Move utf8 functions to modules/base

* Bump repoIndexerLatestVersion to 3

* Add tests for base/encoding.go

* Changes to pass gosimple

* Move UTF8 funcs into new modules/charset package
This commit is contained in:
guillep2k 2019-08-15 09:07:28 -03:00 committed by Lunny Xiao
parent c2c35d169c
commit 5a44be627c
13 changed files with 371 additions and 166 deletions

View file

@ -10,6 +10,7 @@ import (
"strings"
"code.gitea.io/gitea/modules/base"
"code.gitea.io/gitea/modules/charset"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/indexer"
"code.gitea.io/gitea/modules/log"
@ -207,6 +208,7 @@ func addUpdate(update fileUpdate, repo *Repository, batch rupture.FlushingBatch)
if err != nil {
return err
} else if !base.IsTextFile(fileContents) {
// FIXME: UTF-16 files will probably fail here
return nil
}
indexerUpdate := indexer.RepoIndexerUpdate{
@ -214,7 +216,7 @@ func addUpdate(update fileUpdate, repo *Repository, batch rupture.FlushingBatch)
Op: indexer.RepoIndexerOpUpdate,
Data: &indexer.RepoIndexerData{
RepoID: repo.ID,
Content: string(fileContents),
Content: string(charset.ToUTF8DropErrors(fileContents)),
},
}
return indexerUpdate.AddToFlushingBatch(batch)