Language statistics bar for repositories (#8037)

* Implementation for calculating language statistics

Impement saving code language statistics to database

Implement rendering langauge stats

Add primary laguage to show in repository list

Implement repository stats indexer queue

Add indexer test

Refactor to use queue module

* Do not timeout for queues
This commit is contained in:
Lauris BH 2020-02-11 11:34:17 +02:00 committed by GitHub
parent 37892be635
commit ad2642a8aa
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
89 changed files with 182950 additions and 57 deletions

View file

@ -267,7 +267,7 @@ func (b *BleveIndexer) Index(repoID int64) error {
if err = batch.Flush(); err != nil {
return err
}
return repo.UpdateIndexerStatus(sha)
return repo.UpdateIndexerStatus(models.RepoIndexerTypeCode, sha)
}
// Delete deletes indexes by ids

View file

@ -35,11 +35,12 @@ func getDefaultBranchSha(repo *models.Repository) (string, error) {
// getRepoChanges returns changes to repo since last indexer update
func getRepoChanges(repo *models.Repository, revision string) (*repoChanges, error) {
if err := repo.GetIndexerStatus(); err != nil {
status, err := repo.GetIndexerStatus(models.RepoIndexerTypeCode)
if err != nil {
return nil, err
}
if len(repo.IndexerStatus.CommitSha) == 0 {
if len(status.CommitSha) == 0 {
return genesisChanges(repo, revision)
}
return nonGenesisChanges(repo, revision)
@ -98,7 +99,7 @@ func genesisChanges(repo *models.Repository, revision string) (*repoChanges, err
// nonGenesisChanges get changes since the previous indexer update
func nonGenesisChanges(repo *models.Repository, revision string) (*repoChanges, error) {
diffCmd := git.NewCommand("diff", "--name-status",
repo.IndexerStatus.CommitSha, revision)
repo.CodeIndexerStatus.CommitSha, revision)
stdout, err := diffCmd.RunInDir(repo.RepoPath())
if err != nil {
// previous commit sha may have been removed by a force push, so

View file

@ -109,7 +109,7 @@ func populateRepoIndexer() {
return
default:
}
ids, err := models.GetUnindexedRepos(maxRepoID, 0, 50)
ids, err := models.GetUnindexedRepos(models.RepoIndexerTypeCode, maxRepoID, 0, 50)
if err != nil {
log.Error("populateRepoIndexer: %v", err)
return

View file

@ -0,0 +1,54 @@
// Copyright 2020 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package stats
import (
"code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/git"
)
// DBIndexer implements Indexer interface to use database's like search
type DBIndexer struct {
}
// Index repository status function
func (db *DBIndexer) Index(id int64) error {
repo, err := models.GetRepositoryByID(id)
if err != nil {
return err
}
status, err := repo.GetIndexerStatus(models.RepoIndexerTypeStats)
if err != nil {
return err
}
gitRepo, err := git.OpenRepository(repo.RepoPath())
if err != nil {
return err
}
defer gitRepo.Close()
// Get latest commit for default branch
commitID, err := gitRepo.GetBranchCommitID(repo.DefaultBranch)
if err != nil {
return err
}
// Do not recalculate stats if already calculated for this commit
if status.CommitSha == commitID {
return nil
}
// Calculate and save language statistics to database
stats, err := gitRepo.GetLanguageStats(commitID)
if err != nil {
return err
}
return repo.UpdateLanguageStats(commitID, stats)
}
// Close dummy function
func (db *DBIndexer) Close() {
}

View file

@ -0,0 +1,85 @@
// Copyright 2020 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package stats
import (
"code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/graceful"
"code.gitea.io/gitea/modules/log"
)
// Indexer defines an interface to index repository stats
type Indexer interface {
Index(id int64) error
Close()
}
// indexer represents a indexer instance
var indexer Indexer
// Init initialize the repo indexer
func Init() error {
indexer = &DBIndexer{}
if err := initStatsQueue(); err != nil {
return err
}
go populateRepoIndexer()
return nil
}
// populateRepoIndexer populate the repo indexer with pre-existing data. This
// should only be run when the indexer is created for the first time.
func populateRepoIndexer() {
log.Info("Populating the repo stats indexer with existing repositories")
isShutdown := graceful.GetManager().IsShutdown()
exist, err := models.IsTableNotEmpty("repository")
if err != nil {
log.Fatal("System error: %v", err)
} else if !exist {
return
}
var maxRepoID int64
if maxRepoID, err = models.GetMaxID("repository"); err != nil {
log.Fatal("System error: %v", err)
}
// start with the maximum existing repo ID and work backwards, so that we
// don't include repos that are created after gitea starts; such repos will
// already be added to the indexer, and we don't need to add them again.
for maxRepoID > 0 {
select {
case <-isShutdown:
log.Info("Repository Stats Indexer population shutdown before completion")
return
default:
}
ids, err := models.GetUnindexedRepos(models.RepoIndexerTypeStats, maxRepoID, 0, 50)
if err != nil {
log.Error("populateRepoIndexer: %v", err)
return
} else if len(ids) == 0 {
break
}
for _, id := range ids {
select {
case <-isShutdown:
log.Info("Repository Stats Indexer population shutdown before completion")
return
default:
}
if err := statsQueue.Push(id); err != nil {
log.Error("statsQueue.Push: %v", err)
}
maxRepoID = id - 1
}
}
log.Info("Done (re)populating the repo stats indexer with existing repositories")
}

View file

@ -0,0 +1,42 @@
// Copyright 2020 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package stats
import (
"path/filepath"
"testing"
"time"
"code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/setting"
"gopkg.in/ini.v1"
"github.com/stretchr/testify/assert"
)
func TestMain(m *testing.M) {
models.MainTest(m, filepath.Join("..", "..", ".."))
}
func TestRepoStatsIndex(t *testing.T) {
assert.NoError(t, models.PrepareTestDatabase())
setting.Cfg = ini.Empty()
setting.NewQueueService()
err := Init()
assert.NoError(t, err)
time.Sleep(5 * time.Second)
repo, err := models.GetRepositoryByID(1)
assert.NoError(t, err)
langs, err := repo.GetTopLanguageStats(5)
assert.NoError(t, err)
assert.Len(t, langs, 1)
assert.Equal(t, "other", langs[0].Language)
assert.Equal(t, float32(100), langs[0].Percentage)
}

View file

@ -0,0 +1,43 @@
// Copyright 2020 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package stats
import (
"fmt"
"code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/graceful"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/queue"
)
// statsQueue represents a queue to handle repository stats updates
var statsQueue queue.Queue
// handle passed PR IDs and test the PRs
func handle(data ...queue.Data) {
for _, datum := range data {
opts := datum.(int64)
if err := indexer.Index(opts); err != nil {
log.Error("stats queue idexer.Index(%d) failed: %v", opts, err)
}
}
}
func initStatsQueue() error {
statsQueue = queue.CreateQueue("repo_stats_update", handle, int64(0)).(queue.Queue)
if statsQueue == nil {
return fmt.Errorf("Unable to create repo_stats_update Queue")
}
go graceful.GetManager().RunWithShutdownFns(statsQueue.Run)
return nil
}
// UpdateRepoIndexer update a repository's entries in the indexer
func UpdateRepoIndexer(repo *models.Repository) error {
return statsQueue.Push(repo.ID)
}