Language statistics bar for repositories (#8037)

* Implementation for calculating language statistics

Impement saving code language statistics to database

Implement rendering langauge stats

Add primary laguage to show in repository list

Implement repository stats indexer queue

Add indexer test

Refactor to use queue module

* Do not timeout for queues
This commit is contained in:
Lauris BH 2020-02-11 11:34:17 +02:00 committed by GitHub
parent 37892be635
commit ad2642a8aa
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
89 changed files with 182950 additions and 57 deletions

View file

@ -186,6 +186,8 @@ var migrations = []Migration{
NewMigration("Add some columns on review for migration", addReviewMigrateInfo),
// v126 -> v127
NewMigration("Fix topic repository count", fixTopicRepositoryCount),
// v127 -> v128
NewMigration("add repository code language statistics", addLanguageStats),
}
// Migrate database to current version

45
models/migrations/v127.go Normal file
View file

@ -0,0 +1,45 @@
// Copyright 2020 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package migrations
import (
"fmt"
"code.gitea.io/gitea/modules/timeutil"
"xorm.io/xorm"
)
func addLanguageStats(x *xorm.Engine) error {
// LanguageStat see models/repo_language_stats.go
type LanguageStat struct {
ID int64 `xorm:"pk autoincr"`
RepoID int64 `xorm:"UNIQUE(s) INDEX NOT NULL"`
CommitID string
IsPrimary bool
Language string `xorm:"VARCHAR(30) UNIQUE(s) INDEX NOT NULL"`
Percentage float32 `xorm:"NUMERIC(5,2) NOT NULL DEFAULT 0"`
Color string `xorm:"-"`
CreatedUnix timeutil.TimeStamp `xorm:"INDEX CREATED"`
}
type RepoIndexerType int
// RepoIndexerStatus see models/repo_stats_indexer.go
type RepoIndexerStatus struct {
ID int64 `xorm:"pk autoincr"`
RepoID int64 `xorm:"INDEX(s)"`
CommitSha string `xorm:"VARCHAR(40)"`
IndexerType RepoIndexerType `xorm:"INDEX(s) NOT NULL DEFAULT 0"`
}
if err := x.Sync2(new(LanguageStat)); err != nil {
return fmt.Errorf("Sync2: %v", err)
}
if err := x.Sync2(new(RepoIndexerStatus)); err != nil {
return fmt.Errorf("Sync2: %v", err)
}
return nil
}

View file

@ -116,6 +116,7 @@ func init() {
new(OAuth2AuthorizationCode),
new(OAuth2Grant),
new(Task),
new(LanguageStat),
)
gonicNames := []string{"SSL", "UID"}

View file

@ -175,8 +175,9 @@ type Repository struct {
*Mirror `xorm:"-"`
Status RepositoryStatus `xorm:"NOT NULL DEFAULT 0"`
RenderingMetas map[string]string `xorm:"-"`
Units []*RepoUnit `xorm:"-"`
RenderingMetas map[string]string `xorm:"-"`
Units []*RepoUnit `xorm:"-"`
PrimaryLanguage *LanguageStat `xorm:"-"`
IsFork bool `xorm:"INDEX NOT NULL DEFAULT false"`
ForkID int64 `xorm:"INDEX"`
@ -185,7 +186,8 @@ type Repository struct {
TemplateID int64 `xorm:"INDEX"`
TemplateRepo *Repository `xorm:"-"`
Size int64 `xorm:"NOT NULL DEFAULT 0"`
IndexerStatus *RepoIndexerStatus `xorm:"-"`
CodeIndexerStatus *RepoIndexerStatus `xorm:"-"`
StatsIndexerStatus *RepoIndexerStatus `xorm:"-"`
IsFsckEnabled bool `xorm:"NOT NULL DEFAULT true"`
CloseIssuesViaCommitInAnyBranch bool `xorm:"NOT NULL DEFAULT false"`
Topics []string `xorm:"TEXT JSON"`
@ -1504,6 +1506,7 @@ func DeleteRepository(doer *User, uid, repoID int64) error {
&Notification{RepoID: repoID},
&CommitStatus{RepoID: repoID},
&RepoIndexerStatus{RepoID: repoID},
&LanguageStat{RepoID: repoID},
&Comment{RefRepoID: repoID},
&Task{RepoID: repoID},
); err != nil {

View file

@ -10,21 +10,32 @@ import (
"xorm.io/builder"
)
// RepoIndexerType specifies the repository indexer type
type RepoIndexerType int
const (
// RepoIndexerTypeCode code indexer
RepoIndexerTypeCode RepoIndexerType = iota // 0
// RepoIndexerTypeStats repository stats indexer
RepoIndexerTypeStats // 1
)
// RepoIndexerStatus status of a repo's entry in the repo indexer
// For now, implicitly refers to default branch
type RepoIndexerStatus struct {
ID int64 `xorm:"pk autoincr"`
RepoID int64 `xorm:"INDEX"`
CommitSha string `xorm:"VARCHAR(40)"`
ID int64 `xorm:"pk autoincr"`
RepoID int64 `xorm:"INDEX(s)"`
CommitSha string `xorm:"VARCHAR(40)"`
IndexerType RepoIndexerType `xorm:"INDEX(s) NOT NULL DEFAULT 0"`
}
// GetUnindexedRepos returns repos which do not have an indexer status
func GetUnindexedRepos(maxRepoID int64, page, pageSize int) ([]int64, error) {
func GetUnindexedRepos(indexerType RepoIndexerType, maxRepoID int64, page, pageSize int) ([]int64, error) {
ids := make([]int64, 0, 50)
cond := builder.Cond(builder.IsNull{
"repo_indexer_status.id",
})
sess := x.Table("repository").Join("LEFT OUTER", "repo_indexer_status", "repository.id = repo_indexer_status.repo_id")
sess := x.Table("repository").Join("LEFT OUTER", "repo_indexer_status", "repository.id = repo_indexer_status.repo_id AND repo_indexer_status.indexer_type = ?", indexerType)
if maxRepoID > 0 {
cond = builder.And(cond, builder.Lte{
"repository.id": maxRepoID,
@ -43,40 +54,64 @@ func GetUnindexedRepos(maxRepoID int64, page, pageSize int) ([]int64, error) {
return ids, err
}
// GetIndexerStatus loads repo codes indxer status
func (repo *Repository) GetIndexerStatus() error {
if repo.IndexerStatus != nil {
return nil
// getIndexerStatus loads repo codes indxer status
func (repo *Repository) getIndexerStatus(e Engine, indexerType RepoIndexerType) (*RepoIndexerStatus, error) {
switch indexerType {
case RepoIndexerTypeCode:
if repo.CodeIndexerStatus != nil {
return repo.CodeIndexerStatus, nil
}
case RepoIndexerTypeStats:
if repo.StatsIndexerStatus != nil {
return repo.StatsIndexerStatus, nil
}
}
status := &RepoIndexerStatus{RepoID: repo.ID}
has, err := x.Get(status)
status := &RepoIndexerStatus{RepoID: repo.ID, IndexerType: indexerType}
has, err := e.Get(status)
if err != nil {
return err
return nil, err
} else if !has {
status.CommitSha = ""
}
repo.IndexerStatus = status
return nil
switch indexerType {
case RepoIndexerTypeCode:
repo.CodeIndexerStatus = status
case RepoIndexerTypeStats:
repo.StatsIndexerStatus = status
}
return status, nil
}
// UpdateIndexerStatus updates indexer status
func (repo *Repository) UpdateIndexerStatus(sha string) error {
if err := repo.GetIndexerStatus(); err != nil {
// GetIndexerStatus loads repo codes indxer status
func (repo *Repository) GetIndexerStatus(indexerType RepoIndexerType) (*RepoIndexerStatus, error) {
return repo.getIndexerStatus(x, indexerType)
}
// updateIndexerStatus updates indexer status
func (repo *Repository) updateIndexerStatus(e Engine, indexerType RepoIndexerType, sha string) error {
status, err := repo.getIndexerStatus(e, indexerType)
if err != nil {
return fmt.Errorf("UpdateIndexerStatus: Unable to getIndexerStatus for repo: %s Error: %v", repo.FullName(), err)
}
if len(repo.IndexerStatus.CommitSha) == 0 {
repo.IndexerStatus.CommitSha = sha
_, err := x.Insert(repo.IndexerStatus)
if len(status.CommitSha) == 0 {
status.CommitSha = sha
_, err := e.Insert(status)
if err != nil {
return fmt.Errorf("UpdateIndexerStatus: Unable to insert repoIndexerStatus for repo: %s Sha: %s Error: %v", repo.FullName(), sha, err)
}
return nil
}
repo.IndexerStatus.CommitSha = sha
_, err := x.ID(repo.IndexerStatus.ID).Cols("commit_sha").
Update(repo.IndexerStatus)
status.CommitSha = sha
_, err = e.ID(status.ID).Cols("commit_sha").
Update(status)
if err != nil {
return fmt.Errorf("UpdateIndexerStatus: Unable to update repoIndexerStatus for repo: %s Sha: %s Error: %v", repo.FullName(), sha, err)
}
return nil
}
// UpdateIndexerStatus updates indexer status
func (repo *Repository) UpdateIndexerStatus(indexerType RepoIndexerType, sha string) error {
return repo.updateIndexerStatus(x, indexerType, sha)
}

View file

@ -0,0 +1,137 @@
// Copyright 2020 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package models
import (
"math"
"strings"
"code.gitea.io/gitea/modules/timeutil"
"github.com/src-d/enry/v2"
)
// LanguageStat describes language statistics of a repository
type LanguageStat struct {
ID int64 `xorm:"pk autoincr"`
RepoID int64 `xorm:"UNIQUE(s) INDEX NOT NULL"`
CommitID string
IsPrimary bool
Language string `xorm:"VARCHAR(30) UNIQUE(s) INDEX NOT NULL"`
Percentage float32 `xorm:"NUMERIC(5,2) NOT NULL DEFAULT 0"`
Color string `xorm:"-"`
CreatedUnix timeutil.TimeStamp `xorm:"INDEX CREATED"`
}
// LanguageStatList defines a list of language statistics
type LanguageStatList []*LanguageStat
func (stats LanguageStatList) loadAttributes() {
for i := range stats {
stats[i].Color = enry.GetColor(stats[i].Language)
}
}
func (repo *Repository) getLanguageStats(e Engine) (LanguageStatList, error) {
stats := make(LanguageStatList, 0, 6)
if err := e.Where("`repo_id` = ?", repo.ID).Desc("`percentage`").Find(&stats); err != nil {
return nil, err
}
stats.loadAttributes()
return stats, nil
}
// GetLanguageStats returns the language statistics for a repository
func (repo *Repository) GetLanguageStats() (LanguageStatList, error) {
return repo.getLanguageStats(x)
}
// GetTopLanguageStats returns the top language statistics for a repository
func (repo *Repository) GetTopLanguageStats(limit int) (LanguageStatList, error) {
stats, err := repo.getLanguageStats(x)
if err != nil {
return nil, err
}
topstats := make(LanguageStatList, 0, limit)
var other float32
for i := range stats {
if stats[i].Language == "other" || len(topstats) >= limit {
other += stats[i].Percentage
continue
}
topstats = append(topstats, stats[i])
}
if other > 0 {
topstats = append(topstats, &LanguageStat{
RepoID: repo.ID,
Language: "other",
Color: "#cccccc",
Percentage: float32(math.Round(float64(other)*10) / 10),
})
}
return topstats, nil
}
// UpdateLanguageStats updates the language statistics for repository
func (repo *Repository) UpdateLanguageStats(commitID string, stats map[string]float32) error {
sess := x.NewSession()
if err := sess.Begin(); err != nil {
return err
}
defer sess.Close()
oldstats, err := repo.getLanguageStats(sess)
if err != nil {
return err
}
var topLang string
var p float32
for lang, perc := range stats {
if perc > p {
p = perc
topLang = strings.ToLower(lang)
}
}
for lang, perc := range stats {
upd := false
llang := strings.ToLower(lang)
for _, s := range oldstats {
// Update already existing language
if strings.ToLower(s.Language) == llang {
s.CommitID = commitID
s.IsPrimary = llang == topLang
s.Percentage = perc
if _, err := sess.ID(s.ID).Cols("`commit_id`", "`percentage`", "`is_primary`").Update(s); err != nil {
return err
}
upd = true
break
}
}
// Insert new language
if !upd {
if _, err := sess.Insert(&LanguageStat{
RepoID: repo.ID,
CommitID: commitID,
IsPrimary: llang == topLang,
Language: lang,
Percentage: perc,
}); err != nil {
return err
}
}
}
// Delete old languages
if _, err := sess.Where("`id` IN (SELECT `id` FROM `language_stat` WHERE `repo_id` = ? AND `commit_id` != ?)", repo.ID, commitID).Delete(&LanguageStat{}); err != nil {
return err
}
if err = repo.updateIndexerStatus(sess, RepoIndexerTypeStats, commitID); err != nil {
return err
}
return sess.Commit()
}

View file

@ -46,11 +46,14 @@ func (repos RepositoryList) loadAttributes(e Engine) error {
return nil
}
// Load owners.
set := make(map[int64]struct{})
repoIDs := make([]int64, len(repos))
for i := range repos {
set[repos[i].OwnerID] = struct{}{}
repoIDs[i] = repos[i].ID
}
// Load owners.
users := make(map[int64]*User, len(set))
if err := e.
Where("id > 0").
@ -61,6 +64,25 @@ func (repos RepositoryList) loadAttributes(e Engine) error {
for i := range repos {
repos[i].Owner = users[repos[i].OwnerID]
}
// Load primary language.
stats := make(LanguageStatList, 0, len(repos))
if err := e.
Where("`is_primary` = ? AND `language` != ?", true, "other").
In("`repo_id`", repoIDs).
Find(&stats); err != nil {
return fmt.Errorf("find primary languages: %v", err)
}
stats.loadAttributes()
for i := range repos {
for _, st := range stats {
if st.RepoID == repos[i].ID {
repos[i].PrimaryLanguage = st
break
}
}
}
return nil
}
@ -119,7 +141,6 @@ type SearchRepoOptions struct {
OrderBy SearchOrderBy
Private bool // Include private repositories in results
StarredByID int64
IsProfile bool
AllPublic bool // Include also all public repositories of users and public organisations
AllLimited bool // Include also all public repositories of limited organisations
// None -> include collaborative AND non-collaborative
@ -306,10 +327,8 @@ func SearchRepository(opts *SearchRepoOptions) (RepositoryList, int64, error) {
return nil, 0, fmt.Errorf("Repo: %v", err)
}
if !opts.IsProfile {
if err = repos.loadAttributes(sess); err != nil {
return nil, 0, fmt.Errorf("LoadAttributes: %v", err)
}
if err = repos.loadAttributes(sess); err != nil {
return nil, 0, fmt.Errorf("LoadAttributes: %v", err)
}
return repos, count, nil