Performance improvements for pull request list API (#30490)

Fix #30483

---------

Co-authored-by: yp05327 <576951401@qq.com>
Co-authored-by: Giteabot <teabot@gitea.io>
(cherry picked from commit 352a2cae247afa254241f113c5c22b9351f116b9)
This commit is contained in:
Lunny Xiao 2024-05-31 20:10:11 +08:00 committed by Earl Warren
parent 3e5f85ccf3
commit 47a2102694
No known key found for this signature in database
GPG key ID: 0579CB2928A78A00
12 changed files with 243 additions and 130 deletions

View file

@ -9,8 +9,10 @@ import (
"code.gitea.io/gitea/models/db"
access_model "code.gitea.io/gitea/models/perm/access"
repo_model "code.gitea.io/gitea/models/repo"
"code.gitea.io/gitea/models/unit"
user_model "code.gitea.io/gitea/models/user"
"code.gitea.io/gitea/modules/container"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/util"
@ -129,7 +131,7 @@ func GetPullRequestIDsByCheckStatus(ctx context.Context, status PullRequestStatu
}
// PullRequests returns all pull requests for a base Repo by the given conditions
func PullRequests(ctx context.Context, baseRepoID int64, opts *PullRequestsOptions) ([]*PullRequest, int64, error) {
func PullRequests(ctx context.Context, baseRepoID int64, opts *PullRequestsOptions) (PullRequestList, int64, error) {
if opts.Page <= 0 {
opts.Page = 1
}
@ -159,50 +161,93 @@ func PullRequests(ctx context.Context, baseRepoID int64, opts *PullRequestsOptio
// PullRequestList defines a list of pull requests
type PullRequestList []*PullRequest
func (prs PullRequestList) LoadAttributes(ctx context.Context) error {
if len(prs) == 0 {
return nil
func (prs PullRequestList) getRepositoryIDs() []int64 {
repoIDs := make(container.Set[int64])
for _, pr := range prs {
if pr.BaseRepo == nil && pr.BaseRepoID > 0 {
repoIDs.Add(pr.BaseRepoID)
}
if pr.HeadRepo == nil && pr.HeadRepoID > 0 {
repoIDs.Add(pr.HeadRepoID)
}
}
return repoIDs.Values()
}
// Load issues.
issueIDs := prs.GetIssueIDs()
issues := make([]*Issue, 0, len(issueIDs))
func (prs PullRequestList) LoadRepositories(ctx context.Context) error {
repoIDs := prs.getRepositoryIDs()
reposMap := make(map[int64]*repo_model.Repository, len(repoIDs))
if err := db.GetEngine(ctx).
Where("id > 0").
In("id", issueIDs).
Find(&issues); err != nil {
return fmt.Errorf("find issues: %w", err)
}
set := make(map[int64]*Issue)
for i := range issues {
set[issues[i].ID] = issues[i]
In("id", repoIDs).
Find(&reposMap); err != nil {
return fmt.Errorf("find repos: %w", err)
}
for _, pr := range prs {
pr.Issue = set[pr.IssueID]
/*
Old code:
pr.Issue.PullRequest = pr // panic here means issueIDs and prs are not in sync
It's worth panic because it's almost impossible to happen under normal use.
But in integration testing, an asynchronous task could read a database that has been reset.
So returning an error would make more sense, let the caller has a choice to ignore it.
*/
if pr.Issue == nil {
return fmt.Errorf("issues and prs may be not in sync: cannot find issue %v for pr %v: %w", pr.IssueID, pr.ID, util.ErrNotExist)
if pr.BaseRepo == nil {
pr.BaseRepo = reposMap[pr.BaseRepoID]
}
if pr.HeadRepo == nil {
pr.HeadRepo = reposMap[pr.HeadRepoID]
pr.isHeadRepoLoaded = true
}
pr.Issue.PullRequest = pr
}
return nil
}
func (prs PullRequestList) LoadAttributes(ctx context.Context) error {
if _, err := prs.LoadIssues(ctx); err != nil {
return err
}
return nil
}
func (prs PullRequestList) LoadIssues(ctx context.Context) (IssueList, error) {
if len(prs) == 0 {
return nil, nil
}
// Load issues.
issueIDs := prs.GetIssueIDs()
issues := make(map[int64]*Issue, len(issueIDs))
if err := db.GetEngine(ctx).
In("id", issueIDs).
Find(&issues); err != nil {
return nil, fmt.Errorf("find issues: %w", err)
}
issueList := make(IssueList, 0, len(prs))
for _, pr := range prs {
if pr.Issue == nil {
pr.Issue = issues[pr.IssueID]
/*
Old code:
pr.Issue.PullRequest = pr // panic here means issueIDs and prs are not in sync
It's worth panic because it's almost impossible to happen under normal use.
But in integration testing, an asynchronous task could read a database that has been reset.
So returning an error would make more sense, let the caller has a choice to ignore it.
*/
if pr.Issue == nil {
return nil, fmt.Errorf("issues and prs may be not in sync: cannot find issue %v for pr %v: %w", pr.IssueID, pr.ID, util.ErrNotExist)
}
}
pr.Issue.PullRequest = pr
if pr.Issue.Repo == nil {
pr.Issue.Repo = pr.BaseRepo
}
issueList = append(issueList, pr.Issue)
}
return issueList, nil
}
// GetIssueIDs returns all issue ids
func (prs PullRequestList) GetIssueIDs() []int64 {
issueIDs := make([]int64, 0, len(prs))
for i := range prs {
issueIDs = append(issueIDs, prs[i].IssueID)
}
return issueIDs
return container.FilterSlice(prs, func(pr *PullRequest) (int64, bool) {
if pr.Issue == nil {
return pr.IssueID, pr.IssueID > 0
}
return 0, false
})
}
// HasMergedPullRequestInRepo returns whether the user(poster) has merged pull-request in the repo