forgejo/services/repository/files/cherry_pick.go

141 lines
3.8 KiB
Go
Raw Normal View History

// Copyright 2021 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package files
import (
"context"
"fmt"
"strings"
"forgejo.org/models"
repo_model "forgejo.org/models/repo"
user_model "forgejo.org/models/user"
"forgejo.org/modules/git"
"forgejo.org/modules/log"
"forgejo.org/modules/structs"
"forgejo.org/services/pull"
)
// CherryPick cherrypicks or reverts a commit to the given repository
func CherryPick(ctx context.Context, repo *repo_model.Repository, doer *user_model.User, revert bool, opts *ApplyDiffPatchOptions) (*structs.FileResponse, error) {
if err := opts.Validate(ctx, repo, doer); err != nil {
return nil, err
}
message := strings.TrimSpace(opts.Message)
author, committer := GetAuthorAndCommitterUsers(opts.Author, opts.Committer, doer)
t, err := NewTemporaryUploadRepository(ctx, repo)
if err != nil {
log.Error("NewTemporaryUploadRepository failed: %v", err)
}
defer t.Close()
Fix reverting a merge commit failing (#28794) Fixes #22236 --- Error occurring currently while trying to revert commit using read-tree -m approach: > 2022/12/26 16:04:43 ...rvices/pull/patch.go:240:AttemptThreeWayMerge() [E] [63a9c61a] Unable to run read-tree -m! Error: exit status 128 - fatal: this operation must be run in a work tree > - fatal: this operation must be run in a work tree We need to clone a non-bare repository for `git read-tree -m` to work. https://github.com/go-gitea/gitea/commit/bb371aee6ecf5e570cdf7b5f7f0d6f47a607a325 adds support to create a non-bare cloned temporary upload repository. After cloning a non-bare temporary upload repository, we [set default index](https://github.com/go-gitea/gitea/blob/main/services/repository/files/cherry_pick.go#L37) (`git read-tree HEAD`). This operation ends up resetting the git index file (see investigation details below), due to which, we need to call `git update-index --refresh` afterward. Here's the diff of the index file before and after we execute SetDefaultIndex: https://www.diffchecker.com/hyOP3eJy/ Notice the **ctime**, **mtime** are set to 0 after SetDefaultIndex. You can reproduce the same behavior using these steps: ```bash $ git clone https://try.gitea.io/me-heer/test.git -s -b main $ cd test $ git read-tree HEAD $ git read-tree -m 1f085d7ed8 1f085d7ed8 9933caed00 error: Entry '1' not uptodate. Cannot merge. ``` After which, we can fix like this: ``` $ git update-index --refresh $ git read-tree -m 1f085d7ed8 1f085d7ed8 9933caed00 ```
2024-01-16 20:36:51 +05:30
if err := t.Clone(opts.OldBranch, false); err != nil {
return nil, err
}
if err := t.SetDefaultIndex(); err != nil {
return nil, err
}
Fix reverting a merge commit failing (#28794) Fixes #22236 --- Error occurring currently while trying to revert commit using read-tree -m approach: > 2022/12/26 16:04:43 ...rvices/pull/patch.go:240:AttemptThreeWayMerge() [E] [63a9c61a] Unable to run read-tree -m! Error: exit status 128 - fatal: this operation must be run in a work tree > - fatal: this operation must be run in a work tree We need to clone a non-bare repository for `git read-tree -m` to work. https://github.com/go-gitea/gitea/commit/bb371aee6ecf5e570cdf7b5f7f0d6f47a607a325 adds support to create a non-bare cloned temporary upload repository. After cloning a non-bare temporary upload repository, we [set default index](https://github.com/go-gitea/gitea/blob/main/services/repository/files/cherry_pick.go#L37) (`git read-tree HEAD`). This operation ends up resetting the git index file (see investigation details below), due to which, we need to call `git update-index --refresh` afterward. Here's the diff of the index file before and after we execute SetDefaultIndex: https://www.diffchecker.com/hyOP3eJy/ Notice the **ctime**, **mtime** are set to 0 after SetDefaultIndex. You can reproduce the same behavior using these steps: ```bash $ git clone https://try.gitea.io/me-heer/test.git -s -b main $ cd test $ git read-tree HEAD $ git read-tree -m 1f085d7ed8 1f085d7ed8 9933caed00 error: Entry '1' not uptodate. Cannot merge. ``` After which, we can fix like this: ``` $ git update-index --refresh $ git read-tree -m 1f085d7ed8 1f085d7ed8 9933caed00 ```
2024-01-16 20:36:51 +05:30
if err := t.RefreshIndex(); err != nil {
return nil, err
}
// Get the commit of the original branch
commit, err := t.GetBranchCommit(opts.OldBranch)
if err != nil {
return nil, err // Couldn't get a commit for the branch
}
// Assigned LastCommitID in opts if it hasn't been set
if opts.LastCommitID == "" {
opts.LastCommitID = commit.ID.String()
} else {
lastCommitID, err := t.gitRepo.ConvertToGitID(opts.LastCommitID)
if err != nil {
return nil, fmt.Errorf("CherryPick: Invalid last commit ID: %w", err)
}
opts.LastCommitID = lastCommitID.String()
if commit.ID.String() != opts.LastCommitID {
return nil, models.ErrCommitIDDoesNotMatch{
GivenCommitID: opts.LastCommitID,
CurrentCommitID: opts.LastCommitID,
}
}
}
commit, err = t.GetCommit(strings.TrimSpace(opts.Content))
if err != nil {
return nil, err
}
parent, err := commit.ParentID(0)
if err != nil {
parent = git.ObjectFormatFromName(repo.ObjectFormatName).EmptyTree()
}
base, right := parent.String(), commit.ID.String()
if revert {
right, base = base, right
}
feat: improved performances when checking for conflicts on pull requests (#7727) - `testPatch` is a function that is called to test a pull request and determine the state of the pull request. Checking for merge conflicts, check if the diff is empty and if the pull request modifies any protected files. - The checking for merge conflict and if the diff is empty used git commands that relied on a working tree to correctly functions. Forgejo store repositories in a bare format which do not contain a working tree. This means that a temporary copy was created every time a pull request had to be re-checked and for large repositories involving quite some I/O interaction. - This patch adjusts those codepaths to instead use newer Git plumbing commands that work without requiring a work tree and can thus be used directly on the bare repository. The merge conflict is now done via [`git-merge-tree(1)`](https://git-scm.com/docs/git-merge-tree/) and checking if the diff is empty is done via [`git-diff-tree(1)`](https://git-scm.com/docs/git-diff-tree). - If the function is called to test a patch where the head and base repository are not the same, then [Git alternate](https://git-scm.com/docs/gitglossary#Documentation/gitglossary.txt-aiddefalternateobjectdatabaseaalternateobjectdatabase) is used to make the head commit available in the base repository, this done on a per git command basis via the `GIT_ALTERNATE_OBJECT_DIRECTORIES` environment. - As far as I can understand the documentation and the existing code, there's no edge case that the new code cannot handle. It also results in a cleaner codepath, as the existing code did a lot of checking and merging in a more traditional approach that required a lot of (parsing) code, while the new code offloads this to git and has a trivial parser of the output. - Resolves forgejo/forgejo#7701 - Added exhaustive integration testing. Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/7727 Reviewed-by: Earl Warren <earl-warren@noreply.codeberg.org> Reviewed-by: Otto <otto@codeberg.org> Co-authored-by: Gusted <postmaster@gusted.xyz> Co-committed-by: Gusted <postmaster@gusted.xyz>
2025-05-16 12:40:38 +00:00
var treeHash string
if git.SupportGitMergeTree {
var conflict bool
treeHash, conflict, _, err = pull.MergeTree(ctx, t.gitRepo, base, opts.LastCommitID, right, nil)
if err != nil {
return nil, fmt.Errorf("failed to three-way merge %s onto %s: %w", right, opts.OldBranch, err)
}
feat: improved performances when checking for conflicts on pull requests (#7727) - `testPatch` is a function that is called to test a pull request and determine the state of the pull request. Checking for merge conflicts, check if the diff is empty and if the pull request modifies any protected files. - The checking for merge conflict and if the diff is empty used git commands that relied on a working tree to correctly functions. Forgejo store repositories in a bare format which do not contain a working tree. This means that a temporary copy was created every time a pull request had to be re-checked and for large repositories involving quite some I/O interaction. - This patch adjusts those codepaths to instead use newer Git plumbing commands that work without requiring a work tree and can thus be used directly on the bare repository. The merge conflict is now done via [`git-merge-tree(1)`](https://git-scm.com/docs/git-merge-tree/) and checking if the diff is empty is done via [`git-diff-tree(1)`](https://git-scm.com/docs/git-diff-tree). - If the function is called to test a patch where the head and base repository are not the same, then [Git alternate](https://git-scm.com/docs/gitglossary#Documentation/gitglossary.txt-aiddefalternateobjectdatabaseaalternateobjectdatabase) is used to make the head commit available in the base repository, this done on a per git command basis via the `GIT_ALTERNATE_OBJECT_DIRECTORIES` environment. - As far as I can understand the documentation and the existing code, there's no edge case that the new code cannot handle. It also results in a cleaner codepath, as the existing code did a lot of checking and merging in a more traditional approach that required a lot of (parsing) code, while the new code offloads this to git and has a trivial parser of the output. - Resolves forgejo/forgejo#7701 - Added exhaustive integration testing. Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/7727 Reviewed-by: Earl Warren <earl-warren@noreply.codeberg.org> Reviewed-by: Otto <otto@codeberg.org> Co-authored-by: Gusted <postmaster@gusted.xyz> Co-committed-by: Gusted <postmaster@gusted.xyz>
2025-05-16 12:40:38 +00:00
if conflict {
return nil, fmt.Errorf("failed to merge due to conflicts")
}
} else {
description := fmt.Sprintf("CherryPick %s onto %s", right, opts.OldBranch)
conflict, _, err := pull.AttemptThreeWayMerge(ctx, t.gitRepo, base, opts.LastCommitID, right, description)
if err != nil {
return nil, fmt.Errorf("failed to three-way merge %s onto %s: %w", right, opts.OldBranch, err)
}
feat: improved performances when checking for conflicts on pull requests (#7727) - `testPatch` is a function that is called to test a pull request and determine the state of the pull request. Checking for merge conflicts, check if the diff is empty and if the pull request modifies any protected files. - The checking for merge conflict and if the diff is empty used git commands that relied on a working tree to correctly functions. Forgejo store repositories in a bare format which do not contain a working tree. This means that a temporary copy was created every time a pull request had to be re-checked and for large repositories involving quite some I/O interaction. - This patch adjusts those codepaths to instead use newer Git plumbing commands that work without requiring a work tree and can thus be used directly on the bare repository. The merge conflict is now done via [`git-merge-tree(1)`](https://git-scm.com/docs/git-merge-tree/) and checking if the diff is empty is done via [`git-diff-tree(1)`](https://git-scm.com/docs/git-diff-tree). - If the function is called to test a patch where the head and base repository are not the same, then [Git alternate](https://git-scm.com/docs/gitglossary#Documentation/gitglossary.txt-aiddefalternateobjectdatabaseaalternateobjectdatabase) is used to make the head commit available in the base repository, this done on a per git command basis via the `GIT_ALTERNATE_OBJECT_DIRECTORIES` environment. - As far as I can understand the documentation and the existing code, there's no edge case that the new code cannot handle. It also results in a cleaner codepath, as the existing code did a lot of checking and merging in a more traditional approach that required a lot of (parsing) code, while the new code offloads this to git and has a trivial parser of the output. - Resolves forgejo/forgejo#7701 - Added exhaustive integration testing. Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/7727 Reviewed-by: Earl Warren <earl-warren@noreply.codeberg.org> Reviewed-by: Otto <otto@codeberg.org> Co-authored-by: Gusted <postmaster@gusted.xyz> Co-committed-by: Gusted <postmaster@gusted.xyz>
2025-05-16 12:40:38 +00:00
if conflict {
return nil, fmt.Errorf("failed to merge due to conflicts")
}
treeHash, err = t.WriteTree()
if err != nil {
// likely non-sensical tree due to merge conflicts...
return nil, err
}
}
// Now commit the tree
var commitHash string
if opts.Dates != nil {
commitHash, err = t.CommitTreeWithDate("HEAD", author, committer, treeHash, message, opts.Signoff, opts.Dates.Author, opts.Dates.Committer)
} else {
commitHash, err = t.CommitTree("HEAD", author, committer, treeHash, message, opts.Signoff)
}
if err != nil {
return nil, err
}
// Then push this tree to NewBranch
if err := t.Push(doer, commitHash, opts.NewBranch); err != nil {
return nil, err
}
commit, err = t.GetCommit(commitHash)
if err != nil {
return nil, err
}
fileCommitResponse, _ := GetFileCommitResponse(repo, commit) // ok if fails, then will be nil
Add context cache as a request level cache (#22294) To avoid duplicated load of the same data in an HTTP request, we can set a context cache to do that. i.e. Some pages may load a user from a database with the same id in different areas on the same page. But the code is hidden in two different deep logic. How should we share the user? As a result of this PR, now if both entry functions accept `context.Context` as the first parameter and we just need to refactor `GetUserByID` to reuse the user from the context cache. Then it will not be loaded twice on an HTTP request. But of course, sometimes we would like to reload an object from the database, that's why `RemoveContextData` is also exposed. The core context cache is here. It defines a new context ```go type cacheContext struct { ctx context.Context data map[any]map[any]any lock sync.RWMutex } var cacheContextKey = struct{}{} func WithCacheContext(ctx context.Context) context.Context { return context.WithValue(ctx, cacheContextKey, &cacheContext{ ctx: ctx, data: make(map[any]map[any]any), }) } ``` Then you can use the below 4 methods to read/write/del the data within the same context. ```go func GetContextData(ctx context.Context, tp, key any) any func SetContextData(ctx context.Context, tp, key, value any) func RemoveContextData(ctx context.Context, tp, key any) func GetWithContextCache[T any](ctx context.Context, cacheGroupKey string, cacheTargetID any, f func() (T, error)) (T, error) ``` Then let's take a look at how `system.GetString` implement it. ```go func GetSetting(ctx context.Context, key string) (string, error) { return cache.GetWithContextCache(ctx, contextCacheKey, key, func() (string, error) { return cache.GetString(genSettingCacheKey(key), func() (string, error) { res, err := GetSettingNoCache(ctx, key) if err != nil { return "", err } return res.SettingValue, nil }) }) } ``` First, it will check if context data include the setting object with the key. If not, it will query from the global cache which may be memory or a Redis cache. If not, it will get the object from the database. In the end, if the object gets from the global cache or database, it will be set into the context cache. An object stored in the context cache will only be destroyed after the context disappeared.
2023-02-15 21:37:34 +08:00
verification := GetPayloadCommitVerification(ctx, commit)
fileResponse := &structs.FileResponse{
Commit: fileCommitResponse,
Verification: verification,
}
return fileResponse, nil
}