Add cron method to gc LFS MetaObjects (#22385)

This PR adds a task to the cron service to allow garbage collection of
LFS meta objects. As repositories may have a large number of
LFSMetaObjects, an updated column is added to this table and it is used
to perform a generational GC to attempt to reduce the amount of work.
(There may need to be a bit more work here but this is probably enough
for the moment.)

Fix #7045

Signed-off-by: Andrew Thornton <art27@cantab.net>
This commit is contained in:
zeripath 2023-01-16 19:50:53 +00:00 committed by GitHub
parent 04c97aa364
commit 2cc3a6381c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 255 additions and 35 deletions

View file

@ -6,6 +6,7 @@ package doctor
import (
"context"
"fmt"
"time"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
@ -29,7 +30,20 @@ func garbageCollectLFSCheck(ctx context.Context, logger log.Logger, autofix bool
return fmt.Errorf("LFS support is disabled")
}
if err := repository.GarbageCollectLFSMetaObjects(ctx, logger, autofix); err != nil {
if err := repository.GarbageCollectLFSMetaObjects(ctx, repository.GarbageCollectLFSMetaObjectsOptions{
Logger: logger,
AutoFix: autofix,
// Only attempt to garbage collect lfs meta objects older than a week as the order of git lfs upload
// and git object upload is not necessarily guaranteed. It's possible to imagine a situation whereby
// an LFS object is uploaded but the git branch is not uploaded immediately, or there are some rapid
// changes in new branches that might lead to lfs objects becoming temporarily unassociated with git
// objects.
//
// It is likely that a week is potentially excessive but it should definitely be enough that any
// unassociated LFS object is genuinely unassociated.
OlderThan: time.Now().Add(-24 * time.Hour * 7),
// We don't set the UpdatedLessRecentlyThan because we want to do a full GC
}); err != nil {
return err
}