Add API to serve blob or LFS file content (#19689)

* Add LFS API

* Update routers/api/v1/repo/file.go

Co-authored-by: Gusted <williamzijl7@hotmail.com>

* Apply suggestions

* Apply suggestions

* Update routers/api/v1/repo/file.go

Co-authored-by: Gusted <williamzijl7@hotmail.com>

* Report errors

* ADd test

* Use own repo for test

* Use different repo name

* Improve handling

* Slight restructures

1. Avoid reading the blob data multiple times
2. Ensure that caching is only checked when about to serve the blob/lfs
3. Avoid nesting by returning early
4. Make log message a bit more clear
5. Ensure that the dataRc is closed by defer when passed to ServeData

Signed-off-by: Andrew Thornton <art27@cantab.net>

Co-authored-by: Gusted <williamzijl7@hotmail.com>
Co-authored-by: Andrew Thornton <art27@cantab.net>
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
This commit is contained in:
qwerty287 2022-06-04 15:17:53 +02:00 committed by GitHub
parent 14d96ff7ac
commit df9612bb53
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 245 additions and 0 deletions

View file

@ -6,8 +6,10 @@
package repo
import (
"bytes"
"encoding/base64"
"fmt"
"io"
"net/http"
"path"
"time"
@ -18,7 +20,11 @@ import (
"code.gitea.io/gitea/modules/cache"
"code.gitea.io/gitea/modules/context"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/httpcache"
"code.gitea.io/gitea/modules/lfs"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/storage"
api "code.gitea.io/gitea/modules/structs"
"code.gitea.io/gitea/modules/web"
"code.gitea.io/gitea/routers/common"
@ -75,6 +81,142 @@ func GetRawFile(ctx *context.APIContext) {
}
}
// GetRawFileOrLFS get a file by repo's path, redirecting to LFS if necessary.
func GetRawFileOrLFS(ctx *context.APIContext) {
// swagger:operation GET /repos/{owner}/{repo}/media/{filepath} repository repoGetRawFileOrLFS
// ---
// summary: Get a file or it's LFS object from a repository
// parameters:
// - name: owner
// in: path
// description: owner of the repo
// type: string
// required: true
// - name: repo
// in: path
// description: name of the repo
// type: string
// required: true
// - name: filepath
// in: path
// description: filepath of the file to get
// type: string
// required: true
// - name: ref
// in: query
// description: "The name of the commit/branch/tag. Default the repositorys default branch (usually master)"
// type: string
// required: false
// responses:
// 200:
// description: Returns raw file content.
// "404":
// "$ref": "#/responses/notFound"
if ctx.Repo.Repository.IsEmpty {
ctx.NotFound()
return
}
blob, lastModified := getBlobForEntry(ctx)
if ctx.Written() {
return
}
// LFS Pointer files are at most 1024 bytes - so any blob greater than 1024 bytes cannot be an LFS file
if blob.Size() > 1024 {
// First handle caching for the blob
if httpcache.HandleGenericETagTimeCache(ctx.Req, ctx.Resp, `"`+blob.ID.String()+`"`, lastModified) {
return
}
// OK not cached - serve!
if err := common.ServeBlob(ctx.Context, blob, lastModified); err != nil {
ctx.ServerError("ServeBlob", err)
}
return
}
// OK, now the blob is known to have at most 1024 bytes we can simply read this in in one go (This saves reading it twice)
dataRc, err := blob.DataAsync()
if err != nil {
ctx.ServerError("DataAsync", err)
return
}
buf, err := io.ReadAll(dataRc)
if err != nil {
_ = dataRc.Close()
ctx.ServerError("DataAsync", err)
return
}
if err := dataRc.Close(); err != nil {
log.Error("Error whilst closing blob %s reader in %-v. Error: %v", blob.ID, ctx.Context.Repo.Repository, err)
}
// Check if the blob represents a pointer
pointer, _ := lfs.ReadPointer(bytes.NewReader(buf))
// if its not a pointer just serve the data directly
if !pointer.IsValid() {
// First handle caching for the blob
if httpcache.HandleGenericETagTimeCache(ctx.Req, ctx.Resp, `"`+blob.ID.String()+`"`, lastModified) {
return
}
// OK not cached - serve!
if err := common.ServeData(ctx.Context, ctx.Repo.TreePath, blob.Size(), bytes.NewReader(buf)); err != nil {
ctx.ServerError("ServeBlob", err)
}
return
}
// Now check if there is a meta object for this pointer
meta, err := models.GetLFSMetaObjectByOid(ctx.Repo.Repository.ID, pointer.Oid)
// If there isn't one just serve the data directly
if err == models.ErrLFSObjectNotExist {
// Handle caching for the blob SHA (not the LFS object OID)
if httpcache.HandleGenericETagTimeCache(ctx.Req, ctx.Resp, `"`+blob.ID.String()+`"`, lastModified) {
return
}
if err := common.ServeData(ctx.Context, ctx.Repo.TreePath, blob.Size(), bytes.NewReader(buf)); err != nil {
ctx.ServerError("ServeBlob", err)
}
return
} else if err != nil {
ctx.ServerError("GetLFSMetaObjectByOid", err)
return
}
// Handle caching for the LFS object OID
if httpcache.HandleGenericETagCache(ctx.Req, ctx.Resp, `"`+pointer.Oid+`"`) {
return
}
if setting.LFS.ServeDirect {
// If we have a signed url (S3, object storage), redirect to this directly.
u, err := storage.LFS.URL(pointer.RelativePath(), blob.Name())
if u != nil && err == nil {
ctx.Redirect(u.String())
return
}
}
lfsDataRc, err := lfs.ReadMetaObject(meta.Pointer)
if err != nil {
ctx.ServerError("ReadMetaObject", err)
return
}
defer lfsDataRc.Close()
if err := common.ServeData(ctx.Context, ctx.Repo.TreePath, meta.Size, lfsDataRc); err != nil {
ctx.ServerError("ServeData", err)
}
}
func getBlobForEntry(ctx *context.APIContext) (blob *git.Blob, lastModified time.Time) {
entry, err := ctx.Repo.Commit.GetTreeEntryByPath(ctx.Repo.TreePath)
if err != nil {