forgejo/models/forgejo_migrations/v32.go

415 lines
13 KiB
Go
Raw Normal View History

fix: maven use groupId:artifactId for package name concatenation (#6352) Second part of #6327 to fix the Maven package naming. This pull request includes: * Changing the group and artifact IDs from being separated by `-` to `:` as suggested by [Maven](https://maven.apache.org/pom.html#Maven_Coordinates). * Making Maven package names case-sensitive * Migrating the database to: * Handle collisions of package names (e.g., groupId: foo- with artifactId: bar and groupId: foo with artifactId: -bar) by moving them into their own packages. * Fix the missing group ID issue (#6329). * Update lower_name to match the name value for maven pkgs to make it case-sensetive. ## Checklist The [contributor guide](https://forgejo.org/docs/next/contributor/) contains information that will be helpful to first time contributors. There also are a few [conditions for merging Pull Requests in Forgejo repositories](https://codeberg.org/forgejo/governance/src/branch/main/PullRequestsAgreement.md). You are also welcome to join the [Forgejo development chatroom](https://matrix.to/#/#forgejo-development:matrix.org). ### Tests - I added test coverage for Go changes... - [x] in their respective `*_test.go` for unit tests. - [x] in the `tests/integration` directory if it involves interactions with a live Forgejo server. - I added test coverage for JavaScript changes... - [ ] in `web_src/js/*.test.js` if it can be unit tested. - [ ] in `tests/e2e/*.test.e2e.js` if it requires interactions with a live Forgejo server (see also the [developer guide for JavaScript testing](https://codeberg.org/forgejo/forgejo/src/branch/forgejo/tests/e2e/README.md#end-to-end-tests)). ### Documentation - [ ] I created a pull request [to the documentation](https://codeberg.org/forgejo/docs) to explain to Forgejo users how to use this change. - [x] I did not document these changes and I do not expect someone else to do it. ### Release notes - [ ] I do not want this change to show in the release notes. - [x] I want the title to show in the release notes with a link to this pull request. - [ ] I want the content of the `release-notes/<pull request number>.md` to be be used for the release notes instead of the title. Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/6352 Reviewed-by: Earl Warren <earl-warren@noreply.codeberg.org> Co-authored-by: Julian Schlarb <julian.schlarb@denktmit.de> Co-committed-by: Julian Schlarb <julian.schlarb@denktmit.de>
2025-06-01 09:02:29 +02:00
// Copyright 2025 The Forgejo Authors. All rights reserved.
// SPDX-License-Identifier: GPL-3.0-or-later
package forgejo_migrations //nolint:revive
import (
"encoding/xml"
"fmt"
"regexp"
"slices"
"sort"
"strconv"
"strings"
"forgejo.org/models/packages"
"forgejo.org/modules/json"
"forgejo.org/modules/log"
"forgejo.org/modules/packages/maven"
packages_service "forgejo.org/services/packages"
"golang.org/x/net/context"
"xorm.io/xorm"
)
var getPackage = packages_service.GetPackageFileStream
type Snapshot struct {
baseVersion string
date string
time string
build int
}
type Metadata struct {
XMLName xml.Name `xml:"metadata"`
ModelVersion string `xml:"modelVersion,attr"`
GroupID string `xml:"groupId"`
ArtifactID string `xml:"artifactId"`
Version string `xml:"version"`
}
type mavenPackageResult struct {
PackageFile *packages.PackageFile `xorm:"extends"`
PackageVersion *packages.PackageVersion `xorm:"extends"`
Package *packages.Package `xorm:"extends"`
PackageName string `xorm:"-"`
Snapshot *Snapshot `xorm:"-"`
GroupID string `xorm:"-"`
ArtifactID string `xorm:"-"`
}
// ChangeMavenArtifactConcatenation resolves old dash-concatenated Maven coordinates and regenerates metadata.
// Note: runs per-owner in a single transaction; failures roll back all owners.
func ChangeMavenArtifactConcatenation(x *xorm.Engine) error {
sess := x.NewSession()
defer sess.Close()
if err := sess.Begin(); err != nil {
return err
}
// get unique owner IDs of Maven packages
var ownerIDs []*int64
if err := sess.
Table("package").
Select("package.owner_id").
Where("package.type = 'maven'").
GroupBy("package.owner_id").
OrderBy("package.owner_id DESC").
Find(&ownerIDs); err != nil {
return err
}
for _, id := range ownerIDs {
if err := fixMavenArtifactPerOwner(sess, id); err != nil {
log.Error("owner %d migration failed: %v", id, err)
return err // rollback all
}
}
return sess.Commit()
}
func fixMavenArtifactPerOwner(sess *xorm.Session, ownerID *int64) error {
results, err := getMavenPackageResultsToUpdate(sess, ownerID)
if err != nil {
return err
}
if err = resolvePackageCollisions(results, sess); err != nil {
return err
}
if err = processPackageVersions(results, sess); err != nil {
return err
}
return processPackageFiles(results, sess)
}
// processPackageFiles updates Maven package files and versions in the database
// Returns an error if any database or processing operation fails.
func processPackageFiles(results []*mavenPackageResult, sess *xorm.Session) error {
processedVersion := make(map[string][]*mavenPackageResult)
for _, r := range results {
if r.Snapshot != nil {
key := fmt.Sprintf("%s:%s", r.PackageName, r.PackageVersion.LowerVersion)
processedVersion[key] = append(processedVersion[key], r)
}
// Only update version_id when it differs
if r.PackageVersion.ID != r.PackageFile.VersionID {
pattern := strings.TrimSuffix(r.PackageFile.Name, ".pom") + "%"
// Per routers/api/packages/maven/maven.go:338, POM files already have the `IsLead`, so no update needed for this prop
if _, err := sess.Exec("UPDATE package_file SET version_id = ? WHERE version_id = ? and name like ?", r.PackageVersion.ID, r.PackageFile.VersionID, pattern); err != nil {
return err
}
}
}
// If maven-metadata.xml is missing (snapshot path collision), skip regeneration
// Without this metadata, Maven cannot resolve snapshot details
for _, packageResults := range processedVersion {
sort.Slice(packageResults, func(i, j int) bool {
return packageResults[i].Snapshot.build > packageResults[j].Snapshot.build
})
rs := packageResults[0]
pf, md, err := parseMetadata(sess, rs)
if err != nil {
return err
}
if pf != nil && md != nil && md.GroupID == rs.GroupID && md.ArtifactID == rs.ArtifactID {
if pf.VersionID != rs.PackageFile.VersionID {
if _, err := sess.ID(pf.ID).Cols("version_id").Update(pf); err != nil {
return err
}
}
continue
}
log.Warn("no maven-metadata.xml found for (id: %d) [%s:%s]", rs.PackageVersion.ID, rs.PackageName, rs.PackageVersion.Version)
}
return nil
}
// parseMetadata retrieves metadata for a Maven package file from the database and decodes it into a Metadata object.
// Returns the associated PackageFile, Metadata, and any error encountered during processing.
func parseMetadata(sess *xorm.Session, snapshot *mavenPackageResult) (*packages.PackageFile, *Metadata, error) {
ctx := context.Background()
var pf packages.PackageFile
found, err := sess.Table(pf).
Where("version_id = ?", snapshot.PackageFile.VersionID). // still the old id
And("lower_name = ?", "maven-metadata.xml").
Get(&pf)
if err != nil {
return nil, nil, err
}
if !found {
return nil, nil, nil
}
s, _, _, err := getPackage(ctx, &pf)
if err != nil {
return nil, nil, err
}
defer s.Close()
dec := xml.NewDecoder(s)
var m Metadata
if err := dec.Decode(&m); err != nil {
return nil, nil, err
}
return &pf, &m, nil
}
// processPackageVersions processes Maven package versions by updating metadata or inserting new records as necessary.
// It avoids redundant updates by tracking already processed versions using a map. Returns an error on failure.
func processPackageVersions(results []*mavenPackageResult, sess *xorm.Session) error {
processedVersion := make(map[string]int64)
for _, r := range results {
key := fmt.Sprintf("%s:%s", r.PackageName, r.PackageVersion.Version)
if id, ok := processedVersion[key]; ok {
r.PackageVersion.ID = id
continue
}
// for non collisions, just update the metadata
if r.PackageVersion.PackageID == r.Package.ID {
if _, err := sess.ID(r.PackageVersion.ID).Cols("metadata_json").Update(r.PackageVersion); err != nil {
return err
}
} else {
log.Info("Create new maven package version for %s:%s", r.PackageName, r.PackageVersion.Version)
r.PackageVersion.ID = 0
r.PackageVersion.PackageID = r.Package.ID
if _, err := sess.Insert(r.PackageVersion); err != nil {
return err
}
}
processedVersion[key] = r.PackageVersion.ID
}
return nil
}
// getMavenPackageResultsToUpdate retrieves Maven package results that need updates based on the owner ID.
// It processes POM metadata, fixes package inconsistencies, and filters corrupted package versions.
func getMavenPackageResultsToUpdate(sess *xorm.Session, ownerID *int64) ([]*mavenPackageResult, error) {
ctx := context.Background()
var candidates []*mavenPackageResult
if err := sess.
Table("package_file").
Select("package_file.*, package_version.*, package.*").
Join("INNER", "package_version", "package_version.id = package_file.version_id").
Join("INNER", "package", "package.id = package_version.package_id").
Where("package_file.lower_name LIKE ?", "%.pom").
And("package.type = ?", "maven").
And("package.owner_id = ?", ownerID).
OrderBy("package_version.id DESC, package_file.id DESC").
Find(&candidates); err != nil {
return nil, err
}
var results []*mavenPackageResult
var corruptedVersionIDs []int64
// fetch actual metadata from blob as all packages needs to be fixed following the new string concatenation
for _, r := range candidates {
if err := processPomMetadata(ctx, r); err != nil {
// Skip corrupted versions; admin intervention may be needed to repair these files.
log.Warn("Failed to process package file [id: %d] ignoring package version[%d]: %v", r.PackageFile.ID, r.PackageVersion.ID, err)
corruptedVersionIDs = append(corruptedVersionIDs, r.PackageVersion.ID)
continue
}
results = append(results, r)
log.Debug("Resolved id [%d] from [%s:%s] to [%s:%s] [Snapshot: %v]", r.Package.ID, r.Package.Name, r.PackageVersion.Version, r.PackageName, r.PackageVersion.Version, r.Snapshot)
}
for _, corruptedVersionID := range corruptedVersionIDs {
for i := 0; i < len(results); {
if corruptedVersionID == results[i].PackageVersion.ID {
results = append(results[:i], results[i+1:]...)
} else {
i++
}
}
}
return results, nil
}
// resolvePackageCollisions handles name collisions by keeping the first existing record and inserting new Package records for subsequent collisions.
// Returns a map from PackageName to its resolved Package.ID.
func resolvePackageCollisions(results []*mavenPackageResult, sess *xorm.Session) error {
// Group new names by lowerName
collisions := make(map[string][]string)
for _, r := range results {
names := collisions[r.Package.LowerName]
if !slices.Contains(names, r.PackageName) {
collisions[r.Package.LowerName] = append(names, r.PackageName)
}
}
pkgIDByName := make(map[string]int64)
var err error
for _, r := range results {
list := collisions[r.Package.LowerName]
// update to the upcoming package name which is colon separated
r.Package.Name = r.PackageName
r.Package.LowerName = r.PackageName
// exiting entry
if id, ok := pkgIDByName[r.PackageName]; ok {
r.Package.ID = id
// first package kept the current id
} else if list[0] == r.PackageName {
pkgIDByName[r.PackageName] = r.Package.ID
if _, err = sess.ID(r.Package.ID).Cols("name", "lower_name").Update(r.Package); err != nil {
return err
}
// create a new entry
} else {
log.Info("Create new maven package for %s", r.Package.Name)
r.Package.ID = 0
if _, err = sess.Insert(r.Package); err != nil {
return err
}
pkgIDByName[r.PackageName] = r.Package.ID
}
}
return nil
}
// processPomMetadata processes a Maven package file, parses its POM metadata, and updates PackageVersion information.
func processPomMetadata(ctx context.Context, mpr *mavenPackageResult) error {
s, _, _, err := getPackage(ctx, mpr.PackageFile)
if err != nil {
return fmt.Errorf("unable to get package stream: %v", err)
}
defer s.Close()
actualPom, err := maven.ParsePackageMetaData(s)
if err != nil {
return fmt.Errorf("failed to parse POM metadata: %v", err)
}
raw, err := json.Marshal(actualPom)
if err != nil {
return fmt.Errorf("failed to marshal metadata: %v", err)
}
var currentPom *maven.Metadata
if err = json.Unmarshal([]byte(mpr.PackageVersion.MetadataJSON), &currentPom); err != nil {
return fmt.Errorf("failed to unmarshal metadata: %v", err)
}
// since the rest api can also be (ab)used to upload artifacts wrong, just ignore them
if isInvalidMatch(currentPom, actualPom) {
return fmt.Errorf("artifact mismatch: actual [%s] expected [%s]", actualPom.ArtifactID, currentPom.ArtifactID)
}
// this will also fix packages that missed its groupID
// Ref: https://codeberg.org/forgejo/forgejo/pulls/6329
mpr.PackageVersion.MetadataJSON = string(raw)
// Since Maven packages are case-sensitive, avoid potential clashes and clean-ups
// by enforcing consistent case handling similar to RPM packages.
mpr.PackageName = fmt.Sprintf("%s:%s", actualPom.GroupID, actualPom.ArtifactID)
mpr.GroupID = actualPom.GroupID
mpr.ArtifactID = actualPom.ArtifactID
if strings.HasSuffix(mpr.PackageVersion.Version, "-SNAPSHOT") {
snap, err := extraSnapshotDetails(currentPom, actualPom, mpr)
if err != nil {
return err
}
mpr.Snapshot = snap
} else {
// only snapshots are affected but kept in case of not complete fixtures
expectedFileName := fmt.Sprintf("%s-%s.pom", actualPom.ArtifactID, mpr.PackageVersion.Version)
if mpr.PackageFile.Name != expectedFileName {
log.Warn("invalid package file name - this is a collision which needs to be resolved expected [%s], actual [%s]", expectedFileName, mpr.PackageFile.Name)
}
}
return nil
}
// extraSnapshotDetails extracts detailed snapshot information
// Returns a Snapshot object encapsulating the extracted details or an error if the filename is invalid or parsing fails.
func extraSnapshotDetails(currentPom, actualPom *maven.Metadata, mpr *mavenPackageResult) (*Snapshot, error) {
pattern := `^%s-` +
`(?P<baseVersion>[\d\.]+)-` +
`(?P<date>\d{8})\.` +
`(?P<time>\d{6})-` +
`(?P<build>\d+)\.pom$`
re := regexp.MustCompile(fmt.Sprintf(pattern, regexp.QuoteMeta(currentPom.ArtifactID)))
if re.FindStringSubmatch(mpr.PackageFile.Name) == nil {
log.Warn("invalid package file name - this is a collision which needs to be resolved %s", mpr.PackageFile.Name)
}
re = regexp.MustCompile(fmt.Sprintf(pattern, regexp.QuoteMeta(actualPom.ArtifactID)))
match := re.FindStringSubmatch(mpr.PackageFile.Name)
if match == nil {
return nil, fmt.Errorf("invalid snapshot filename: %s", mpr.PackageFile.Name)
}
baseIdx := re.SubexpIndex("baseVersion")
dateIdx := re.SubexpIndex("date")
timeIdx := re.SubexpIndex("time")
buildIdx := re.SubexpIndex("build")
buildNum, _ := strconv.Atoi(match[buildIdx])
return &Snapshot{
baseVersion: match[baseIdx],
date: match[dateIdx],
time: match[timeIdx],
build: buildNum,
}, nil
}
// isInvalidMatch returns true if the stored metadatas groupID:artifactID
// differs from actual values—accounting for an earlier bug that sometimes omitted the groupID.
func isInvalidMatch(current, actual *maven.Metadata) bool {
bare := fmt.Sprintf("-%s", actual.ArtifactID)
full := fmt.Sprintf("%s-%s", actual.GroupID, actual.ArtifactID)
currentID := fmt.Sprintf("%s-%s", current.GroupID, current.ArtifactID)
return currentID != full && currentID != bare
}