forgejo/models/forgejo_migrations/v32.go
Julian Schlarb 21151ea5ce
Some checks are pending
/ release (push) Waiting to run
testing / test-unit (push) Blocked by required conditions
testing / test-e2e (push) Blocked by required conditions
testing / backend-checks (push) Waiting to run
testing / frontend-checks (push) Waiting to run
testing / test-remote-cacher (redis) (push) Blocked by required conditions
testing / test-remote-cacher (valkey) (push) Blocked by required conditions
testing / test-remote-cacher (garnet) (push) Blocked by required conditions
testing / test-remote-cacher (redict) (push) Blocked by required conditions
testing / test-mysql (push) Blocked by required conditions
testing / test-pgsql (push) Blocked by required conditions
testing / test-sqlite (push) Blocked by required conditions
testing / security-check (push) Blocked by required conditions
fix: maven use groupId:artifactId for package name concatenation (#6352)
Second part of #6327 to fix the Maven package naming. This pull request includes:
* Changing the group and artifact IDs from being separated by `-` to `:` as suggested by [Maven](https://maven.apache.org/pom.html#Maven_Coordinates).
* Making Maven package names case-sensitive
* Migrating the database to:
  * Handle collisions of package names (e.g., groupId: foo- with artifactId: bar and groupId: foo with artifactId: -bar) by moving them into their own packages.
  * Fix the missing group ID issue (#6329).
  * Update  lower_name to match the name value for maven pkgs to make it case-sensetive.

## Checklist

The [contributor guide](https://forgejo.org/docs/next/contributor/) contains information that will be helpful to first time contributors. There also are a few [conditions for merging Pull Requests in Forgejo repositories](https://codeberg.org/forgejo/governance/src/branch/main/PullRequestsAgreement.md). You are also welcome to join the [Forgejo development chatroom](https://matrix.to/#/#forgejo-development:matrix.org).

### Tests

- I added test coverage for Go changes...
  - [x] in their respective `*_test.go` for unit tests.
  - [x] in the `tests/integration` directory if it involves interactions with a live Forgejo server.
- I added test coverage for JavaScript changes...
  - [ ] in `web_src/js/*.test.js` if it can be unit tested.
  - [ ] in `tests/e2e/*.test.e2e.js` if it requires interactions with a live Forgejo server (see also the [developer guide for JavaScript testing](https://codeberg.org/forgejo/forgejo/src/branch/forgejo/tests/e2e/README.md#end-to-end-tests)).

### Documentation

- [ ] I created a pull request [to the documentation](https://codeberg.org/forgejo/docs) to explain to Forgejo users how to use this change.
- [x] I did not document these changes and I do not expect someone else to do it.

### Release notes

- [ ] I do not want this change to show in the release notes.
- [x] I want the title to show in the release notes with a link to this pull request.
- [ ] I want the content of the `release-notes/<pull request number>.md` to be be used for the release notes instead of the title.

Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/6352
Reviewed-by: Earl Warren <earl-warren@noreply.codeberg.org>
Co-authored-by: Julian Schlarb <julian.schlarb@denktmit.de>
Co-committed-by: Julian Schlarb <julian.schlarb@denktmit.de>
2025-06-01 09:02:29 +02:00

414 lines
13 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// Copyright 2025 The Forgejo Authors. All rights reserved.
// SPDX-License-Identifier: GPL-3.0-or-later
package forgejo_migrations //nolint:revive
import (
"encoding/xml"
"fmt"
"regexp"
"slices"
"sort"
"strconv"
"strings"
"forgejo.org/models/packages"
"forgejo.org/modules/json"
"forgejo.org/modules/log"
"forgejo.org/modules/packages/maven"
packages_service "forgejo.org/services/packages"
"golang.org/x/net/context"
"xorm.io/xorm"
)
var getPackage = packages_service.GetPackageFileStream
type Snapshot struct {
baseVersion string
date string
time string
build int
}
type Metadata struct {
XMLName xml.Name `xml:"metadata"`
ModelVersion string `xml:"modelVersion,attr"`
GroupID string `xml:"groupId"`
ArtifactID string `xml:"artifactId"`
Version string `xml:"version"`
}
type mavenPackageResult struct {
PackageFile *packages.PackageFile `xorm:"extends"`
PackageVersion *packages.PackageVersion `xorm:"extends"`
Package *packages.Package `xorm:"extends"`
PackageName string `xorm:"-"`
Snapshot *Snapshot `xorm:"-"`
GroupID string `xorm:"-"`
ArtifactID string `xorm:"-"`
}
// ChangeMavenArtifactConcatenation resolves old dash-concatenated Maven coordinates and regenerates metadata.
// Note: runs per-owner in a single transaction; failures roll back all owners.
func ChangeMavenArtifactConcatenation(x *xorm.Engine) error {
sess := x.NewSession()
defer sess.Close()
if err := sess.Begin(); err != nil {
return err
}
// get unique owner IDs of Maven packages
var ownerIDs []*int64
if err := sess.
Table("package").
Select("package.owner_id").
Where("package.type = 'maven'").
GroupBy("package.owner_id").
OrderBy("package.owner_id DESC").
Find(&ownerIDs); err != nil {
return err
}
for _, id := range ownerIDs {
if err := fixMavenArtifactPerOwner(sess, id); err != nil {
log.Error("owner %d migration failed: %v", id, err)
return err // rollback all
}
}
return sess.Commit()
}
func fixMavenArtifactPerOwner(sess *xorm.Session, ownerID *int64) error {
results, err := getMavenPackageResultsToUpdate(sess, ownerID)
if err != nil {
return err
}
if err = resolvePackageCollisions(results, sess); err != nil {
return err
}
if err = processPackageVersions(results, sess); err != nil {
return err
}
return processPackageFiles(results, sess)
}
// processPackageFiles updates Maven package files and versions in the database
// Returns an error if any database or processing operation fails.
func processPackageFiles(results []*mavenPackageResult, sess *xorm.Session) error {
processedVersion := make(map[string][]*mavenPackageResult)
for _, r := range results {
if r.Snapshot != nil {
key := fmt.Sprintf("%s:%s", r.PackageName, r.PackageVersion.LowerVersion)
processedVersion[key] = append(processedVersion[key], r)
}
// Only update version_id when it differs
if r.PackageVersion.ID != r.PackageFile.VersionID {
pattern := strings.TrimSuffix(r.PackageFile.Name, ".pom") + "%"
// Per routers/api/packages/maven/maven.go:338, POM files already have the `IsLead`, so no update needed for this prop
if _, err := sess.Exec("UPDATE package_file SET version_id = ? WHERE version_id = ? and name like ?", r.PackageVersion.ID, r.PackageFile.VersionID, pattern); err != nil {
return err
}
}
}
// If maven-metadata.xml is missing (snapshot path collision), skip regeneration
// Without this metadata, Maven cannot resolve snapshot details
for _, packageResults := range processedVersion {
sort.Slice(packageResults, func(i, j int) bool {
return packageResults[i].Snapshot.build > packageResults[j].Snapshot.build
})
rs := packageResults[0]
pf, md, err := parseMetadata(sess, rs)
if err != nil {
return err
}
if pf != nil && md != nil && md.GroupID == rs.GroupID && md.ArtifactID == rs.ArtifactID {
if pf.VersionID != rs.PackageFile.VersionID {
if _, err := sess.ID(pf.ID).Cols("version_id").Update(pf); err != nil {
return err
}
}
continue
}
log.Warn("no maven-metadata.xml found for (id: %d) [%s:%s]", rs.PackageVersion.ID, rs.PackageName, rs.PackageVersion.Version)
}
return nil
}
// parseMetadata retrieves metadata for a Maven package file from the database and decodes it into a Metadata object.
// Returns the associated PackageFile, Metadata, and any error encountered during processing.
func parseMetadata(sess *xorm.Session, snapshot *mavenPackageResult) (*packages.PackageFile, *Metadata, error) {
ctx := context.Background()
var pf packages.PackageFile
found, err := sess.Table(pf).
Where("version_id = ?", snapshot.PackageFile.VersionID). // still the old id
And("lower_name = ?", "maven-metadata.xml").
Get(&pf)
if err != nil {
return nil, nil, err
}
if !found {
return nil, nil, nil
}
s, _, _, err := getPackage(ctx, &pf)
if err != nil {
return nil, nil, err
}
defer s.Close()
dec := xml.NewDecoder(s)
var m Metadata
if err := dec.Decode(&m); err != nil {
return nil, nil, err
}
return &pf, &m, nil
}
// processPackageVersions processes Maven package versions by updating metadata or inserting new records as necessary.
// It avoids redundant updates by tracking already processed versions using a map. Returns an error on failure.
func processPackageVersions(results []*mavenPackageResult, sess *xorm.Session) error {
processedVersion := make(map[string]int64)
for _, r := range results {
key := fmt.Sprintf("%s:%s", r.PackageName, r.PackageVersion.Version)
if id, ok := processedVersion[key]; ok {
r.PackageVersion.ID = id
continue
}
// for non collisions, just update the metadata
if r.PackageVersion.PackageID == r.Package.ID {
if _, err := sess.ID(r.PackageVersion.ID).Cols("metadata_json").Update(r.PackageVersion); err != nil {
return err
}
} else {
log.Info("Create new maven package version for %s:%s", r.PackageName, r.PackageVersion.Version)
r.PackageVersion.ID = 0
r.PackageVersion.PackageID = r.Package.ID
if _, err := sess.Insert(r.PackageVersion); err != nil {
return err
}
}
processedVersion[key] = r.PackageVersion.ID
}
return nil
}
// getMavenPackageResultsToUpdate retrieves Maven package results that need updates based on the owner ID.
// It processes POM metadata, fixes package inconsistencies, and filters corrupted package versions.
func getMavenPackageResultsToUpdate(sess *xorm.Session, ownerID *int64) ([]*mavenPackageResult, error) {
ctx := context.Background()
var candidates []*mavenPackageResult
if err := sess.
Table("package_file").
Select("package_file.*, package_version.*, package.*").
Join("INNER", "package_version", "package_version.id = package_file.version_id").
Join("INNER", "package", "package.id = package_version.package_id").
Where("package_file.lower_name LIKE ?", "%.pom").
And("package.type = ?", "maven").
And("package.owner_id = ?", ownerID).
OrderBy("package_version.id DESC, package_file.id DESC").
Find(&candidates); err != nil {
return nil, err
}
var results []*mavenPackageResult
var corruptedVersionIDs []int64
// fetch actual metadata from blob as all packages needs to be fixed following the new string concatenation
for _, r := range candidates {
if err := processPomMetadata(ctx, r); err != nil {
// Skip corrupted versions; admin intervention may be needed to repair these files.
log.Warn("Failed to process package file [id: %d] ignoring package version[%d]: %v", r.PackageFile.ID, r.PackageVersion.ID, err)
corruptedVersionIDs = append(corruptedVersionIDs, r.PackageVersion.ID)
continue
}
results = append(results, r)
log.Debug("Resolved id [%d] from [%s:%s] to [%s:%s] [Snapshot: %v]", r.Package.ID, r.Package.Name, r.PackageVersion.Version, r.PackageName, r.PackageVersion.Version, r.Snapshot)
}
for _, corruptedVersionID := range corruptedVersionIDs {
for i := 0; i < len(results); {
if corruptedVersionID == results[i].PackageVersion.ID {
results = append(results[:i], results[i+1:]...)
} else {
i++
}
}
}
return results, nil
}
// resolvePackageCollisions handles name collisions by keeping the first existing record and inserting new Package records for subsequent collisions.
// Returns a map from PackageName to its resolved Package.ID.
func resolvePackageCollisions(results []*mavenPackageResult, sess *xorm.Session) error {
// Group new names by lowerName
collisions := make(map[string][]string)
for _, r := range results {
names := collisions[r.Package.LowerName]
if !slices.Contains(names, r.PackageName) {
collisions[r.Package.LowerName] = append(names, r.PackageName)
}
}
pkgIDByName := make(map[string]int64)
var err error
for _, r := range results {
list := collisions[r.Package.LowerName]
// update to the upcoming package name which is colon separated
r.Package.Name = r.PackageName
r.Package.LowerName = r.PackageName
// exiting entry
if id, ok := pkgIDByName[r.PackageName]; ok {
r.Package.ID = id
// first package kept the current id
} else if list[0] == r.PackageName {
pkgIDByName[r.PackageName] = r.Package.ID
if _, err = sess.ID(r.Package.ID).Cols("name", "lower_name").Update(r.Package); err != nil {
return err
}
// create a new entry
} else {
log.Info("Create new maven package for %s", r.Package.Name)
r.Package.ID = 0
if _, err = sess.Insert(r.Package); err != nil {
return err
}
pkgIDByName[r.PackageName] = r.Package.ID
}
}
return nil
}
// processPomMetadata processes a Maven package file, parses its POM metadata, and updates PackageVersion information.
func processPomMetadata(ctx context.Context, mpr *mavenPackageResult) error {
s, _, _, err := getPackage(ctx, mpr.PackageFile)
if err != nil {
return fmt.Errorf("unable to get package stream: %v", err)
}
defer s.Close()
actualPom, err := maven.ParsePackageMetaData(s)
if err != nil {
return fmt.Errorf("failed to parse POM metadata: %v", err)
}
raw, err := json.Marshal(actualPom)
if err != nil {
return fmt.Errorf("failed to marshal metadata: %v", err)
}
var currentPom *maven.Metadata
if err = json.Unmarshal([]byte(mpr.PackageVersion.MetadataJSON), &currentPom); err != nil {
return fmt.Errorf("failed to unmarshal metadata: %v", err)
}
// since the rest api can also be (ab)used to upload artifacts wrong, just ignore them
if isInvalidMatch(currentPom, actualPom) {
return fmt.Errorf("artifact mismatch: actual [%s] expected [%s]", actualPom.ArtifactID, currentPom.ArtifactID)
}
// this will also fix packages that missed its groupID
// Ref: https://codeberg.org/forgejo/forgejo/pulls/6329
mpr.PackageVersion.MetadataJSON = string(raw)
// Since Maven packages are case-sensitive, avoid potential clashes and clean-ups
// by enforcing consistent case handling similar to RPM packages.
mpr.PackageName = fmt.Sprintf("%s:%s", actualPom.GroupID, actualPom.ArtifactID)
mpr.GroupID = actualPom.GroupID
mpr.ArtifactID = actualPom.ArtifactID
if strings.HasSuffix(mpr.PackageVersion.Version, "-SNAPSHOT") {
snap, err := extraSnapshotDetails(currentPom, actualPom, mpr)
if err != nil {
return err
}
mpr.Snapshot = snap
} else {
// only snapshots are affected but kept in case of not complete fixtures
expectedFileName := fmt.Sprintf("%s-%s.pom", actualPom.ArtifactID, mpr.PackageVersion.Version)
if mpr.PackageFile.Name != expectedFileName {
log.Warn("invalid package file name - this is a collision which needs to be resolved expected [%s], actual [%s]", expectedFileName, mpr.PackageFile.Name)
}
}
return nil
}
// extraSnapshotDetails extracts detailed snapshot information
// Returns a Snapshot object encapsulating the extracted details or an error if the filename is invalid or parsing fails.
func extraSnapshotDetails(currentPom, actualPom *maven.Metadata, mpr *mavenPackageResult) (*Snapshot, error) {
pattern := `^%s-` +
`(?P<baseVersion>[\d\.]+)-` +
`(?P<date>\d{8})\.` +
`(?P<time>\d{6})-` +
`(?P<build>\d+)\.pom$`
re := regexp.MustCompile(fmt.Sprintf(pattern, regexp.QuoteMeta(currentPom.ArtifactID)))
if re.FindStringSubmatch(mpr.PackageFile.Name) == nil {
log.Warn("invalid package file name - this is a collision which needs to be resolved %s", mpr.PackageFile.Name)
}
re = regexp.MustCompile(fmt.Sprintf(pattern, regexp.QuoteMeta(actualPom.ArtifactID)))
match := re.FindStringSubmatch(mpr.PackageFile.Name)
if match == nil {
return nil, fmt.Errorf("invalid snapshot filename: %s", mpr.PackageFile.Name)
}
baseIdx := re.SubexpIndex("baseVersion")
dateIdx := re.SubexpIndex("date")
timeIdx := re.SubexpIndex("time")
buildIdx := re.SubexpIndex("build")
buildNum, _ := strconv.Atoi(match[buildIdx])
return &Snapshot{
baseVersion: match[baseIdx],
date: match[dateIdx],
time: match[timeIdx],
build: buildNum,
}, nil
}
// isInvalidMatch returns true if the stored metadatas groupID:artifactID
// differs from actual values—accounting for an earlier bug that sometimes omitted the groupID.
func isInvalidMatch(current, actual *maven.Metadata) bool {
bare := fmt.Sprintf("-%s", actual.ArtifactID)
full := fmt.Sprintf("%s-%s", actual.GroupID, actual.ArtifactID)
currentID := fmt.Sprintf("%s-%s", current.GroupID, current.ArtifactID)
return currentID != full && currentID != bare
}