diff --git a/models/forgejo_migrations/migrate.go b/models/forgejo_migrations/migrate.go index 41197c434d..21a2077d06 100644 --- a/models/forgejo_migrations/migrate.go +++ b/models/forgejo_migrations/migrate.go @@ -101,6 +101,8 @@ var migrations = []*Migration{ NewMigration("Migrate `User.NormalizedFederatedURI` column to extract port & schema into FederatedHost", MigrateNormalizedFederatedURI), // v30 -> v31 NewMigration("Normalize repository.topics to empty slice instead of null", SetTopicsAsEmptySlice), + // v31 -> v32 + NewMigration("Migrate maven package name concatenation", ChangeMavenArtifactConcatenation), } // GetCurrentDBVersion returns the current Forgejo database version. diff --git a/models/forgejo_migrations/v32.go b/models/forgejo_migrations/v32.go new file mode 100644 index 0000000000..bed335ab6b --- /dev/null +++ b/models/forgejo_migrations/v32.go @@ -0,0 +1,414 @@ +// Copyright 2025 The Forgejo Authors. All rights reserved. +// SPDX-License-Identifier: GPL-3.0-or-later + +package forgejo_migrations //nolint:revive + +import ( + "encoding/xml" + "fmt" + "regexp" + "slices" + "sort" + "strconv" + "strings" + + "forgejo.org/models/packages" + "forgejo.org/modules/json" + "forgejo.org/modules/log" + "forgejo.org/modules/packages/maven" + packages_service "forgejo.org/services/packages" + + "golang.org/x/net/context" + "xorm.io/xorm" +) + +var getPackage = packages_service.GetPackageFileStream + +type Snapshot struct { + baseVersion string + date string + time string + build int +} + +type Metadata struct { + XMLName xml.Name `xml:"metadata"` + ModelVersion string `xml:"modelVersion,attr"` + GroupID string `xml:"groupId"` + ArtifactID string `xml:"artifactId"` + Version string `xml:"version"` +} + +type mavenPackageResult struct { + PackageFile *packages.PackageFile `xorm:"extends"` + PackageVersion *packages.PackageVersion `xorm:"extends"` + Package *packages.Package `xorm:"extends"` + PackageName string `xorm:"-"` + Snapshot *Snapshot `xorm:"-"` + GroupID string `xorm:"-"` + ArtifactID string `xorm:"-"` +} + +// ChangeMavenArtifactConcatenation resolves old dash-concatenated Maven coordinates and regenerates metadata. +// Note: runs per-owner in a single transaction; failures roll back all owners. +func ChangeMavenArtifactConcatenation(x *xorm.Engine) error { + sess := x.NewSession() + defer sess.Close() + + if err := sess.Begin(); err != nil { + return err + } + + // get unique owner IDs of Maven packages + var ownerIDs []*int64 + if err := sess. + Table("package"). + Select("package.owner_id"). + Where("package.type = 'maven'"). + GroupBy("package.owner_id"). + OrderBy("package.owner_id DESC"). + Find(&ownerIDs); err != nil { + return err + } + + for _, id := range ownerIDs { + if err := fixMavenArtifactPerOwner(sess, id); err != nil { + log.Error("owner %d migration failed: %v", id, err) + return err // rollback all + } + } + + return sess.Commit() +} + +func fixMavenArtifactPerOwner(sess *xorm.Session, ownerID *int64) error { + results, err := getMavenPackageResultsToUpdate(sess, ownerID) + if err != nil { + return err + } + + if err = resolvePackageCollisions(results, sess); err != nil { + return err + } + + if err = processPackageVersions(results, sess); err != nil { + return err + } + + return processPackageFiles(results, sess) +} + +// processPackageFiles updates Maven package files and versions in the database +// Returns an error if any database or processing operation fails. +func processPackageFiles(results []*mavenPackageResult, sess *xorm.Session) error { + processedVersion := make(map[string][]*mavenPackageResult) + + for _, r := range results { + if r.Snapshot != nil { + key := fmt.Sprintf("%s:%s", r.PackageName, r.PackageVersion.LowerVersion) + processedVersion[key] = append(processedVersion[key], r) + } + + // Only update version_id when it differs + if r.PackageVersion.ID != r.PackageFile.VersionID { + pattern := strings.TrimSuffix(r.PackageFile.Name, ".pom") + "%" + // Per routers/api/packages/maven/maven.go:338, POM files already have the `IsLead`, so no update needed for this prop + if _, err := sess.Exec("UPDATE package_file SET version_id = ? WHERE version_id = ? and name like ?", r.PackageVersion.ID, r.PackageFile.VersionID, pattern); err != nil { + return err + } + } + } + + // If maven-metadata.xml is missing (snapshot path collision), skip regeneration + // Without this metadata, Maven cannot resolve snapshot details + for _, packageResults := range processedVersion { + sort.Slice(packageResults, func(i, j int) bool { + return packageResults[i].Snapshot.build > packageResults[j].Snapshot.build + }) + + rs := packageResults[0] + + pf, md, err := parseMetadata(sess, rs) + if err != nil { + return err + } + + if pf != nil && md != nil && md.GroupID == rs.GroupID && md.ArtifactID == rs.ArtifactID { + if pf.VersionID != rs.PackageFile.VersionID { + if _, err := sess.ID(pf.ID).Cols("version_id").Update(pf); err != nil { + return err + } + } + continue + } + + log.Warn("no maven-metadata.xml found for (id: %d) [%s:%s]", rs.PackageVersion.ID, rs.PackageName, rs.PackageVersion.Version) + } + + return nil +} + +// parseMetadata retrieves metadata for a Maven package file from the database and decodes it into a Metadata object. +// Returns the associated PackageFile, Metadata, and any error encountered during processing. +func parseMetadata(sess *xorm.Session, snapshot *mavenPackageResult) (*packages.PackageFile, *Metadata, error) { + ctx := context.Background() + + var pf packages.PackageFile + found, err := sess.Table(pf). + Where("version_id = ?", snapshot.PackageFile.VersionID). // still the old id + And("lower_name = ?", "maven-metadata.xml"). + Get(&pf) + if err != nil { + return nil, nil, err + } + + if !found { + return nil, nil, nil + } + + s, _, _, err := getPackage(ctx, &pf) + if err != nil { + return nil, nil, err + } + + defer s.Close() + dec := xml.NewDecoder(s) + var m Metadata + if err := dec.Decode(&m); err != nil { + return nil, nil, err + } + + return &pf, &m, nil +} + +// processPackageVersions processes Maven package versions by updating metadata or inserting new records as necessary. +// It avoids redundant updates by tracking already processed versions using a map. Returns an error on failure. +func processPackageVersions(results []*mavenPackageResult, sess *xorm.Session) error { + processedVersion := make(map[string]int64) + + for _, r := range results { + key := fmt.Sprintf("%s:%s", r.PackageName, r.PackageVersion.Version) + + if id, ok := processedVersion[key]; ok { + r.PackageVersion.ID = id + continue + } + + // for non collisions, just update the metadata + if r.PackageVersion.PackageID == r.Package.ID { + if _, err := sess.ID(r.PackageVersion.ID).Cols("metadata_json").Update(r.PackageVersion); err != nil { + return err + } + } else { + log.Info("Create new maven package version for %s:%s", r.PackageName, r.PackageVersion.Version) + r.PackageVersion.ID = 0 + r.PackageVersion.PackageID = r.Package.ID + if _, err := sess.Insert(r.PackageVersion); err != nil { + return err + } + } + + processedVersion[key] = r.PackageVersion.ID + } + + return nil +} + +// getMavenPackageResultsToUpdate retrieves Maven package results that need updates based on the owner ID. +// It processes POM metadata, fixes package inconsistencies, and filters corrupted package versions. +func getMavenPackageResultsToUpdate(sess *xorm.Session, ownerID *int64) ([]*mavenPackageResult, error) { + ctx := context.Background() + var candidates []*mavenPackageResult + if err := sess. + Table("package_file"). + Select("package_file.*, package_version.*, package.*"). + Join("INNER", "package_version", "package_version.id = package_file.version_id"). + Join("INNER", "package", "package.id = package_version.package_id"). + Where("package_file.lower_name LIKE ?", "%.pom"). + And("package.type = ?", "maven"). + And("package.owner_id = ?", ownerID). + OrderBy("package_version.id DESC, package_file.id DESC"). + Find(&candidates); err != nil { + return nil, err + } + + var results []*mavenPackageResult + var corruptedVersionIDs []int64 + + // fetch actual metadata from blob as all packages needs to be fixed following the new string concatenation + for _, r := range candidates { + if err := processPomMetadata(ctx, r); err != nil { + // Skip corrupted versions; admin intervention may be needed to repair these files. + log.Warn("Failed to process package file [id: %d] ignoring package version[%d]: %v", r.PackageFile.ID, r.PackageVersion.ID, err) + + corruptedVersionIDs = append(corruptedVersionIDs, r.PackageVersion.ID) + + continue + } + + results = append(results, r) + log.Debug("Resolved id [%d] from [%s:%s] to [%s:%s] [Snapshot: %v]", r.Package.ID, r.Package.Name, r.PackageVersion.Version, r.PackageName, r.PackageVersion.Version, r.Snapshot) + } + + for _, corruptedVersionID := range corruptedVersionIDs { + for i := 0; i < len(results); { + if corruptedVersionID == results[i].PackageVersion.ID { + results = append(results[:i], results[i+1:]...) + } else { + i++ + } + } + } + + return results, nil +} + +// resolvePackageCollisions handles name collisions by keeping the first existing record and inserting new Package records for subsequent collisions. +// Returns a map from PackageName to its resolved Package.ID. +func resolvePackageCollisions(results []*mavenPackageResult, sess *xorm.Session) error { + // Group new names by lowerName + collisions := make(map[string][]string) + for _, r := range results { + names := collisions[r.Package.LowerName] + if !slices.Contains(names, r.PackageName) { + collisions[r.Package.LowerName] = append(names, r.PackageName) + } + } + + pkgIDByName := make(map[string]int64) + var err error + + for _, r := range results { + list := collisions[r.Package.LowerName] + + // update to the upcoming package name which is colon separated + r.Package.Name = r.PackageName + r.Package.LowerName = r.PackageName + + // exiting entry + if id, ok := pkgIDByName[r.PackageName]; ok { + r.Package.ID = id + // first package kept the current id + } else if list[0] == r.PackageName { + pkgIDByName[r.PackageName] = r.Package.ID + + if _, err = sess.ID(r.Package.ID).Cols("name", "lower_name").Update(r.Package); err != nil { + return err + } + // create a new entry + } else { + log.Info("Create new maven package for %s", r.Package.Name) + + r.Package.ID = 0 + if _, err = sess.Insert(r.Package); err != nil { + return err + } + + pkgIDByName[r.PackageName] = r.Package.ID + } + } + + return nil +} + +// processPomMetadata processes a Maven package file, parses its POM metadata, and updates PackageVersion information. +func processPomMetadata(ctx context.Context, mpr *mavenPackageResult) error { + s, _, _, err := getPackage(ctx, mpr.PackageFile) + if err != nil { + return fmt.Errorf("unable to get package stream: %v", err) + } + defer s.Close() + + actualPom, err := maven.ParsePackageMetaData(s) + if err != nil { + return fmt.Errorf("failed to parse POM metadata: %v", err) + } + + raw, err := json.Marshal(actualPom) + if err != nil { + return fmt.Errorf("failed to marshal metadata: %v", err) + } + + var currentPom *maven.Metadata + if err = json.Unmarshal([]byte(mpr.PackageVersion.MetadataJSON), ¤tPom); err != nil { + return fmt.Errorf("failed to unmarshal metadata: %v", err) + } + + // since the rest api can also be (ab)used to upload artifacts wrong, just ignore them + if isInvalidMatch(currentPom, actualPom) { + return fmt.Errorf("artifact mismatch: actual [%s] expected [%s]", actualPom.ArtifactID, currentPom.ArtifactID) + } + + // this will also fix packages that missed its groupID + // Ref: https://codeberg.org/forgejo/forgejo/pulls/6329 + mpr.PackageVersion.MetadataJSON = string(raw) + + // Since Maven packages are case-sensitive, avoid potential clashes and clean-ups + // by enforcing consistent case handling similar to RPM packages. + mpr.PackageName = fmt.Sprintf("%s:%s", actualPom.GroupID, actualPom.ArtifactID) + + mpr.GroupID = actualPom.GroupID + mpr.ArtifactID = actualPom.ArtifactID + + if strings.HasSuffix(mpr.PackageVersion.Version, "-SNAPSHOT") { + snap, err := extraSnapshotDetails(currentPom, actualPom, mpr) + if err != nil { + return err + } + mpr.Snapshot = snap + } else { + // only snapshots are affected but kept in case of not complete fixtures + expectedFileName := fmt.Sprintf("%s-%s.pom", actualPom.ArtifactID, mpr.PackageVersion.Version) + if mpr.PackageFile.Name != expectedFileName { + log.Warn("invalid package file name - this is a collision which needs to be resolved expected [%s], actual [%s]", expectedFileName, mpr.PackageFile.Name) + } + } + + return nil +} + +// extraSnapshotDetails extracts detailed snapshot information +// Returns a Snapshot object encapsulating the extracted details or an error if the filename is invalid or parsing fails. +func extraSnapshotDetails(currentPom, actualPom *maven.Metadata, mpr *mavenPackageResult) (*Snapshot, error) { + pattern := `^%s-` + + `(?P[\d\.]+)-` + + `(?P\d{8})\.` + + `(?P