Skip to content

Commit

Permalink
fix: in some cases, try to use pom info to guess name and version to …
Browse files Browse the repository at this point in the history
…top level jar (anchore#2080)

Otherwise, small renames like 'hudson-war-2.2.1.war' to 'hudson.war', would cause
syft to incorrectly catolog the archive.

Signed-off-by: Will Murphy <will.murphy@anchore.com>
  • Loading branch information
willmurphyscode authored Aug 31, 2023
1 parent fb633af commit 2e151a2
Show file tree
Hide file tree
Showing 3 changed files with 91 additions and 3 deletions.
51 changes: 48 additions & 3 deletions syft/pkg/cataloger/java/archive_parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,6 @@ func (j *archiveParser) parse() ([]pkg.Package, []artifact.Relationship, error)
// discoverMainPackage parses the root Java manifest used as the parent package to all discovered nested packages.
func (j *archiveParser) discoverMainPackage() (*pkg.Package, error) {
// search and parse java manifest files
// TODO: do we want to prefer or check for pom files over manifest here?
manifestMatches := j.fileManifest.GlobMatch(manifestGlob)
if len(manifestMatches) > 1 {
return nil, fmt.Errorf("found multiple manifests in the jar: %+v", manifestMatches)
Expand Down Expand Up @@ -186,9 +185,24 @@ func (j *archiveParser) discoverMainPackage() (*pkg.Package, error) {

// we use j.location because we want to associate the license declaration with where we discovered the contents in the manifest
licenses := pkg.NewLicensesFromLocation(j.location, selectLicenses(manifest)...)
/*
We should name and version from, in this order:
1. pom.properties if we find exactly 1
2. pom.xml if we find exactly 1
3. manifest
4. filename
*/
name, version := j.guessMainPackageNameAndVersionFromPomInfo()
if name == "" {
name = selectName(manifest, j.fileInfo)
}
if version == "" {
version = selectVersion(manifest, j.fileInfo)
}
return &pkg.Package{
Name: selectName(manifest, j.fileInfo),
Version: selectVersion(manifest, j.fileInfo),
// TODO: maybe select name should just have a pom properties in it?
Name: name,
Version: version,
Language: pkg.Java,
Licenses: pkg.NewLicenseSet(licenses...),
Locations: file.NewLocationSet(
Expand All @@ -204,6 +218,37 @@ func (j *archiveParser) discoverMainPackage() (*pkg.Package, error) {
}, nil
}

func (j *archiveParser) guessMainPackageNameAndVersionFromPomInfo() (string, string) {
pomPropertyMatches := j.fileManifest.GlobMatch(pomPropertiesGlob)
pomMatches := j.fileManifest.GlobMatch(pomXMLGlob)
var pomPropertiesObject pkg.PomProperties
var pomProjectObject pkg.PomProject
if len(pomPropertyMatches) == 1 || len(pomMatches) == 1 {
// we have exactly 1 pom.properties or pom.xml in the archive; assume it represents the
// package we're scanning if the names seem like a plausible match
properties, _ := pomPropertiesByParentPath(j.archivePath, j.location, pomPropertyMatches)
projects, _ := pomProjectByParentPath(j.archivePath, j.location, pomMatches)

for parentPath, propertiesObj := range properties {
if propertiesObj.ArtifactID != "" && j.fileInfo.name != "" && strings.HasPrefix(propertiesObj.ArtifactID, j.fileInfo.name) {
pomPropertiesObject = propertiesObj
if proj, exists := projects[parentPath]; exists {
pomProjectObject = proj
}
}
}
}
name := pomPropertiesObject.ArtifactID
if name == "" {
name = pomProjectObject.ArtifactID
}
version := pomPropertiesObject.Version
if version == "" {
version = pomProjectObject.Version
}
return name, version
}

// discoverPkgsFromAllMavenFiles parses Maven POM properties/xml for a given
// parent package, returning all listed Java packages found for each pom
// properties discovered and potentially updating the given parentPkg with new
Expand Down
36 changes: 36 additions & 0 deletions test/integration/regression_java_virtualpath_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
package integration

import (
"strings"
"testing"

"github.com/stretchr/testify/assert"

"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/source"
)

func TestWarCatalogedCorrectlyIfRenamed(t *testing.T) {
// install hudson-war@2.2.1 and renames the file to `/hudson.war`
sbom, _ := catalogFixtureImage(t, "image-java-virtualpath-regression", source.SquashedScope, nil)

badPURL := "pkg:maven/hudson/hudson@2.2.1"
goodPURL := "pkg:maven/org.jvnet.hudson.main/hudson-war@2.2.1"
foundCorrectPackage := false
badVirtualPath := "/hudson.war:org.jvnet.hudson.main:hudson-war"
goodVirtualPath := "/hudson.war"
for _, p := range sbom.Artifacts.Packages.Sorted() {
if p.Type == pkg.JavaPkg && strings.Contains(p.Name, "hudson") {
assert.NotEqual(t, badPURL, p.PURL, "must not find bad purl %q", badPURL)
virtPath := ""
if meta, ok := p.Metadata.(pkg.JavaMetadata); ok {
virtPath = meta.VirtualPath
if p.PURL == goodPURL && virtPath == goodVirtualPath {
foundCorrectPackage = true
}
}
assert.NotEqual(t, badVirtualPath, virtPath, "must not find bad virtual path %q", badVirtualPath)
}
}
assert.True(t, foundCorrectPackage, "must find correct package, but did not")
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
FROM alpine:latest

RUN wget https://repo1.maven.org/maven2/org/jvnet/hudson/main/hudson-war/2.2.1/hudson-war-2.2.1.war

RUN mv hudson-war-2.2.1.war hudson.war


0 comments on commit 2e151a2

Please sign in to comment.