From bbcf9652e60430f46632338b47b9b1f1c2f60669 Mon Sep 17 00:00:00 2001 From: Keith Zantow Date: Tue, 23 Jul 2024 15:25:41 -0400 Subject: [PATCH] feat: add maven relativePath parent resolution Signed-off-by: Keith Zantow --- syft/pkg/cataloger/java/archive_parser.go | 2 +- .../pkg/cataloger/java/archive_parser_test.go | 2 +- syft/pkg/cataloger/java/config.go | 2 +- syft/pkg/cataloger/java/maven_resolver.go | 107 +++++++++++++++--- .../pkg/cataloger/java/maven_resolver_test.go | 40 ++++++- syft/pkg/cataloger/java/parse_pom_xml.go | 7 +- syft/pkg/cataloger/java/parse_pom_xml_test.go | 6 +- .../org/child-one/1.3.6/child-one-1.3.6.pom | 1 - .../parent-one/3.11.0/parent-one-3.11.0.pom | 1 - .../parent-two/13.7.8/parent-two-13.7.8.pom | 4 +- .../pom/relative/child-1/pom.xml | 42 +++++++ .../pom/relative/parent-1/pom.xml | 52 +++++++++ .../pom/relative/parent-2/pom.xml | 60 ++++++++++ 13 files changed, 295 insertions(+), 31 deletions(-) create mode 100644 syft/pkg/cataloger/java/test-fixtures/pom/relative/child-1/pom.xml create mode 100644 syft/pkg/cataloger/java/test-fixtures/pom/relative/parent-1/pom.xml create mode 100644 syft/pkg/cataloger/java/test-fixtures/pom/relative/parent-2/pom.xml diff --git a/syft/pkg/cataloger/java/archive_parser.go b/syft/pkg/cataloger/java/archive_parser.go index d477de000c0..c6ed0377064 100644 --- a/syft/pkg/cataloger/java/archive_parser.go +++ b/syft/pkg/cataloger/java/archive_parser.go @@ -106,7 +106,7 @@ func newJavaArchiveParser(reader file.LocationReadCloser, detectNested bool, cfg fileInfo: newJavaArchiveFilename(currentFilepath), detectNested: detectNested, cfg: cfg, - maven: newMavenResolver(cfg), + maven: newMavenResolver(nil, cfg), }, cleanupFn, nil } diff --git a/syft/pkg/cataloger/java/archive_parser_test.go b/syft/pkg/cataloger/java/archive_parser_test.go index ab6ddbbaafc..2a2f13569de 100644 --- a/syft/pkg/cataloger/java/archive_parser_test.go +++ b/syft/pkg/cataloger/java/archive_parser_test.go @@ -1080,7 +1080,7 @@ func Test_newPackageFromMavenData(t *testing.T) { } test.expectedParent.Locations = locations - r := newMavenResolver(DefaultArchiveCatalogerConfig()) + r := newMavenResolver(nil, DefaultArchiveCatalogerConfig()) actualPackage := newPackageFromMavenData(context.Background(), &r, test.props, test.project, test.parent, file.NewLocation(virtualPath)) if test.expectedPackage == nil { require.Nil(t, actualPackage) diff --git a/syft/pkg/cataloger/java/config.go b/syft/pkg/cataloger/java/config.go index 63566c1d6fa..df48f12bfde 100644 --- a/syft/pkg/cataloger/java/config.go +++ b/syft/pkg/cataloger/java/config.go @@ -20,7 +20,7 @@ func DefaultArchiveCatalogerConfig() ArchiveCatalogerConfig { UseMavenLocalRepository: false, MavenLocalRepositoryDir: defaultMavenLocalRepoDir(), MavenBaseURL: mavenBaseURL, - MaxParentRecursiveDepth: 5, + MaxParentRecursiveDepth: 10, } } diff --git a/syft/pkg/cataloger/java/maven_resolver.go b/syft/pkg/cataloger/java/maven_resolver.go index bcfbef2029e..a5f64478e36 100644 --- a/syft/pkg/cataloger/java/maven_resolver.go +++ b/syft/pkg/cataloger/java/maven_resolver.go @@ -8,6 +8,7 @@ import ( "io" "net/http" "os" + "path" "path/filepath" "reflect" "regexp" @@ -20,6 +21,7 @@ import ( "github.com/anchore/syft/internal" "github.com/anchore/syft/internal/cache" "github.com/anchore/syft/internal/log" + "github.com/anchore/syft/syft/file" ) // mavenID is the unique identifier for a package in Maven @@ -52,7 +54,7 @@ func (m mavenID) Valid() bool { } func (m mavenID) String() string { - return fmt.Sprintf("groupId: %s artifactId:%s version:%s", m.GroupID, m.ArtifactID, m.Version) + return fmt.Sprintf("(groupId: %s artifactId: %s version: %s)", m.GroupID, m.ArtifactID, m.Version) } var expressionMatcher = regexp.MustCompile("[$][{][^}]+[}]") @@ -60,20 +62,26 @@ var expressionMatcher = regexp.MustCompile("[$][{][^}]+[}]") // mavenResolver is a short-lived utility to resolve maven poms from multiple sources, including: // the scanned filesystem, local maven cache directories, remote maven repositories, and the syft cache type mavenResolver struct { - cfg ArchiveCatalogerConfig - // resolver file.Resolver + cfg ArchiveCatalogerConfig cache cache.Cache resolved map[mavenID]*gopom.Project remoteRequestTimeout time.Duration checkedLocalRepo bool + // fileResolver and pomLocations are used to resolve parent poms by relativePath + fileResolver file.Resolver + pomLocations map[*gopom.Project]file.Location } -func newMavenResolver(cfg ArchiveCatalogerConfig) mavenResolver { +// newMavenResolver constructs a new mavenResolver with the given configuration. +// NOTE: the fileResolver is optional and if provided will be used to resolve parent poms by relative path +func newMavenResolver(fileResolver file.Resolver, cfg ArchiveCatalogerConfig) mavenResolver { return mavenResolver{ cfg: cfg, cache: cache.GetManager().GetCache("java/maven/repo", "v1"), resolved: map[mavenID]*gopom.Project{}, remoteRequestTimeout: time.Second * 10, + fileResolver: fileResolver, + pomLocations: map[*gopom.Project]file.Location{}, } } @@ -340,7 +348,7 @@ func (r *mavenResolver) cacheResolveReader(key string, resolve func() (io.ReadCl return bytes.NewBuffer(contents), err } -// resolveParent attempts to resolve +// resolveParent attempts to resolve the parent for the given pom func (r *mavenResolver) resolveParent(ctx context.Context, pom *gopom.Project) (*gopom.Project, error) { if pom == nil || pom.Parent == nil { return nil, nil @@ -351,10 +359,24 @@ func (r *mavenResolver) resolveParent(ctx context.Context, pom *gopom.Project) ( groupID := r.getPropertyValue(ctx, &pomWithoutParent, parent.GroupID) artifactID := r.getPropertyValue(ctx, &pomWithoutParent, parent.ArtifactID) version := r.getPropertyValue(ctx, &pomWithoutParent, parent.Version) + + // check cache before resolving + parentID := mavenID{groupID, artifactID, version} + if resolvedParent, ok := r.resolved[parentID]; ok { + return resolvedParent, nil + } + + // check if the pom exists in the fileResolver + parentPom := r.findParentPomByRelativePath(ctx, pom, parentID) + if parentPom != nil { + return parentPom, nil + } + + // find POM normally return r.findPom(ctx, groupID, artifactID, version) } -// Try to find the version of a dependency (groupID, artifactID) by searching all parent poms and imported managed dependencies +// findInheritedVersion attempts to find the version of a dependency (groupID, artifactID) by searching all parent poms and imported managed dependencies // //nolint:gocognit func (r *mavenResolver) findInheritedVersion(ctx context.Context, root *gopom.Project, pom *gopom.Project, groupID, artifactID string, resolving ...mavenID) (string, error) { @@ -371,7 +393,7 @@ func (r *mavenResolver) findInheritedVersion(ctx context.Context, root *gopom.Pr var version string // check for entries in dependencyManagement first - for _, dep := range directManagedDependencies(pom) { + for _, dep := range pomManagedDependencies(pom) { depGroupID := r.getPropertyValue(ctx, root, dep.GroupID) depArtifactID := r.getPropertyValue(ctx, root, dep.ArtifactID) if depGroupID == groupID && depArtifactID == artifactID { @@ -415,7 +437,7 @@ func (r *mavenResolver) findInheritedVersion(ctx context.Context, root *gopom.Pr } // check for inherited dependencies - for _, dep := range directDependencies(pom) { + for _, dep := range pomDependencies(pom) { depGroupID := r.getPropertyValue(ctx, root, dep.GroupID) depArtifactID := r.getPropertyValue(ctx, root, dep.ArtifactID) if depGroupID == groupID && depArtifactID == artifactID { @@ -448,7 +470,7 @@ func (r *mavenResolver) resolveLicenses(ctx context.Context, pom *gopom.Project, return nil, fmt.Errorf("maximum parent recursive depth (%v) reached: %v", r.cfg.MaxParentRecursiveDepth, processing) } - directLicenses := r.directLicenses(ctx, pom) + directLicenses := r.pomLicenses(ctx, pom) if len(directLicenses) > 0 { return directLicenses, nil } @@ -463,8 +485,8 @@ func (r *mavenResolver) resolveLicenses(ctx context.Context, pom *gopom.Project, return r.resolveLicenses(ctx, parent, append(processing, id)...) } -// directLicenses appends the directly specified licenses with non-empty name or url -func (r *mavenResolver) directLicenses(ctx context.Context, pom *gopom.Project) []gopom.License { +// pomLicenses appends the directly specified licenses with non-empty name or url +func (r *mavenResolver) pomLicenses(ctx context.Context, pom *gopom.Project) []gopom.License { var out []gopom.License for _, license := range deref(pom.Licenses) { // if we find non-empty licenses, return them @@ -477,8 +499,62 @@ func (r *mavenResolver) directLicenses(ctx context.Context, pom *gopom.Project) return out } -// directDependencies returns all direct dependencies in a project, including all defined in profiles -func directDependencies(pom *gopom.Project) []gopom.Dependency { +func (r *mavenResolver) findParentPomByRelativePath(ctx context.Context, pom *gopom.Project, parentID mavenID) *gopom.Project { + // don't resolve if no resolver + if r.fileResolver == nil { + return nil + } + + pomLocation, hasPomLocation := r.pomLocations[pom] + if !hasPomLocation || pom == nil || pom.Parent == nil { + return nil + } + relativePath := r.getPropertyValue(ctx, pom, pom.Parent.RelativePath) + if relativePath == "" { + return nil + } + p := pomLocation.Path() + p = path.Dir(p) + p = path.Join(p, relativePath) + p = path.Clean(p) + parentLocations, err := r.fileResolver.FilesByPath(p) + if err != nil || len(parentLocations) == 0 { + log.Debugf("parent not found in by relative path for: %v looking for: %v at %v err: %v", newMavenIDFromPom(pom), parentID, relativePath, err) + return nil + } + parentLocation := parentLocations[0] + + parentContents, err := r.fileResolver.FileContentsByLocation(parentLocation) + if err != nil || parentContents == nil { + log.Debugf("unable to get parent by relative path for: %v parent: %v at %v err: %v", newMavenIDFromPom(pom), parentID, parentLocation, err) + return nil + } + defer internal.CloseAndLogError(parentContents, parentLocation.RealPath) + parentPom, err := decodePomXML(parentContents) + if err != nil || parentPom == nil { + log.Debugf("unable to parse parent by relative path for: %v parent: %v at %v err: %v", newMavenIDFromPom(pom), parentID, parentLocation, err) + return nil + } + // ensure ids match + groupID := r.getPropertyValue(ctx, pom, parentPom.GroupID) + artifactID := r.getPropertyValue(ctx, pom, parentPom.ArtifactID) + version := r.getPropertyValue(ctx, pom, parentPom.Version) + + newParentID := mavenID{groupID, artifactID, version} + if newParentID != parentID { + log.Debugf("parent IDs do not match resolving parent by relative path for: %v parent: %v at %v, got: %v", newMavenIDFromPom(pom), parentID, parentLocation, newParentID) + return nil + } + + r.resolved[parentID] = parentPom + r.pomLocations[parentPom] = parentLocation // for any future parent relativepath lookups + + return parentPom +} + +// pomDependencies returns all dependencies directly defined in a project, including all defined in profiles. +// does not resolve parent dependencies +func pomDependencies(pom *gopom.Project) []gopom.Dependency { dependencies := deref(pom.Dependencies) for _, profile := range deref(pom.Profiles) { dependencies = append(dependencies, deref(profile.Dependencies)...) @@ -486,8 +562,9 @@ func directDependencies(pom *gopom.Project) []gopom.Dependency { return dependencies } -// directManagedDependencies returns all managed dependencies in a project, including all defined in profiles -func directManagedDependencies(pom *gopom.Project) []gopom.Dependency { +// pomManagedDependencies returns all directly defined managed dependencies in a project pom, including all defined in profiles. +// does not resolve parent managed dependencies +func pomManagedDependencies(pom *gopom.Project) []gopom.Dependency { var dependencies []gopom.Dependency if pom.DependencyManagement != nil { dependencies = append(dependencies, deref(pom.DependencyManagement.Dependencies)...) diff --git a/syft/pkg/cataloger/java/maven_resolver_test.go b/syft/pkg/cataloger/java/maven_resolver_test.go index 97d4e0888a8..575baed6c6c 100644 --- a/syft/pkg/cataloger/java/maven_resolver_test.go +++ b/syft/pkg/cataloger/java/maven_resolver_test.go @@ -2,6 +2,8 @@ package java import ( "context" + "github.com/anchore/syft/internal" + "github.com/anchore/syft/syft/internal/fileresolver" "net/http" "net/http/httptest" "os" @@ -155,7 +157,7 @@ func Test_resolveProperty(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { - r := newMavenResolver(DefaultArchiveCatalogerConfig()) + r := newMavenResolver(nil, DefaultArchiveCatalogerConfig()) resolved := r.getPropertyValue(context.Background(), &test.pom, ptr(test.property)) require.Equal(t, test.expected, resolved) }) @@ -186,7 +188,7 @@ func Test_mavenResolverLocal(t *testing.T) { for _, test := range tests { t.Run(test.artifactID, func(t *testing.T) { ctx := context.Background() - r := newMavenResolver(ArchiveCatalogerConfig{ + r := newMavenResolver(nil, ArchiveCatalogerConfig{ UseNetwork: false, UseMavenLocalRepository: true, MavenLocalRepositoryDir: dir, @@ -227,7 +229,7 @@ func Test_mavenResolverRemote(t *testing.T) { for _, test := range tests { t.Run(test.artifactID, func(t *testing.T) { ctx := context.Background() - r := newMavenResolver(ArchiveCatalogerConfig{ + r := newMavenResolver(nil, ArchiveCatalogerConfig{ UseNetwork: true, UseMavenLocalRepository: false, MavenBaseURL: url, @@ -245,6 +247,38 @@ func Test_mavenResolverRemote(t *testing.T) { } } +func Test_relativePathParent(t *testing.T) { + resolver, err := fileresolver.NewFromDirectory("test-fixtures/pom/relative", "") + require.NoError(t, err) + + r := newMavenResolver(resolver, DefaultArchiveCatalogerConfig()) + locs, err := resolver.FilesByPath("child-1/pom.xml") + require.NoError(t, err) + require.Len(t, locs, 1) + + loc := locs[0] + contents, err := resolver.FileContentsByLocation(loc) + require.NoError(t, err) + defer internal.CloseAndLogError(contents, loc.RealPath) + + pom, err := decodePomXML(contents) + require.NoError(t, err) + + r.pomLocations[pom] = loc + + ctx := context.Background() + parent, err := r.resolveParent(ctx, pom) + require.NoError(t, err) + require.Contains(t, r.pomLocations, parent) + + parent, err = r.resolveParent(ctx, parent) + require.NoError(t, err) + require.Contains(t, r.pomLocations, parent) + + got := r.getPropertyValue(ctx, pom, ptr("${commons-exec_subversion}")) + require.Equal(t, "3", got) +} + // testRepo starts a remote maven repo serving all the pom files found in the given directory func testRepo(t *testing.T, dir string) (url string) { // mux is the HTTP request multiplexer used with the test server. diff --git a/syft/pkg/cataloger/java/parse_pom_xml.go b/syft/pkg/cataloger/java/parse_pom_xml.go index 8154f4ed01f..27bee293d1f 100644 --- a/syft/pkg/cataloger/java/parse_pom_xml.go +++ b/syft/pkg/cataloger/java/parse_pom_xml.go @@ -22,16 +22,17 @@ import ( const pomXMLGlob = "*pom.xml" -func (gap genericArchiveParserAdapter) parsePomXML(ctx context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { +func (gap genericArchiveParserAdapter) parsePomXML(ctx context.Context, fileResolver file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { pom, err := decodePomXML(reader) if err != nil || pom == nil { return nil, nil, err } - r := newMavenResolver(gap.cfg) + r := newMavenResolver(fileResolver, gap.cfg) + r.pomLocations[pom] = reader.Location // store the location this pom was resolved in order to attempt parent pom lookups var pkgs []pkg.Package - for _, dep := range directDependencies(pom) { + for _, dep := range pomDependencies(pom) { id := newMavenID(dep.GroupID, dep.ArtifactID, dep.Version) log.Tracef("adding dependency to SBOM: %v", id) p, err := newPackageFromDependency( diff --git a/syft/pkg/cataloger/java/parse_pom_xml_test.go b/syft/pkg/cataloger/java/parse_pom_xml_test.go index d2e355a380c..2806324ab5e 100644 --- a/syft/pkg/cataloger/java/parse_pom_xml_test.go +++ b/syft/pkg/cataloger/java/parse_pom_xml_test.go @@ -329,7 +329,7 @@ func Test_parsePomXMLProject(t *testing.T) { t.Run(test.name, func(t *testing.T) { fixture, err := os.Open(test.project.Path) assert.NoError(t, err) - r := newMavenResolver(ArchiveCatalogerConfig{}) + r := newMavenResolver(nil, ArchiveCatalogerConfig{}) pom, err := gopom.ParseFromReader(fixture) require.NoError(t, err) @@ -338,7 +338,7 @@ func Test_parsePomXMLProject(t *testing.T) { assert.NoError(t, err) assert.Equal(t, test.project, actual) - licenses := r.directLicenses(context.Background(), pom) + licenses := r.pomLicenses(context.Background(), pom) assert.NoError(t, err) assert.Equal(t, test.licenses, toPkgLicenses(&jarLocation, licenses)) }) @@ -399,7 +399,7 @@ func Test_pomParent(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { - r := newMavenResolver(DefaultArchiveCatalogerConfig()) + r := newMavenResolver(nil, DefaultArchiveCatalogerConfig()) assert.Equal(t, test.expected, pomParent(context.Background(), &r, &gopom.Project{Parent: test.input})) }) } diff --git a/syft/pkg/cataloger/java/test-fixtures/pom/maven-repo/my/org/child-one/1.3.6/child-one-1.3.6.pom b/syft/pkg/cataloger/java/test-fixtures/pom/maven-repo/my/org/child-one/1.3.6/child-one-1.3.6.pom index 2d42648e51b..6a72f2fd56d 100644 --- a/syft/pkg/cataloger/java/test-fixtures/pom/maven-repo/my/org/child-one/1.3.6/child-one-1.3.6.pom +++ b/syft/pkg/cataloger/java/test-fixtures/pom/maven-repo/my/org/child-one/1.3.6/child-one-1.3.6.pom @@ -18,7 +18,6 @@ my.org parent-one 3.11.0 - ../../parent-1/pom.xml child-one diff --git a/syft/pkg/cataloger/java/test-fixtures/pom/maven-repo/my/org/parent-one/3.11.0/parent-one-3.11.0.pom b/syft/pkg/cataloger/java/test-fixtures/pom/maven-repo/my/org/parent-one/3.11.0/parent-one-3.11.0.pom index f4be4a02c43..4dd7d533f73 100644 --- a/syft/pkg/cataloger/java/test-fixtures/pom/maven-repo/my/org/parent-one/3.11.0/parent-one-3.11.0.pom +++ b/syft/pkg/cataloger/java/test-fixtures/pom/maven-repo/my/org/parent-one/3.11.0/parent-one-3.11.0.pom @@ -6,7 +6,6 @@ my.org parent-two 13.7.8 - ../parent-2 my.org diff --git a/syft/pkg/cataloger/java/test-fixtures/pom/maven-repo/my/org/parent-two/13.7.8/parent-two-13.7.8.pom b/syft/pkg/cataloger/java/test-fixtures/pom/maven-repo/my/org/parent-two/13.7.8/parent-two-13.7.8.pom index fd7b3ba6c3e..5864bfa6cf5 100644 --- a/syft/pkg/cataloger/java/test-fixtures/pom/maven-repo/my/org/parent-two/13.7.8/parent-two-13.7.8.pom +++ b/syft/pkg/cataloger/java/test-fixtures/pom/maven-repo/my/org/parent-two/13.7.8/parent-two-13.7.8.pom @@ -4,8 +4,8 @@ 4.0.0 my.org - parent-2 - 13 + parent-two + 13.7.8 pom diff --git a/syft/pkg/cataloger/java/test-fixtures/pom/relative/child-1/pom.xml b/syft/pkg/cataloger/java/test-fixtures/pom/relative/child-1/pom.xml new file mode 100644 index 00000000000..63ed7d474dd --- /dev/null +++ b/syft/pkg/cataloger/java/test-fixtures/pom/relative/child-1/pom.xml @@ -0,0 +1,42 @@ + + + 4.0.0 + + + + + my.org + parent-one + 3.11.0 + ../parent-1/pom.xml + + + child-one + + ${project.one}.3.6 + jar + + + 3.12.0 + 4.2 + 4.12 + + + + + org.apache.commons + commons-lang3 + + + + diff --git a/syft/pkg/cataloger/java/test-fixtures/pom/relative/parent-1/pom.xml b/syft/pkg/cataloger/java/test-fixtures/pom/relative/parent-1/pom.xml new file mode 100644 index 00000000000..69ff49eff0c --- /dev/null +++ b/syft/pkg/cataloger/java/test-fixtures/pom/relative/parent-1/pom.xml @@ -0,0 +1,52 @@ + + + 4.0.0 + + my.org + parent-two + 13.7.8 + ../parent-2/pom.xml + + + my.org + parent-one + 3.11.0 + pom + + + + 3.1${project.parent.version}.0 + 4.3 + + + + + + org.apache.commons + commons-lang3 + ${commons.lang3.version} + + + + + + + org.apache.commons + commons-text + ${commons.text.version} + + + org.apache.commons + commons-collections4 + ${commons.collections4.version} + + + junit + junit + ${commons.junit.version} + test + + + + diff --git a/syft/pkg/cataloger/java/test-fixtures/pom/relative/parent-2/pom.xml b/syft/pkg/cataloger/java/test-fixtures/pom/relative/parent-2/pom.xml new file mode 100644 index 00000000000..5864bfa6cf5 --- /dev/null +++ b/syft/pkg/cataloger/java/test-fixtures/pom/relative/parent-2/pom.xml @@ -0,0 +1,60 @@ + + + 4.0.0 + + my.org + parent-two + 13.7.8 + pom + + + 3.14.0 + 4.4 + 1.12.0 + 4.13.2 + 3 + 1 + + + + + + org.apache.commons + commons-lang3 + ${commons.lang3.version} + + + org.apache.commons + commons-text + ${commons.text.version} + + + junit + junit + ${commons.junit.version} + test + + + + + + + org.apache.commons + commons-text + ${commons.text.version} + + + org.apache.commons + commons-collections4 + ${commons.collections4.version} + + + junit + junit + ${commons.junit.version} + test + + + +