Skip to content

Commit

Permalink
feat: add maven relativePath parent resolution
Browse files Browse the repository at this point in the history
Signed-off-by: Keith Zantow <kzantow@gmail.com>
  • Loading branch information
kzantow committed Jul 23, 2024
1 parent 4a7b5b6 commit bbcf965
Show file tree
Hide file tree
Showing 13 changed files with 295 additions and 31 deletions.
2 changes: 1 addition & 1 deletion syft/pkg/cataloger/java/archive_parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ func newJavaArchiveParser(reader file.LocationReadCloser, detectNested bool, cfg
fileInfo: newJavaArchiveFilename(currentFilepath),
detectNested: detectNested,
cfg: cfg,
maven: newMavenResolver(cfg),
maven: newMavenResolver(nil, cfg),
}, cleanupFn, nil
}

Expand Down
2 changes: 1 addition & 1 deletion syft/pkg/cataloger/java/archive_parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1080,7 +1080,7 @@ func Test_newPackageFromMavenData(t *testing.T) {
}
test.expectedParent.Locations = locations

r := newMavenResolver(DefaultArchiveCatalogerConfig())
r := newMavenResolver(nil, DefaultArchiveCatalogerConfig())
actualPackage := newPackageFromMavenData(context.Background(), &r, test.props, test.project, test.parent, file.NewLocation(virtualPath))
if test.expectedPackage == nil {
require.Nil(t, actualPackage)
Expand Down
2 changes: 1 addition & 1 deletion syft/pkg/cataloger/java/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ func DefaultArchiveCatalogerConfig() ArchiveCatalogerConfig {
UseMavenLocalRepository: false,
MavenLocalRepositoryDir: defaultMavenLocalRepoDir(),
MavenBaseURL: mavenBaseURL,
MaxParentRecursiveDepth: 5,
MaxParentRecursiveDepth: 10,
}
}

Expand Down
107 changes: 92 additions & 15 deletions syft/pkg/cataloger/java/maven_resolver.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"io"
"net/http"
"os"
"path"
"path/filepath"
"reflect"
"regexp"
Expand All @@ -20,6 +21,7 @@ import (
"github.com/anchore/syft/internal"
"github.com/anchore/syft/internal/cache"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/file"
)

// mavenID is the unique identifier for a package in Maven
Expand Down Expand Up @@ -52,28 +54,34 @@ func (m mavenID) Valid() bool {
}

func (m mavenID) String() string {
return fmt.Sprintf("groupId: %s artifactId:%s version:%s", m.GroupID, m.ArtifactID, m.Version)
return fmt.Sprintf("(groupId: %s artifactId: %s version: %s)", m.GroupID, m.ArtifactID, m.Version)
}

var expressionMatcher = regexp.MustCompile("[$][{][^}]+[}]")

// mavenResolver is a short-lived utility to resolve maven poms from multiple sources, including:
// the scanned filesystem, local maven cache directories, remote maven repositories, and the syft cache
type mavenResolver struct {
cfg ArchiveCatalogerConfig
// resolver file.Resolver
cfg ArchiveCatalogerConfig
cache cache.Cache
resolved map[mavenID]*gopom.Project
remoteRequestTimeout time.Duration
checkedLocalRepo bool
// fileResolver and pomLocations are used to resolve parent poms by relativePath
fileResolver file.Resolver
pomLocations map[*gopom.Project]file.Location
}

func newMavenResolver(cfg ArchiveCatalogerConfig) mavenResolver {
// newMavenResolver constructs a new mavenResolver with the given configuration.
// NOTE: the fileResolver is optional and if provided will be used to resolve parent poms by relative path
func newMavenResolver(fileResolver file.Resolver, cfg ArchiveCatalogerConfig) mavenResolver {
return mavenResolver{
cfg: cfg,
cache: cache.GetManager().GetCache("java/maven/repo", "v1"),
resolved: map[mavenID]*gopom.Project{},
remoteRequestTimeout: time.Second * 10,
fileResolver: fileResolver,
pomLocations: map[*gopom.Project]file.Location{},
}
}

Expand Down Expand Up @@ -340,7 +348,7 @@ func (r *mavenResolver) cacheResolveReader(key string, resolve func() (io.ReadCl
return bytes.NewBuffer(contents), err
}

// resolveParent attempts to resolve
// resolveParent attempts to resolve the parent for the given pom
func (r *mavenResolver) resolveParent(ctx context.Context, pom *gopom.Project) (*gopom.Project, error) {
if pom == nil || pom.Parent == nil {
return nil, nil
Expand All @@ -351,10 +359,24 @@ func (r *mavenResolver) resolveParent(ctx context.Context, pom *gopom.Project) (
groupID := r.getPropertyValue(ctx, &pomWithoutParent, parent.GroupID)
artifactID := r.getPropertyValue(ctx, &pomWithoutParent, parent.ArtifactID)
version := r.getPropertyValue(ctx, &pomWithoutParent, parent.Version)

// check cache before resolving
parentID := mavenID{groupID, artifactID, version}
if resolvedParent, ok := r.resolved[parentID]; ok {
return resolvedParent, nil
}

// check if the pom exists in the fileResolver
parentPom := r.findParentPomByRelativePath(ctx, pom, parentID)
if parentPom != nil {
return parentPom, nil
}

// find POM normally
return r.findPom(ctx, groupID, artifactID, version)
}

// Try to find the version of a dependency (groupID, artifactID) by searching all parent poms and imported managed dependencies
// findInheritedVersion attempts to find the version of a dependency (groupID, artifactID) by searching all parent poms and imported managed dependencies
//
//nolint:gocognit
func (r *mavenResolver) findInheritedVersion(ctx context.Context, root *gopom.Project, pom *gopom.Project, groupID, artifactID string, resolving ...mavenID) (string, error) {
Expand All @@ -371,7 +393,7 @@ func (r *mavenResolver) findInheritedVersion(ctx context.Context, root *gopom.Pr
var version string

// check for entries in dependencyManagement first
for _, dep := range directManagedDependencies(pom) {
for _, dep := range pomManagedDependencies(pom) {
depGroupID := r.getPropertyValue(ctx, root, dep.GroupID)
depArtifactID := r.getPropertyValue(ctx, root, dep.ArtifactID)
if depGroupID == groupID && depArtifactID == artifactID {
Expand Down Expand Up @@ -415,7 +437,7 @@ func (r *mavenResolver) findInheritedVersion(ctx context.Context, root *gopom.Pr
}

// check for inherited dependencies
for _, dep := range directDependencies(pom) {
for _, dep := range pomDependencies(pom) {
depGroupID := r.getPropertyValue(ctx, root, dep.GroupID)
depArtifactID := r.getPropertyValue(ctx, root, dep.ArtifactID)
if depGroupID == groupID && depArtifactID == artifactID {
Expand Down Expand Up @@ -448,7 +470,7 @@ func (r *mavenResolver) resolveLicenses(ctx context.Context, pom *gopom.Project,
return nil, fmt.Errorf("maximum parent recursive depth (%v) reached: %v", r.cfg.MaxParentRecursiveDepth, processing)
}

directLicenses := r.directLicenses(ctx, pom)
directLicenses := r.pomLicenses(ctx, pom)
if len(directLicenses) > 0 {
return directLicenses, nil
}
Expand All @@ -463,8 +485,8 @@ func (r *mavenResolver) resolveLicenses(ctx context.Context, pom *gopom.Project,
return r.resolveLicenses(ctx, parent, append(processing, id)...)
}

// directLicenses appends the directly specified licenses with non-empty name or url
func (r *mavenResolver) directLicenses(ctx context.Context, pom *gopom.Project) []gopom.License {
// pomLicenses appends the directly specified licenses with non-empty name or url
func (r *mavenResolver) pomLicenses(ctx context.Context, pom *gopom.Project) []gopom.License {
var out []gopom.License
for _, license := range deref(pom.Licenses) {
// if we find non-empty licenses, return them
Expand All @@ -477,17 +499,72 @@ func (r *mavenResolver) directLicenses(ctx context.Context, pom *gopom.Project)
return out
}

// directDependencies returns all direct dependencies in a project, including all defined in profiles
func directDependencies(pom *gopom.Project) []gopom.Dependency {
func (r *mavenResolver) findParentPomByRelativePath(ctx context.Context, pom *gopom.Project, parentID mavenID) *gopom.Project {
// don't resolve if no resolver
if r.fileResolver == nil {
return nil
}

pomLocation, hasPomLocation := r.pomLocations[pom]
if !hasPomLocation || pom == nil || pom.Parent == nil {
return nil
}
relativePath := r.getPropertyValue(ctx, pom, pom.Parent.RelativePath)
if relativePath == "" {
return nil
}
p := pomLocation.Path()
p = path.Dir(p)
p = path.Join(p, relativePath)
p = path.Clean(p)
parentLocations, err := r.fileResolver.FilesByPath(p)
if err != nil || len(parentLocations) == 0 {
log.Debugf("parent not found in by relative path for: %v looking for: %v at %v err: %v", newMavenIDFromPom(pom), parentID, relativePath, err)
return nil
}
parentLocation := parentLocations[0]

parentContents, err := r.fileResolver.FileContentsByLocation(parentLocation)
if err != nil || parentContents == nil {
log.Debugf("unable to get parent by relative path for: %v parent: %v at %v err: %v", newMavenIDFromPom(pom), parentID, parentLocation, err)
return nil
}
defer internal.CloseAndLogError(parentContents, parentLocation.RealPath)
parentPom, err := decodePomXML(parentContents)
if err != nil || parentPom == nil {
log.Debugf("unable to parse parent by relative path for: %v parent: %v at %v err: %v", newMavenIDFromPom(pom), parentID, parentLocation, err)
return nil
}
// ensure ids match
groupID := r.getPropertyValue(ctx, pom, parentPom.GroupID)
artifactID := r.getPropertyValue(ctx, pom, parentPom.ArtifactID)
version := r.getPropertyValue(ctx, pom, parentPom.Version)

newParentID := mavenID{groupID, artifactID, version}
if newParentID != parentID {
log.Debugf("parent IDs do not match resolving parent by relative path for: %v parent: %v at %v, got: %v", newMavenIDFromPom(pom), parentID, parentLocation, newParentID)
return nil
}

r.resolved[parentID] = parentPom
r.pomLocations[parentPom] = parentLocation // for any future parent relativepath lookups

return parentPom
}

// pomDependencies returns all dependencies directly defined in a project, including all defined in profiles.
// does not resolve parent dependencies
func pomDependencies(pom *gopom.Project) []gopom.Dependency {
dependencies := deref(pom.Dependencies)
for _, profile := range deref(pom.Profiles) {
dependencies = append(dependencies, deref(profile.Dependencies)...)
}
return dependencies
}

// directManagedDependencies returns all managed dependencies in a project, including all defined in profiles
func directManagedDependencies(pom *gopom.Project) []gopom.Dependency {
// pomManagedDependencies returns all directly defined managed dependencies in a project pom, including all defined in profiles.
// does not resolve parent managed dependencies
func pomManagedDependencies(pom *gopom.Project) []gopom.Dependency {
var dependencies []gopom.Dependency
if pom.DependencyManagement != nil {
dependencies = append(dependencies, deref(pom.DependencyManagement.Dependencies)...)
Expand Down
40 changes: 37 additions & 3 deletions syft/pkg/cataloger/java/maven_resolver_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ package java

import (
"context"
"github.com/anchore/syft/internal"
"github.com/anchore/syft/syft/internal/fileresolver"
"net/http"
"net/http/httptest"
"os"
Expand Down Expand Up @@ -155,7 +157,7 @@ func Test_resolveProperty(t *testing.T) {

for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
r := newMavenResolver(DefaultArchiveCatalogerConfig())
r := newMavenResolver(nil, DefaultArchiveCatalogerConfig())
resolved := r.getPropertyValue(context.Background(), &test.pom, ptr(test.property))
require.Equal(t, test.expected, resolved)
})
Expand Down Expand Up @@ -186,7 +188,7 @@ func Test_mavenResolverLocal(t *testing.T) {
for _, test := range tests {
t.Run(test.artifactID, func(t *testing.T) {
ctx := context.Background()
r := newMavenResolver(ArchiveCatalogerConfig{
r := newMavenResolver(nil, ArchiveCatalogerConfig{
UseNetwork: false,
UseMavenLocalRepository: true,
MavenLocalRepositoryDir: dir,
Expand Down Expand Up @@ -227,7 +229,7 @@ func Test_mavenResolverRemote(t *testing.T) {
for _, test := range tests {
t.Run(test.artifactID, func(t *testing.T) {
ctx := context.Background()
r := newMavenResolver(ArchiveCatalogerConfig{
r := newMavenResolver(nil, ArchiveCatalogerConfig{
UseNetwork: true,
UseMavenLocalRepository: false,
MavenBaseURL: url,
Expand All @@ -245,6 +247,38 @@ func Test_mavenResolverRemote(t *testing.T) {
}
}

func Test_relativePathParent(t *testing.T) {
resolver, err := fileresolver.NewFromDirectory("test-fixtures/pom/relative", "")
require.NoError(t, err)

r := newMavenResolver(resolver, DefaultArchiveCatalogerConfig())
locs, err := resolver.FilesByPath("child-1/pom.xml")
require.NoError(t, err)
require.Len(t, locs, 1)

loc := locs[0]
contents, err := resolver.FileContentsByLocation(loc)
require.NoError(t, err)
defer internal.CloseAndLogError(contents, loc.RealPath)

pom, err := decodePomXML(contents)
require.NoError(t, err)

r.pomLocations[pom] = loc

ctx := context.Background()
parent, err := r.resolveParent(ctx, pom)
require.NoError(t, err)
require.Contains(t, r.pomLocations, parent)

parent, err = r.resolveParent(ctx, parent)
require.NoError(t, err)
require.Contains(t, r.pomLocations, parent)

got := r.getPropertyValue(ctx, pom, ptr("${commons-exec_subversion}"))
require.Equal(t, "3", got)
}

// testRepo starts a remote maven repo serving all the pom files found in the given directory
func testRepo(t *testing.T, dir string) (url string) {
// mux is the HTTP request multiplexer used with the test server.
Expand Down
7 changes: 4 additions & 3 deletions syft/pkg/cataloger/java/parse_pom_xml.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,17 @@ import (

const pomXMLGlob = "*pom.xml"

func (gap genericArchiveParserAdapter) parsePomXML(ctx context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
func (gap genericArchiveParserAdapter) parsePomXML(ctx context.Context, fileResolver file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
pom, err := decodePomXML(reader)
if err != nil || pom == nil {
return nil, nil, err
}

r := newMavenResolver(gap.cfg)
r := newMavenResolver(fileResolver, gap.cfg)
r.pomLocations[pom] = reader.Location // store the location this pom was resolved in order to attempt parent pom lookups

var pkgs []pkg.Package
for _, dep := range directDependencies(pom) {
for _, dep := range pomDependencies(pom) {
id := newMavenID(dep.GroupID, dep.ArtifactID, dep.Version)
log.Tracef("adding dependency to SBOM: %v", id)
p, err := newPackageFromDependency(
Expand Down
6 changes: 3 additions & 3 deletions syft/pkg/cataloger/java/parse_pom_xml_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,7 @@ func Test_parsePomXMLProject(t *testing.T) {
t.Run(test.name, func(t *testing.T) {
fixture, err := os.Open(test.project.Path)
assert.NoError(t, err)
r := newMavenResolver(ArchiveCatalogerConfig{})
r := newMavenResolver(nil, ArchiveCatalogerConfig{})

pom, err := gopom.ParseFromReader(fixture)
require.NoError(t, err)
Expand All @@ -338,7 +338,7 @@ func Test_parsePomXMLProject(t *testing.T) {
assert.NoError(t, err)
assert.Equal(t, test.project, actual)

licenses := r.directLicenses(context.Background(), pom)
licenses := r.pomLicenses(context.Background(), pom)
assert.NoError(t, err)
assert.Equal(t, test.licenses, toPkgLicenses(&jarLocation, licenses))
})
Expand Down Expand Up @@ -399,7 +399,7 @@ func Test_pomParent(t *testing.T) {

for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
r := newMavenResolver(DefaultArchiveCatalogerConfig())
r := newMavenResolver(nil, DefaultArchiveCatalogerConfig())
assert.Equal(t, test.expected, pomParent(context.Background(), &r, &gopom.Project{Parent: test.input}))
})
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
<groupId>my.org</groupId>
<artifactId>parent-one</artifactId>
<version>3.11.0</version>
<relativePath>../../parent-1/pom.xml</relativePath>
</parent>

<artifactId>child-one</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
<groupId>my.org</groupId>
<artifactId>parent-two</artifactId>
<version>13.7.8</version>
<relativePath>../parent-2</relativePath>
</parent>

<groupId>my.org</groupId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
<modelVersion>4.0.0</modelVersion>

<groupId>my.org</groupId>
<artifactId>parent-2</artifactId>
<version>13</version>
<artifactId>parent-two</artifactId>
<version>13.7.8</version>
<packaging>pom</packaging>

<properties>
Expand Down
Loading

0 comments on commit bbcf965

Please sign in to comment.