Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add JVM cataloger #3217

Merged
merged 5 commits into from
Sep 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions cmd/syft/internal/options/catalog.go
Original file line number Diff line number Diff line change
Expand Up @@ -241,5 +241,10 @@ func (cfg *Catalog) PostLoad() error {
return fmt.Errorf("bad scope value %q", cfg.Scope)
}

// the binary package exclusion code depends on the file overlap relationships being created upstream in processing
if !cfg.Relationships.PackageFileOwnershipOverlap && cfg.Package.ExcludeBinaryOverlapByOwnership {
return fmt.Errorf("cannot enable exclude-binary-overlap-by-ownership without enabling package-file-ownership-overlap")
}

return nil
}
8 changes: 8 additions & 0 deletions cmd/syft/internal/options/catalog_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,14 @@ func TestCatalog_PostLoad(t *testing.T) {
assert.Empty(t, options.Catalogers)
},
},
{
name: "must have package overlap flag when pruning binaries by overlap",
options: Catalog{
Package: packageConfig{ExcludeBinaryOverlapByOwnership: true},
Relationships: relationshipsConfig{PackageFileOwnershipOverlap: false},
},
wantErr: assert.Error,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
Expand Down
2 changes: 1 addition & 1 deletion internal/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@ package internal
const (
// JSONSchemaVersion is the current schema version output by the JSON encoder
// This is roughly following the "SchemaVer" guidelines for versioning the JSON schema. Please see schema/json/README.md for details on how to increment.
JSONSchemaVersion = "16.0.16"
JSONSchemaVersion = "16.0.17"
)
91 changes: 69 additions & 22 deletions internal/relationship/exclude_binaries_by_file_ownership_overlap.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,55 +25,102 @@ var (
binaryMetadataTypes = []string{
reflect.TypeOf(pkg.ELFBinaryPackageNoteJSONPayload{}).Name(),
reflect.TypeOf(pkg.BinarySignature{}).Name(),
reflect.TypeOf(pkg.JavaVMInstallation{}).Name(),
}
)

func ExcludeBinariesByFileOwnershipOverlap(accessor sbomsync.Accessor) {
accessor.WriteToSBOM(func(s *sbom.SBOM) {
for _, r := range s.Relationships {
if excludeBinaryByFileOwnershipOverlap(r, s.Artifacts.Packages) {
s.Artifacts.Packages.Delete(r.To.ID())
s.Relationships = RemoveRelationshipsByID(s.Relationships, r.To.ID())
if idToRemove := excludeByFileOwnershipOverlap(r, s.Artifacts.Packages); idToRemove != "" {
wagoodman marked this conversation as resolved.
Show resolved Hide resolved
s.Artifacts.Packages.Delete(idToRemove)
s.Relationships = RemoveRelationshipsByID(s.Relationships, idToRemove)
}
}
})
}

// excludeBinaryByFileOwnershipOverlap will remove packages from a collection given the following properties are true
// 1) the relationship between packages is OwnershipByFileOverlap
// 2) the parent is an "os" package
// 3) the child is a synthetic package generated by the binary cataloger
// 4) the package names are identical
// This was implemented as a way to help resolve: https://github.com/anchore/syft/issues/931
func excludeBinaryByFileOwnershipOverlap(r artifact.Relationship, c *pkg.Collection) bool {
// excludeByFileOwnershipOverlap will remove packages that should be overridden by a more authoritative package,
// such as an OS package or a package from a cataloger with more specific information being raised up.
func excludeByFileOwnershipOverlap(r artifact.Relationship, c *pkg.Collection) artifact.ID {
if artifact.OwnershipByFileOverlapRelationship != r.Type {
return false
return ""
}

parent := c.Package(r.From.ID())
if parent == nil {
return false
}

parentInExclusion := slices.Contains(osCatalogerTypes, parent.Type)
if !parentInExclusion {
return false
return ""
}

child := c.Package(r.To.ID())
if child == nil {
return false
return ""
}

if idToRemove := identifyOverlappingOSRelationship(parent, child); idToRemove != "" {
return idToRemove
}

if idToRemove := identifyOverlappingJVMRelationship(parent, child); idToRemove != "" {
return idToRemove
}

return ""
}

// identifyOverlappingJVMRelationship indicates the package to remove if this is a binary -> binary pkg relationship
// with a java binary signature package and a more authoritative JVM release package.
func identifyOverlappingJVMRelationship(parent *pkg.Package, child *pkg.Package) artifact.ID {
if !slices.Contains(binaryCatalogerTypes, parent.Type) {
return ""
}

if !slices.Contains(binaryCatalogerTypes, child.Type) {
return ""
}

if child.Metadata == nil {
return ""
}

var (
foundJVM bool
idToRemove artifact.ID
)
for _, p := range []*pkg.Package{parent, child} {
wagoodman marked this conversation as resolved.
Show resolved Hide resolved
switch p.Metadata.(type) {
case pkg.JavaVMInstallation:
foundJVM = true
default:
idToRemove = p.ID()
}
}

if foundJVM {
return idToRemove
}

return ""
}

// identifyOverlappingOSRelationship indicates the package ID to remove if this is an OS pkg -> bin pkg relationship.
// This was implemented as a way to help resolve: https://github.com/anchore/syft/issues/931
func identifyOverlappingOSRelationship(parent *pkg.Package, child *pkg.Package) artifact.ID {
if !slices.Contains(osCatalogerTypes, parent.Type) {
return ""
}

if slices.Contains(binaryCatalogerTypes, child.Type) {
return true
return child.ID()
}

if child.Metadata == nil {
return false
return ""
}

childMetadataType := reflect.TypeOf(child.Metadata)
if !slices.Contains(binaryMetadataTypes, reflect.TypeOf(child.Metadata).Name()) {
return ""
}

return slices.Contains(binaryMetadataTypes, childMetadataType.Name())
return child.ID()
}
184 changes: 131 additions & 53 deletions internal/relationship/exclude_binaries_by_file_ownership_overlap_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,17 @@ package relationship
import (
"testing"

"github.com/stretchr/testify/assert"

"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/pkg"
)

func TestExclude(t *testing.T) {
func TestExcludeByFileOwnershipOverlap(t *testing.T) {
packageA := pkg.Package{Name: "package-a", Type: pkg.ApkPkg}
packageB := pkg.Package{Name: "package-a", Type: pkg.PythonPkg}
packageC := pkg.Package{Name: "package-a", Type: pkg.BinaryPkg}
packageD := pkg.Package{Name: "package-d", Type: pkg.BinaryPkg}
packageE := pkg.Package{Name: "package-e", Type: pkg.RpmPkg, Metadata: pkg.ELFBinaryPackageNoteJSONPayload{Type: "rpm"}}
packageF := pkg.Package{Name: "package-f", Type: pkg.RpmPkg, Metadata: pkg.BinarySignature{}}
for _, p := range []*pkg.Package{&packageA, &packageB, &packageC, &packageD, &packageE, &packageF} {
packageB := pkg.Package{Name: "package-b", Type: pkg.BinaryPkg, Metadata: pkg.JavaVMInstallation{}}
packageC := pkg.Package{Name: "package-c", Type: pkg.BinaryPkg, Metadata: pkg.ELFBinaryPackageNoteJSONPayload{Type: "rpm"}}
for _, p := range []*pkg.Package{&packageA, &packageB, &packageC} {
p := p
p.SetID()
}
Expand All @@ -26,73 +25,152 @@ func TestExclude(t *testing.T) {
shouldExclude bool
}{
{
name: "no exclusions from os -> python",
relationship: artifact.Relationship{
Type: artifact.OwnershipByFileOverlapRelationship,
From: packageA,
To: packageB,
},
packages: pkg.NewCollection(packageA, packageB),
shouldExclude: false,
},
{
name: "exclusions from os -> binary",
// prove that OS -> bin exclusions are wired
name: "exclusions from os -> elf binary (as RPM)",
relationship: artifact.Relationship{
Type: artifact.OwnershipByFileOverlapRelationship,
From: packageA,
To: packageC,
From: packageA, // OS
To: packageC, // ELF binary
},
packages: pkg.NewCollection(packageA, packageC),
shouldExclude: true,
},
{
name: "exclusions from os -> elf binary (as RPM)",
// prove that bin -> JVM exclusions are wired
name: "exclusions from binary -> binary with JVM metadata",
relationship: artifact.Relationship{
Type: artifact.OwnershipByFileOverlapRelationship,
From: packageA,
To: packageE,
From: packageB, // binary with JVM metadata
To: packageC, // binary
},
packages: pkg.NewCollection(packageA, packageE),
packages: pkg.NewCollection(packageC, packageB),
shouldExclude: true,
},
}

for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
actualExclude := excludeByFileOwnershipOverlap(test.relationship, test.packages)
didExclude := actualExclude != ""
if !didExclude && test.shouldExclude {
t.Errorf("expected to exclude relationship %+v", test.relationship)
}
})

}
}

func TestIdentifyOverlappingOSRelationship(t *testing.T) {
packageA := pkg.Package{Name: "package-a", Type: pkg.ApkPkg} // OS package
packageB := pkg.Package{Name: "package-b", Type: pkg.BinaryPkg}
packageC := pkg.Package{Name: "package-c", Type: pkg.BinaryPkg, Metadata: pkg.BinarySignature{}}
packageD := pkg.Package{Name: "package-d", Type: pkg.PythonPkg} // Language package
packageE := pkg.Package{Name: "package-e", Type: pkg.BinaryPkg, Metadata: pkg.ELFBinaryPackageNoteJSONPayload{}}

for _, p := range []*pkg.Package{&packageA, &packageB, &packageC, &packageD, &packageE} {
p.SetID()
}

tests := []struct {
name string
parent *pkg.Package
child *pkg.Package
expectedID artifact.ID
}{
{
name: "exclusions from os -> binary (masquerading as RPM)",
relationship: artifact.Relationship{
Type: artifact.OwnershipByFileOverlapRelationship,
From: packageA,
To: packageF,
},
packages: pkg.NewCollection(packageA, packageF),
shouldExclude: true,
name: "OS -> binary without metadata",
parent: &packageA,
child: &packageB,
expectedID: packageB.ID(), // OS package to binary package, should return child ID
},
{
name: "no exclusions from python -> binary",
relationship: artifact.Relationship{
Type: artifact.OwnershipByFileOverlapRelationship,
From: packageB,
To: packageC,
},
packages: pkg.NewCollection(packageB, packageC),
shouldExclude: false,
name: "OS -> binary with binary metadata",
parent: &packageA,
child: &packageC,
expectedID: packageC.ID(), // OS package to binary package with binary metadata, should return child ID
},
{
name: "no exclusions for different package names",
relationship: artifact.Relationship{
Type: artifact.OwnershipByFileOverlapRelationship,
From: packageA,
To: packageD,
},
packages: pkg.NewCollection(packageA, packageD),
shouldExclude: false,
name: "OS -> non-binary package",
parent: &packageA,
child: &packageD,
expectedID: "", // OS package to non-binary package, no exclusion
},
{
name: "OS -> binary with ELF metadata",
parent: &packageA,
child: &packageE,
expectedID: packageE.ID(), // OS package to binary package with ELF metadata, should return child ID
},
{
name: "non-OS parent",
parent: &packageD, // non-OS package
child: &packageC,
expectedID: "", // non-OS parent, no exclusion
},
}

for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
if !excludeBinaryByFileOwnershipOverlap(test.relationship, test.packages) && test.shouldExclude {
t.Errorf("expected to exclude relationship %+v", test.relationship)
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
resultID := identifyOverlappingOSRelationship(tt.parent, tt.child)
assert.Equal(t, tt.expectedID, resultID)
})
}
}

func TestIdentifyOverlappingJVMRelationship(t *testing.T) {

packageA := pkg.Package{Name: "package-a", Type: pkg.BinaryPkg}
packageB := pkg.Package{Name: "package-b", Type: pkg.BinaryPkg, Metadata: pkg.BinarySignature{}}
packageC := pkg.Package{Name: "package-c", Type: pkg.BinaryPkg, Metadata: pkg.JavaVMInstallation{}}
packageD := pkg.Package{Name: "package-d", Type: pkg.PythonPkg}
packageE := pkg.Package{Name: "package-e", Type: pkg.BinaryPkg}

for _, p := range []*pkg.Package{&packageA, &packageB, &packageC, &packageD, &packageE} {
p.SetID()
}

tests := []struct {
name string
parent *pkg.Package
child *pkg.Package
expectedID artifact.ID
}{
{
name: "binary -> binary with JVM installation",
parent: &packageA,
child: &packageC,
expectedID: packageA.ID(), // JVM found, return BinaryPkg ID
},
{
name: "binary -> binary with binary signature",
parent: &packageA,
child: &packageB,
expectedID: "", // binary signatures only found, no exclusion
},
{
name: "binary -> python (non-binary child)",
parent: &packageA,
child: &packageD,
expectedID: "", // non-binary child, no exclusion
},
{
name: "no JVM or signature in binary -> binary",
parent: &packageA,
child: &packageE,
expectedID: "", // no JVM or binary signature, no exclusion
},
{
name: "non-binary parent",
parent: &packageD,
child: &packageC,
expectedID: "", // non-binary parent, no exclusion
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
resultID := identifyOverlappingJVMRelationship(tt.parent, tt.child)
assert.Equal(t, tt.expectedID, resultID)
})
}
}
1 change: 1 addition & 0 deletions internal/task/package_tasks.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ func DefaultPackageTaskFactories() PackageTaskFactories {
newSimplePackageTaskFactory(binary.NewELFPackageCataloger, pkgcataloging.DeclaredTag, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, "binary", "elf-package"),
newSimplePackageTaskFactory(githubactions.NewActionUsageCataloger, pkgcataloging.DeclaredTag, pkgcataloging.DirectoryTag, "github", "github-actions"),
newSimplePackageTaskFactory(githubactions.NewWorkflowUsageCataloger, pkgcataloging.DeclaredTag, pkgcataloging.DirectoryTag, "github", "github-actions"),
newSimplePackageTaskFactory(java.NewJvmDistributionCataloger, pkgcataloging.DeclaredTag, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, "java", "jvm", "jdk", "jre"),
newPackageTaskFactory(
func(cfg CatalogingFactoryConfig) pkg.Cataloger {
return kernel.NewLinuxKernelCataloger(cfg.PackagesConfig.LinuxKernel)
Expand Down
Loading
Loading