Skip to content

Commit

Permalink
931: binary cataloger exclusion defaults for ownership by overlap (an…
Browse files Browse the repository at this point in the history
…chore#1948)

Fixes anchore#931

PR anchore#1948 introduces a new implicit exclusion for binary packages that overlap by file ownership and have certain characteristics:

1) the relationship between packages is OwnershipByFileOverlap
2) the parent package is an "os" package - see changelog for included catalogers
3) the child is a synthetic package generated by the binary cataloger - see changelog for included catalogers
4) the package names are identical

---------

Signed-off-by: Christopher Phillips <christopher.phillips@anchore.com>
  • Loading branch information
spiffcs authored Aug 8, 2023
1 parent b6cb65d commit c4ca385
Show file tree
Hide file tree
Showing 23 changed files with 295 additions and 109 deletions.
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -482,6 +482,10 @@ default-image-pull-source: ""
# - "./out/**/*.json"
exclude: []

# allows users to exclude synthetic binary packages from the sbom
# these packages are removed if an overlap with a non-synthetic package is found
exclude-overlap-by-ownership: true

# os and/or architecture to use when referencing container images (e.g. "windows/armv6" or "arm64")
# same as --platform; SYFT_PLATFORM env var
platform: ""
Expand Down
57 changes: 30 additions & 27 deletions internal/config/application.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,31 +42,32 @@ type Application struct {
ConfigPath string `yaml:"configPath,omitempty" json:"configPath" mapstructure:"config"`
Verbosity uint `yaml:"verbosity,omitempty" json:"verbosity" mapstructure:"verbosity"`
// -q, indicates to not show any status output to stderr (ETUI or logging UI)
Quiet bool `yaml:"quiet" json:"quiet" mapstructure:"quiet"`
Outputs []string `yaml:"output" json:"output" mapstructure:"output"` // -o, the format to use for output
OutputTemplatePath string `yaml:"output-template-path" json:"output-template-path" mapstructure:"output-template-path"` // -t template file to use for output
File string `yaml:"file" json:"file" mapstructure:"file"` // --file, the file to write report output to
CheckForAppUpdate bool `yaml:"check-for-app-update" json:"check-for-app-update" mapstructure:"check-for-app-update"` // whether to check for an application update on start up or not
Dev development `yaml:"dev" json:"dev" mapstructure:"dev"`
Log logging `yaml:"log" json:"log" mapstructure:"log"` // all logging-related options
Catalogers []string `yaml:"catalogers" json:"catalogers" mapstructure:"catalogers"`
Package pkg `yaml:"package" json:"package" mapstructure:"package"`
Golang golang `yaml:"golang" json:"golang" mapstructure:"golang"`
LinuxKernel linuxKernel `yaml:"linux-kernel" json:"linux-kernel" mapstructure:"linux-kernel"`
Python python `yaml:"python" json:"python" mapstructure:"python"`
Attest attest `yaml:"attest" json:"attest" mapstructure:"attest"`
FileMetadata FileMetadata `yaml:"file-metadata" json:"file-metadata" mapstructure:"file-metadata"`
FileClassification fileClassification `yaml:"file-classification" json:"file-classification" mapstructure:"file-classification"`
FileContents fileContents `yaml:"file-contents" json:"file-contents" mapstructure:"file-contents"`
Secrets secrets `yaml:"secrets" json:"secrets" mapstructure:"secrets"`
Registry registry `yaml:"registry" json:"registry" mapstructure:"registry"`
Exclusions []string `yaml:"exclude" json:"exclude" mapstructure:"exclude"`
Platform string `yaml:"platform" json:"platform" mapstructure:"platform"`
Name string `yaml:"name" json:"name" mapstructure:"name"`
Source sourceCfg `yaml:"source" json:"source" mapstructure:"source"`
Parallelism int `yaml:"parallelism" json:"parallelism" mapstructure:"parallelism"` // the number of catalog workers to run in parallel
DefaultImagePullSource string `yaml:"default-image-pull-source" json:"default-image-pull-source" mapstructure:"default-image-pull-source"` // specify default image pull source
BasePath string `yaml:"base-path" json:"base-path" mapstructure:"base-path"` // specify base path for all file paths
Quiet bool `yaml:"quiet" json:"quiet" mapstructure:"quiet"`
Outputs []string `yaml:"output" json:"output" mapstructure:"output"` // -o, the format to use for output
OutputTemplatePath string `yaml:"output-template-path" json:"output-template-path" mapstructure:"output-template-path"` // -t template file to use for output
File string `yaml:"file" json:"file" mapstructure:"file"` // --file, the file to write report output to
CheckForAppUpdate bool `yaml:"check-for-app-update" json:"check-for-app-update" mapstructure:"check-for-app-update"` // whether to check for an application update on start up or not
Dev development `yaml:"dev" json:"dev" mapstructure:"dev"`
Log logging `yaml:"log" json:"log" mapstructure:"log"` // all logging-related options
Catalogers []string `yaml:"catalogers" json:"catalogers" mapstructure:"catalogers"`
Package pkg `yaml:"package" json:"package" mapstructure:"package"`
Golang golang `yaml:"golang" json:"golang" mapstructure:"golang"`
LinuxKernel linuxKernel `yaml:"linux-kernel" json:"linux-kernel" mapstructure:"linux-kernel"`
Python python `yaml:"python" json:"python" mapstructure:"python"`
Attest attest `yaml:"attest" json:"attest" mapstructure:"attest"`
FileMetadata FileMetadata `yaml:"file-metadata" json:"file-metadata" mapstructure:"file-metadata"`
FileClassification fileClassification `yaml:"file-classification" json:"file-classification" mapstructure:"file-classification"`
FileContents fileContents `yaml:"file-contents" json:"file-contents" mapstructure:"file-contents"`
Secrets secrets `yaml:"secrets" json:"secrets" mapstructure:"secrets"`
Registry registry `yaml:"registry" json:"registry" mapstructure:"registry"`
Exclusions []string `yaml:"exclude" json:"exclude" mapstructure:"exclude"`
Platform string `yaml:"platform" json:"platform" mapstructure:"platform"`
Name string `yaml:"name" json:"name" mapstructure:"name"`
Source sourceCfg `yaml:"source" json:"source" mapstructure:"source"`
Parallelism int `yaml:"parallelism" json:"parallelism" mapstructure:"parallelism"` // the number of catalog workers to run in parallel
DefaultImagePullSource string `yaml:"default-image-pull-source" json:"default-image-pull-source" mapstructure:"default-image-pull-source"` // specify default image pull source
BasePath string `yaml:"base-path" json:"base-path" mapstructure:"base-path"` // specify base path for all file paths
ExcludeBinaryOverlapByOwnership bool `yaml:"exclude-binary-overlap-by-ownership" json:"exclude-binary-overlap-by-ownership" mapstructure:"exclude-binary-overlap-by-ownership"` // exclude synthetic binary packages owned by os package files
}

func (cfg Application) ToCatalogerConfig() cataloger.Config {
Expand All @@ -76,8 +77,9 @@ func (cfg Application) ToCatalogerConfig() cataloger.Config {
IncludeUnindexedArchives: cfg.Package.SearchUnindexedArchives,
Scope: cfg.Package.Cataloger.ScopeOpt,
},
Catalogers: cfg.Catalogers,
Parallelism: cfg.Parallelism,
Catalogers: cfg.Catalogers,
Parallelism: cfg.Parallelism,
ExcludeBinaryOverlapByOwnership: cfg.ExcludeBinaryOverlapByOwnership,
Golang: golangCataloger.NewGoCatalogerOpts().
WithSearchLocalModCacheLicenses(cfg.Golang.SearchLocalModCacheLicenses).
WithLocalModCacheDir(cfg.Golang.LocalModCacheDir).
Expand Down Expand Up @@ -221,6 +223,7 @@ func loadDefaultValues(v *viper.Viper) {
v.SetDefault("catalogers", nil)
v.SetDefault("parallelism", 1)
v.SetDefault("default-image-pull-source", "")
v.SetDefault("exclude-binary-overlap-by-ownership", true)

// for each field in the configuration struct, see if the field implements the defaultValueLoader interface and invoke it if it does
value := reflect.ValueOf(Application{})
Expand Down
25 changes: 24 additions & 1 deletion syft/lib.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,11 +74,34 @@ func CatalogPackages(src source.Source, cfg cataloger.Config) (*pkg.Collection,

catalog, relationships, err := cataloger.Catalog(resolver, release, cfg.Parallelism, catalogers...)

relationships = append(relationships, newSourceRelationshipsFromCatalog(src, catalog)...)
// apply exclusions to the package catalog
// default config value for this is true
// https://github.com/anchore/syft/issues/931
if cfg.ExcludeBinaryOverlapByOwnership {
for _, r := range relationships {
if cataloger.Exclude(r, catalog) {
catalog.Delete(r.To.ID())
relationships = removeRelationshipsByID(relationships, r.To.ID())
}
}
}

// no need to consider source relationships for os -> binary exclusions
relationships = append(relationships, newSourceRelationshipsFromCatalog(src, catalog)...)
return catalog, relationships, release, err
}

func removeRelationshipsByID(relationships []artifact.Relationship, id artifact.ID) []artifact.Relationship {
// https://github.com/golang/go/wiki/SliceTricks#filtering-without-allocating
filtered := relationships[:0]
for _, r := range relationships {
if r.To.ID() != id && r.From.ID() != id {
filtered = append(filtered, r)
}
}
return filtered
}

func newSourceRelationshipsFromCatalog(src source.Source, c *pkg.Collection) []artifact.Relationship {
relationships := make([]artifact.Relationship, 0) // Should we pre-allocate this by giving catalog a Len() method?
for p := range c.Enumerate() {
Expand Down
4 changes: 2 additions & 2 deletions syft/pkg/cataloger/alpm/cataloger.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@ import (
"github.com/anchore/syft/syft/pkg/cataloger/generic"
)

const catalogerName = "alpmdb-cataloger"
const CatalogerName = "alpmdb-cataloger"

func NewAlpmdbCataloger() *generic.Cataloger {
return generic.NewCataloger(catalogerName).
return generic.NewCataloger(CatalogerName).
WithParserByGlobs(parseAlpmDB, pkg.AlpmDBGlob)
}
4 changes: 2 additions & 2 deletions syft/pkg/cataloger/apkdb/cataloger.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@ import (
"github.com/anchore/syft/syft/pkg/cataloger/generic"
)

const catalogerName = "apkdb-cataloger"
const CatalogerName = "apkdb-cataloger"

// NewApkdbCataloger returns a new Alpine DB cataloger object.
func NewApkdbCataloger() *generic.Cataloger {
return generic.NewCataloger(catalogerName).
return generic.NewCataloger(CatalogerName).
WithParserByGlobs(parseApkDB, pkg.ApkDBGlob)
}
4 changes: 2 additions & 2 deletions syft/pkg/cataloger/binary/cataloger.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import (
"github.com/anchore/syft/syft/pkg"
)

const catalogerName = "binary-cataloger"
const CatalogerName = "binary-cataloger"

func NewCataloger() *Cataloger {
return &Cataloger{}
Expand All @@ -22,7 +22,7 @@ type Cataloger struct{}

// Name returns a string that uniquely describes the Cataloger
func (c Cataloger) Name() string {
return catalogerName
return CatalogerName
}

// Catalog is given an object to resolve file references and content, this function returns any discovered Packages
Expand Down
2 changes: 1 addition & 1 deletion syft/pkg/cataloger/binary/package.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ func newPackage(classifier classifier, location file.Location, matchMetadata map
),
Type: pkg.BinaryPkg,
CPEs: cpes,
FoundBy: catalogerName,
FoundBy: CatalogerName,
MetadataType: pkg.BinaryMetadataType,
Metadata: pkg.BinaryMetadata{
Matches: []pkg.ClassifierMatch{
Expand Down
23 changes: 7 additions & 16 deletions syft/pkg/cataloger/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,23 +8,14 @@ import (
)

// TODO: these field naming vs helper function naming schemes are inconsistent.

type Config struct {
Search SearchConfig
Golang golang.GoCatalogerOpts
LinuxKernel kernel.LinuxCatalogerConfig
Python python.CatalogerConfig
Catalogers []string
Parallelism int
}

func DefaultConfig() Config {
return Config{
Search: DefaultSearchConfig(),
Parallelism: 1,
LinuxKernel: kernel.DefaultLinuxCatalogerConfig(),
Python: python.DefaultCatalogerConfig(),
}
Search SearchConfig
Golang golang.GoCatalogerOpts
LinuxKernel kernel.LinuxCatalogerConfig
Python python.CatalogerConfig
Catalogers []string
Parallelism int
ExcludeBinaryOverlapByOwnership bool
}

func (c Config) Java() java.Config {
Expand Down
4 changes: 2 additions & 2 deletions syft/pkg/cataloger/deb/cataloger.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@ import (
"github.com/anchore/syft/syft/pkg/cataloger/generic"
)

const catalogerName = "dpkgdb-cataloger"
const CatalogerName = "dpkgdb-cataloger"

// NewDpkgdbCataloger returns a new Deb package cataloger capable of parsing DPKG status DB files.
func NewDpkgdbCataloger() *generic.Cataloger {
return generic.NewCataloger(catalogerName).
return generic.NewCataloger(CatalogerName).
// note: these globs have been intentionally split up in order to improve search performance,
// please do NOT combine into: "**/var/lib/dpkg/{status,status.d/*}"
WithParserByGlobs(parseDpkgDB, "**/var/lib/dpkg/status", "**/var/lib/dpkg/status.d/*", "**/lib/opkg/info/*.control", "**/lib/opkg/status")
Expand Down
4 changes: 2 additions & 2 deletions syft/pkg/cataloger/nix/cataloger.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import (
)

const (
catalogerName = "nix-store-cataloger"
CatalogerName = "nix-store-cataloger"
nixStoreGlob = "**/nix/store/*"
)

Expand All @@ -24,7 +24,7 @@ func NewStoreCataloger() *StoreCataloger {
}

func (c *StoreCataloger) Name() string {
return catalogerName
return CatalogerName
}

func (c *StoreCataloger) Catalog(resolver file.Resolver) ([]pkg.Package, []artifact.Relationship, error) {
Expand Down
2 changes: 1 addition & 1 deletion syft/pkg/cataloger/nix/cataloger_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ func TestCataloger_Catalog(t *testing.T) {
Version: "2.34-210",
PURL: "pkg:nix/glibc@2.34-210?output=bin&outputhash=h0cnbmfcn93xm5dg2x27ixhag1cwndga",
Locations: file.NewLocationSet(file.NewLocation("nix/store/h0cnbmfcn93xm5dg2x27ixhag1cwndga-glibc-2.34-210-bin")),
FoundBy: catalogerName,
FoundBy: CatalogerName,
Type: pkg.NixPkg,
MetadataType: pkg.NixStoreMetadataType,
Metadata: pkg.NixStoreMetadata{
Expand Down
2 changes: 1 addition & 1 deletion syft/pkg/cataloger/nix/package.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ func newNixStorePackage(storePath nixStorePath, locations ...file.Location) pkg.
p := pkg.Package{
Name: storePath.name,
Version: storePath.version,
FoundBy: catalogerName,
FoundBy: CatalogerName,
Locations: file.NewLocationSet(locations...),
Type: pkg.NixPkg,
PURL: packageURL(storePath),
Expand Down
55 changes: 55 additions & 0 deletions syft/pkg/cataloger/package_exclusions.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
package cataloger

import (
"golang.org/x/exp/slices"

"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/alpm"
"github.com/anchore/syft/syft/pkg/cataloger/apkdb"
"github.com/anchore/syft/syft/pkg/cataloger/binary"
"github.com/anchore/syft/syft/pkg/cataloger/deb"
"github.com/anchore/syft/syft/pkg/cataloger/nix"
"github.com/anchore/syft/syft/pkg/cataloger/rpm"
)

var (
osCatalogerTypes = []string{
apkdb.CatalogerName,
alpm.CatalogerName,
deb.CatalogerName,
nix.CatalogerName,
rpm.DBCatalogerName,
rpm.FileCatalogerName,
}
binaryCatalogerTypes = []string{binary.CatalogerName}
)

// Exclude will remove packages from a collection given the following properties are true
// 1) the relationship between packages is OwnershipByFileOverlap
// 2) the parent is an "os" package
// 3) the child is a synthetic package generated by the binary cataloger
// 4) the package names are identical
// This exclude was implemented as a way to help resolve: https://github.com/anchore/syft/issues/931
func Exclude(r artifact.Relationship, c *pkg.Collection) bool {
if artifact.OwnershipByFileOverlapRelationship != r.Type {
return false
}

parent := c.Package(r.From.ID())
if parent == nil {
return false
}

parentInExclusion := slices.Contains(osCatalogerTypes, parent.FoundBy)
if !parentInExclusion {
return false
}

child := c.Package(r.To.ID())
if child == nil {
return false
}

return slices.Contains(binaryCatalogerTypes, child.FoundBy)
}
78 changes: 78 additions & 0 deletions syft/pkg/cataloger/package_exclusions_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
package cataloger

import (
"testing"

"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/apkdb"
"github.com/anchore/syft/syft/pkg/cataloger/binary"
)

func TestExclude(t *testing.T) {
packageA := pkg.Package{Name: "package-a", Type: pkg.ApkPkg, FoundBy: apkdb.CatalogerName}
packageB := pkg.Package{Name: "package-a", Type: pkg.PythonPkg, FoundBy: "language-cataloger"}
packageC := pkg.Package{Name: "package-a", Type: pkg.BinaryPkg, FoundBy: binary.CatalogerName}
packageD := pkg.Package{Name: "package-d", Type: pkg.BinaryPkg, FoundBy: binary.CatalogerName}
for _, p := range []*pkg.Package{&packageA, &packageB, &packageC, &packageD} {
p := p
p.SetID()
}

tests := []struct {
name string
relationship artifact.Relationship
packages *pkg.Collection
shouldExclude bool
}{
{
name: "no exclusions from os -> python",
relationship: artifact.Relationship{
Type: artifact.OwnershipByFileOverlapRelationship,
From: packageA,
To: packageB,
},
packages: pkg.NewCollection(packageA, packageB),
shouldExclude: false,
},
{
name: "exclusions from os -> binary",
relationship: artifact.Relationship{
Type: artifact.OwnershipByFileOverlapRelationship,
From: packageA,
To: packageC,
},
packages: pkg.NewCollection(packageA, packageC),
shouldExclude: true,
},
{
name: "no exclusions from python -> binary",
relationship: artifact.Relationship{
Type: artifact.OwnershipByFileOverlapRelationship,
From: packageB,
To: packageC,
},
packages: pkg.NewCollection(packageB, packageC),
shouldExclude: false,
},
{
name: "no exclusions for different package names",
relationship: artifact.Relationship{
Type: artifact.OwnershipByFileOverlapRelationship,
From: packageA,
To: packageD,
},
packages: pkg.NewCollection(packageA, packageD),
shouldExclude: false,
},
}

for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
if !Exclude(test.relationship, test.packages) && test.shouldExclude {
t.Errorf("expected to exclude relationship %+v", test.relationship)
}
})

}
}
Loading

0 comments on commit c4ca385

Please sign in to comment.