Skip to content

Commit

Permalink
feat: --enrich flag for data enrichment feature enablement (#3182)
Browse files Browse the repository at this point in the history
Signed-off-by: Keith Zantow <kzantow@gmail.com>
  • Loading branch information
kzantow committed Sep 12, 2024
1 parent fcd5ec9 commit 1b86326
Show file tree
Hide file tree
Showing 8 changed files with 217 additions and 73 deletions.
99 changes: 81 additions & 18 deletions cmd/syft/internal/options/catalog.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"github.com/anchore/fangs"
intFile "github.com/anchore/syft/internal/file"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/internal/task"
"github.com/anchore/syft/syft"
"github.com/anchore/syft/syft/cataloging"
"github.com/anchore/syft/syft/cataloging/filecataloging"
Expand All @@ -36,6 +37,7 @@ type Catalog struct {
Scope string `yaml:"scope" json:"scope" mapstructure:"scope"`
Parallelism int `yaml:"parallelism" json:"parallelism" mapstructure:"parallelism"` // the number of catalog workers to run in parallel
Relationships relationshipsConfig `yaml:"relationships" json:"relationships" mapstructure:"relationships"`
Enrich []string `yaml:"enrich" json:"enrich" mapstructure:"enrich"`

// ecosystem-specific cataloger configuration
Golang golangConfig `yaml:"golang" json:"golang" mapstructure:"golang"`
Expand All @@ -55,7 +57,7 @@ type Catalog struct {
var _ interface {
clio.FlagAdder
clio.PostLoader
fangs.FieldDescriber
clio.FieldDescriber
} = (*Catalog)(nil)

func DefaultCatalog() Catalog {
Expand Down Expand Up @@ -130,9 +132,9 @@ func (cfg Catalog) ToPackagesConfig() pkgcataloging.Config {
return pkgcataloging.Config{
Binary: binary.DefaultClassifierCatalogerConfig(),
Golang: golang.DefaultCatalogerConfig().
WithSearchLocalModCacheLicenses(cfg.Golang.SearchLocalModCacheLicenses).
WithSearchLocalModCacheLicenses(*multiLevelOption(false, enrichmentEnabled(cfg.Enrich, task.Go, task.Golang), cfg.Golang.SearchLocalModCacheLicenses)).
WithLocalModCacheDir(cfg.Golang.LocalModCacheDir).
WithSearchRemoteLicenses(cfg.Golang.SearchRemoteLicenses).
WithSearchRemoteLicenses(*multiLevelOption(false, enrichmentEnabled(cfg.Enrich, task.Go, task.Golang), cfg.Golang.SearchRemoteLicenses)).
WithProxy(cfg.Golang.Proxy).
WithNoProxy(cfg.Golang.NoProxy).
WithMainModuleVersion(
Expand All @@ -142,7 +144,7 @@ func (cfg Catalog) ToPackagesConfig() pkgcataloging.Config {
WithFromLDFlags(cfg.Golang.MainModuleVersion.FromLDFlags),
),
JavaScript: javascript.DefaultCatalogerConfig().
WithSearchRemoteLicenses(cfg.JavaScript.SearchRemoteLicenses).
WithSearchRemoteLicenses(*multiLevelOption(false, enrichmentEnabled(cfg.Enrich, task.JavaScript, task.Node, task.NPM), cfg.JavaScript.SearchRemoteLicenses)).
WithNpmBaseURL(cfg.JavaScript.NpmBaseURL),
LinuxKernel: kernel.LinuxKernelCatalogerConfig{
CatalogModules: cfg.LinuxKernel.CatalogModules,
Expand All @@ -151,9 +153,9 @@ func (cfg Catalog) ToPackagesConfig() pkgcataloging.Config {
GuessUnpinnedRequirements: cfg.Python.GuessUnpinnedRequirements,
},
JavaArchive: java.DefaultArchiveCatalogerConfig().
WithUseMavenLocalRepository(cfg.Java.UseMavenLocalRepository).
WithUseMavenLocalRepository(*multiLevelOption(false, enrichmentEnabled(cfg.Enrich, task.Java, task.Maven), cfg.Java.UseMavenLocalRepository)).
WithMavenLocalRepositoryDir(cfg.Java.MavenLocalRepositoryDir).
WithUseNetwork(cfg.Java.UseNetwork).
WithUseNetwork(*multiLevelOption(false, enrichmentEnabled(cfg.Enrich, task.Java, task.Maven), cfg.Java.UseNetwork)).
WithMavenBaseURL(cfg.Java.MavenURL).
WithArchiveTraversal(archiveSearch, cfg.Java.MaxParentRecursiveDepth),
}
Expand Down Expand Up @@ -193,6 +195,9 @@ func (cfg *Catalog) AddFlags(flags clio.FlagSet) {
flags.StringArrayVarP(&cfg.SelectCatalogers, "select-catalogers", "",
"add, remove, and filter the catalogers to be used")

flags.StringArrayVarP(&cfg.Enrich, "enrich", "",
fmt.Sprintf("enable package data enrichment from local and online sources (options: %s)", strings.Join(publicisedEnrichmentOptions, ", ")))

flags.StringVarP(&cfg.Source.Name, "source-name", "",
"set the name of the target being analyzed")

Expand All @@ -205,6 +210,10 @@ func (cfg *Catalog) AddFlags(flags clio.FlagSet) {

func (cfg *Catalog) DescribeFields(descriptions fangs.FieldDescriptionSet) {
descriptions.Add(&cfg.Parallelism, "number of cataloger workers to run in parallel")

descriptions.Add(&cfg.Enrich, fmt.Sprintf(`Enable data enrichment operations, which can utilize services such as Maven Central and NPM.
By default all enrichment is disabled, use: all to enable everything.
Available options are: %s`, strings.Join(publicisedEnrichmentOptions, ", ")))
}

func (cfg *Catalog) PostLoad() error {
Expand All @@ -215,23 +224,12 @@ func (cfg *Catalog) PostLoad() error {
return fmt.Errorf("cannot use both 'catalogers' and 'select-catalogers'/'default-catalogers' flags")
}

flatten := func(l []string) []string {
var out []string
for _, v := range l {
for _, s := range strings.Split(v, ",") {
out = append(out, strings.TrimSpace(s))
}
}
sort.Strings(out)

return out
}

cfg.From = flatten(cfg.From)

cfg.Catalogers = flatten(cfg.Catalogers)
cfg.DefaultCatalogers = flatten(cfg.DefaultCatalogers)
cfg.SelectCatalogers = flatten(cfg.SelectCatalogers)
cfg.Enrich = flatten(cfg.Enrich)

// for backwards compatibility
cfg.DefaultCatalogers = append(cfg.DefaultCatalogers, cfg.Catalogers...)
Expand All @@ -243,3 +241,68 @@ func (cfg *Catalog) PostLoad() error {

return nil
}

func flatten(commaSeparatedEntries []string) []string {
var out []string
for _, v := range commaSeparatedEntries {
for _, s := range strings.Split(v, ",") {
out = append(out, strings.TrimSpace(s))
}
}
sort.Strings(out)
return out
}

var publicisedEnrichmentOptions = []string{
"all",
task.Golang,
task.Java,
task.JavaScript,
}

func enrichmentEnabled(enrichDirectives []string, features ...string) *bool {
if len(enrichDirectives) == 0 {
return nil
}

enabled := func(features ...string) *bool {
for _, directive := range enrichDirectives {
enable := true
directive = strings.TrimPrefix(directive, "+") // +java and java are equivalent
if strings.HasPrefix(directive, "-") {
directive = directive[1:]
enable = false
}
for _, feature := range features {
if directive == feature {
return &enable
}
}
}
return nil
}

enableAll := enabled("all")
disableAll := enabled("none")

if disableAll != nil && *disableAll {
if enableAll != nil {
log.Warn("you have specified to both enable and disable all enrichment functionality, defaulting to disabled")
}
enableAll = ptr(false)
}

// check for explicit enable/disable of feature names
for _, feat := range features {
enableFeature := enabled(feat)
if enableFeature != nil {
return enableFeature
}
}

return enableAll
}

func ptr[T any](val T) *T {
return &val
}
66 changes: 66 additions & 0 deletions cmd/syft/internal/options/catalog_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,3 +70,69 @@ func TestCatalog_PostLoad(t *testing.T) {
})
}
}

func Test_enrichmentEnabled(t *testing.T) {
tests := []struct {
directives string
test string
expected *bool
}{
{
directives: "",
test: "java",
expected: nil,
},
{
directives: "none",
test: "java",
expected: ptr(false),
},
{
directives: "none,+java",
test: "java",
expected: ptr(true),
},
{
directives: "all,none",
test: "java",
expected: ptr(false),
},
{
directives: "all",
test: "java",
expected: ptr(true),
},
{
directives: "golang,js",
test: "java",
expected: nil,
},
{
directives: "golang,-js,java",
test: "java",
expected: ptr(true),
},
{
directives: "golang,js,-java",
test: "java",
expected: ptr(false),
},
{
directives: "all",
test: "java",
expected: ptr(true),
},
{
directives: "all,-java",
test: "java",
expected: ptr(false),
},
}

for _, test := range tests {
t.Run(test.directives, func(t *testing.T) {
got := enrichmentEnabled(flatten([]string{test.directives}), test.test)
assert.Equal(t, test.expected, got)
})
}
}
8 changes: 4 additions & 4 deletions cmd/syft/internal/options/golang.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ import (
)

type golangConfig struct {
SearchLocalModCacheLicenses bool `json:"search-local-mod-cache-licenses" yaml:"search-local-mod-cache-licenses" mapstructure:"search-local-mod-cache-licenses"`
SearchLocalModCacheLicenses *bool `json:"search-local-mod-cache-licenses" yaml:"search-local-mod-cache-licenses" mapstructure:"search-local-mod-cache-licenses"`
LocalModCacheDir string `json:"local-mod-cache-dir" yaml:"local-mod-cache-dir" mapstructure:"local-mod-cache-dir"`
SearchRemoteLicenses bool `json:"search-remote-licenses" yaml:"search-remote-licenses" mapstructure:"search-remote-licenses"`
SearchRemoteLicenses *bool `json:"search-remote-licenses" yaml:"search-remote-licenses" mapstructure:"search-remote-licenses"`
Proxy string `json:"proxy" yaml:"proxy" mapstructure:"proxy"`
NoProxy string `json:"no-proxy" yaml:"no-proxy" mapstructure:"no-proxy"`
MainModuleVersion golangMainModuleVersionConfig `json:"main-module-version" yaml:"main-module-version" mapstructure:"main-module-version"`
Expand Down Expand Up @@ -47,9 +47,9 @@ type golangMainModuleVersionConfig struct {
func defaultGolangConfig() golangConfig {
def := golang.DefaultCatalogerConfig()
return golangConfig{
SearchLocalModCacheLicenses: def.SearchLocalModCacheLicenses,
SearchLocalModCacheLicenses: nil, // this defaults to false, which is the API default
LocalModCacheDir: def.LocalModCacheDir,
SearchRemoteLicenses: def.SearchRemoteLicenses,
SearchRemoteLicenses: nil, // this defaults to false, which is the API default
Proxy: strings.Join(def.Proxies, ","),
NoProxy: strings.Join(def.NoProxy, ","),
MainModuleVersion: golangMainModuleVersionConfig{
Expand Down
8 changes: 4 additions & 4 deletions cmd/syft/internal/options/java.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ import (
)

type javaConfig struct {
UseNetwork bool `yaml:"use-network" json:"use-network" mapstructure:"use-network"`
UseMavenLocalRepository bool `yaml:"use-maven-local-repository" json:"use-maven-local-repository" mapstructure:"use-maven-local-repository"`
UseNetwork *bool `yaml:"use-network" json:"use-network" mapstructure:"use-network"`
UseMavenLocalRepository *bool `yaml:"use-maven-local-repository" json:"use-maven-local-repository" mapstructure:"use-maven-local-repository"`
MavenLocalRepositoryDir string `yaml:"maven-local-repository-dir" json:"maven-local-repository-dir" mapstructure:"maven-local-repository-dir"`
MavenURL string `yaml:"maven-url" json:"maven-url" mapstructure:"maven-url"`
MaxParentRecursiveDepth int `yaml:"max-parent-recursive-depth" json:"max-parent-recursive-depth" mapstructure:"max-parent-recursive-depth"`
Expand All @@ -17,9 +17,9 @@ func defaultJavaConfig() javaConfig {
def := java.DefaultArchiveCatalogerConfig()

return javaConfig{
UseNetwork: def.UseNetwork,
UseNetwork: nil, // this defaults to false, which is the API default
MaxParentRecursiveDepth: def.MaxParentRecursiveDepth,
UseMavenLocalRepository: def.UseMavenLocalRepository,
UseMavenLocalRepository: nil, // this defaults to false, which is the API default
MavenLocalRepositoryDir: def.MavenLocalRepositoryDir,
MavenURL: def.MavenBaseURL,
}
Expand Down
2 changes: 1 addition & 1 deletion cmd/syft/internal/options/javascript.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ package options
import "github.com/anchore/clio"

type javaScriptConfig struct {
SearchRemoteLicenses bool `json:"search-remote-licenses" yaml:"search-remote-licenses" mapstructure:"search-remote-licenses"`
SearchRemoteLicenses *bool `json:"search-remote-licenses" yaml:"search-remote-licenses" mapstructure:"search-remote-licenses"`
NpmBaseURL string `json:"npm-base-url" yaml:"npm-base-url" mapstructure:"npm-base-url"`
}

Expand Down
24 changes: 12 additions & 12 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ require (
github.com/acobaugh/osrelease v0.1.0
github.com/anchore/bubbly v0.0.0-20231115134915-def0aba654a9
github.com/anchore/clio v0.0.0-20240522144804-d81e109008aa
github.com/anchore/fangs v0.0.0-20240508143433-f016b099950f
github.com/anchore/fangs v0.0.0-20240903175602-e716ef12c23d
github.com/anchore/go-collections v0.0.0-20240216171411-9321230ce537
github.com/anchore/go-logger v0.0.0-20230725134548-c21dafa1ec5a
github.com/anchore/go-macholibre v0.0.0-20220308212642-53e6d0aaf6fb
Expand Down Expand Up @@ -84,7 +84,7 @@ require (
modernc.org/sqlite v1.33.0
)

require google.golang.org/genproto v0.0.0-20231106174013-bbf56f31fb17 // indirect
require google.golang.org/genproto v0.0.0-20240213162025-012b6fc9bca9 // indirect

require (
github.com/BurntSushi/toml v1.4.0
Expand Down Expand Up @@ -135,13 +135,13 @@ require (
github.com/emirpasic/gods v1.18.1 // indirect
github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f // indirect
github.com/felixge/fgprof v0.9.3 // indirect
github.com/felixge/httpsnoop v1.0.3 // indirect
github.com/felixge/httpsnoop v1.0.4 // indirect
github.com/fsnotify/fsnotify v1.7.0 // indirect
github.com/gabriel-vasile/mimetype v1.4.4 // indirect
github.com/gkampitakis/ciinfo v0.3.0 // indirect
github.com/gkampitakis/go-diff v1.3.2 // indirect
github.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376 // indirect
github.com/go-logr/logr v1.2.4 // indirect
github.com/go-logr/logr v1.4.1 // indirect
github.com/go-logr/stdr v1.2.2 // indirect
github.com/go-restruct/restruct v1.2.0-alpha // indirect
github.com/gogo/protobuf v1.3.2 // indirect
Expand Down Expand Up @@ -185,7 +185,7 @@ require (
github.com/opencontainers/runtime-spec v1.1.0-rc.1 // indirect
github.com/opencontainers/selinux v1.11.0 // indirect
github.com/pborman/indent v1.2.1 // indirect
github.com/pelletier/go-toml/v2 v2.1.0 // indirect
github.com/pelletier/go-toml/v2 v2.2.2 // indirect
github.com/pierrec/lz4/v4 v4.1.19 // indirect
github.com/pjbgf/sha1cd v0.3.0 // indirect
github.com/pkg/errors v0.9.1 // indirect
Expand All @@ -204,7 +204,7 @@ require (
github.com/sourcegraph/conc v0.3.0 // indirect
github.com/spf13/cast v1.7.0 // indirect
github.com/spf13/pflag v1.0.5 // indirect
github.com/spf13/viper v1.18.2 // indirect
github.com/spf13/viper v1.19.0 // indirect
github.com/subosito/gotenv v1.6.0 // indirect
github.com/sylabs/sif/v2 v2.17.1 // indirect
github.com/sylabs/squashfs v1.0.0 // indirect
Expand All @@ -221,10 +221,10 @@ require (
github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 // indirect
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect
go.opencensus.io v0.24.0 // indirect
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.45.0 // indirect
go.opentelemetry.io/otel v1.19.0 // indirect
go.opentelemetry.io/otel/metric v1.19.0 // indirect
go.opentelemetry.io/otel/trace v1.19.0 // indirect
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.49.0 // indirect
go.opentelemetry.io/otel v1.24.0 // indirect
go.opentelemetry.io/otel/metric v1.24.0 // indirect
go.opentelemetry.io/otel/trace v1.24.0 // indirect
go.uber.org/atomic v1.9.0 // indirect
go.uber.org/multierr v1.9.0 // indirect
golang.org/x/crypto v0.27.0 // indirect
Expand All @@ -234,8 +234,8 @@ require (
golang.org/x/text v0.18.0 // indirect
golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d // indirect
golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20231120223509-83a465c0220f // indirect
google.golang.org/grpc v1.59.0 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20240314234333-6e1732d8331c // indirect
google.golang.org/grpc v1.62.1 // indirect
google.golang.org/protobuf v1.33.0 // indirect
gopkg.in/ini.v1 v1.67.0 // indirect
gopkg.in/warnings.v0 v0.1.2 // indirect
Expand Down
Loading

0 comments on commit 1b86326

Please sign in to comment.