Skip to content

Commit

Permalink
improve on ingestion license check (guacsec#2152)
Browse files Browse the repository at this point in the history
* remove unneeded batching and remove source license query on ingestion

Signed-off-by: pxp928 <parth.psu@gmail.com>

* fix unit test for on ingestion scanner

Signed-off-by: pxp928 <parth.psu@gmail.com>

---------

Signed-off-by: pxp928 <parth.psu@gmail.com>
  • Loading branch information
pxp928 authored Sep 26, 2024
1 parent 5ba890d commit 6ea218b
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 89 deletions.
21 changes: 13 additions & 8 deletions pkg/certifier/clearlydefined/clearlydefined.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,8 @@ func getDefinitions(ctx context.Context, client *http.Client, purls []string, co
}

// EvaluateClearlyDefinedDefinition converts the purls into coordinates to query clearly defined
func EvaluateClearlyDefinedDefinition(ctx context.Context, client *http.Client, purls []string, docChannel chan<- *processor.Document) ([]*processor.Document, error) {
func EvaluateClearlyDefinedDefinition(ctx context.Context, client *http.Client,
purls []string, docChannel chan<- *processor.Document, collectSourceLicenses bool) ([]*processor.Document, error) {
logger := logging.FromContext(ctx)
var batchCoordinates []string
var queryPurls []string
Expand All @@ -143,7 +144,8 @@ func EvaluateClearlyDefinedDefinition(ctx context.Context, client *http.Client,
batchCoordinates = append(batchCoordinates, coordinate.ToString())
}
}
if genCDDocs, err := generateDefinitions(ctx, client, batchCoordinates, queryPurls, docChannel); err != nil {
if genCDDocs, err := generateDefinitions(ctx, client, batchCoordinates, queryPurls, docChannel,
collectSourceLicenses); err != nil {
return nil, fmt.Errorf("generateDefinitions failed with error: %w", err)
} else {
generatedCDDocs = append(generatedCDDocs, genCDDocs...)
Expand All @@ -154,7 +156,8 @@ func EvaluateClearlyDefinedDefinition(ctx context.Context, client *http.Client,

// generateDefinitions takes in the batched coordinated to retrieve the definition. It uses the definition to check if source
// information can be queried in clearly defined.
func generateDefinitions(ctx context.Context, client *http.Client, batchCoordinates, queryPurls []string, docChannel chan<- *processor.Document) ([]*processor.Document, error) {
func generateDefinitions(ctx context.Context, client *http.Client, batchCoordinates,
queryPurls []string, docChannel chan<- *processor.Document, collectSourceLicenses bool) ([]*processor.Document, error) {
var generatedCDDocs []*processor.Document
if len(batchCoordinates) > 0 {
definitionMap, err := getDefinitions(ctx, client, queryPurls, batchCoordinates)
Expand All @@ -168,10 +171,12 @@ func generateDefinitions(ctx context.Context, client *http.Client, batchCoordina
generatedCDDocs = append(generatedCDDocs, genCDPkgDocs...)
}

if genCDSrcDocs, err := evaluateDefinitionForSource(ctx, client, definitionMap, docChannel); err != nil {
return nil, fmt.Errorf("evaluateDefinitionForSource failed with error: %w", err)
} else {
generatedCDDocs = append(generatedCDDocs, genCDSrcDocs...)
if collectSourceLicenses {
if genCDSrcDocs, err := evaluateDefinitionForSource(ctx, client, definitionMap, docChannel); err != nil {
return nil, fmt.Errorf("evaluateDefinitionForSource failed with error: %w", err)
} else {
generatedCDDocs = append(generatedCDDocs, genCDSrcDocs...)
}
}
}
return generatedCDDocs, nil
Expand All @@ -190,7 +195,7 @@ func (c *cdCertifier) CertifyComponent(ctx context.Context, rootComponent interf
purls = append(purls, node.Purl)
}

if _, err := EvaluateClearlyDefinedDefinition(ctx, c.cdHTTPClient, purls, docChannel); err != nil {
if _, err := EvaluateClearlyDefinedDefinition(ctx, c.cdHTTPClient, purls, docChannel, true); err != nil {
return fmt.Errorf("could not generate document from Clearly Defined results: %w", err)
}

Expand Down
40 changes: 6 additions & 34 deletions pkg/ingestor/parser/common/scanner/scanner.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,40 +64,12 @@ func PurlsLicenseScan(ctx context.Context, purls []string) ([]assembler.CertifyL
var certLegalIngest []assembler.CertifyLegalIngest
var hasSourceAtIngest []assembler.HasSourceAtIngest

if len(purls) > 249 {
i := 0
var batchPurls []string
for _, purl := range purls {
if i < 248 {
batchPurls = append(batchPurls, purl)
i++
} else {
batchPurls = append(batchPurls, purl)
batchedCL, batchedHSA, err := runQueryOnBatchedPurls(ctx, cdParser, batchPurls)
if err != nil {
return nil, nil, fmt.Errorf("runQueryOnBatchedPurls failed with error: %w", err)
}
certLegalIngest = append(certLegalIngest, batchedCL...)
hasSourceAtIngest = append(hasSourceAtIngest, batchedHSA...)
batchPurls = make([]string, 0)
}
}
if len(batchPurls) > 0 {
batchedCL, batchedHSA, err := runQueryOnBatchedPurls(ctx, cdParser, batchPurls)
if err != nil {
return nil, nil, fmt.Errorf("runQueryOnBatchedPurls failed with error: %w", err)
}
certLegalIngest = append(certLegalIngest, batchedCL...)
hasSourceAtIngest = append(hasSourceAtIngest, batchedHSA...)
}
} else {
batchedCL, batchedHSA, err := runQueryOnBatchedPurls(ctx, cdParser, purls)
if err != nil {
return nil, nil, fmt.Errorf("runQueryOnBatchedPurls failed with error: %w", err)
}
certLegalIngest = append(certLegalIngest, batchedCL...)
hasSourceAtIngest = append(hasSourceAtIngest, batchedHSA...)
batchedCL, batchedHSA, err := runQueryOnBatchedPurls(ctx, cdParser, purls)
if err != nil {
return nil, nil, fmt.Errorf("runQueryOnBatchedPurls failed with error: %w", err)
}
certLegalIngest = append(certLegalIngest, batchedCL...)
hasSourceAtIngest = append(hasSourceAtIngest, batchedHSA...)

return certLegalIngest, hasSourceAtIngest, nil
}
Expand All @@ -109,7 +81,7 @@ func runQueryOnBatchedPurls(ctx context.Context, cdParser common.DocumentParser,
var hasSourceAtIngest []assembler.HasSourceAtIngest
if cdProcessorDocs, err := cd_certifier.EvaluateClearlyDefinedDefinition(ctx, &http.Client{
Transport: version.UATransport,
}, batchPurls, nil); err != nil {
}, batchPurls, nil, false); err != nil {
return nil, nil, fmt.Errorf("failed get definition from clearly defined with error: %w", err)
} else {
for _, doc := range cdProcessorDocs {
Expand Down
21 changes: 0 additions & 21 deletions pkg/ingestor/parser/common/scanner/scanner_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -373,27 +373,6 @@ func TestPurlsLicenseScan(t *testing.T) {
Collector: "clearlydefined",
},
},
{
Src: &generated.SourceInputSpec{
Type: "sourcearchive",
Namespace: "org.apache.logging.log4j",
Name: "log4j-core",
Tag: ptrfrom.String("2.8.1"),
},
Declared: []generated.LicenseInputSpec{},
Discovered: []generated.LicenseInputSpec{
{Name: "Apache-2.0", ListVersion: &lvUnknown},
{Name: "NOASSERTION", ListVersion: &lvUnknown},
},
CertifyLegal: &generated.CertifyLegalInputSpec{
DiscoveredLicense: "Apache-2.0 AND NOASSERTION",
Attribution: "Copyright 2005-2006 Tim Fennell,Copyright 1999-2012 Apache Software Foundation,Copyright 1999-2005 The Apache Software Foundation",
Justification: "Retrieved from ClearlyDefined",
TimeScanned: tm,
Origin: "clearlydefined",
Collector: "clearlydefined",
},
},
},
wantHSAs: []assembler.HasSourceAtIngest{
{
Expand Down
64 changes: 38 additions & 26 deletions pkg/ingestor/parser/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package parser
import (
"context"
"fmt"
"sync"

"github.com/guacsec/guac/pkg/assembler"
"github.com/guacsec/guac/pkg/handler/processor"
Expand Down Expand Up @@ -75,6 +76,8 @@ func RegisterDocumentParser(p func() common.DocumentParser, d processor.Document

// ParseDocumentTree takes the DocumentTree and create graph inputs (nodes and edges) per document node.
func ParseDocumentTree(ctx context.Context, docTree processor.DocumentTree, scanForVulns bool, scanForLicense bool) ([]assembler.IngestPredicates, []*common.IdentifierStrings, error) {
var wg sync.WaitGroup

assemblerInputs := []assembler.IngestPredicates{}
identifierStrings := []*common.IdentifierStrings{}
logger := docTree.Document.ChildLogger
Expand All @@ -98,40 +101,49 @@ func ParseDocumentTree(ctx context.Context, docTree processor.DocumentTree, scan
}

if scanForVulns {
// scan purls via OSV on initial ingestion to capture vulnerability information
var purls []string
for _, idString := range identifierStrings {
purls = append(purls, idString.PurlStrings...)
}
wg.Add(1)
go func() {
defer wg.Done()
// scan purls via OSV on initial ingestion to capture vulnerability information
var purls []string
for _, idString := range identifierStrings {
purls = append(purls, idString.PurlStrings...)
}

vulnEquals, certVulns, err := scanner.PurlsVulnScan(ctx, purls)
if err != nil {
logger.Errorf("error scanning purls for vulnerabilities %v", err)
} else {
if len(assemblerInputs) > 0 {
assemblerInputs[0].VulnEqual = append(assemblerInputs[0].VulnEqual, vulnEquals...)
assemblerInputs[0].CertifyVuln = append(assemblerInputs[0].CertifyVuln, certVulns...)
vulnEquals, certVulns, err := scanner.PurlsVulnScan(ctx, purls)
if err != nil {
logger.Errorf("error scanning purls for vulnerabilities %v", err)
} else {
if len(assemblerInputs) > 0 {
assemblerInputs[0].VulnEqual = append(assemblerInputs[0].VulnEqual, vulnEquals...)
assemblerInputs[0].CertifyVuln = append(assemblerInputs[0].CertifyVuln, certVulns...)
}
}
}
}()
}

if scanForLicense {
// scan purls via clearly defined on initial ingestion to capture license information
var purls []string
for _, idString := range identifierStrings {
purls = append(purls, idString.PurlStrings...)
}
wg.Add(1)
go func() {
defer wg.Done()
// scan purls via clearly defined on initial ingestion to capture license information
var purls []string
for _, idString := range identifierStrings {
purls = append(purls, idString.PurlStrings...)
}

certLegal, hasSourceAt, err := scanner.PurlsLicenseScan(ctx, purls)
if err != nil {
logger.Errorf("error scanning purls for licenses %v", err)
} else {
if len(assemblerInputs) > 0 {
assemblerInputs[0].CertifyLegal = append(assemblerInputs[0].CertifyLegal, certLegal...)
assemblerInputs[0].HasSourceAt = append(assemblerInputs[0].HasSourceAt, hasSourceAt...)
certLegal, hasSourceAt, err := scanner.PurlsLicenseScan(ctx, purls)
if err != nil {
logger.Errorf("error scanning purls for licenses %v", err)
} else {
if len(assemblerInputs) > 0 {
assemblerInputs[0].CertifyLegal = append(assemblerInputs[0].CertifyLegal, certLegal...)
assemblerInputs[0].HasSourceAt = append(assemblerInputs[0].HasSourceAt, hasSourceAt...)
}
}
}
}()
}
wg.Wait()

return assemblerInputs, identifierStrings, nil
}
Expand Down

0 comments on commit 6ea218b

Please sign in to comment.