Skip to content

Commit

Permalink
fix(license): using common way for splitting licenses (#4434)
Browse files Browse the repository at this point in the history
* fix(license): using common way for splitting licenses

* add test cases

* TEST new regex

* extract function

* fix version detection

---------

Co-authored-by: Nikita Pivkin <nikita.pivkin@smartforce.io>
  • Loading branch information
afdesk and nikpivkin authored Jul 19, 2023
1 parent 3e2416d commit 9399604
Show file tree
Hide file tree
Showing 4 changed files with 95 additions and 10 deletions.
2 changes: 1 addition & 1 deletion pkg/fanal/analyzer/language/analyze.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ func toApplication(fileType, filePath, libFilePath string, r dio.ReadSeekerAt, l
for _, lib := range libs {
var licenses []string
if lib.License != "" {
licenses = strings.Split(lib.License, ",")
licenses = licensing.SplitLicenses(lib.License)
for i, license := range licenses {
licenses[i] = licensing.Normalize(strings.TrimSpace(license))
}
Expand Down
10 changes: 1 addition & 9 deletions pkg/fanal/analyzer/pkg/dpkg/copyright.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ var (
dpkgLicenseAnalyzerVersion = 1

commonLicenseReferenceRegexp = regexp.MustCompile(`/?usr/share/common-licenses/([0-9A-Za-z_.+-]+[0-9A-Za-z+])`)
licenseSplitRegexp = regexp.MustCompile("(,?[_ ]+or[_ ]+)|(,?[_ ]+and[_ ])|(,[ ]*)")
)

// dpkgLicenseAnalyzer parses copyright files and detect licenses
Expand Down Expand Up @@ -90,14 +89,7 @@ func (a *dpkgLicenseAnalyzer) parseCopyright(r dio.ReadSeekerAt) ([]types.Licens

l = normalizeLicense(l)
if len(l) > 0 {
// Split licenses without considering "and"/"or"
// examples:
// 'GPL-1+,GPL-2' => {"GPL-1", "GPL-2"}
// 'GPL-1+ or Artistic or Artistic-dist' => {"GPL-1", "Artistic", "Artistic-dist"}
// 'LGPLv3+_or_GPLv2+' => {"LGPLv3", "GPLv2"}
// 'BSD-3-CLAUSE and GPL-2' => {"BSD-3-CLAUSE", "GPL-2"}
// 'GPL-1+ or Artistic, and BSD-4-clause-POWERDOG' => {"GPL-1+", "Artistic", "BSD-4-clause-POWERDOG"}
for _, lic := range licenseSplitRegexp.Split(l, -1) {
for _, lic := range licensing.SplitLicenses(l) {
lic = licensing.Normalize(lic)
if !slices.Contains(licenses, lic) {
licenses = append(licenses, lic)
Expand Down
26 changes: 26 additions & 0 deletions pkg/licensing/normalize.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package licensing

import (
"regexp"
"strings"
)

Expand Down Expand Up @@ -80,9 +81,34 @@ var mapping = map[string]string{
"PUBLIC DOMAIN": Unlicense,
}

// Split licenses without considering "and"/"or"
// examples:
// 'GPL-1+,GPL-2' => {"GPL-1+", "GPL-2"}
// 'GPL-1+ or Artistic or Artistic-dist' => {"GPL-1+", "Artistic", "Artistic-dist"}
// 'LGPLv3+_or_GPLv2+' => {"LGPLv3+", "GPLv2"}
// 'BSD-3-CLAUSE and GPL-2' => {"BSD-3-CLAUSE", "GPL-2"}
// 'GPL-1+ or Artistic, and BSD-4-clause-POWERDOG' => {"GPL-1+", "Artistic", "BSD-4-clause-POWERDOG"}
// 'BSD 3-Clause License or Apache License, Version 2.0' => {"BSD 3-Clause License", "Apache License, Version 2.0"}
// var LicenseSplitRegexp = regexp.MustCompile("(,?[_ ]+or[_ ]+)|(,?[_ ]+and[_ ])|(,[ ]*)")

var licenseSplitRegexp = regexp.MustCompile("(,?[_ ]+(?:or|and)[_ ]+)|(,[ ]*)")

func Normalize(name string) string {
if l, ok := mapping[strings.ToUpper(name)]; ok {
return l
}
return name
}

func SplitLicenses(str string) []string {
var licenses []string
for _, maybeLic := range licenseSplitRegexp.Split(str, -1) {
lower := strings.ToLower(maybeLic)
if (strings.HasPrefix(lower, "ver ") || strings.HasPrefix(lower, "version ")) && len(licenses) > 0 {
licenses[len(licenses)-1] += ", " + maybeLic
} else {
licenses = append(licenses, maybeLic)
}
}
return licenses
}
67 changes: 67 additions & 0 deletions pkg/licensing/normalize_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
package licensing_test

import (
"testing"

"github.com/stretchr/testify/assert"

"github.com/aquasecurity/trivy/pkg/licensing"
)

func TestSplitLicenses(t *testing.T) {
tests := []struct {
name string
license string
licenses []string
}{
{
"simple list comma-separated",
"GPL-1+,GPL-2",
[]string{"GPL-1+", "GPL-2"},
},
{
"simple list comma-separated",
"GPL-1+,GPL-2,GPL-3",
[]string{"GPL-1+", "GPL-2", "GPL-3"},
},
{
"3 licenses 'or'-separated",
"GPL-1+ or Artistic or Artistic-dist",
[]string{"GPL-1+", "Artistic", "Artistic-dist"},
},
// '
{
"two licenses _or_ separated",
"LGPLv3+_or_GPLv2+",
[]string{"LGPLv3+", "GPLv2+"},
},
// '
{
"licenses `and`-separated",
"BSD-3-CLAUSE and GPL-2",
[]string{"BSD-3-CLAUSE", "GPL-2"},
},
{
"three licenses and/or separated",
"GPL-1+ or Artistic, and BSD-4-clause-POWERDOG",
[]string{"GPL-1+", "Artistic", "BSD-4-clause-POWERDOG"},
},
{
"two licenses with version",
"Apache License,Version 2.0, OSET Public License version 2.1",
[]string{"Apache License, Version 2.0", "OSET Public License version 2.1"},
},
{
"the license starts with `ver`",
"verbatim and BSD-4-clause",
[]string{"verbatim", "BSD-4-clause"},
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
res := licensing.SplitLicenses(tt.license)
assert.Equal(t, tt.licenses, res)
})
}
}

0 comments on commit 9399604

Please sign in to comment.