diff --git a/pkg/fanal/analyzer/language/analyze.go b/pkg/fanal/analyzer/language/analyze.go index 33526dbf1707..1f65841eca07 100644 --- a/pkg/fanal/analyzer/language/analyze.go +++ b/pkg/fanal/analyzer/language/analyze.go @@ -92,7 +92,7 @@ func toApplication(fileType, filePath, libFilePath string, r dio.ReadSeekerAt, l for _, lib := range libs { var licenses []string if lib.License != "" { - licenses = strings.Split(lib.License, ",") + licenses = licensing.SplitLicenses(lib.License) for i, license := range licenses { licenses[i] = licensing.Normalize(strings.TrimSpace(license)) } diff --git a/pkg/fanal/analyzer/pkg/dpkg/copyright.go b/pkg/fanal/analyzer/pkg/dpkg/copyright.go index 4b8006e42bbb..9b962b8c3e49 100644 --- a/pkg/fanal/analyzer/pkg/dpkg/copyright.go +++ b/pkg/fanal/analyzer/pkg/dpkg/copyright.go @@ -27,7 +27,6 @@ var ( dpkgLicenseAnalyzerVersion = 1 commonLicenseReferenceRegexp = regexp.MustCompile(`/?usr/share/common-licenses/([0-9A-Za-z_.+-]+[0-9A-Za-z+])`) - licenseSplitRegexp = regexp.MustCompile("(,?[_ ]+or[_ ]+)|(,?[_ ]+and[_ ])|(,[ ]*)") ) // dpkgLicenseAnalyzer parses copyright files and detect licenses @@ -90,14 +89,7 @@ func (a *dpkgLicenseAnalyzer) parseCopyright(r dio.ReadSeekerAt) ([]types.Licens l = normalizeLicense(l) if len(l) > 0 { - // Split licenses without considering "and"/"or" - // examples: - // 'GPL-1+,GPL-2' => {"GPL-1", "GPL-2"} - // 'GPL-1+ or Artistic or Artistic-dist' => {"GPL-1", "Artistic", "Artistic-dist"} - // 'LGPLv3+_or_GPLv2+' => {"LGPLv3", "GPLv2"} - // 'BSD-3-CLAUSE and GPL-2' => {"BSD-3-CLAUSE", "GPL-2"} - // 'GPL-1+ or Artistic, and BSD-4-clause-POWERDOG' => {"GPL-1+", "Artistic", "BSD-4-clause-POWERDOG"} - for _, lic := range licenseSplitRegexp.Split(l, -1) { + for _, lic := range licensing.SplitLicenses(l) { lic = licensing.Normalize(lic) if !slices.Contains(licenses, lic) { licenses = append(licenses, lic) diff --git a/pkg/licensing/normalize.go b/pkg/licensing/normalize.go index 5e25fc89ca13..38956108c4fc 100644 --- a/pkg/licensing/normalize.go +++ b/pkg/licensing/normalize.go @@ -1,6 +1,7 @@ package licensing import ( + "regexp" "strings" ) @@ -80,9 +81,34 @@ var mapping = map[string]string{ "PUBLIC DOMAIN": Unlicense, } +// Split licenses without considering "and"/"or" +// examples: +// 'GPL-1+,GPL-2' => {"GPL-1+", "GPL-2"} +// 'GPL-1+ or Artistic or Artistic-dist' => {"GPL-1+", "Artistic", "Artistic-dist"} +// 'LGPLv3+_or_GPLv2+' => {"LGPLv3+", "GPLv2"} +// 'BSD-3-CLAUSE and GPL-2' => {"BSD-3-CLAUSE", "GPL-2"} +// 'GPL-1+ or Artistic, and BSD-4-clause-POWERDOG' => {"GPL-1+", "Artistic", "BSD-4-clause-POWERDOG"} +// 'BSD 3-Clause License or Apache License, Version 2.0' => {"BSD 3-Clause License", "Apache License, Version 2.0"} +// var LicenseSplitRegexp = regexp.MustCompile("(,?[_ ]+or[_ ]+)|(,?[_ ]+and[_ ])|(,[ ]*)") + +var licenseSplitRegexp = regexp.MustCompile("(,?[_ ]+(?:or|and)[_ ]+)|(,[ ]*)") + func Normalize(name string) string { if l, ok := mapping[strings.ToUpper(name)]; ok { return l } return name } + +func SplitLicenses(str string) []string { + var licenses []string + for _, maybeLic := range licenseSplitRegexp.Split(str, -1) { + lower := strings.ToLower(maybeLic) + if (strings.HasPrefix(lower, "ver ") || strings.HasPrefix(lower, "version ")) && len(licenses) > 0 { + licenses[len(licenses)-1] += ", " + maybeLic + } else { + licenses = append(licenses, maybeLic) + } + } + return licenses +} diff --git a/pkg/licensing/normalize_test.go b/pkg/licensing/normalize_test.go new file mode 100644 index 000000000000..a13cbed5d863 --- /dev/null +++ b/pkg/licensing/normalize_test.go @@ -0,0 +1,67 @@ +package licensing_test + +import ( + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/aquasecurity/trivy/pkg/licensing" +) + +func TestSplitLicenses(t *testing.T) { + tests := []struct { + name string + license string + licenses []string + }{ + { + "simple list comma-separated", + "GPL-1+,GPL-2", + []string{"GPL-1+", "GPL-2"}, + }, + { + "simple list comma-separated", + "GPL-1+,GPL-2,GPL-3", + []string{"GPL-1+", "GPL-2", "GPL-3"}, + }, + { + "3 licenses 'or'-separated", + "GPL-1+ or Artistic or Artistic-dist", + []string{"GPL-1+", "Artistic", "Artistic-dist"}, + }, + // ' + { + "two licenses _or_ separated", + "LGPLv3+_or_GPLv2+", + []string{"LGPLv3+", "GPLv2+"}, + }, + // ' + { + "licenses `and`-separated", + "BSD-3-CLAUSE and GPL-2", + []string{"BSD-3-CLAUSE", "GPL-2"}, + }, + { + "three licenses and/or separated", + "GPL-1+ or Artistic, and BSD-4-clause-POWERDOG", + []string{"GPL-1+", "Artistic", "BSD-4-clause-POWERDOG"}, + }, + { + "two licenses with version", + "Apache License,Version 2.0, OSET Public License version 2.1", + []string{"Apache License, Version 2.0", "OSET Public License version 2.1"}, + }, + { + "the license starts with `ver`", + "verbatim and BSD-4-clause", + []string{"verbatim", "BSD-4-clause"}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + res := licensing.SplitLicenses(tt.license) + assert.Equal(t, tt.licenses, res) + }) + } +}