diff --git a/pkg/license/catalog.go b/pkg/license/catalog.go
new file mode 100644
index 00000000000..1f9d690490b
--- /dev/null
+++ b/pkg/license/catalog.go
@@ -0,0 +1,109 @@
+/*
+Copyright 2021 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package license
+
+import (
+ "path/filepath"
+ "sync"
+
+ "github.com/pkg/errors"
+ "github.com/sirupsen/logrus"
+)
+
+// CatalogOptions are the spdx settings
+type CatalogOptions struct {
+ CacheDir string // Directrory to catch the license we download from SPDX.org
+}
+
+// DefaultCatalogOpts are the predetermined settings. License and cache directories
+// are in the temporary OS directory and are created if the do not exist
+var DefaultCatalogOpts = &CatalogOptions{}
+
+// NewSPDXWithOptions returns a SPDX object with the specified options
+func NewCatalogWithOptions(opts *CatalogOptions) (catalog *Catalog, err error) {
+ // Create the license downloader
+ doptions := DefaultDownloaderOpts
+ doptions.CacheDir = opts.CacheDir
+ downloader, err := NewDownloaderWithOptions(doptions)
+ if err != nil {
+ return nil, errors.Wrap(err, "creating downloader")
+ }
+ catalog = &Catalog{
+ Downloader: downloader,
+ opts: opts,
+ }
+
+ return catalog, nil
+}
+
+// Options returns a pointer to the catlog options
+func (catalog *Catalog) Options() *CatalogOptions {
+ return catalog.opts
+}
+
+// LoadLicenses reads the license data from the downloader
+func (catalog *Catalog) LoadLicenses() error {
+ logrus.Info("Loading license data from downloader")
+ licenses, err := catalog.Downloader.GetLicenses()
+ if err != nil {
+ return errors.Wrap(err, "getting licenses from downloader")
+ }
+ catalog.List = licenses
+ logrus.Infof("Got %d licenses from downloader", len(licenses.Licenses))
+ return nil
+}
+
+// Catalog is an objec to interact with licenses and manifest creation
+type Catalog struct {
+ Downloader *Downloader // License Downloader
+ List *List // List of licenses
+ opts *CatalogOptions // SPDX Options
+}
+
+// WriteLicensesAsText writes the SPDX license collection to text files
+func (catalog *Catalog) WriteLicensesAsText(targetDir string) error {
+ logrus.Info("Writing SPDX licenses to " + targetDir)
+ if catalog.List.Licenses == nil {
+ return errors.New("unable to write licenses, they have not been loaded yet")
+ }
+ wg := sync.WaitGroup{}
+ var err error
+ for _, l := range catalog.List.Licenses {
+ wg.Add(1)
+ go func(l *License) {
+ defer wg.Done()
+ if lerr := l.WriteText(filepath.Join(targetDir, l.LicenseID+".txt")); err != nil {
+ if err == nil {
+ err = lerr
+ } else {
+ err = errors.Wrap(err, lerr.Error())
+ }
+ }
+ }(l)
+ }
+ wg.Wait()
+ return errors.Wrap(err, "caught errors while writing license files")
+}
+
+// GetLicense returns a license struct from its SPDX ID label
+func (catalog *Catalog) GetLicense(label string) *License {
+ if lic, ok := catalog.List.Licenses[label]; ok {
+ return lic
+ }
+ logrus.Warn("Label %s is not an identifier of a known license " + label)
+ return nil
+}
diff --git a/pkg/license/download.go b/pkg/license/download.go
index ac62cc3bbb1..f84a71001f0 100644
--- a/pkg/license/download.go
+++ b/pkg/license/download.go
@@ -20,19 +20,22 @@ import (
"crypto/sha1"
"encoding/json"
"fmt"
- "io"
- "net/http"
"os"
"path/filepath"
+ "strings"
"github.com/nozzle/throttler"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
+ "sigs.k8s.io/release-utils/http"
"sigs.k8s.io/release-utils/util"
)
// ListURL is the json list of all spdx licenses
-const ListURL = "https://spdx.org/licenses/licenses.json"
+const (
+ LicenseDataURL = "https://spdx.org/licenses/"
+ LicenseListFilename = "licenses.json"
+)
// NewDownloader returns a downloader with the default options
func NewDownloader() (*Downloader, error) {
@@ -72,7 +75,7 @@ func (do *DownloaderOptions) Validate() error {
}
// And no cache dir was specified
if do.CacheDir == "" {
- dir, err := os.MkdirTemp("", "license-cache-")
+ dir, err := os.MkdirTemp(os.TempDir(), "license-cache-")
if err != nil {
return errors.Wrap(err, "creating temporary directory")
}
@@ -94,7 +97,7 @@ func (d *Downloader) SetImplementation(di DownloaderImplementation) {
// GetLicenses is the mina function of the downloader. Returns a license list
// or an error if could get them
-func (d *Downloader) GetLicenses() (*SPDXLicenseList, error) {
+func (d *Downloader) GetLicenses() (*List, error) {
return d.impl.GetLicenses()
}
@@ -102,7 +105,7 @@ func (d *Downloader) GetLicenses() (*SPDXLicenseList, error) {
// DownloaderImplementation has only one method
type DownloaderImplementation interface {
- GetLicenses() (*SPDXLicenseList, error)
+ GetLicenses() (*List, error)
SetOptions(*DownloaderOptions)
}
@@ -124,23 +127,17 @@ func (ddi *DefaultDownloaderImpl) SetOptions(opts *DownloaderOptions) {
}
// GetLicenses downloads the main json file listing all SPDX supported licenses
-func (ddi *DefaultDownloaderImpl) GetLicenses() (licenses *SPDXLicenseList, err error) {
+func (ddi *DefaultDownloaderImpl) GetLicenses() (licenses *List, err error) {
// TODO: Cache licenselist
- logrus.Info("Downloading main SPDX license data")
+ logrus.Info("Downloading main SPDX license data from " + LicenseDataURL)
// Get the list of licenses
- resp, err := http.Get(ListURL)
+ licensesJSON, err := http.NewAgent().Get(LicenseDataURL + LicenseListFilename)
if err != nil {
return nil, errors.Wrap(err, "fetching licenses list")
}
- defer resp.Body.Close()
- licensesJSON, err := io.ReadAll(resp.Body)
- if err != nil {
- return nil, errors.Wrap(err, "reading license list response body")
- }
-
- licenseList := &SPDXLicenseList{}
+ licenseList := &List{}
if err := json.Unmarshal(licensesJSON, licenseList); err != nil {
return nil, errors.Wrap(err, "parsing SPDX licence list")
}
@@ -150,6 +147,11 @@ func (ddi *DefaultDownloaderImpl) GetLicenses() (licenses *SPDXLicenseList, err
// Create a new Throttler that will get `parallelDownloads` urls at a time
t := throttler.New(ddi.Options.parallelDownloads, len(licenseList.LicenseData))
for _, l := range licenseList.LicenseData {
+ licURL := l.Reference
+ // If the license URLs have a local reference
+ if strings.HasPrefix(licURL, "./") {
+ licURL = LicenseDataURL + strings.TrimPrefix(licURL, "./")
+ }
// Launch a goroutine to fetch the URL.
go func(url string) {
var err error
@@ -158,8 +160,9 @@ func (ddi *DefaultDownloaderImpl) GetLicenses() (licenses *SPDXLicenseList, err
if err != nil {
return
}
+ logrus.Debugf("Got license: %s from %s", l.LicenseID, url)
licenseList.Add(l)
- }(l.DetailsURL)
+ }(licURL)
t.Throttle()
}
@@ -214,7 +217,7 @@ func (ddi *DefaultDownloaderImpl) getCachedData(url string) ([]byte, error) {
}
// getLicenseFromURL downloads a license in json and returns it parsed into a struct
-func (ddi *DefaultDownloaderImpl) getLicenseFromURL(url string) (license *SPDXLicense, err error) {
+func (ddi *DefaultDownloaderImpl) getLicenseFromURL(url string) (license *License, err error) {
licenseJSON := []byte{}
// Determine the cache file name
if ddi.Options.EnableCache {
@@ -230,15 +233,10 @@ func (ddi *DefaultDownloaderImpl) getLicenseFromURL(url string) (license *SPDXLi
// If we still don't have json data, download it
if len(licenseJSON) == 0 {
logrus.Infof("Downloading license data from %s", url)
- resp, err := http.Get(url)
+ licenseJSON, err = http.NewAgent().Get(url)
if err != nil {
return nil, errors.Wrapf(err, "getting %s", url)
}
- defer resp.Body.Close()
- licenseJSON, err = io.ReadAll(resp.Body)
- if err != nil {
- return nil, errors.Wrap(err, "reading response body")
- }
logrus.Infof("Downloaded %d bytes from %s", len(licenseJSON), url)
@@ -249,5 +247,10 @@ func (ddi *DefaultDownloaderImpl) getLicenseFromURL(url string) (license *SPDXLi
}
}
- return ParseSPDXLicense(licenseJSON)
+ // Parse the SPDX license from the JSON data
+ l, err := ParseLicense(licenseJSON)
+ if err != nil {
+ return nil, errors.Wrap(err, "parsing license json data")
+ }
+ return l, err
}
diff --git a/pkg/license/implementation.go b/pkg/license/implementation.go
new file mode 100644
index 00000000000..3915528b5f5
--- /dev/null
+++ b/pkg/license/implementation.go
@@ -0,0 +1,188 @@
+/*
+Copyright 2021 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package license
+
+import (
+ "fmt"
+ "os"
+ "path/filepath"
+ "regexp"
+
+ licenseclassifier "github.com/google/licenseclassifier/v2"
+ "github.com/pkg/errors"
+ "github.com/sirupsen/logrus"
+)
+
+// ReaderDefaultImpl the default license reader imlementation, uses
+// Google's cicense classifier
+type ReaderDefaultImpl struct {
+ lc *licenseclassifier.Classifier
+ catalog *Catalog
+}
+
+// ClassifyFile takes a file path and returns the most probable license tag
+func (d *ReaderDefaultImpl) ClassifyFile(path string) (licenseTag string, moreTags []string, err error) {
+ file, err := os.Open(path)
+ if err != nil {
+ return licenseTag, nil, errors.Wrap(err, "opening file for analysis")
+ }
+ defer file.Close()
+
+ // Get the classsification
+ matches, err := d.Classifier().MatchFrom(file)
+ if len(matches) == 0 {
+ logrus.Warn("File does not match a known license: " + path)
+ }
+ var highestConf float64
+ moreTags = []string{}
+ for _, match := range matches {
+ if match.Confidence > highestConf {
+ highestConf = match.Confidence
+ licenseTag = match.Name
+ moreTags = append(moreTags, match.Name)
+ }
+ }
+ return licenseTag, []string{}, nil
+}
+
+// ClassifyLicenseFiles takes a list of paths and tries to find return all licenses found in it
+func (d *ReaderDefaultImpl) ClassifyLicenseFiles(paths []string) (
+ licenseList []ClassifyResult, unrecognizedPaths []string, err error) {
+ // Run the files through the clasifier
+ for _, f := range paths {
+ label, _, err := d.ClassifyFile(f)
+ if err != nil {
+ return nil, unrecognizedPaths, errors.Wrap(err, "classifying file")
+ }
+ if label == "" {
+ unrecognizedPaths = append(unrecognizedPaths, f)
+ continue
+ }
+ // Get the license corresponding to the ID label
+ license := d.catalog.GetLicense(label)
+ if license == nil {
+ return nil, unrecognizedPaths,
+ errors.New(fmt.Sprintf("ID does not correspond to a valid license: '%s'", label))
+ }
+ // Apend to the return results
+ licenseList = append(licenseList, ClassifyResult{f, license})
+ }
+ logrus.Infof(
+ "License classifier recognized %d/%d (%d%%) os the files",
+ len(licenseList), len(paths), (len(licenseList)/len(paths))*100,
+ )
+ return licenseList, unrecognizedPaths, nil
+}
+
+// LicenseFromLabel return a spdx license from its label
+func (d *ReaderDefaultImpl) LicenseFromLabel(label string) (license *License) {
+ return d.catalog.GetLicense(label)
+}
+
+// LicenseFromFile a file path and returns its license
+func (d *ReaderDefaultImpl) LicenseFromFile(path string) (license *License, err error) {
+ // Run the files through the clasifier
+ label, _, err := d.ClassifyFile(path)
+ if err != nil {
+ return nil, errors.Wrap(err, "classifying file")
+ }
+
+ if label == "" {
+ logrus.Info("File does not contain a known license: " + path)
+ return nil, nil
+ }
+
+ // Get the license corresponding to the ID label
+ license = d.catalog.GetLicense(label)
+ if license == nil {
+ return nil, errors.New(fmt.Sprintf("ID does not correspond to a valid license: %s", label))
+ }
+
+ return license, nil
+}
+
+// FindLicenseFiles will scan a directory and return files that may be licenses
+func (d *ReaderDefaultImpl) FindLicenseFiles(path string) ([]string, error) {
+ logrus.Infof("Scanning %s for license files", path)
+ licenseList := []string{}
+ re := regexp.MustCompile(licenseFilanameRe)
+ if err := filepath.Walk(path,
+ func(path string, finfo os.FileInfo, err error) error {
+ if err != nil {
+ return err
+ }
+
+ // Directories are ignored
+ if finfo.IsDir() {
+ return nil
+ }
+
+ // No go source files are considered
+ if filepath.Ext(path) == ".go" {
+ return nil
+ }
+ // Check if tehe file matches the license regexp
+ if re.MatchString(filepath.Base(path)) {
+ licenseList = append(licenseList, path)
+ }
+ return nil
+ }); err != nil {
+ return nil, errors.Wrap(err, "scanning the directory for license files")
+ }
+ logrus.Infof("%d license files found in directory", len(licenseList))
+ return licenseList, nil
+}
+
+// Initialize checks the options and creates the needed objects
+func (d *ReaderDefaultImpl) Initialize(opts *ReaderOptions) error {
+ // Validate our options before startin
+ if err := opts.Validate(); err != nil {
+ return errors.Wrap(err, "validating the license reader options")
+ }
+
+ // Create the implementation's SPDX object
+ catalogOpts := DefaultCatalogOpts
+ catalogOpts.CacheDir = opts.CachePath()
+ catalog, err := NewCatalogWithOptions(catalogOpts)
+ if err != nil {
+ return errors.Wrap(err, "creating SPDX object")
+ }
+ d.catalog = catalog
+
+ if err := d.catalog.LoadLicenses(); err != nil {
+ return errors.Wrap(err, "loading licenses")
+ }
+
+ // Write the licenses to disk as th classifier will need them
+ if err := catalog.WriteLicensesAsText(opts.LicensesPath()); err != nil {
+ return errors.Wrap(err, "writing license data to disk")
+ }
+
+ // Create the implementation's classifier
+ d.lc = licenseclassifier.NewClassifier(opts.ConfidenceThreshold)
+ return errors.Wrap(d.lc.LoadLicenses(opts.LicensesPath()), "loading licenses at init")
+}
+
+// Classifier returns the license classifier
+func (d *ReaderDefaultImpl) Classifier() *licenseclassifier.Classifier {
+ return d.lc
+}
+
+// SPDX returns the reader's SPDX object
+func (d *ReaderDefaultImpl) Catalog() *Catalog {
+ return d.catalog
+}
diff --git a/pkg/license/license.go b/pkg/license/license.go
index e6b6f74e9d3..6f6793ad0be 100644
--- a/pkg/license/license.go
+++ b/pkg/license/license.go
@@ -19,12 +19,13 @@ limitations under the License.
package license
import (
- "fmt"
+ "bufio"
+ "encoding/json"
"os"
"path/filepath"
- "regexp"
+ "strings"
+ "sync"
- licenseclassifier "github.com/google/licenseclassifier/v2"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"sigs.k8s.io/release-utils/util"
@@ -36,6 +37,31 @@ const (
defaultLicenseSubDir = "licenses"
)
+const kubernetesBoilerPlate = `# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0`
+
+// DebianLicenseLabels is a map to get the SPDX label from a debian label
+var DebianLicenseLabels = map[string]string{
+ "Apache-2.0": "Apache-2.0",
+ "Artistic": "Artistic-1.0-Perl",
+ "BSD": "BSD-1-Clause",
+ "CC0-1.0": "CC0-1.0",
+ "GFDL-1.2": "GFDL-1.2",
+ "GFDL-1.3": "GFDL-1.3",
+ "GPL": "GPL-1.0",
+ "GPL-1": "GPL-1.0",
+ "GPL-2": "GPL-2.0",
+ "GPL-3": "GPL-3.0",
+ "LGPL-2": "LGPL-2.0",
+ "LGPL-2.1": "LGPL-2.1",
+ "LGPL-3": "LGPL-3.0",
+ "MPL-1.1": "MPL-1.1",
+ "MPL-2.0": "MPL-2.0",
+}
+
// Reader is an object that finds and interprets license files
type Reader struct {
impl ReaderImplementation
@@ -147,6 +173,20 @@ var DefaultReaderOptions = &ReaderOptions{
ConfidenceThreshold: 0.9,
}
+// LicenseFromLabel returns a spdx license from its label
+func (r *Reader) LicenseFromLabel(label string) (license *License) {
+ return r.impl.LicenseFromLabel(label)
+}
+
+// LicenseFromFile reads a file ans returns its license
+func (r *Reader) LicenseFromFile(filePath string) (license *License, err error) {
+ license, err = r.impl.LicenseFromFile(filePath)
+ if err != nil {
+ return nil, errors.Wrap(err, "classifying file to determine license")
+ }
+ return license, err
+}
+
// ReadLicenses returns an array of all licenses found in the specified path
func (r *Reader) ReadLicenses(path string) (licenseList []ClassifyResult, unknownPaths []string, err error) {
licenseFiles, err := r.impl.FindLicenseFiles(path)
@@ -164,7 +204,7 @@ func (r *Reader) ReadLicenses(path string) (licenseList []ClassifyResult, unknow
// ClassifyResult abstracts the data resulting from a file classification
type ClassifyResult struct {
File string
- License *SPDXLicense
+ License *License
}
//counterfeiter:generate . ReaderImplementation
@@ -175,138 +215,100 @@ type ReaderImplementation interface {
Initialize(*ReaderOptions) error
ClassifyLicenseFiles([]string) ([]ClassifyResult, []string, error)
ClassifyFile(string) (string, []string, error)
+ LicenseFromFile(string) (*License, error)
+ LicenseFromLabel(string) *License
FindLicenseFiles(string) ([]string, error)
}
-// ReaderDefaultImpl the default license reader imlementation, uses
-// Google's cicense classifier
-type ReaderDefaultImpl struct {
- lc *licenseclassifier.Classifier
- spdx *SPDX
-}
-
-// Initialize checks the options and creates the needed objects
-func (d *ReaderDefaultImpl) Initialize(opts *ReaderOptions) error {
- // Validate our options before startin
- if err := opts.Validate(); err != nil {
- return errors.Wrap(err, "validating the license reader options")
- }
-
- // Create the implementation's SPDX object
- spdxopts := DefaultSPDXOpts
- spdxopts.CacheDir = opts.CachePath()
- spdx, err := NewSPDXWithOptions(spdxopts)
+// HasKubernetesBoilerPlate checks if a file contains the Kubernetes License boilerplate
+func HasKubernetesBoilerPlate(filePath string) (bool, error) {
+ // kubernetesBoilerPlate
+ sut, err := os.Open(filePath)
if err != nil {
- return errors.Wrap(err, "creating SPDX object")
+ return false, errors.Wrap(err, "opening file to check for k8s boilerplate")
}
- d.spdx = spdx
-
- if err := d.spdx.LoadLicenses(); err != nil {
- return errors.Wrap(err, "loading licenses")
+ defer sut.Close()
+
+ // Trim whitespace from lines
+ scanner := bufio.NewScanner(sut)
+ scanner.Split(bufio.ScanLines)
+ text := ""
+ i := 0
+ for scanner.Scan() {
+ text = text + scanner.Text() + "\n"
+ i++
+ if i > 100 {
+ break
+ }
}
-
- // Write the licenses to disk as th classifier will need them
- if err := spdx.WriteLicensesAsText(opts.LicensesPath()); err != nil {
- return errors.Wrap(err, "writing license data to disk")
+ // If we're past 100 lines, forget it
+ if strings.Contains(text, kubernetesBoilerPlate) {
+ logrus.Infof("Found Kubernetes boilerplate in %s", filePath)
+ return true, nil
}
- // Create the implementation's classifier
- d.lc = licenseclassifier.NewClassifier(opts.ConfidenceThreshold)
- return errors.Wrap(d.lc.LoadLicenses(opts.LicensesPath()), "loading licenses at init")
-}
-
-// Classifier returns the license classifier
-func (d *ReaderDefaultImpl) Classifier() *licenseclassifier.Classifier {
- return d.lc
+ return false, nil
}
-// SPDX returns the reader's SPDX object
-func (d *ReaderDefaultImpl) SPDX() *SPDX {
- return d.spdx
+// List abstracts the list of licenses published by SPDX.org
+type List struct {
+ sync.RWMutex
+ Version string `json:"licenseListVersion"`
+ ReleaseDateString string `json:"releaseDate "`
+ LicenseData []ListEntry `json:"licenses"`
+ Licenses map[string]*License
}
-// ClassifyFile takes a file path and returns the most probable license tag
-func (d *ReaderDefaultImpl) ClassifyFile(path string) (licenseTag string, moreTags []string, err error) {
- file, err := os.Open(path)
- if err != nil {
- return licenseTag, nil, errors.Wrap(err, "opening file for analysis")
+// Add appends a license to the license list
+func (list *List) Add(license *License) {
+ list.Lock()
+ defer list.Unlock()
+ if list.Licenses == nil {
+ list.Licenses = map[string]*License{}
}
- defer file.Close()
+ list.Licenses[license.LicenseID] = license
+}
- // Get the classsification
- matches, err := d.Classifier().MatchFrom(file)
- if len(matches) == 0 {
- logrus.Warn("File does not match a known license: " + path)
- }
- var highestConf float64
- moreTags = []string{}
- for _, match := range matches {
- if match.Confidence > highestConf {
- highestConf = match.Confidence
- licenseTag = match.Name
- moreTags = append(moreTags, match.Name)
- }
- }
- return licenseTag, []string{}, nil
+// SPDXLicense is a license described in JSON
+type License struct {
+ IsDeprecatedLicenseID bool `json:"isDeprecatedLicenseId"`
+ IsFsfLibre bool `json:"isFsfLibre"`
+ IsOsiApproved bool `json:"isOsiApproved"`
+ LicenseText string `json:"licenseText"`
+ StandardLicenseHeaderTemplate string `json:"standardLicenseHeaderTemplate"`
+ StandardLicenseTemplate string `json:"standardLicenseTemplate"`
+ Name string `json:"name"`
+ LicenseID string `json:"licenseId"`
+ StandardLicenseHeader string `json:"standardLicenseHeader"`
+ SeeAlso []string `json:"seeAlso"`
}
-// ClassifyLicenseFiles takes a list of paths and tries to find return all licenses found in it
-func (d *ReaderDefaultImpl) ClassifyLicenseFiles(paths []string) (
- licenseList []ClassifyResult, unrecognizedPaths []string, err error) {
- // Run the files through the clasifier
- for _, f := range paths {
- label, _, err := d.ClassifyFile(f)
- if err != nil {
- return nil, unrecognizedPaths, errors.Wrap(err, "classifying file")
- }
- if label == "" {
- unrecognizedPaths = append(unrecognizedPaths, f)
- continue
- }
- // Get the license corresponding to the ID label
- license := d.spdx.GetLicense(label)
- if license == nil {
- return nil, unrecognizedPaths,
- errors.New(fmt.Sprintf("ID does not correspond to a valid license: %s", label))
- }
- // Apend to the return results
- licenseList = append(licenseList, ClassifyResult{f, license})
- }
- logrus.Infof(
- "License classifier recognized %d/%d (%d%%) os the files",
- len(licenseList), len(paths), (len(licenseList)/len(paths))*100,
+// WriteText writes the SPDX license text to a text file
+func (license *License) WriteText(filePath string) error {
+ return errors.Wrap(
+ os.WriteFile(
+ filePath, []byte(license.LicenseText), os.FileMode(0o644),
+ ), "while writing license to text file",
)
- return licenseList, unrecognizedPaths, nil
}
-// FindLicenseFiles will scan a directory and return files that may be licenses
-func (d *ReaderDefaultImpl) FindLicenseFiles(path string) ([]string, error) {
- logrus.Infof("Scanning %s for license files", path)
- licenseList := []string{}
- re := regexp.MustCompile(licenseFilanameRe)
- if err := filepath.Walk(path,
- func(path string, finfo os.FileInfo, err error) error {
- if err != nil {
- return err
- }
-
- // Directories are ignored
- if finfo.IsDir() {
- return nil
- }
+// ListEntry a license entry in the list
+type ListEntry struct {
+ IsOsiApproved bool `json:"isOsiApproved"`
+ IsDeprectaed bool `json:"isDeprecatedLicenseId"`
+ Reference string `json:"reference"`
+ DetailsURL string `json:"detailsUrl"`
+ ReferenceNumber int `json:"referenceNumber"`
+ Name string `json:"name"`
+ LicenseID string `json:"licenseId"`
+ SeeAlso []string `json:"seeAlso"`
+}
- // No go source files are considered
- if filepath.Ext(path) == ".go" {
- return nil
- }
- // Check if tehe file matches the license regexp
- if re.MatchString(filepath.Base(path)) {
- licenseList = append(licenseList, path)
- }
- return nil
- }); err != nil {
- return nil, errors.Wrap(err, "scanning the directory for license files")
+// ParseLicense parses a SPDX license from its JSON source
+func ParseLicense(licenseJSON []byte) (license *License, err error) {
+ license = &License{}
+ if err := json.Unmarshal(licenseJSON, license); err != nil {
+ return nil, errors.Wrap(err, "parsing SPDX licence")
}
- logrus.Infof("%d license files found in directory", len(licenseList))
- return licenseList, nil
+ return license, nil
}
diff --git a/pkg/license/license_test.go b/pkg/license/license_test.go
index b58ae5aad35..1391aa14636 100644
--- a/pkg/license/license_test.go
+++ b/pkg/license/license_test.go
@@ -46,21 +46,20 @@ const testFullLicense = `
}
`
-func TestISPDXLoadLicenses(t *testing.T) {
+func TestISCatalogLoadLicenses(t *testing.T) {
downloader := &license.Downloader{}
// Create a SPDX to test
- spdx := &license.SPDX{
- Downloader: downloader,
- Options: license.DefaultSPDXOpts,
- }
+ spdx, err := license.NewCatalogWithOptions(license.DefaultCatalogOpts)
+ require.Nil(t, err)
+ spdx.Downloader = downloader
for _, tc := range []struct {
mustFail bool
- dnLoaderReturns *license.SPDXLicenseList
+ dnLoaderReturns *license.List
dnLoaderError error
}{
{true, nil, errors.New("Some download error")},
- {false, &license.SPDXLicenseList{}, nil},
+ {false, &license.List{}, nil},
} {
impl := licensefakes.FakeDownloaderImplementation{}
impl.GetLicensesReturns(tc.dnLoaderReturns, tc.dnLoaderError)
@@ -76,20 +75,21 @@ func TestISPDXLoadLicenses(t *testing.T) {
func TestUSPDXWriteLicensesAsText(t *testing.T) {
testLicenseID := "test-license"
+ testLicenseID2 := "test-license2"
downloader := &license.Downloader{}
impl := licensefakes.FakeDownloaderImplementation{}
- impl.GetLicensesReturns(&license.SPDXLicenseList{
- Licenses: map[string]*license.SPDXLicense{
- testLicenseID: {LicenseID: testLicenseID, LicenseText: "Test"},
+ impl.GetLicensesReturns(&license.List{
+ Licenses: map[string]*license.License{
+ testLicenseID: {LicenseID: testLicenseID, LicenseText: "Test"},
+ testLicenseID2: {LicenseID: testLicenseID2, LicenseText: "Test2"},
},
}, nil)
downloader.SetImplementation(&impl)
// Create a SPDX to test
- spdx := &license.SPDX{
- Downloader: downloader,
- Options: license.DefaultSPDXOpts,
- }
+ spdx, err := license.NewCatalogWithOptions(license.DefaultCatalogOpts)
+ require.Nil(t, err)
+ spdx.Downloader = downloader
// Get the licenses from the fke downloader
require.Nil(t, spdx.LoadLicenses())
@@ -109,29 +109,27 @@ func TestUSPDXWriteLicensesAsText(t *testing.T) {
func TestUSPDXGetLicense(t *testing.T) {
testLicenseID := "test-license"
testLicenseContent := "Test license content"
- spdx := license.SPDX{
- Downloader: &license.Downloader{},
- Licenses: &license.SPDXLicenseList{
- Licenses: map[string]*license.SPDXLicense{
- testLicenseID: {LicenseID: testLicenseID, LicenseText: testLicenseContent},
- },
+ catalog, err := license.NewCatalogWithOptions(&license.CatalogOptions{})
+ require.Nil(t, err)
+ catalog.List = &license.List{
+ Licenses: map[string]*license.License{
+ testLicenseID: {LicenseID: testLicenseID, LicenseText: testLicenseContent},
},
- Options: &license.SPDXOptions{},
}
- testTicense := spdx.GetLicense(testLicenseID)
+ testTicense := catalog.GetLicense(testLicenseID)
require.NotNil(t, testTicense)
require.Equal(t, testTicense.LicenseID, testLicenseID)
require.Equal(t, testTicense.LicenseText, testLicenseContent)
- testTicense = spdx.GetLicense("invalid-license-id")
+ testTicense = catalog.GetLicense("invalid-license-id")
require.Nil(t, testTicense)
}
func TestUSPDXLicenseListAdd(t *testing.T) {
// Create a sample license
- licenseList := &license.SPDXLicenseList{}
- testLicense := &license.SPDXLicense{LicenseID: "test-license", LicenseText: "test text"}
+ licenseList := &license.List{}
+ testLicense := &license.License{LicenseID: "test-license", LicenseText: "test text"}
// Use the Add method to add it to the collection
licenseList.Add(testLicense)
// Retrieve the data from the struct
@@ -167,7 +165,7 @@ func CheckFileExists(t *testing.T, path string) error {
}
func TestULicenseWriteText(t *testing.T) {
- testLicense := license.SPDXLicense{
+ testLicense := license.License{
LicenseText: "Test license text",
LicenseID: "test-license",
}
@@ -182,7 +180,7 @@ func TestULicenseWriteText(t *testing.T) {
}
func TestParseSPDXLicense(t *testing.T) {
- testsLicense, err := license.ParseSPDXLicense([]byte(testFullLicense))
+ testsLicense, err := license.ParseLicense([]byte(testFullLicense))
require.Nil(t, err)
require.NotNil(t, testsLicense)
diff --git a/pkg/license/licensefakes/fake_downloader_implementation.go b/pkg/license/licensefakes/fake_downloader_implementation.go
index de7875293fc..6857600b04c 100644
--- a/pkg/license/licensefakes/fake_downloader_implementation.go
+++ b/pkg/license/licensefakes/fake_downloader_implementation.go
@@ -1,19 +1,3 @@
-/*
-Copyright The Kubernetes Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
// Code generated by counterfeiter. DO NOT EDIT.
package licensefakes
@@ -24,16 +8,16 @@ import (
)
type FakeDownloaderImplementation struct {
- GetLicensesStub func() (*license.SPDXLicenseList, error)
+ GetLicensesStub func() (*license.List, error)
getLicensesMutex sync.RWMutex
getLicensesArgsForCall []struct {
}
getLicensesReturns struct {
- result1 *license.SPDXLicenseList
+ result1 *license.List
result2 error
}
getLicensesReturnsOnCall map[int]struct {
- result1 *license.SPDXLicenseList
+ result1 *license.List
result2 error
}
SetOptionsStub func(*license.DownloaderOptions)
@@ -45,7 +29,7 @@ type FakeDownloaderImplementation struct {
invocationsMutex sync.RWMutex
}
-func (fake *FakeDownloaderImplementation) GetLicenses() (*license.SPDXLicenseList, error) {
+func (fake *FakeDownloaderImplementation) GetLicenses() (*license.List, error) {
fake.getLicensesMutex.Lock()
ret, specificReturn := fake.getLicensesReturnsOnCall[len(fake.getLicensesArgsForCall)]
fake.getLicensesArgsForCall = append(fake.getLicensesArgsForCall, struct {
@@ -69,34 +53,34 @@ func (fake *FakeDownloaderImplementation) GetLicensesCallCount() int {
return len(fake.getLicensesArgsForCall)
}
-func (fake *FakeDownloaderImplementation) GetLicensesCalls(stub func() (*license.SPDXLicenseList, error)) {
+func (fake *FakeDownloaderImplementation) GetLicensesCalls(stub func() (*license.List, error)) {
fake.getLicensesMutex.Lock()
defer fake.getLicensesMutex.Unlock()
fake.GetLicensesStub = stub
}
-func (fake *FakeDownloaderImplementation) GetLicensesReturns(result1 *license.SPDXLicenseList, result2 error) {
+func (fake *FakeDownloaderImplementation) GetLicensesReturns(result1 *license.List, result2 error) {
fake.getLicensesMutex.Lock()
defer fake.getLicensesMutex.Unlock()
fake.GetLicensesStub = nil
fake.getLicensesReturns = struct {
- result1 *license.SPDXLicenseList
+ result1 *license.List
result2 error
}{result1, result2}
}
-func (fake *FakeDownloaderImplementation) GetLicensesReturnsOnCall(i int, result1 *license.SPDXLicenseList, result2 error) {
+func (fake *FakeDownloaderImplementation) GetLicensesReturnsOnCall(i int, result1 *license.List, result2 error) {
fake.getLicensesMutex.Lock()
defer fake.getLicensesMutex.Unlock()
fake.GetLicensesStub = nil
if fake.getLicensesReturnsOnCall == nil {
fake.getLicensesReturnsOnCall = make(map[int]struct {
- result1 *license.SPDXLicenseList
+ result1 *license.List
result2 error
})
}
fake.getLicensesReturnsOnCall[i] = struct {
- result1 *license.SPDXLicenseList
+ result1 *license.List
result2 error
}{result1, result2}
}
diff --git a/pkg/license/licensefakes/fake_reader_implementation.go b/pkg/license/licensefakes/fake_reader_implementation.go
index 2b67ed8ad0b..405c6176081 100644
--- a/pkg/license/licensefakes/fake_reader_implementation.go
+++ b/pkg/license/licensefakes/fake_reader_implementation.go
@@ -1,19 +1,3 @@
-/*
-Copyright The Kubernetes Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
// Code generated by counterfeiter. DO NOT EDIT.
package licensefakes
@@ -78,6 +62,30 @@ type FakeReaderImplementation struct {
initializeReturnsOnCall map[int]struct {
result1 error
}
+ LicenseFromFileStub func(string) (*license.License, error)
+ licenseFromFileMutex sync.RWMutex
+ licenseFromFileArgsForCall []struct {
+ arg1 string
+ }
+ licenseFromFileReturns struct {
+ result1 *license.License
+ result2 error
+ }
+ licenseFromFileReturnsOnCall map[int]struct {
+ result1 *license.License
+ result2 error
+ }
+ LicenseFromLabelStub func(string) *license.License
+ licenseFromLabelMutex sync.RWMutex
+ licenseFromLabelArgsForCall []struct {
+ arg1 string
+ }
+ licenseFromLabelReturns struct {
+ result1 *license.License
+ }
+ licenseFromLabelReturnsOnCall map[int]struct {
+ result1 *license.License
+ }
invocations map[string][][]interface{}
invocationsMutex sync.RWMutex
}
@@ -346,6 +354,131 @@ func (fake *FakeReaderImplementation) InitializeReturnsOnCall(i int, result1 err
}{result1}
}
+func (fake *FakeReaderImplementation) LicenseFromFile(arg1 string) (*license.License, error) {
+ fake.licenseFromFileMutex.Lock()
+ ret, specificReturn := fake.licenseFromFileReturnsOnCall[len(fake.licenseFromFileArgsForCall)]
+ fake.licenseFromFileArgsForCall = append(fake.licenseFromFileArgsForCall, struct {
+ arg1 string
+ }{arg1})
+ stub := fake.LicenseFromFileStub
+ fakeReturns := fake.licenseFromFileReturns
+ fake.recordInvocation("LicenseFromFile", []interface{}{arg1})
+ fake.licenseFromFileMutex.Unlock()
+ if stub != nil {
+ return stub(arg1)
+ }
+ if specificReturn {
+ return ret.result1, ret.result2
+ }
+ return fakeReturns.result1, fakeReturns.result2
+}
+
+func (fake *FakeReaderImplementation) LicenseFromFileCallCount() int {
+ fake.licenseFromFileMutex.RLock()
+ defer fake.licenseFromFileMutex.RUnlock()
+ return len(fake.licenseFromFileArgsForCall)
+}
+
+func (fake *FakeReaderImplementation) LicenseFromFileCalls(stub func(string) (*license.License, error)) {
+ fake.licenseFromFileMutex.Lock()
+ defer fake.licenseFromFileMutex.Unlock()
+ fake.LicenseFromFileStub = stub
+}
+
+func (fake *FakeReaderImplementation) LicenseFromFileArgsForCall(i int) string {
+ fake.licenseFromFileMutex.RLock()
+ defer fake.licenseFromFileMutex.RUnlock()
+ argsForCall := fake.licenseFromFileArgsForCall[i]
+ return argsForCall.arg1
+}
+
+func (fake *FakeReaderImplementation) LicenseFromFileReturns(result1 *license.License, result2 error) {
+ fake.licenseFromFileMutex.Lock()
+ defer fake.licenseFromFileMutex.Unlock()
+ fake.LicenseFromFileStub = nil
+ fake.licenseFromFileReturns = struct {
+ result1 *license.License
+ result2 error
+ }{result1, result2}
+}
+
+func (fake *FakeReaderImplementation) LicenseFromFileReturnsOnCall(i int, result1 *license.License, result2 error) {
+ fake.licenseFromFileMutex.Lock()
+ defer fake.licenseFromFileMutex.Unlock()
+ fake.LicenseFromFileStub = nil
+ if fake.licenseFromFileReturnsOnCall == nil {
+ fake.licenseFromFileReturnsOnCall = make(map[int]struct {
+ result1 *license.License
+ result2 error
+ })
+ }
+ fake.licenseFromFileReturnsOnCall[i] = struct {
+ result1 *license.License
+ result2 error
+ }{result1, result2}
+}
+
+func (fake *FakeReaderImplementation) LicenseFromLabel(arg1 string) *license.License {
+ fake.licenseFromLabelMutex.Lock()
+ ret, specificReturn := fake.licenseFromLabelReturnsOnCall[len(fake.licenseFromLabelArgsForCall)]
+ fake.licenseFromLabelArgsForCall = append(fake.licenseFromLabelArgsForCall, struct {
+ arg1 string
+ }{arg1})
+ stub := fake.LicenseFromLabelStub
+ fakeReturns := fake.licenseFromLabelReturns
+ fake.recordInvocation("LicenseFromLabel", []interface{}{arg1})
+ fake.licenseFromLabelMutex.Unlock()
+ if stub != nil {
+ return stub(arg1)
+ }
+ if specificReturn {
+ return ret.result1
+ }
+ return fakeReturns.result1
+}
+
+func (fake *FakeReaderImplementation) LicenseFromLabelCallCount() int {
+ fake.licenseFromLabelMutex.RLock()
+ defer fake.licenseFromLabelMutex.RUnlock()
+ return len(fake.licenseFromLabelArgsForCall)
+}
+
+func (fake *FakeReaderImplementation) LicenseFromLabelCalls(stub func(string) *license.License) {
+ fake.licenseFromLabelMutex.Lock()
+ defer fake.licenseFromLabelMutex.Unlock()
+ fake.LicenseFromLabelStub = stub
+}
+
+func (fake *FakeReaderImplementation) LicenseFromLabelArgsForCall(i int) string {
+ fake.licenseFromLabelMutex.RLock()
+ defer fake.licenseFromLabelMutex.RUnlock()
+ argsForCall := fake.licenseFromLabelArgsForCall[i]
+ return argsForCall.arg1
+}
+
+func (fake *FakeReaderImplementation) LicenseFromLabelReturns(result1 *license.License) {
+ fake.licenseFromLabelMutex.Lock()
+ defer fake.licenseFromLabelMutex.Unlock()
+ fake.LicenseFromLabelStub = nil
+ fake.licenseFromLabelReturns = struct {
+ result1 *license.License
+ }{result1}
+}
+
+func (fake *FakeReaderImplementation) LicenseFromLabelReturnsOnCall(i int, result1 *license.License) {
+ fake.licenseFromLabelMutex.Lock()
+ defer fake.licenseFromLabelMutex.Unlock()
+ fake.LicenseFromLabelStub = nil
+ if fake.licenseFromLabelReturnsOnCall == nil {
+ fake.licenseFromLabelReturnsOnCall = make(map[int]struct {
+ result1 *license.License
+ })
+ }
+ fake.licenseFromLabelReturnsOnCall[i] = struct {
+ result1 *license.License
+ }{result1}
+}
+
func (fake *FakeReaderImplementation) Invocations() map[string][][]interface{} {
fake.invocationsMutex.RLock()
defer fake.invocationsMutex.RUnlock()
@@ -357,6 +490,10 @@ func (fake *FakeReaderImplementation) Invocations() map[string][][]interface{} {
defer fake.findLicenseFilesMutex.RUnlock()
fake.initializeMutex.RLock()
defer fake.initializeMutex.RUnlock()
+ fake.licenseFromFileMutex.RLock()
+ defer fake.licenseFromFileMutex.RUnlock()
+ fake.licenseFromLabelMutex.RLock()
+ defer fake.licenseFromLabelMutex.RUnlock()
copiedInvocations := map[string][][]interface{}{}
for key, value := range fake.invocations {
copiedInvocations[key] = value
diff --git a/pkg/license/spdx.go b/pkg/license/spdx.go
deleted file mode 100644
index 2d8e56569ec..00000000000
--- a/pkg/license/spdx.go
+++ /dev/null
@@ -1,170 +0,0 @@
-/*
-Copyright 2021 The Kubernetes Authors.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-package license
-
-import (
- "encoding/json"
- "os"
- "path/filepath"
- "sync"
-
- "github.com/pkg/errors"
- "github.com/sirupsen/logrus"
-)
-
-// NewSPDX returns a SPDX object with the default options
-func NewSPDX() (spdx *SPDX, err error) {
- return NewSPDXWithOptions(DefaultSPDXOpts)
-}
-
-// NewSPDXWithOptions returns a SPDX object with the specified options
-func NewSPDXWithOptions(opts *SPDXOptions) (spdx *SPDX, err error) {
- // Create the license Downloader
- doptions := DefaultDownloaderOpts
- doptions.CacheDir = opts.CacheDir
- downloader, err := NewDownloaderWithOptions(doptions)
- if err != nil {
- return nil, errors.Wrap(err, "creating downloader")
- }
- spdx = &SPDX{
- Downloader: downloader,
- Options: DefaultSPDXOpts,
- }
- if err := spdx.Options.Validate(); err != nil {
- return nil, err
- }
- return spdx, nil
-}
-
-// SPDX is an objec to interact with licenses and manifest creation
-type SPDX struct {
- Downloader *Downloader // License Downloader
- Licenses *SPDXLicenseList // List of licenses
- Options *SPDXOptions // SPDX Options
-}
-
-// SPDXOptions are the spdx settings
-type SPDXOptions struct {
- CacheDir string
-}
-
-// Validate checks the spdx options
-func (o *SPDXOptions) Validate() error {
- return nil
-}
-
-// DefaultSPDXOpts are the predetermined settings. License and cache directories
-// are in the temporary OS directory and are created if the do not exist
-var DefaultSPDXOpts = &SPDXOptions{}
-
-// SPDXLicenseList abstracts the list of licenses published by SPDX.org
-type SPDXLicenseList struct {
- sync.RWMutex
- Version string `json:"licenseListVersion"`
- ReleaseDateString string `json:"releaseDate "`
- LicenseData []SPDXLicenseListEntry `json:"licenses"`
- Licenses map[string]*SPDXLicense
-}
-
-// Add appends a license to the license list
-func (list *SPDXLicenseList) Add(license *SPDXLicense) {
- list.Lock()
- defer list.Unlock()
- if list.Licenses == nil {
- list.Licenses = map[string]*SPDXLicense{}
- }
- list.Licenses[license.LicenseID] = license
-}
-
-// SPDXLicense is a license described in JSON
-type SPDXLicense struct {
- IsDeprecatedLicenseID bool `json:"isDeprecatedLicenseId"`
- IsFsfLibre bool `json:"isFsfLibre"`
- IsOsiApproved bool `json:"isOsiApproved"`
- LicenseText string `json:"licenseText"`
- StandardLicenseHeaderTemplate string `json:"standardLicenseHeaderTemplate"`
- StandardLicenseTemplate string `json:"standardLicenseTemplate"`
- Name string `json:"name"`
- LicenseID string `json:"licenseId"`
- StandardLicenseHeader string `json:"standardLicenseHeader"`
- SeeAlso []string `json:"seeAlso"`
-}
-
-// WriteText writes the SPDX license text to a text file
-func (license *SPDXLicense) WriteText(filePath string) error {
- return errors.Wrap(
- os.WriteFile(
- filePath, []byte(license.LicenseText), os.FileMode(0o644),
- ), "while writing license to text file",
- )
-}
-
-// SPDXLicenseListEntry a license entry in the list
-type SPDXLicenseListEntry struct {
- IsOsiApproved bool `json:"isOsiApproved"`
- IsDeprectaed bool `json:"isDeprecatedLicenseId"`
- Reference string `json:"reference"`
- DetailsURL string `json:"detailsUrl"`
- ReferenceNumber string `json:"referenceNumber"`
- Name string `json:"name"`
- LicenseID string `json:"licenseId"`
- SeeAlso []string `json:"seeAlso"`
-}
-
-// LoadLicenses reads the license data from the downloader
-func (spdx *SPDX) LoadLicenses() error {
- logrus.Info("Loading license data from downloader")
- licenses, err := spdx.Downloader.GetLicenses()
- if err != nil {
- return errors.Wrap(err, "getting licenses from downloader")
- }
- spdx.Licenses = licenses
- logrus.Infof("SPDX: Got %d licenses from downloader", len(licenses.Licenses))
- return nil
-}
-
-// WriteLicensesAsText writes the SPDX license collection to text files
-func (spdx *SPDX) WriteLicensesAsText(targetDir string) error {
- logrus.Info("Writing SPDX licenses to " + targetDir)
- if spdx.Licenses.Licenses == nil {
- return errors.New("unable to write licenses, they have not been loaded yet")
- }
- for _, l := range spdx.Licenses.Licenses {
- if err := l.WriteText(filepath.Join(targetDir, l.LicenseID+".txt")); err != nil {
- return errors.Wrapf(err, "while writing license %s", l.LicenseID)
- }
- }
- return nil
-}
-
-// GetLicense returns a license struct from its SPDX ID label
-func (spdx *SPDX) GetLicense(label string) *SPDXLicense {
- if lic, ok := spdx.Licenses.Licenses[label]; ok {
- return lic
- }
- logrus.Warn("Label %s is not an ID of a known license " + label)
- return nil
-}
-
-// ParseSPDXLicense parses a SPDX license from its JSON source
-func ParseSPDXLicense(licenseJSON []byte) (license *SPDXLicense, err error) {
- license = &SPDXLicense{}
- if err := json.Unmarshal(licenseJSON, license); err != nil {
- return nil, errors.Wrap(err, "parsing SPDX licence")
- }
- return license, nil
-}
diff --git a/pkg/spdx/builder.go b/pkg/spdx/builder.go
new file mode 100644
index 00000000000..ee2f62a6619
--- /dev/null
+++ b/pkg/spdx/builder.go
@@ -0,0 +1,200 @@
+/*
+Copyright 2021 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package spdx
+
+import (
+ "os"
+ "path/filepath"
+
+ "github.com/google/go-containerregistry/pkg/name"
+ "github.com/google/uuid"
+ "github.com/pkg/errors"
+ "github.com/sirupsen/logrus"
+ "sigs.k8s.io/release-utils/util"
+)
+
+func NewDocBuilder() *DocBuilder {
+ db := &DocBuilder{
+ options: &defaultDocBuilderOpts,
+ impl: defaultDocBuilderImpl{},
+ }
+ return db
+}
+
+// DocBuilder is a tool to write spdx manifests
+type DocBuilder struct {
+ options *DocBuilderOptions
+ impl DocBuilderImplementation
+}
+
+// Generate creates anew SPDX document describing the artifacts specified in the options
+func (db *DocBuilder) Generate(genopts *DocGenerateOptions) (*Document, error) {
+ // Create the SPDX document
+ doc, err := db.impl.GenerateDoc(db.options, genopts)
+ if err != nil {
+ return nil, errors.Wrap(err, "creating SPDX document")
+ }
+
+ // If we have a specified output file, write it
+ if genopts.OutputFile == "" {
+ return doc, nil
+ }
+
+ return doc, errors.Wrapf(
+ db.impl.WriteDoc(doc, genopts.OutputFile),
+ "writing doc to %s", genopts.OutputFile,
+ )
+}
+
+type DocGenerateOptions struct {
+ Tarballs []string // A slice of tar paths
+ Files []string // A slice of naked files to include in the bom
+ Images []string // A slice of docker images
+ OutputFile string // Output location
+ Namespace string // Namespace for the document (a unique URI)
+ AnalyseLayers bool // A flag that controls if deep layer analysis should be performed
+}
+
+func (o *DocGenerateOptions) Validate() error {
+ if len(o.Tarballs) == 0 && len(o.Files) == 0 && len(o.Images) == 0 {
+ return errors.New(
+ "To build a document at least an image, tarball or a file has to be specified",
+ )
+ }
+ return nil
+}
+
+type DocBuilderOptions struct {
+ WorkDir string // Working directory (defaults to a tmp dir)
+}
+
+var defaultDocBuilderOpts = DocBuilderOptions{
+ WorkDir: filepath.Join(os.TempDir(), "spdx-docbuilder"),
+}
+
+type DocBuilderImplementation interface {
+ GenerateDoc(*DocBuilderOptions, *DocGenerateOptions) (*Document, error)
+ WriteDoc(*Document, string) error
+}
+
+// defaultDocBuilderImpl is the default implementation for the
+// SPDX document builder
+type defaultDocBuilderImpl struct{}
+
+// Generate generates a document
+func (builder defaultDocBuilderImpl) GenerateDoc(
+ opts *DocBuilderOptions, genopts *DocGenerateOptions,
+) (doc *Document, err error) {
+ if err := genopts.Validate(); err != nil {
+ return nil, errors.Wrap(err, "checking build options")
+ }
+
+ spdx := NewSPDX()
+ spdx.options.AnalyzeLayers = genopts.AnalyseLayers
+
+ if !util.Exists(opts.WorkDir) {
+ if err := os.MkdirAll(opts.WorkDir, os.FileMode(0o755)); err != nil {
+ return nil, errors.Wrap(err, "creating builder worskpace dir")
+ }
+ }
+
+ tmpdir, err := os.MkdirTemp(opts.WorkDir, "doc-build-")
+ if err != nil {
+ return nil, errors.Wrapf(err, "creating temporary workdir in %s", opts.WorkDir)
+ }
+ defer os.RemoveAll(tmpdir)
+
+ // Create the new document
+ doc = NewDocument()
+ doc.Namespace = genopts.Namespace
+
+ if genopts.Namespace == "" {
+ logrus.Warn("Document namespace is empty, a mock URI will be supplied but the doc will not be valid")
+ doc.Namespace = "http://example.com/"
+ }
+
+ for _, i := range genopts.Images {
+ logrus.Infof("Processing image: %s", i)
+ tararchive := filepath.Join(tmpdir, uuid.New().String()+".tar")
+ if err := spdx.PullImagesToArchive(i, tararchive); err != nil {
+ return nil, errors.Wrapf(err, "writing image %s to file", i)
+ }
+ p, err := spdx.PackageFromImageTarball(tararchive, &TarballOptions{})
+ if err != nil {
+ return nil, errors.Wrap(err, "generating tarball package")
+ }
+ ref, err := name.ParseReference(i)
+ if err != nil {
+ return nil, errors.Wrapf(err, "parsing image reference %q", i)
+ }
+
+ // Grab the package data from wither the tag or, if it's a digest,
+ // from parsing the digest
+ tag, ok := ref.(name.Tag)
+ if ok {
+ p.Name = tag.RepositoryStr()
+ p.DownloadLocation = tag.Name()
+ p.Version = tag.Identifier()
+ } else {
+ dgst, ok := ref.(name.Digest)
+ if ok {
+ p.Version = dgst.DigestStr()
+ p.Name = dgst.RepositoryStr()
+ p.DownloadLocation = dgst.Name()
+ }
+ }
+ if err := doc.AddPackage(p); err != nil {
+ return nil, errors.Wrap(err, "adding package to document")
+ }
+ }
+
+ for _, tb := range genopts.Tarballs {
+ logrus.Infof("Processing tarball %s", tb)
+ p, err := spdx.PackageFromImageTarball(tb, &TarballOptions{})
+ if err != nil {
+ return nil, errors.Wrap(err, "generating tarball package")
+ }
+ if err := doc.AddPackage(p); err != nil {
+ return nil, errors.Wrap(err, "adding package to document")
+ }
+ }
+
+ for _, f := range genopts.Files {
+ logrus.Infof("Processing file %s", f)
+ f, err := spdx.FileFromPath(f)
+ if err != nil {
+ return nil, errors.Wrap(err, "adding file")
+ }
+ if err := doc.AddFile(f); err != nil {
+ return nil, errors.Wrap(err, "adding file to document")
+ }
+ }
+ return doc, nil
+}
+
+// WriteDoc renders the document to a file
+func (builder defaultDocBuilderImpl) WriteDoc(doc *Document, path string) error {
+ markup, err := doc.Render()
+ if err != nil {
+ return errors.Wrap(err, "generating document markup")
+ }
+ logrus.Infof("writing document to %s", path)
+ return errors.Wrap(
+ os.WriteFile(path, []byte(markup), os.FileMode(0o644)),
+ "writing document markup to file",
+ )
+}
diff --git a/pkg/spdx/document.go b/pkg/spdx/document.go
new file mode 100644
index 00000000000..2fc43dbf49c
--- /dev/null
+++ b/pkg/spdx/document.go
@@ -0,0 +1,206 @@
+/*
+Copyright 2021 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package spdx
+
+import (
+ "bytes"
+ "crypto/sha1"
+ "fmt"
+ "html/template"
+ "log"
+ "os"
+ "regexp"
+ "time"
+
+ "github.com/google/uuid"
+ "github.com/pkg/errors"
+ "github.com/sirupsen/logrus"
+)
+
+var docTemplate = `{{ if .Version }}SPDXVersion: {{.Version}}
+{{ end -}}
+DataLicense: CC0-1.0
+{{ if .ID }}SPDXID: {{ .ID }}
+{{ end -}}
+{{ if .Name }}DocumentName: {{ .Name }}
+{{ end -}}
+{{ if .Namespace }}DocumentNamespace: {{ .Namespace }}
+{{ end -}}
+{{ if .Creator -}}
+{{- if .Creator.Person }}Creator: Person: {{ .Creator.Person }}
+{{ end -}}
+{{- if .Creator.Tool -}}
+{{- range $key, $value := .Creator.Tool }}Creator: Tool: {{ $value }}
+{{ end -}}
+{{- end -}}
+{{ end -}}
+{{ if .Created }}Created: {{ dateFormat .Created }}
+{{ end }}
+
+`
+
+// Document abstracts the SPDX document
+type Document struct {
+ Version string // SPDX-2.2
+ DataLicense string // CC0-1.0
+ ID string // SPDXRef-DOCUMENT
+ Name string // hello-go-src
+ Namespace string // https://swinslow.net/spdx-examples/example6/hello-go-src-v1
+ Creator struct {
+ Person string // Steve Winslow (steve@swinslow.net)
+ Tool []string // github.com/spdx/tools-golang/builder
+ }
+ Created time.Time // 2020-11-24T01:12:27Z
+ Packages map[string]*Package
+ Files map[string]*File // List of files
+}
+
+// NewDocument returns a new SPDX document with some defaults preloaded
+func NewDocument() *Document {
+ return &Document{
+ ID: "SPDXRef-DOCUMENT",
+ Version: "SPDX-2.2",
+ DataLicense: "CC0-1.0",
+ Created: time.Now().UTC(),
+ Creator: struct {
+ Person string
+ Tool []string
+ }{
+ Person: defaultDocumentAuthor,
+ Tool: []string{"k8s.io/release/pkg/spdx"},
+ },
+ }
+}
+
+// AddPackage adds a new empty package to the document
+func (d *Document) AddPackage(pkg *Package) error {
+ if d.Packages == nil {
+ d.Packages = map[string]*Package{}
+ }
+
+ if pkg.ID == "" {
+ // If we so not have an ID but have a name generate it fro there
+ reg := regexp.MustCompile("[^a-zA-Z0-9-]+")
+ id := reg.ReplaceAllString(pkg.Name, "")
+ if id != "" {
+ pkg.ID = "SPDXRef-Package-" + id
+ }
+ }
+ if pkg.ID == "" {
+ return errors.New("package id is needed to add a new package")
+ }
+ if _, ok := d.Packages[pkg.ID]; ok {
+ return errors.New("a package named " + pkg.ID + " already exists in the document")
+ }
+
+ d.Packages[pkg.ID] = pkg
+ return nil
+}
+
+// Write outputs the SPDX document into a file
+func (d *Document) Write(path string) error {
+ content, err := d.Render()
+ if err != nil {
+ return errors.Wrap(err, "rendering SPDX code")
+ }
+ if err := os.WriteFile(path, []byte(content), os.FileMode(0o644)); err != nil {
+ return errors.Wrap(err, "writing SPDX code to file")
+ }
+ logrus.Infof("SPDX SBOM written to %s", path)
+ return nil
+}
+
+// Render reders the spdx manifest
+func (d *Document) Render() (doc string, err error) {
+ var buf bytes.Buffer
+ funcMap := template.FuncMap{
+ // The name "title" is what the function will be called in the template text.
+ "dateFormat": func(t time.Time) string { return t.UTC().Format("2006-02-01T15:04:05Z") },
+ }
+
+ if d.Name == "" {
+ d.Name = "BOM-SPDX-" + uuid.New().String()
+ logrus.Warnf("Document has no name defined, automatically set to " + d.Name)
+ }
+
+ tmpl, err := template.New("document").Funcs(funcMap).Parse(docTemplate)
+ if err != nil {
+ log.Fatalf("parsing: %s", err)
+ }
+
+ // Run the template to verify the output.
+ if err := tmpl.Execute(&buf, d); err != nil {
+ return "", errors.Wrap(err, "executing spdx document template")
+ }
+
+ doc = buf.String()
+
+ // List files in the document. Files listed directly on the
+ // document do not contain relationships yet.
+ filesDescribed := ""
+ if len(d.Files) > 0 {
+ doc += "\n##### Files independent of packages\n\n"
+ filesDescribed = "\n"
+ }
+
+ for _, file := range d.Files {
+ fileDoc, err := file.Render()
+ if err != nil {
+ return "", errors.Wrap(err, "rendering file "+file.Name)
+ }
+ doc += fileDoc
+ filesDescribed += fmt.Sprintf("Relationship: %s DESCRIBES %s\n\n", d.ID, file.ID)
+ }
+ doc += filesDescribed
+
+ // Cycle all packages and get their data
+ for _, pkg := range d.Packages {
+ pkgDoc, err := pkg.Render()
+ if err != nil {
+ return "", errors.Wrap(err, "rendering pkg "+pkg.Name)
+ }
+
+ doc += pkgDoc
+ doc += fmt.Sprintf("Relationship: %s DESCRIBES %s\n\n", d.ID, pkg.ID)
+ }
+
+ return doc, err
+}
+
+// AddFile adds a file contained in the package
+func (d *Document) AddFile(file *File) error {
+ if d.Files == nil {
+ d.Files = map[string]*File{}
+ }
+ // If file does not have an ID, we try to build one
+ // by hashing the file name
+ if file.ID == "" {
+ if file.Name == "" {
+ return errors.New("unable to generate file ID, filename not set")
+ }
+ if d.Name == "" {
+ return errors.New("unable to generate file ID, filename not set")
+ }
+ h := sha1.New()
+ if _, err := h.Write([]byte(d.Name + ":" + file.Name)); err != nil {
+ return errors.Wrap(err, "getting sha1 of filename")
+ }
+ file.ID = "SPDXRef-File-" + fmt.Sprintf("%x", h.Sum(nil))
+ }
+ d.Files[file.ID] = file
+ return nil
+}
diff --git a/pkg/spdx/file.go b/pkg/spdx/file.go
new file mode 100644
index 00000000000..7b547b2a7f9
--- /dev/null
+++ b/pkg/spdx/file.go
@@ -0,0 +1,149 @@
+/*
+Copyright 2021 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package spdx
+
+import (
+ "bytes"
+ "crypto/sha1"
+ "html/template"
+ "os"
+ "path/filepath"
+ "strings"
+
+ "github.com/pkg/errors"
+ "github.com/sirupsen/logrus"
+ "sigs.k8s.io/release-utils/hash"
+ "sigs.k8s.io/release-utils/util"
+)
+
+var fileTemplate = `{{ if .Name }}FileName: {{ .Name }}
+{{ end -}}
+{{ if .ID }}SPDXID: {{ .ID }}
+{{ end -}}
+{{- if .Checksum -}}
+{{- range $key, $value := .Checksum -}}
+{{ if . }}FileChecksum: {{ $key }}: {{ $value }}
+{{ end -}}
+{{- end -}}
+{{- end -}}
+LicenseConcluded: {{ if .LicenseConcluded }}{{ .LicenseConcluded }}{{ else }}NOASSERTION{{ end }}
+LicenseInfoInFile: {{ if .LicenseInfoInFile }}LicenseInfoInFile: {{ .LicenseInfoInFile }}{{ else }}NOASSERTION{{ end }}
+FileCopyrightText: {{ if .CopyrightText }}{{ .CopyrightText }}
+{{ else }}NOASSERTION{{ end }}
+
+`
+
+// File abstracts a file contained in a package
+type File struct {
+ Name string // string /Makefile
+ FileName string // Name of the file
+ ID string // SPDXRef-Makefile
+ LicenseConcluded string // GPL-3.0-or-later
+ LicenseInfoInFile string // GPL-3.0-or-later
+ CopyrightText string // NOASSERTION
+ SourceFile string // Source file to read from (not part of the spec)
+ Checksum map[string]string
+
+ options *FileOptions // Options
+}
+
+func NewFile() (f *File) {
+ f = &File{
+ options: &FileOptions{},
+ }
+ return f
+}
+
+func (f *File) Options() *FileOptions {
+ return f.options
+}
+
+// FileOptions
+type FileOptions struct {
+ WorkDir string
+}
+
+// ReadChecksums receives a path to a file and calculates its checksums
+func (f *File) ReadChecksums(filePath string) error {
+ if f.Checksum == nil {
+ f.Checksum = map[string]string{}
+ }
+ file, err := os.Open(filePath)
+ if err != nil {
+ return errors.Wrap(err, "opening file for reading: "+filePath)
+ }
+ defer file.Close()
+ // TODO: Make this line like the others once this PR is
+ // included in a k-sigs/release-util release:
+ // https://github.com/kubernetes-sigs/release-utils/pull/16
+ s1, err := hash.ForFile(filePath, sha1.New())
+ if err != nil {
+ return errors.Wrap(err, "getting sha1 sum for file")
+ }
+ s256, err := hash.SHA256ForFile(filePath)
+ if err != nil {
+ return errors.Wrap(err, "getting file checksums")
+ }
+ s512, err := hash.SHA512ForFile(filePath)
+ if err != nil {
+ return errors.Wrap(err, "getting file checksums")
+ }
+
+ f.Checksum = map[string]string{
+ "SHA1": s1,
+ "SHA256": s256,
+ "SHA512": s512,
+ }
+ return nil
+}
+
+// Render renders the document fragment of a file
+func (f *File) Render() (docFragment string, err error) {
+ var buf bytes.Buffer
+ tmpl, err := template.New("file").Parse(fileTemplate)
+ if err != nil {
+ return "", errors.Wrap(err, "parsing file template")
+ }
+
+ // Run the template to verify the output.
+ if err := tmpl.Execute(&buf, f); err != nil {
+ return "", errors.Wrap(err, "executing spdx file template")
+ }
+
+ docFragment = buf.String()
+ return docFragment, nil
+}
+
+// ReadSourceFile reads the source file for the package and populates
+// the fields derived from it (Checksums and FileName)
+func (f *File) ReadSourceFile(path string) error {
+ if !util.Exists(path) {
+ return errors.New("unable to find package source file")
+ }
+
+ if err := f.ReadChecksums(path); err != nil {
+ return errors.Wrap(err, "reading file checksums")
+ }
+
+ f.SourceFile = path
+ f.Name = strings.TrimPrefix(
+ path, f.Options().WorkDir+string(filepath.Separator),
+ )
+ f.ID = "SPDXRef-File-" + f.Checksum["SHA256"][0:15]
+ logrus.Infof("Added file %s as %s", f.Name, f.ID)
+ return nil
+}
diff --git a/pkg/spdx/imageanalyzer.go b/pkg/spdx/imageanalyzer.go
new file mode 100644
index 00000000000..b00316bd253
--- /dev/null
+++ b/pkg/spdx/imageanalyzer.go
@@ -0,0 +1,86 @@
+/*
+Copyright 2021 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package spdx
+
+import (
+ "os"
+ "path/filepath"
+
+ "github.com/pkg/errors"
+
+ "github.com/sirupsen/logrus"
+)
+
+// ImageAnalyzer is an object that checks images to see if we can add more
+// information to a spdx package based on its content. Each analyzer is
+// written specifically for a layer type. The idea is to be able to enrich
+// common base images with more data to have the most common images covered.
+type ImageAnalyzer struct {
+ Analyzers map[string]ContainerLayerAnalyzer
+}
+
+func NewImageAnalyzer() *ImageAnalyzer {
+ // Default options for all analyzers
+ opts := &ContainerLayerAnalyzerOptions{
+ LicenseCacheDir: filepath.Join(os.TempDir(), spdxLicenseCacheDir),
+ }
+
+ // Create the instance with all the drivers we have so far
+ return &ImageAnalyzer{
+ Analyzers: map[string]ContainerLayerAnalyzer{
+ "distroless": &distrolessHandler{
+ Options: opts,
+ },
+ "go-runner": &goRunnerHandler{
+ Options: opts,
+ },
+ },
+ }
+}
+
+// AnalyzeLayer is the main method of the analyzer
+// it will query each of the analyzers to see if we can
+// extract more image from the layer and enrich the
+// spdx package referenced by pkg
+func (ia *ImageAnalyzer) AnalyzeLayer(layerPath string, pkg *Package) error {
+ if pkg == nil {
+ return errors.New("Unable to analyze layer, package is null")
+ }
+ for label, handler := range ia.Analyzers {
+ logrus.Infof("Scanning layer with %s", label)
+ can, err := handler.CanHandle(layerPath)
+ if err != nil {
+ return errors.Wrapf(err, "checking if layer can be handled with %s", label)
+ }
+
+ if can {
+ return handler.ReadPackageData(layerPath, pkg)
+ }
+ }
+ return nil
+}
+
+// ContainerLayerAnalyzer is an interface that knows how to read a
+// known container layer and populate a SPDX package
+type ContainerLayerAnalyzer interface {
+ ReadPackageData(layerPath string, pkg *Package) error
+ CanHandle(layerPath string) (bool, error)
+}
+
+type ContainerLayerAnalyzerOptions struct {
+ LicenseCacheDir string
+}
diff --git a/pkg/spdx/imageanalyzer_distroless.go b/pkg/spdx/imageanalyzer_distroless.go
new file mode 100644
index 00000000000..a80585ee64e
--- /dev/null
+++ b/pkg/spdx/imageanalyzer_distroless.go
@@ -0,0 +1,277 @@
+/*
+Copyright 2021 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package spdx
+
+import (
+ "archive/tar"
+ "bytes"
+ "compress/gzip"
+ "encoding/json"
+ "io"
+ "io/ioutil"
+ "os"
+ "path/filepath"
+ "regexp"
+ "strings"
+
+ "github.com/pkg/errors"
+ "github.com/sirupsen/logrus"
+ "k8s.io/release/pkg/license"
+ "sigs.k8s.io/release-utils/http"
+ "sigs.k8s.io/release-utils/util"
+)
+
+const (
+ distrolessBundleURL = "https://raw.githubusercontent.com/GoogleContainerTools/distroless/master/"
+ distrolessBundle = "package_bundle_amd64_debian10.versions" // TODO: Perhaps make an option
+ distrolessLicensePath = "./usr/share/doc/"
+ distrolessLicenseName = "/copyright"
+ distrolessCommonLicenseDir = "/usr/share/common-licenses/"
+ commonLicensesRe = `(?i)/usr/share/common-licenses/[-A-Z0-9\.]+`
+ gzExt = ".gz"
+)
+
+type distrolessHandler struct {
+ reader *license.Reader
+ Options *ContainerLayerAnalyzerOptions
+}
+
+// ReadPackageData reads the distroless
+func (h *distrolessHandler) ReadPackageData(layerPath string, pkg *Package) error {
+ // Create a new license reader to scan license files
+ licenseReader, err := h.licenseReader(h.Options)
+ if err != nil {
+ return errors.Wrap(
+ err, "creating license reader to scan distroless image",
+ )
+ }
+
+ // Create the package representing the distroless layer
+ pkg.Name = "distroless"
+ pkg.ID = "SPDXRef-Package-distroless"
+ pkg.FilesAnalyzed = false
+
+ // Fetch the current distrolless package list
+ packageList, err := h.fetchDistrolessPackages()
+ if err != nil {
+ return errors.Wrap(err, "getting package lists")
+ }
+
+ // Open the distroless layer tar for reading
+ tarfile, err := os.Open(layerPath)
+ if err != nil {
+ return errors.Wrap(err, "opening distroless image layer ")
+ }
+ defer tarfile.Close()
+ dir, err := os.MkdirTemp(os.TempDir(), "image-process-")
+ if err != nil {
+ return errors.Wrap(err, "creating temporary directory")
+ }
+ defer os.RemoveAll(dir)
+ var tr *tar.Reader
+ if filepath.Ext(layerPath) == gzExt {
+ gzf, err := gzip.NewReader(tarfile)
+ if err != nil {
+ return errors.Wrap(err, "creating gzip reader")
+ }
+ tr = tar.NewReader(gzf)
+ } else {
+ tr = tar.NewReader(tarfile)
+ }
+ for {
+ hdr, err := tr.Next()
+ if err == io.EOF {
+ break
+ }
+ if err != nil {
+ return errors.Wrap(err, "reading the image tarfile")
+ }
+
+ // Scan the license directories to to determine the installed packages
+ if strings.HasPrefix(hdr.Name, distrolessLicensePath) && strings.HasSuffix(hdr.Name, distrolessLicenseName) {
+ // We infer the name of the package from the license directory
+ packageName := strings.TrimSuffix(strings.TrimPrefix(hdr.Name, distrolessLicensePath), distrolessLicenseName)
+ logrus.Infof("Creating SPDX subpackage " + packageName)
+ subpkg := NewPackage()
+ subpkg.Name = packageName
+ if _, ok := packageList[subpkg.Name]; ok {
+ logrus.Infof(" distroless uses version %s of %s", packageList[subpkg.Name], subpkg.Name)
+ subpkg.Version = packageList[subpkg.Name]
+ } else {
+ logrus.Warnf("could not determine version for package %s", subpkg.Name)
+ }
+
+ // Extract the package license to a file
+ f, err := os.Create(filepath.Join(dir, packageName+".license"))
+ if err != nil {
+ return errors.Wrap(err, "creating image layer file")
+ }
+ defer f.Close()
+
+ if _, err := io.Copy(f, tr); err != nil {
+ return errors.Wrap(err, "extracting license data for "+subpkg.Name)
+ }
+
+ // Use our license classifier to try to determine
+ // the license we are dealing with
+ spdxlicense, err := licenseReader.LicenseFromFile(f.Name())
+ if err != nil {
+ return errors.Wrap(err, "reading license from file")
+ }
+
+ // If we still do not have a license, try to get it from the
+ // devian copyright files. We have to read the files so...
+ if spdxlicense == nil {
+ // ...open the file
+ fileData, err := ioutil.ReadFile(filepath.Join(dir, packageName+".license"))
+ if err != nil {
+ return errors.Wrap(err, "reading license file")
+ }
+
+ // We will try to look for the license in two ways:
+ if strings.Contains(string(fileData), "is in the public domain") {
+ // Option 1: File is in the public domain
+ logrus.Info("File is the public domain")
+
+ // In this case we include the full license text in the manifest
+ subpkg.CopyrightText = string(fileData)
+ subpkg.LicenseComments = "Found public domain declaration in copyright text file"
+ } else {
+ // Option 2: Copyright file references an installed license.
+ re := regexp.MustCompile(commonLicensesRe)
+ label := re.FindString(string(fileData))
+ label = strings.TrimPrefix(label, distrolessCommonLicenseDir)
+ label = strings.TrimSuffix(label, ".")
+
+ // Translate from debian to SPDX label
+ label = license.DebianLicenseLabels[label]
+ if label != "" {
+ spdxlicense = licenseReader.LicenseFromLabel(label)
+ logrus.Infof("Found license %s for package %s by reading copyright file", spdxlicense.LicenseID, subpkg.Name)
+ subpkg.LicenseDeclared = spdxlicense.LicenseID
+ }
+ }
+ } else {
+ subpkg.LicenseDeclared = spdxlicense.LicenseID
+ }
+
+ // Add the debian package to the layer package
+ if err := pkg.AddPackage(subpkg); err != nil {
+ return errors.Wrapf(err, "adding %s subpackage", subpkg.Name)
+ }
+ }
+ }
+ return nil
+}
+
+// fetchDistrolessPackages retrieves the package list published at the
+// distroless repository keyed by package name and version
+func (h *distrolessHandler) fetchDistrolessPackages() (pkgInfo map[string]string, err error) {
+ logrus.Info("Fetching distroless image package list")
+ body, err := http.NewAgent().Get(distrolessBundleURL + distrolessBundle)
+ if err != nil {
+ return nil, errors.Wrap(err, "fetching distroless image package manifest")
+ }
+
+ pkgInfo = map[string]string{}
+ if err := json.Unmarshal(body, &pkgInfo); err != nil {
+ return nil, errors.Wrap(err, "unmarshalling the distroless package list")
+ }
+ logrus.Infof(
+ "Distroless bundle for %s lists %d packages",
+ distrolessBundle, len(pkgInfo),
+ )
+ return pkgInfo, nil
+}
+
+// licenseReader returns a reusable license reader
+func (h *distrolessHandler) licenseReader(o *ContainerLayerAnalyzerOptions) (*license.Reader, error) {
+ if h.reader == nil {
+ logrus.Info("Initializing licence reader with default options")
+ // We use a default license cache
+ opts := license.DefaultReaderOptions
+ ldir := filepath.Join(os.TempDir(), "spdx-license-reader-licenses")
+ // ... unless overridden by the options
+ if o.LicenseCacheDir != "" {
+ ldir = o.LicenseCacheDir
+ }
+
+ // If the license cache does not exist, create it
+ if !util.Exists(ldir) {
+ if err := os.MkdirAll(ldir, os.FileMode(0o0755)); err != nil {
+ return nil, errors.Wrap(err, "creating license cache directory")
+ }
+ }
+ opts.CacheDir = ldir
+ // Create the new reader
+ reader, err := license.NewReaderWithOptions(opts)
+ if err != nil {
+ return nil, errors.Wrap(err, "creating reusable license reader")
+ }
+ h.reader = reader
+ }
+ return h.reader, nil
+}
+
+// CanHandle returns a bools indicating if this handle can supply more
+// data about the specified tarball
+func (h *distrolessHandler) CanHandle(layerPath string) (can bool, err error) {
+ // Open the tar file
+ f, err := os.Open(layerPath)
+ if err != nil {
+ return can, errors.Wrap(err, "opening tarball")
+ }
+
+ var tr *tar.Reader
+ if filepath.Ext(layerPath) == gzExt {
+ gzf, err := gzip.NewReader(f)
+ if err != nil {
+ return can, errors.Wrap(err, "creating gzip reader")
+ }
+ tr = tar.NewReader(gzf)
+ } else {
+ tr = tar.NewReader(f)
+ }
+ b := bytes.NewBuffer(make([]byte, 0))
+ // Search for the os-file in the tar contents
+ for {
+ hdr, err := tr.Next()
+ if err == io.EOF {
+ break // End of archive
+ }
+ if err != nil {
+ return can, errors.Wrap(err, "reading the image tarfile")
+ }
+
+ if hdr.FileInfo().IsDir() {
+ continue
+ }
+
+ // Scan for the os-release file in the tarball
+ if hdr.Name == "./etc/os-release" {
+ if _, err = io.Copy(b, tr); err != nil {
+ return can, errors.Wrap(err, "extracting os-release file")
+ }
+ }
+ }
+ // If the image has the Distroless tag in the OS file, we can handle it
+ if strings.Contains(b.String(), `PRETTY_NAME="Distroless"`) {
+ logrus.Infof("👍 Tarball %s identified as distroless layer", layerPath)
+ return true, nil
+ }
+ return can, nil
+}
diff --git a/pkg/spdx/imageanalyzer_gorunner.go b/pkg/spdx/imageanalyzer_gorunner.go
new file mode 100644
index 00000000000..93808414b27
--- /dev/null
+++ b/pkg/spdx/imageanalyzer_gorunner.go
@@ -0,0 +1,172 @@
+/*
+Copyright 2021 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package spdx
+
+import (
+ "archive/tar"
+ "compress/gzip"
+ "io"
+ "io/ioutil"
+ "os"
+ "path/filepath"
+
+ "github.com/pkg/errors"
+ "github.com/sirupsen/logrus"
+ "k8s.io/release/pkg/license"
+ "sigs.k8s.io/release-utils/http"
+ "sigs.k8s.io/release-utils/util"
+)
+
+const (
+ goRunnerVersionURL = "https://raw.githubusercontent.com/kubernetes/release/master/images/build/go-runner/VERSION"
+ goRunnerLicenseURL = "https://raw.githubusercontent.com/kubernetes/release/master/images/build/go-runner/Dockerfile"
+)
+
+type goRunnerHandler struct {
+ reader *license.Reader
+ Options *ContainerLayerAnalyzerOptions
+}
+
+func (h *goRunnerHandler) ReadPackageData(layerPath string, pkg *Package) error {
+ pkg.Supplier.Person = "Kubernetes Release Managers (release-managers@kubernetes.io)"
+ pkg.Name = "go-runner"
+
+ // Get the go-runner version
+ // TODO: Add http retries
+ versionb, err := http.NewAgent().Get(goRunnerVersionURL)
+ if err != nil {
+ return errors.Wrap(err, "fetching go-runner VERSION file")
+ }
+ logrus.Infof("go-runner image is at version %s", string(versionb))
+ pkg.Version = string(versionb)
+
+ // Read the docker file to scan for license
+ lic, err := http.NewAgent().Get(goRunnerLicenseURL)
+ if err != nil {
+ return errors.Wrap(err, "fetching go-runner VERSION file")
+ }
+
+ df, err := ioutil.TempFile(os.TempDir(), "gorunner-dockerfile-")
+ if err != nil {
+ return errors.Wrap(err, "creating temporary file to read go-runner license")
+ }
+ defer df.Close()
+ defer os.Remove(df.Name())
+
+ if err := ioutil.WriteFile(df.Name(), lic, os.FileMode(0o644)); err != nil {
+ return errors.Wrap(err, "writing go-runner license to temp file")
+ }
+
+ // Let's extract the license for the layer:
+ var grlic *license.License
+ licenseReader, err := h.licenseReader(h.Options)
+ if err != nil {
+ return errors.Wrap(err, "getting license reader")
+ }
+ // First, check if the file has our boiler plate
+ hasbp, err := license.HasKubernetesBoilerPlate(df.Name())
+ if err != nil {
+ return errors.Wrap(err, "checking for k8s boilerplate in go-runner")
+ }
+ // If the boilerplate was found, we know it is apache2
+ if hasbp {
+ grlic = licenseReader.LicenseFromLabel("Apache-2.0")
+ // Otherwise, as a fallback, try to classify the file
+ } else {
+ grlic, err = licenseReader.LicenseFromFile(df.Name())
+ if err != nil {
+ return errors.Wrap(err, "attempting to read go-runner license")
+ }
+ }
+ pkg.LicenseDeclared = grlic.LicenseID
+ logrus.Infof("Found license %s in go-runner image", grlic.LicenseID)
+ return nil
+}
+
+// licenseReader returns a reusable license reader
+func (h *goRunnerHandler) licenseReader(o *ContainerLayerAnalyzerOptions) (*license.Reader, error) {
+ if h.reader == nil {
+ logrus.Info("Initializing licence reader with default options")
+ // We use a default license cache
+ opts := license.DefaultReaderOptions
+ ldir := filepath.Join(os.TempDir(), "spdx-license-reader-licenses")
+ // ... unless overridden by the options
+ if o.LicenseCacheDir != "" {
+ ldir = o.LicenseCacheDir
+ }
+
+ // If the license cache does not exist, create it
+ if !util.Exists(ldir) {
+ if err := os.MkdirAll(ldir, os.FileMode(0o0755)); err != nil {
+ return nil, errors.Wrap(err, "creating license cache directory")
+ }
+ }
+ opts.CacheDir = ldir
+ // Create the new reader
+ reader, err := license.NewReaderWithOptions(opts)
+ if err != nil {
+ return nil, errors.Wrap(err, "creating reusable license reader")
+ }
+ h.reader = reader
+ }
+ return h.reader, nil
+}
+
+// CanHandle returns a bools indicating if this handle can supply more
+// data about the specified tarball
+func (h *goRunnerHandler) CanHandle(layerPath string) (can bool, err error) {
+ // Open the tar file
+ f, err := os.Open(layerPath)
+ if err != nil {
+ return can, errors.Wrap(err, "opening tarball")
+ }
+ defer f.Close()
+ var tr *tar.Reader
+ if filepath.Ext(layerPath) == ".gz" {
+ gzf, err := gzip.NewReader(f)
+ if err != nil {
+ return can, errors.Wrap(err, "creating gzip reader")
+ }
+ tr = tar.NewReader(gzf)
+ } else {
+ tr = tar.NewReader(f)
+ }
+
+ binaryFound := false
+ // Search for the os-file in the tar contents
+ for {
+ hdr, err := tr.Next()
+ if err == io.EOF {
+ break // End of archive
+ }
+ if err != nil {
+ return can, errors.Wrapf(err, "reading the image tarfile at %s", layerPath)
+ }
+
+ if hdr.FileInfo().IsDir() {
+ continue
+ }
+
+ // Scan for the os-release file in the tarball
+ if hdr.Name == "go-runner" {
+ binaryFound = true
+ logrus.Infof("👍 Tarball %s identified as a go-runner layer", layerPath)
+ break
+ }
+ }
+ return binaryFound, nil
+}
diff --git a/pkg/spdx/implementation.go b/pkg/spdx/implementation.go
new file mode 100644
index 00000000000..e19bc6a353e
--- /dev/null
+++ b/pkg/spdx/implementation.go
@@ -0,0 +1,179 @@
+/*
+Copyright 2021 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package spdx
+
+//go:generate go run github.com/maxbrunsfeld/counterfeiter/v6 -generate
+
+import (
+ "archive/tar"
+ "crypto/sha1"
+ "encoding/json"
+ "fmt"
+ "io"
+ "os"
+ "path/filepath"
+ "strings"
+
+ "github.com/google/go-containerregistry/pkg/name"
+ v1 "github.com/google/go-containerregistry/pkg/v1"
+ "github.com/google/go-containerregistry/pkg/v1/remote"
+ "github.com/google/go-containerregistry/pkg/v1/tarball"
+ "github.com/pkg/errors"
+ "github.com/sirupsen/logrus"
+ "sigs.k8s.io/release-utils/util"
+)
+
+//counterfeiter:generate . spdxImplementation
+
+type spdxImplementation interface {
+ ExtractTarballTmp(string) (string, error)
+ ReadArchiveManifest(string) (*ArchiveManifest, error)
+ PullImagesToArchive(string, string) error
+ PackageFromLayerTarBall(string, *TarballOptions) (*Package, error)
+}
+
+type spdxDefaultImplementation struct{}
+
+// ExtractTarballTmp extracts a tarball to a temporary directory
+func (di *spdxDefaultImplementation) ExtractTarballTmp(tarPath string) (tmpDir string, err error) {
+ tmpDir, err = os.MkdirTemp(os.TempDir(), "spdx-tar-extract-")
+ if err != nil {
+ return tmpDir, errors.Wrap(err, "creating temporary directory for tar extraction")
+ }
+
+ // Open the tar file
+ f, err := os.Open(tarPath)
+ if err != nil {
+ return tmpDir, errors.Wrap(err, "opening tarball")
+ }
+
+ tr := tar.NewReader(f)
+ numFiles := 0
+ for {
+ hdr, err := tr.Next()
+ if err == io.EOF {
+ break // End of archive
+ }
+ if err != nil {
+ return tmpDir, errors.Wrap(err, "reading the image tarfile")
+ }
+
+ if hdr.FileInfo().IsDir() {
+ continue
+ }
+
+ if strings.HasPrefix(filepath.Base(hdr.FileInfo().Name()), ".wh") {
+ logrus.Info("Skipping extraction of whiteout file")
+ continue
+ }
+
+ if err := os.MkdirAll(
+ filepath.Join(tmpDir, filepath.Dir(hdr.Name)), os.FileMode(0o755),
+ ); err != nil {
+ return tmpDir, errors.Wrap(err, "creating image directory structure")
+ }
+
+ targetFile := filepath.Join(tmpDir, hdr.Name)
+ f, err := os.Create(targetFile)
+ if err != nil {
+ return tmpDir, errors.Wrap(err, "creating image layer file")
+ }
+ defer f.Close()
+
+ if _, err := io.Copy(f, tr); err != nil {
+ return tmpDir, errors.Wrap(err, "extracting image data")
+ }
+ numFiles++
+ }
+ logrus.Infof("Successfully extracted %d files from image tarball %s", numFiles, tarPath)
+ return tmpDir, err
+}
+
+// readArchiveManifest extracts the manifest json from an image tar
+// archive and returns the data as a struct
+func (di *spdxDefaultImplementation) ReadArchiveManifest(manifestPath string) (manifest *ArchiveManifest, err error) {
+ // Check that we have the archive manifest.json file
+ if !util.Exists(manifestPath) {
+ return manifest, errors.New("unable to find manifest file " + manifestPath)
+ }
+
+ // Parse the json file
+ manifestData := []ArchiveManifest{}
+ manifestJSON, err := os.ReadFile(manifestPath)
+ if err != nil {
+ return manifest, errors.Wrap(err, "unable to read from tarfile")
+ }
+ if err := json.Unmarshal(manifestJSON, &manifestData); err != nil {
+ fmt.Println(string(manifestJSON))
+ return manifest, errors.Wrap(err, "unmarshalling image manifest")
+ }
+ return &manifestData[0], nil
+}
+
+// PullImagesToArchive takes an image reference (a tag or a digest)
+// and writes it into a docker tar archive in path
+func (di *spdxDefaultImplementation) PullImagesToArchive(referenceString, path string) error {
+ // Parse the string to get a reference (tag or digest)
+ ref, err := name.ParseReference(referenceString)
+ if err != nil {
+ return errors.Wrapf(err, "parsing reference %s", referenceString)
+ }
+
+ // Build an image from the reference
+ img, err := remote.Image(ref)
+ if err != nil {
+ return errors.Wrap(err, "getting image")
+ }
+
+ // This algo comes from crane:
+ // Try to cast the reference as a tag:
+ tag, ok := ref.(name.Tag)
+ // if it fails
+ if !ok {
+ // .. and it is a digest
+ d, ok := ref.(name.Digest)
+ if !ok {
+ return fmt.Errorf("reference is not a tag or digest")
+ }
+ // We add a mock tag
+ tag = d.Repository.Tag("from-digest") // Append digest here?
+ }
+
+ return tarball.MultiWriteToFile(path, map[name.Tag]v1.Image{tag: img})
+}
+
+// PackageFromLayerTarBall builds a SPDX package from an image
+// tarball
+func (di *spdxDefaultImplementation) PackageFromLayerTarBall(
+ layerFile string, opts *TarballOptions,
+) (*Package, error) {
+ logrus.Infof("Generating SPDX package from layer in %s", layerFile)
+
+ pkg := NewPackage()
+ pkg.options.WorkDir = opts.ExtractDir
+ if err := pkg.ReadSourceFile(filepath.Join(opts.ExtractDir, layerFile)); err != nil {
+ return nil, errors.Wrap(err, "reading source file")
+ }
+ // Build the pkg name from its internal path
+ h := sha1.New()
+ if _, err := h.Write([]byte(layerFile)); err != nil {
+ return nil, errors.Wrap(err, "hashing file path")
+ }
+ pkg.Name = fmt.Sprintf("%x", h.Sum(nil))
+
+ return pkg, nil
+}
diff --git a/pkg/spdx/package.go b/pkg/spdx/package.go
new file mode 100644
index 00000000000..e5768ed4b9c
--- /dev/null
+++ b/pkg/spdx/package.go
@@ -0,0 +1,243 @@
+/*
+Copyright 2021 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package spdx
+
+import (
+ "bytes"
+ "crypto/sha1"
+ "fmt"
+ "html/template"
+ "path/filepath"
+ "regexp"
+ "sort"
+ "strings"
+
+ "github.com/pkg/errors"
+ "sigs.k8s.io/release-utils/hash"
+ "sigs.k8s.io/release-utils/util"
+)
+
+var packageTemplate = `##### Package: {{ .Name }}
+
+{{ if .Name }}PackageName: {{ .Name }}
+{{ end -}}
+{{ if .ID }}SPDXID: {{ .ID }}
+{{ end -}}
+{{- if .Checksum -}}
+{{- range $key, $value := .Checksum -}}
+{{ if . }}PackageChecksum: {{ $key }}: {{ $value }}
+{{ end -}}
+{{- end -}}
+{{- end -}}
+PackageDownloadLocation: {{ if .DownloadLocation }}{{ .DownloadLocation }}{{ else }}NONE{{ end }}
+FilesAnalyzed: {{ .FilesAnalyzed }}
+{{ if .VerificationCode }}PackageVerificationCode: {{ .VerificationCode }}
+{{ end -}}
+PackageLicenseConcluded: {{ if .LicenseConcluded }}{{ .LicenseConcluded }}{{ else }}NOASSERTION{{ end }}
+{{ if .FileName }}PackageFileName: {{ .FileName }}
+{{ end -}}
+{{ if .LicenseInfoFromFiles }}PackageLicenseInfoFromFiles: {{ .LicenseInfoFromFiles }}
+{{ end -}}
+{{ if .Version }}PackageVersion: {{ .Version }}
+{{ end -}}
+PackageLicenseDeclared: {{ if .LicenseDeclared }}{{ .LicenseDeclared }}{{ else }}NOASSERTION{{ end }}
+PackageCopyrightText: {{ if .CopyrightText }}{{ .CopyrightText }}
+{{ else }}NOASSERTION{{ end }}
+`
+
+// Package groups a set of files
+type Package struct {
+ FilesAnalyzed bool // true
+ Name string // hello-go-src
+ ID string // SPDXRef-Package-hello-go-src
+ DownloadLocation string // git@github.com:swinslow/spdx-examples.git#example6/content/src
+ VerificationCode string // 6486e016b01e9ec8a76998cefd0705144d869234
+ LicenseConcluded string // LicenseID o NOASSERTION
+ LicenseInfoFromFiles string // GPL-3.0-or-later
+ LicenseDeclared string // GPL-3.0-or-later
+ LicenseComments string // record any relevant background information or analysis that went in to arriving at the Concluded License
+ CopyrightText string // string NOASSERTION
+ Version string // Package version
+ FileName string // Name of the package
+ SourceFile string // Source file for the package (taball for images, rpm, deb, etc)
+
+ // Supplier: the actual distribution source for the package/directory
+ Supplier struct {
+ Person string // person name and optional ()
+ Organization string // organization name and optional ()
+ }
+ // Originator: For example, the SPDX file identifies the package glibc and Red Hat as the Package Supplier,
+ // but the Free Software Foundation is the Package Originator.
+ Originator struct {
+ Person string // person name and optional ()
+ Organization string // organization name and optional ()
+ }
+ // Subpackages contained
+ Packages map[string]*Package // Sub packages conatined in this pkg
+ Files map[string]*File // List of files
+ Checksum map[string]string // Checksum of the package
+
+ options *PackageOptions // Options
+}
+
+func NewPackage() (p *Package) {
+ p = &Package{
+ options: &PackageOptions{},
+ }
+ return p
+}
+
+type PackageOptions struct {
+ WorkDir string // Working directory to read files from
+}
+
+func (p *Package) Options() *PackageOptions {
+ return p.options
+}
+
+// ReadSourceFile reads the source file for the package and populates
+// the package fields derived from it (Checksums and FileName)
+func (p *Package) ReadSourceFile(path string) error {
+ if !util.Exists(path) {
+ return errors.New("unable to find package source file")
+ }
+ s256, err := hash.SHA256ForFile(path)
+ if err != nil {
+ return errors.Wrap(err, "getting source file sha256")
+ }
+ s512, err := hash.SHA512ForFile(path)
+ if err != nil {
+ return errors.Wrap(err, "getting source file sha512")
+ }
+ p.Checksum = map[string]string{
+ "SHA256": s256,
+ "SHA512": s512,
+ }
+ p.SourceFile = path
+ p.FileName = strings.TrimPrefix(path, p.Options().WorkDir+string(filepath.Separator))
+ return nil
+}
+
+// AddFile adds a file contained in the package
+func (p *Package) AddFile(file *File) error {
+ if p.Files == nil {
+ p.Files = map[string]*File{}
+ }
+ // If file does not have an ID, we try to build one
+ // by hashing the file name
+ if file.ID == "" {
+ if file.Name == "" {
+ return errors.New("unable to generate file ID, filename not set")
+ }
+ if p.Name == "" {
+ return errors.New("unable to generate file ID, filename not set")
+ }
+ h := sha1.New()
+ if _, err := h.Write([]byte(p.Name + ":" + file.Name)); err != nil {
+ return errors.Wrap(err, "getting sha1 of filename")
+ }
+ file.ID = "SPDXRef-File-" + fmt.Sprintf("%x", h.Sum(nil))
+ }
+ p.Files[file.ID] = file
+ return nil
+}
+
+// AddPackage adds a new subpackage to a package
+func (p *Package) AddPackage(pkg *Package) error {
+ if p.Packages == nil {
+ p.Packages = map[string]*Package{}
+ }
+ if pkg.ID == "" {
+ // If we so not have an ID but have a name generate it fro there
+ reg := regexp.MustCompile("[^a-zA-Z0-9-]+")
+ id := reg.ReplaceAllString(pkg.Name, "")
+ if id != "" {
+ pkg.ID = "SPDXRef-Package-" + id
+ }
+ }
+ if pkg.ID == "" {
+ return errors.New("package name is needed to add a new package")
+ }
+ if _, ok := p.Packages[pkg.ID]; ok {
+ return errors.New("a package named " + pkg.ID + " already exists in the document")
+ }
+
+ p.Packages[pkg.ID] = pkg
+ return nil
+}
+
+// Render renders the document fragment of the package
+func (p *Package) Render() (docFragment string, err error) {
+ var buf bytes.Buffer
+ tmpl, err := template.New("package").Parse(packageTemplate)
+ if err != nil {
+ return "", errors.Wrap(err, "parsing package template")
+ }
+
+ // If files were analyzed, calculate the verification
+ if p.FilesAnalyzed {
+ if len(p.Files) == 0 {
+ return docFragment, errors.New("unable to get package verification code, package has no files")
+ }
+ shaList := []string{}
+ for _, f := range p.Files {
+ if f.Checksum == nil {
+ return docFragment, errors.New("unable to render package, file has no checksums")
+ }
+ if _, ok := f.Checksum["SHA1"]; !ok {
+ return docFragment, errors.New("unable to render package, files were analyzed but some do not have sha1 checksum")
+ }
+ shaList = append(shaList, f.Checksum["SHA1"])
+ }
+ sort.Strings(shaList)
+ h := sha1.New()
+ if _, err := h.Write([]byte(strings.Join(shaList, ""))); err != nil {
+ return docFragment, errors.Wrap(err, "getting sha1 verification of files")
+ }
+ p.VerificationCode = fmt.Sprintf("%x", h.Sum(nil))
+ }
+
+ // Run the template to verify the output.
+ if err := tmpl.Execute(&buf, p); err != nil {
+ return "", errors.Wrap(err, "executing spdx package template")
+ }
+
+ docFragment = buf.String()
+
+ for _, f := range p.Files {
+ fileFragment, err := f.Render()
+ if err != nil {
+ return "", errors.Wrap(err, "rendering file "+f.Name)
+ }
+ docFragment += fileFragment
+ docFragment += fmt.Sprintf("Relationship: %s CONTAINS %s\n\n", p.ID, f.ID)
+ }
+
+ // Print the contained sub packages
+ if p.Packages != nil {
+ for _, pkg := range p.Packages {
+ pkgDoc, err := pkg.Render()
+ if err != nil {
+ return "", errors.Wrap(err, "rendering pkg "+pkg.Name)
+ }
+
+ docFragment += pkgDoc
+ docFragment += fmt.Sprintf("Relationship: %s CONTAINS %s\n\n", p.ID, pkg.ID)
+ }
+ }
+ return docFragment, nil
+}
diff --git a/pkg/spdx/spdx.go b/pkg/spdx/spdx.go
new file mode 100644
index 00000000000..07ea678494f
--- /dev/null
+++ b/pkg/spdx/spdx.go
@@ -0,0 +1,170 @@
+/*
+Copyright 2021 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package spdx
+
+import (
+ "os"
+ "path/filepath"
+
+ "github.com/pkg/errors"
+ "github.com/sirupsen/logrus"
+
+ "sigs.k8s.io/release-utils/util"
+)
+
+const (
+ defaultDocumentAuthor = "Kubernetes Release Managers (release-managers@kubernetes.io)"
+ archiveManifestFilename = "manifest.json"
+ spdxLicenseCacheDir = "spdx/lic"
+)
+
+type SPDX struct {
+ impl spdxImplementation
+ options *Options
+}
+
+func NewSPDX() *SPDX {
+ return &SPDX{
+ impl: &spdxDefaultImplementation{},
+ options: &defaultSPDXOptions,
+ }
+}
+
+func (spdx *SPDX) SetImplementation(impl spdxImplementation) {
+ spdx.impl = impl
+}
+
+type Options struct {
+ LicenseCacheDir string // Directory to cache SPDX license information
+ AnalyzeLayers bool
+}
+
+func (spdx *SPDX) Options() *Options {
+ return spdx.options
+}
+
+var defaultSPDXOptions = Options{
+ LicenseCacheDir: filepath.Join(os.TempDir(), spdxLicenseCacheDir),
+ AnalyzeLayers: true,
+}
+
+type ArchiveManifest struct {
+ ConfigFilename string `json:"Config"`
+ RepoTags []string `json:"RepoTags"`
+ LayerFiles []string `json:"Layers"`
+}
+
+// ImageOptions set of options for processing tar files
+type TarballOptions struct {
+ ExtractDir string // Directory where the docker tar archive will be extracted
+}
+
+// PackageFromImageTarball returns a SPDX package from a tarball
+func (spdx *SPDX) PackageFromImageTarball(
+ tarPath string, opts *TarballOptions,
+) (imagePackage *Package, err error) {
+ logrus.Infof("Generating SPDX package from image tarball %s", tarPath)
+
+ // Extract all files from tarfile
+ opts.ExtractDir, err = spdx.impl.ExtractTarballTmp(tarPath)
+ if err != nil {
+ return nil, errors.Wrap(err, "extracting tarball to temp dir")
+ }
+ defer os.RemoveAll(opts.ExtractDir)
+
+ // Read the archive manifest json:
+ manifest, err := spdx.impl.ReadArchiveManifest(
+ filepath.Join(opts.ExtractDir, archiveManifestFilename),
+ )
+ if err != nil {
+ return nil, errors.Wrap(err, "while reading docker archive manifest")
+ }
+
+ if len(manifest.RepoTags) == 0 {
+ return nil, errors.New("No RepoTags found in manifest")
+ }
+
+ if manifest.RepoTags[0] == "" {
+ return nil, errors.New(
+ "unable to add tar archive, manifest does not have a RepoTags entry",
+ )
+ }
+
+ logrus.Infof("Package describes %s image", manifest.RepoTags[0])
+
+ // Create the new SPDX package
+ imagePackage = NewPackage()
+ imagePackage.Options().WorkDir = opts.ExtractDir
+ imagePackage.Name = manifest.RepoTags[0]
+
+ logrus.Infof("Image manifest lists %d layers", len(manifest.LayerFiles))
+
+ // Cycle all the layers from the manifest and add them as packages
+ for _, layerFile := range manifest.LayerFiles {
+ // Generate a package from a layer
+ pkg, err := spdx.impl.PackageFromLayerTarBall(layerFile, opts)
+ if err != nil {
+ return nil, errors.Wrap(err, "building package from layer")
+ }
+
+ // If the option is enabled, scan the container layers
+ if spdx.options.AnalyzeLayers {
+ if err := spdx.AnalyzeImageLayer(filepath.Join(opts.ExtractDir, layerFile), pkg); err != nil {
+ return nil, errors.Wrap(err, "scanning layer "+pkg.ID)
+ }
+ } else {
+ logrus.Info("Not performing deep image analysis (opts.AnalyzeLayers = false)")
+ }
+
+ // Add the layer package to the image package
+ if err := imagePackage.AddPackage(pkg); err != nil {
+ return nil, errors.Wrap(err, "adding layer to image package")
+ }
+ }
+
+ // return the finished package
+ return imagePackage, nil
+}
+
+// FileFromPath creates a File object from a path
+func (spdx *SPDX) FileFromPath(filePath string) (*File, error) {
+ if !util.Exists(filePath) {
+ return nil, errors.New("file does not exist")
+ }
+ f := NewFile()
+ if err := f.ReadSourceFile(filePath); err != nil {
+ return nil, errors.Wrap(err, "creating file from path")
+ }
+ return f, nil
+}
+
+// AnalyzeLayer uses the collection of image analyzers to see if
+// it matches a known image from which a spdx package can be
+// enriched with more information
+func (spdx *SPDX) AnalyzeImageLayer(layerPath string, pkg *Package) error {
+ return NewImageAnalyzer().AnalyzeLayer(layerPath, pkg)
+}
+
+// ExtractTarballTmp extracts a tarball to a temp file
+func (spdx *SPDX) ExtractTarballTmp(tarPath string) (tmpDir string, err error) {
+ return spdx.impl.ExtractTarballTmp(tarPath)
+}
+
+// PullImagesToArchive
+func (spdx *SPDX) PullImagesToArchive(reference, path string) error {
+ return spdx.impl.PullImagesToArchive(reference, path)
+}