diff --git a/pkg/license/catalog.go b/pkg/license/catalog.go new file mode 100644 index 00000000000..1f9d690490b --- /dev/null +++ b/pkg/license/catalog.go @@ -0,0 +1,109 @@ +/* +Copyright 2021 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package license + +import ( + "path/filepath" + "sync" + + "github.com/pkg/errors" + "github.com/sirupsen/logrus" +) + +// CatalogOptions are the spdx settings +type CatalogOptions struct { + CacheDir string // Directrory to catch the license we download from SPDX.org +} + +// DefaultCatalogOpts are the predetermined settings. License and cache directories +// are in the temporary OS directory and are created if the do not exist +var DefaultCatalogOpts = &CatalogOptions{} + +// NewSPDXWithOptions returns a SPDX object with the specified options +func NewCatalogWithOptions(opts *CatalogOptions) (catalog *Catalog, err error) { + // Create the license downloader + doptions := DefaultDownloaderOpts + doptions.CacheDir = opts.CacheDir + downloader, err := NewDownloaderWithOptions(doptions) + if err != nil { + return nil, errors.Wrap(err, "creating downloader") + } + catalog = &Catalog{ + Downloader: downloader, + opts: opts, + } + + return catalog, nil +} + +// Options returns a pointer to the catlog options +func (catalog *Catalog) Options() *CatalogOptions { + return catalog.opts +} + +// LoadLicenses reads the license data from the downloader +func (catalog *Catalog) LoadLicenses() error { + logrus.Info("Loading license data from downloader") + licenses, err := catalog.Downloader.GetLicenses() + if err != nil { + return errors.Wrap(err, "getting licenses from downloader") + } + catalog.List = licenses + logrus.Infof("Got %d licenses from downloader", len(licenses.Licenses)) + return nil +} + +// Catalog is an objec to interact with licenses and manifest creation +type Catalog struct { + Downloader *Downloader // License Downloader + List *List // List of licenses + opts *CatalogOptions // SPDX Options +} + +// WriteLicensesAsText writes the SPDX license collection to text files +func (catalog *Catalog) WriteLicensesAsText(targetDir string) error { + logrus.Info("Writing SPDX licenses to " + targetDir) + if catalog.List.Licenses == nil { + return errors.New("unable to write licenses, they have not been loaded yet") + } + wg := sync.WaitGroup{} + var err error + for _, l := range catalog.List.Licenses { + wg.Add(1) + go func(l *License) { + defer wg.Done() + if lerr := l.WriteText(filepath.Join(targetDir, l.LicenseID+".txt")); err != nil { + if err == nil { + err = lerr + } else { + err = errors.Wrap(err, lerr.Error()) + } + } + }(l) + } + wg.Wait() + return errors.Wrap(err, "caught errors while writing license files") +} + +// GetLicense returns a license struct from its SPDX ID label +func (catalog *Catalog) GetLicense(label string) *License { + if lic, ok := catalog.List.Licenses[label]; ok { + return lic + } + logrus.Warn("Label %s is not an identifier of a known license " + label) + return nil +} diff --git a/pkg/license/download.go b/pkg/license/download.go index ac62cc3bbb1..f84a71001f0 100644 --- a/pkg/license/download.go +++ b/pkg/license/download.go @@ -20,19 +20,22 @@ import ( "crypto/sha1" "encoding/json" "fmt" - "io" - "net/http" "os" "path/filepath" + "strings" "github.com/nozzle/throttler" "github.com/pkg/errors" "github.com/sirupsen/logrus" + "sigs.k8s.io/release-utils/http" "sigs.k8s.io/release-utils/util" ) // ListURL is the json list of all spdx licenses -const ListURL = "https://spdx.org/licenses/licenses.json" +const ( + LicenseDataURL = "https://spdx.org/licenses/" + LicenseListFilename = "licenses.json" +) // NewDownloader returns a downloader with the default options func NewDownloader() (*Downloader, error) { @@ -72,7 +75,7 @@ func (do *DownloaderOptions) Validate() error { } // And no cache dir was specified if do.CacheDir == "" { - dir, err := os.MkdirTemp("", "license-cache-") + dir, err := os.MkdirTemp(os.TempDir(), "license-cache-") if err != nil { return errors.Wrap(err, "creating temporary directory") } @@ -94,7 +97,7 @@ func (d *Downloader) SetImplementation(di DownloaderImplementation) { // GetLicenses is the mina function of the downloader. Returns a license list // or an error if could get them -func (d *Downloader) GetLicenses() (*SPDXLicenseList, error) { +func (d *Downloader) GetLicenses() (*List, error) { return d.impl.GetLicenses() } @@ -102,7 +105,7 @@ func (d *Downloader) GetLicenses() (*SPDXLicenseList, error) { // DownloaderImplementation has only one method type DownloaderImplementation interface { - GetLicenses() (*SPDXLicenseList, error) + GetLicenses() (*List, error) SetOptions(*DownloaderOptions) } @@ -124,23 +127,17 @@ func (ddi *DefaultDownloaderImpl) SetOptions(opts *DownloaderOptions) { } // GetLicenses downloads the main json file listing all SPDX supported licenses -func (ddi *DefaultDownloaderImpl) GetLicenses() (licenses *SPDXLicenseList, err error) { +func (ddi *DefaultDownloaderImpl) GetLicenses() (licenses *List, err error) { // TODO: Cache licenselist - logrus.Info("Downloading main SPDX license data") + logrus.Info("Downloading main SPDX license data from " + LicenseDataURL) // Get the list of licenses - resp, err := http.Get(ListURL) + licensesJSON, err := http.NewAgent().Get(LicenseDataURL + LicenseListFilename) if err != nil { return nil, errors.Wrap(err, "fetching licenses list") } - defer resp.Body.Close() - licensesJSON, err := io.ReadAll(resp.Body) - if err != nil { - return nil, errors.Wrap(err, "reading license list response body") - } - - licenseList := &SPDXLicenseList{} + licenseList := &List{} if err := json.Unmarshal(licensesJSON, licenseList); err != nil { return nil, errors.Wrap(err, "parsing SPDX licence list") } @@ -150,6 +147,11 @@ func (ddi *DefaultDownloaderImpl) GetLicenses() (licenses *SPDXLicenseList, err // Create a new Throttler that will get `parallelDownloads` urls at a time t := throttler.New(ddi.Options.parallelDownloads, len(licenseList.LicenseData)) for _, l := range licenseList.LicenseData { + licURL := l.Reference + // If the license URLs have a local reference + if strings.HasPrefix(licURL, "./") { + licURL = LicenseDataURL + strings.TrimPrefix(licURL, "./") + } // Launch a goroutine to fetch the URL. go func(url string) { var err error @@ -158,8 +160,9 @@ func (ddi *DefaultDownloaderImpl) GetLicenses() (licenses *SPDXLicenseList, err if err != nil { return } + logrus.Debugf("Got license: %s from %s", l.LicenseID, url) licenseList.Add(l) - }(l.DetailsURL) + }(licURL) t.Throttle() } @@ -214,7 +217,7 @@ func (ddi *DefaultDownloaderImpl) getCachedData(url string) ([]byte, error) { } // getLicenseFromURL downloads a license in json and returns it parsed into a struct -func (ddi *DefaultDownloaderImpl) getLicenseFromURL(url string) (license *SPDXLicense, err error) { +func (ddi *DefaultDownloaderImpl) getLicenseFromURL(url string) (license *License, err error) { licenseJSON := []byte{} // Determine the cache file name if ddi.Options.EnableCache { @@ -230,15 +233,10 @@ func (ddi *DefaultDownloaderImpl) getLicenseFromURL(url string) (license *SPDXLi // If we still don't have json data, download it if len(licenseJSON) == 0 { logrus.Infof("Downloading license data from %s", url) - resp, err := http.Get(url) + licenseJSON, err = http.NewAgent().Get(url) if err != nil { return nil, errors.Wrapf(err, "getting %s", url) } - defer resp.Body.Close() - licenseJSON, err = io.ReadAll(resp.Body) - if err != nil { - return nil, errors.Wrap(err, "reading response body") - } logrus.Infof("Downloaded %d bytes from %s", len(licenseJSON), url) @@ -249,5 +247,10 @@ func (ddi *DefaultDownloaderImpl) getLicenseFromURL(url string) (license *SPDXLi } } - return ParseSPDXLicense(licenseJSON) + // Parse the SPDX license from the JSON data + l, err := ParseLicense(licenseJSON) + if err != nil { + return nil, errors.Wrap(err, "parsing license json data") + } + return l, err } diff --git a/pkg/license/implementation.go b/pkg/license/implementation.go new file mode 100644 index 00000000000..3915528b5f5 --- /dev/null +++ b/pkg/license/implementation.go @@ -0,0 +1,188 @@ +/* +Copyright 2021 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package license + +import ( + "fmt" + "os" + "path/filepath" + "regexp" + + licenseclassifier "github.com/google/licenseclassifier/v2" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" +) + +// ReaderDefaultImpl the default license reader imlementation, uses +// Google's cicense classifier +type ReaderDefaultImpl struct { + lc *licenseclassifier.Classifier + catalog *Catalog +} + +// ClassifyFile takes a file path and returns the most probable license tag +func (d *ReaderDefaultImpl) ClassifyFile(path string) (licenseTag string, moreTags []string, err error) { + file, err := os.Open(path) + if err != nil { + return licenseTag, nil, errors.Wrap(err, "opening file for analysis") + } + defer file.Close() + + // Get the classsification + matches, err := d.Classifier().MatchFrom(file) + if len(matches) == 0 { + logrus.Warn("File does not match a known license: " + path) + } + var highestConf float64 + moreTags = []string{} + for _, match := range matches { + if match.Confidence > highestConf { + highestConf = match.Confidence + licenseTag = match.Name + moreTags = append(moreTags, match.Name) + } + } + return licenseTag, []string{}, nil +} + +// ClassifyLicenseFiles takes a list of paths and tries to find return all licenses found in it +func (d *ReaderDefaultImpl) ClassifyLicenseFiles(paths []string) ( + licenseList []ClassifyResult, unrecognizedPaths []string, err error) { + // Run the files through the clasifier + for _, f := range paths { + label, _, err := d.ClassifyFile(f) + if err != nil { + return nil, unrecognizedPaths, errors.Wrap(err, "classifying file") + } + if label == "" { + unrecognizedPaths = append(unrecognizedPaths, f) + continue + } + // Get the license corresponding to the ID label + license := d.catalog.GetLicense(label) + if license == nil { + return nil, unrecognizedPaths, + errors.New(fmt.Sprintf("ID does not correspond to a valid license: '%s'", label)) + } + // Apend to the return results + licenseList = append(licenseList, ClassifyResult{f, license}) + } + logrus.Infof( + "License classifier recognized %d/%d (%d%%) os the files", + len(licenseList), len(paths), (len(licenseList)/len(paths))*100, + ) + return licenseList, unrecognizedPaths, nil +} + +// LicenseFromLabel return a spdx license from its label +func (d *ReaderDefaultImpl) LicenseFromLabel(label string) (license *License) { + return d.catalog.GetLicense(label) +} + +// LicenseFromFile a file path and returns its license +func (d *ReaderDefaultImpl) LicenseFromFile(path string) (license *License, err error) { + // Run the files through the clasifier + label, _, err := d.ClassifyFile(path) + if err != nil { + return nil, errors.Wrap(err, "classifying file") + } + + if label == "" { + logrus.Info("File does not contain a known license: " + path) + return nil, nil + } + + // Get the license corresponding to the ID label + license = d.catalog.GetLicense(label) + if license == nil { + return nil, errors.New(fmt.Sprintf("ID does not correspond to a valid license: %s", label)) + } + + return license, nil +} + +// FindLicenseFiles will scan a directory and return files that may be licenses +func (d *ReaderDefaultImpl) FindLicenseFiles(path string) ([]string, error) { + logrus.Infof("Scanning %s for license files", path) + licenseList := []string{} + re := regexp.MustCompile(licenseFilanameRe) + if err := filepath.Walk(path, + func(path string, finfo os.FileInfo, err error) error { + if err != nil { + return err + } + + // Directories are ignored + if finfo.IsDir() { + return nil + } + + // No go source files are considered + if filepath.Ext(path) == ".go" { + return nil + } + // Check if tehe file matches the license regexp + if re.MatchString(filepath.Base(path)) { + licenseList = append(licenseList, path) + } + return nil + }); err != nil { + return nil, errors.Wrap(err, "scanning the directory for license files") + } + logrus.Infof("%d license files found in directory", len(licenseList)) + return licenseList, nil +} + +// Initialize checks the options and creates the needed objects +func (d *ReaderDefaultImpl) Initialize(opts *ReaderOptions) error { + // Validate our options before startin + if err := opts.Validate(); err != nil { + return errors.Wrap(err, "validating the license reader options") + } + + // Create the implementation's SPDX object + catalogOpts := DefaultCatalogOpts + catalogOpts.CacheDir = opts.CachePath() + catalog, err := NewCatalogWithOptions(catalogOpts) + if err != nil { + return errors.Wrap(err, "creating SPDX object") + } + d.catalog = catalog + + if err := d.catalog.LoadLicenses(); err != nil { + return errors.Wrap(err, "loading licenses") + } + + // Write the licenses to disk as th classifier will need them + if err := catalog.WriteLicensesAsText(opts.LicensesPath()); err != nil { + return errors.Wrap(err, "writing license data to disk") + } + + // Create the implementation's classifier + d.lc = licenseclassifier.NewClassifier(opts.ConfidenceThreshold) + return errors.Wrap(d.lc.LoadLicenses(opts.LicensesPath()), "loading licenses at init") +} + +// Classifier returns the license classifier +func (d *ReaderDefaultImpl) Classifier() *licenseclassifier.Classifier { + return d.lc +} + +// SPDX returns the reader's SPDX object +func (d *ReaderDefaultImpl) Catalog() *Catalog { + return d.catalog +} diff --git a/pkg/license/license.go b/pkg/license/license.go index e6b6f74e9d3..6f6793ad0be 100644 --- a/pkg/license/license.go +++ b/pkg/license/license.go @@ -19,12 +19,13 @@ limitations under the License. package license import ( - "fmt" + "bufio" + "encoding/json" "os" "path/filepath" - "regexp" + "strings" + "sync" - licenseclassifier "github.com/google/licenseclassifier/v2" "github.com/pkg/errors" "github.com/sirupsen/logrus" "sigs.k8s.io/release-utils/util" @@ -36,6 +37,31 @@ const ( defaultLicenseSubDir = "licenses" ) +const kubernetesBoilerPlate = `# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0` + +// DebianLicenseLabels is a map to get the SPDX label from a debian label +var DebianLicenseLabels = map[string]string{ + "Apache-2.0": "Apache-2.0", + "Artistic": "Artistic-1.0-Perl", + "BSD": "BSD-1-Clause", + "CC0-1.0": "CC0-1.0", + "GFDL-1.2": "GFDL-1.2", + "GFDL-1.3": "GFDL-1.3", + "GPL": "GPL-1.0", + "GPL-1": "GPL-1.0", + "GPL-2": "GPL-2.0", + "GPL-3": "GPL-3.0", + "LGPL-2": "LGPL-2.0", + "LGPL-2.1": "LGPL-2.1", + "LGPL-3": "LGPL-3.0", + "MPL-1.1": "MPL-1.1", + "MPL-2.0": "MPL-2.0", +} + // Reader is an object that finds and interprets license files type Reader struct { impl ReaderImplementation @@ -147,6 +173,20 @@ var DefaultReaderOptions = &ReaderOptions{ ConfidenceThreshold: 0.9, } +// LicenseFromLabel returns a spdx license from its label +func (r *Reader) LicenseFromLabel(label string) (license *License) { + return r.impl.LicenseFromLabel(label) +} + +// LicenseFromFile reads a file ans returns its license +func (r *Reader) LicenseFromFile(filePath string) (license *License, err error) { + license, err = r.impl.LicenseFromFile(filePath) + if err != nil { + return nil, errors.Wrap(err, "classifying file to determine license") + } + return license, err +} + // ReadLicenses returns an array of all licenses found in the specified path func (r *Reader) ReadLicenses(path string) (licenseList []ClassifyResult, unknownPaths []string, err error) { licenseFiles, err := r.impl.FindLicenseFiles(path) @@ -164,7 +204,7 @@ func (r *Reader) ReadLicenses(path string) (licenseList []ClassifyResult, unknow // ClassifyResult abstracts the data resulting from a file classification type ClassifyResult struct { File string - License *SPDXLicense + License *License } //counterfeiter:generate . ReaderImplementation @@ -175,138 +215,100 @@ type ReaderImplementation interface { Initialize(*ReaderOptions) error ClassifyLicenseFiles([]string) ([]ClassifyResult, []string, error) ClassifyFile(string) (string, []string, error) + LicenseFromFile(string) (*License, error) + LicenseFromLabel(string) *License FindLicenseFiles(string) ([]string, error) } -// ReaderDefaultImpl the default license reader imlementation, uses -// Google's cicense classifier -type ReaderDefaultImpl struct { - lc *licenseclassifier.Classifier - spdx *SPDX -} - -// Initialize checks the options and creates the needed objects -func (d *ReaderDefaultImpl) Initialize(opts *ReaderOptions) error { - // Validate our options before startin - if err := opts.Validate(); err != nil { - return errors.Wrap(err, "validating the license reader options") - } - - // Create the implementation's SPDX object - spdxopts := DefaultSPDXOpts - spdxopts.CacheDir = opts.CachePath() - spdx, err := NewSPDXWithOptions(spdxopts) +// HasKubernetesBoilerPlate checks if a file contains the Kubernetes License boilerplate +func HasKubernetesBoilerPlate(filePath string) (bool, error) { + // kubernetesBoilerPlate + sut, err := os.Open(filePath) if err != nil { - return errors.Wrap(err, "creating SPDX object") + return false, errors.Wrap(err, "opening file to check for k8s boilerplate") } - d.spdx = spdx - - if err := d.spdx.LoadLicenses(); err != nil { - return errors.Wrap(err, "loading licenses") + defer sut.Close() + + // Trim whitespace from lines + scanner := bufio.NewScanner(sut) + scanner.Split(bufio.ScanLines) + text := "" + i := 0 + for scanner.Scan() { + text = text + scanner.Text() + "\n" + i++ + if i > 100 { + break + } } - - // Write the licenses to disk as th classifier will need them - if err := spdx.WriteLicensesAsText(opts.LicensesPath()); err != nil { - return errors.Wrap(err, "writing license data to disk") + // If we're past 100 lines, forget it + if strings.Contains(text, kubernetesBoilerPlate) { + logrus.Infof("Found Kubernetes boilerplate in %s", filePath) + return true, nil } - // Create the implementation's classifier - d.lc = licenseclassifier.NewClassifier(opts.ConfidenceThreshold) - return errors.Wrap(d.lc.LoadLicenses(opts.LicensesPath()), "loading licenses at init") -} - -// Classifier returns the license classifier -func (d *ReaderDefaultImpl) Classifier() *licenseclassifier.Classifier { - return d.lc + return false, nil } -// SPDX returns the reader's SPDX object -func (d *ReaderDefaultImpl) SPDX() *SPDX { - return d.spdx +// List abstracts the list of licenses published by SPDX.org +type List struct { + sync.RWMutex + Version string `json:"licenseListVersion"` + ReleaseDateString string `json:"releaseDate "` + LicenseData []ListEntry `json:"licenses"` + Licenses map[string]*License } -// ClassifyFile takes a file path and returns the most probable license tag -func (d *ReaderDefaultImpl) ClassifyFile(path string) (licenseTag string, moreTags []string, err error) { - file, err := os.Open(path) - if err != nil { - return licenseTag, nil, errors.Wrap(err, "opening file for analysis") +// Add appends a license to the license list +func (list *List) Add(license *License) { + list.Lock() + defer list.Unlock() + if list.Licenses == nil { + list.Licenses = map[string]*License{} } - defer file.Close() + list.Licenses[license.LicenseID] = license +} - // Get the classsification - matches, err := d.Classifier().MatchFrom(file) - if len(matches) == 0 { - logrus.Warn("File does not match a known license: " + path) - } - var highestConf float64 - moreTags = []string{} - for _, match := range matches { - if match.Confidence > highestConf { - highestConf = match.Confidence - licenseTag = match.Name - moreTags = append(moreTags, match.Name) - } - } - return licenseTag, []string{}, nil +// SPDXLicense is a license described in JSON +type License struct { + IsDeprecatedLicenseID bool `json:"isDeprecatedLicenseId"` + IsFsfLibre bool `json:"isFsfLibre"` + IsOsiApproved bool `json:"isOsiApproved"` + LicenseText string `json:"licenseText"` + StandardLicenseHeaderTemplate string `json:"standardLicenseHeaderTemplate"` + StandardLicenseTemplate string `json:"standardLicenseTemplate"` + Name string `json:"name"` + LicenseID string `json:"licenseId"` + StandardLicenseHeader string `json:"standardLicenseHeader"` + SeeAlso []string `json:"seeAlso"` } -// ClassifyLicenseFiles takes a list of paths and tries to find return all licenses found in it -func (d *ReaderDefaultImpl) ClassifyLicenseFiles(paths []string) ( - licenseList []ClassifyResult, unrecognizedPaths []string, err error) { - // Run the files through the clasifier - for _, f := range paths { - label, _, err := d.ClassifyFile(f) - if err != nil { - return nil, unrecognizedPaths, errors.Wrap(err, "classifying file") - } - if label == "" { - unrecognizedPaths = append(unrecognizedPaths, f) - continue - } - // Get the license corresponding to the ID label - license := d.spdx.GetLicense(label) - if license == nil { - return nil, unrecognizedPaths, - errors.New(fmt.Sprintf("ID does not correspond to a valid license: %s", label)) - } - // Apend to the return results - licenseList = append(licenseList, ClassifyResult{f, license}) - } - logrus.Infof( - "License classifier recognized %d/%d (%d%%) os the files", - len(licenseList), len(paths), (len(licenseList)/len(paths))*100, +// WriteText writes the SPDX license text to a text file +func (license *License) WriteText(filePath string) error { + return errors.Wrap( + os.WriteFile( + filePath, []byte(license.LicenseText), os.FileMode(0o644), + ), "while writing license to text file", ) - return licenseList, unrecognizedPaths, nil } -// FindLicenseFiles will scan a directory and return files that may be licenses -func (d *ReaderDefaultImpl) FindLicenseFiles(path string) ([]string, error) { - logrus.Infof("Scanning %s for license files", path) - licenseList := []string{} - re := regexp.MustCompile(licenseFilanameRe) - if err := filepath.Walk(path, - func(path string, finfo os.FileInfo, err error) error { - if err != nil { - return err - } - - // Directories are ignored - if finfo.IsDir() { - return nil - } +// ListEntry a license entry in the list +type ListEntry struct { + IsOsiApproved bool `json:"isOsiApproved"` + IsDeprectaed bool `json:"isDeprecatedLicenseId"` + Reference string `json:"reference"` + DetailsURL string `json:"detailsUrl"` + ReferenceNumber int `json:"referenceNumber"` + Name string `json:"name"` + LicenseID string `json:"licenseId"` + SeeAlso []string `json:"seeAlso"` +} - // No go source files are considered - if filepath.Ext(path) == ".go" { - return nil - } - // Check if tehe file matches the license regexp - if re.MatchString(filepath.Base(path)) { - licenseList = append(licenseList, path) - } - return nil - }); err != nil { - return nil, errors.Wrap(err, "scanning the directory for license files") +// ParseLicense parses a SPDX license from its JSON source +func ParseLicense(licenseJSON []byte) (license *License, err error) { + license = &License{} + if err := json.Unmarshal(licenseJSON, license); err != nil { + return nil, errors.Wrap(err, "parsing SPDX licence") } - logrus.Infof("%d license files found in directory", len(licenseList)) - return licenseList, nil + return license, nil } diff --git a/pkg/license/license_test.go b/pkg/license/license_test.go index b58ae5aad35..1391aa14636 100644 --- a/pkg/license/license_test.go +++ b/pkg/license/license_test.go @@ -46,21 +46,20 @@ const testFullLicense = ` } ` -func TestISPDXLoadLicenses(t *testing.T) { +func TestISCatalogLoadLicenses(t *testing.T) { downloader := &license.Downloader{} // Create a SPDX to test - spdx := &license.SPDX{ - Downloader: downloader, - Options: license.DefaultSPDXOpts, - } + spdx, err := license.NewCatalogWithOptions(license.DefaultCatalogOpts) + require.Nil(t, err) + spdx.Downloader = downloader for _, tc := range []struct { mustFail bool - dnLoaderReturns *license.SPDXLicenseList + dnLoaderReturns *license.List dnLoaderError error }{ {true, nil, errors.New("Some download error")}, - {false, &license.SPDXLicenseList{}, nil}, + {false, &license.List{}, nil}, } { impl := licensefakes.FakeDownloaderImplementation{} impl.GetLicensesReturns(tc.dnLoaderReturns, tc.dnLoaderError) @@ -76,20 +75,21 @@ func TestISPDXLoadLicenses(t *testing.T) { func TestUSPDXWriteLicensesAsText(t *testing.T) { testLicenseID := "test-license" + testLicenseID2 := "test-license2" downloader := &license.Downloader{} impl := licensefakes.FakeDownloaderImplementation{} - impl.GetLicensesReturns(&license.SPDXLicenseList{ - Licenses: map[string]*license.SPDXLicense{ - testLicenseID: {LicenseID: testLicenseID, LicenseText: "Test"}, + impl.GetLicensesReturns(&license.List{ + Licenses: map[string]*license.License{ + testLicenseID: {LicenseID: testLicenseID, LicenseText: "Test"}, + testLicenseID2: {LicenseID: testLicenseID2, LicenseText: "Test2"}, }, }, nil) downloader.SetImplementation(&impl) // Create a SPDX to test - spdx := &license.SPDX{ - Downloader: downloader, - Options: license.DefaultSPDXOpts, - } + spdx, err := license.NewCatalogWithOptions(license.DefaultCatalogOpts) + require.Nil(t, err) + spdx.Downloader = downloader // Get the licenses from the fke downloader require.Nil(t, spdx.LoadLicenses()) @@ -109,29 +109,27 @@ func TestUSPDXWriteLicensesAsText(t *testing.T) { func TestUSPDXGetLicense(t *testing.T) { testLicenseID := "test-license" testLicenseContent := "Test license content" - spdx := license.SPDX{ - Downloader: &license.Downloader{}, - Licenses: &license.SPDXLicenseList{ - Licenses: map[string]*license.SPDXLicense{ - testLicenseID: {LicenseID: testLicenseID, LicenseText: testLicenseContent}, - }, + catalog, err := license.NewCatalogWithOptions(&license.CatalogOptions{}) + require.Nil(t, err) + catalog.List = &license.List{ + Licenses: map[string]*license.License{ + testLicenseID: {LicenseID: testLicenseID, LicenseText: testLicenseContent}, }, - Options: &license.SPDXOptions{}, } - testTicense := spdx.GetLicense(testLicenseID) + testTicense := catalog.GetLicense(testLicenseID) require.NotNil(t, testTicense) require.Equal(t, testTicense.LicenseID, testLicenseID) require.Equal(t, testTicense.LicenseText, testLicenseContent) - testTicense = spdx.GetLicense("invalid-license-id") + testTicense = catalog.GetLicense("invalid-license-id") require.Nil(t, testTicense) } func TestUSPDXLicenseListAdd(t *testing.T) { // Create a sample license - licenseList := &license.SPDXLicenseList{} - testLicense := &license.SPDXLicense{LicenseID: "test-license", LicenseText: "test text"} + licenseList := &license.List{} + testLicense := &license.License{LicenseID: "test-license", LicenseText: "test text"} // Use the Add method to add it to the collection licenseList.Add(testLicense) // Retrieve the data from the struct @@ -167,7 +165,7 @@ func CheckFileExists(t *testing.T, path string) error { } func TestULicenseWriteText(t *testing.T) { - testLicense := license.SPDXLicense{ + testLicense := license.License{ LicenseText: "Test license text", LicenseID: "test-license", } @@ -182,7 +180,7 @@ func TestULicenseWriteText(t *testing.T) { } func TestParseSPDXLicense(t *testing.T) { - testsLicense, err := license.ParseSPDXLicense([]byte(testFullLicense)) + testsLicense, err := license.ParseLicense([]byte(testFullLicense)) require.Nil(t, err) require.NotNil(t, testsLicense) diff --git a/pkg/license/licensefakes/fake_downloader_implementation.go b/pkg/license/licensefakes/fake_downloader_implementation.go index de7875293fc..6857600b04c 100644 --- a/pkg/license/licensefakes/fake_downloader_implementation.go +++ b/pkg/license/licensefakes/fake_downloader_implementation.go @@ -1,19 +1,3 @@ -/* -Copyright The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - // Code generated by counterfeiter. DO NOT EDIT. package licensefakes @@ -24,16 +8,16 @@ import ( ) type FakeDownloaderImplementation struct { - GetLicensesStub func() (*license.SPDXLicenseList, error) + GetLicensesStub func() (*license.List, error) getLicensesMutex sync.RWMutex getLicensesArgsForCall []struct { } getLicensesReturns struct { - result1 *license.SPDXLicenseList + result1 *license.List result2 error } getLicensesReturnsOnCall map[int]struct { - result1 *license.SPDXLicenseList + result1 *license.List result2 error } SetOptionsStub func(*license.DownloaderOptions) @@ -45,7 +29,7 @@ type FakeDownloaderImplementation struct { invocationsMutex sync.RWMutex } -func (fake *FakeDownloaderImplementation) GetLicenses() (*license.SPDXLicenseList, error) { +func (fake *FakeDownloaderImplementation) GetLicenses() (*license.List, error) { fake.getLicensesMutex.Lock() ret, specificReturn := fake.getLicensesReturnsOnCall[len(fake.getLicensesArgsForCall)] fake.getLicensesArgsForCall = append(fake.getLicensesArgsForCall, struct { @@ -69,34 +53,34 @@ func (fake *FakeDownloaderImplementation) GetLicensesCallCount() int { return len(fake.getLicensesArgsForCall) } -func (fake *FakeDownloaderImplementation) GetLicensesCalls(stub func() (*license.SPDXLicenseList, error)) { +func (fake *FakeDownloaderImplementation) GetLicensesCalls(stub func() (*license.List, error)) { fake.getLicensesMutex.Lock() defer fake.getLicensesMutex.Unlock() fake.GetLicensesStub = stub } -func (fake *FakeDownloaderImplementation) GetLicensesReturns(result1 *license.SPDXLicenseList, result2 error) { +func (fake *FakeDownloaderImplementation) GetLicensesReturns(result1 *license.List, result2 error) { fake.getLicensesMutex.Lock() defer fake.getLicensesMutex.Unlock() fake.GetLicensesStub = nil fake.getLicensesReturns = struct { - result1 *license.SPDXLicenseList + result1 *license.List result2 error }{result1, result2} } -func (fake *FakeDownloaderImplementation) GetLicensesReturnsOnCall(i int, result1 *license.SPDXLicenseList, result2 error) { +func (fake *FakeDownloaderImplementation) GetLicensesReturnsOnCall(i int, result1 *license.List, result2 error) { fake.getLicensesMutex.Lock() defer fake.getLicensesMutex.Unlock() fake.GetLicensesStub = nil if fake.getLicensesReturnsOnCall == nil { fake.getLicensesReturnsOnCall = make(map[int]struct { - result1 *license.SPDXLicenseList + result1 *license.List result2 error }) } fake.getLicensesReturnsOnCall[i] = struct { - result1 *license.SPDXLicenseList + result1 *license.List result2 error }{result1, result2} } diff --git a/pkg/license/licensefakes/fake_reader_implementation.go b/pkg/license/licensefakes/fake_reader_implementation.go index 2b67ed8ad0b..405c6176081 100644 --- a/pkg/license/licensefakes/fake_reader_implementation.go +++ b/pkg/license/licensefakes/fake_reader_implementation.go @@ -1,19 +1,3 @@ -/* -Copyright The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - // Code generated by counterfeiter. DO NOT EDIT. package licensefakes @@ -78,6 +62,30 @@ type FakeReaderImplementation struct { initializeReturnsOnCall map[int]struct { result1 error } + LicenseFromFileStub func(string) (*license.License, error) + licenseFromFileMutex sync.RWMutex + licenseFromFileArgsForCall []struct { + arg1 string + } + licenseFromFileReturns struct { + result1 *license.License + result2 error + } + licenseFromFileReturnsOnCall map[int]struct { + result1 *license.License + result2 error + } + LicenseFromLabelStub func(string) *license.License + licenseFromLabelMutex sync.RWMutex + licenseFromLabelArgsForCall []struct { + arg1 string + } + licenseFromLabelReturns struct { + result1 *license.License + } + licenseFromLabelReturnsOnCall map[int]struct { + result1 *license.License + } invocations map[string][][]interface{} invocationsMutex sync.RWMutex } @@ -346,6 +354,131 @@ func (fake *FakeReaderImplementation) InitializeReturnsOnCall(i int, result1 err }{result1} } +func (fake *FakeReaderImplementation) LicenseFromFile(arg1 string) (*license.License, error) { + fake.licenseFromFileMutex.Lock() + ret, specificReturn := fake.licenseFromFileReturnsOnCall[len(fake.licenseFromFileArgsForCall)] + fake.licenseFromFileArgsForCall = append(fake.licenseFromFileArgsForCall, struct { + arg1 string + }{arg1}) + stub := fake.LicenseFromFileStub + fakeReturns := fake.licenseFromFileReturns + fake.recordInvocation("LicenseFromFile", []interface{}{arg1}) + fake.licenseFromFileMutex.Unlock() + if stub != nil { + return stub(arg1) + } + if specificReturn { + return ret.result1, ret.result2 + } + return fakeReturns.result1, fakeReturns.result2 +} + +func (fake *FakeReaderImplementation) LicenseFromFileCallCount() int { + fake.licenseFromFileMutex.RLock() + defer fake.licenseFromFileMutex.RUnlock() + return len(fake.licenseFromFileArgsForCall) +} + +func (fake *FakeReaderImplementation) LicenseFromFileCalls(stub func(string) (*license.License, error)) { + fake.licenseFromFileMutex.Lock() + defer fake.licenseFromFileMutex.Unlock() + fake.LicenseFromFileStub = stub +} + +func (fake *FakeReaderImplementation) LicenseFromFileArgsForCall(i int) string { + fake.licenseFromFileMutex.RLock() + defer fake.licenseFromFileMutex.RUnlock() + argsForCall := fake.licenseFromFileArgsForCall[i] + return argsForCall.arg1 +} + +func (fake *FakeReaderImplementation) LicenseFromFileReturns(result1 *license.License, result2 error) { + fake.licenseFromFileMutex.Lock() + defer fake.licenseFromFileMutex.Unlock() + fake.LicenseFromFileStub = nil + fake.licenseFromFileReturns = struct { + result1 *license.License + result2 error + }{result1, result2} +} + +func (fake *FakeReaderImplementation) LicenseFromFileReturnsOnCall(i int, result1 *license.License, result2 error) { + fake.licenseFromFileMutex.Lock() + defer fake.licenseFromFileMutex.Unlock() + fake.LicenseFromFileStub = nil + if fake.licenseFromFileReturnsOnCall == nil { + fake.licenseFromFileReturnsOnCall = make(map[int]struct { + result1 *license.License + result2 error + }) + } + fake.licenseFromFileReturnsOnCall[i] = struct { + result1 *license.License + result2 error + }{result1, result2} +} + +func (fake *FakeReaderImplementation) LicenseFromLabel(arg1 string) *license.License { + fake.licenseFromLabelMutex.Lock() + ret, specificReturn := fake.licenseFromLabelReturnsOnCall[len(fake.licenseFromLabelArgsForCall)] + fake.licenseFromLabelArgsForCall = append(fake.licenseFromLabelArgsForCall, struct { + arg1 string + }{arg1}) + stub := fake.LicenseFromLabelStub + fakeReturns := fake.licenseFromLabelReturns + fake.recordInvocation("LicenseFromLabel", []interface{}{arg1}) + fake.licenseFromLabelMutex.Unlock() + if stub != nil { + return stub(arg1) + } + if specificReturn { + return ret.result1 + } + return fakeReturns.result1 +} + +func (fake *FakeReaderImplementation) LicenseFromLabelCallCount() int { + fake.licenseFromLabelMutex.RLock() + defer fake.licenseFromLabelMutex.RUnlock() + return len(fake.licenseFromLabelArgsForCall) +} + +func (fake *FakeReaderImplementation) LicenseFromLabelCalls(stub func(string) *license.License) { + fake.licenseFromLabelMutex.Lock() + defer fake.licenseFromLabelMutex.Unlock() + fake.LicenseFromLabelStub = stub +} + +func (fake *FakeReaderImplementation) LicenseFromLabelArgsForCall(i int) string { + fake.licenseFromLabelMutex.RLock() + defer fake.licenseFromLabelMutex.RUnlock() + argsForCall := fake.licenseFromLabelArgsForCall[i] + return argsForCall.arg1 +} + +func (fake *FakeReaderImplementation) LicenseFromLabelReturns(result1 *license.License) { + fake.licenseFromLabelMutex.Lock() + defer fake.licenseFromLabelMutex.Unlock() + fake.LicenseFromLabelStub = nil + fake.licenseFromLabelReturns = struct { + result1 *license.License + }{result1} +} + +func (fake *FakeReaderImplementation) LicenseFromLabelReturnsOnCall(i int, result1 *license.License) { + fake.licenseFromLabelMutex.Lock() + defer fake.licenseFromLabelMutex.Unlock() + fake.LicenseFromLabelStub = nil + if fake.licenseFromLabelReturnsOnCall == nil { + fake.licenseFromLabelReturnsOnCall = make(map[int]struct { + result1 *license.License + }) + } + fake.licenseFromLabelReturnsOnCall[i] = struct { + result1 *license.License + }{result1} +} + func (fake *FakeReaderImplementation) Invocations() map[string][][]interface{} { fake.invocationsMutex.RLock() defer fake.invocationsMutex.RUnlock() @@ -357,6 +490,10 @@ func (fake *FakeReaderImplementation) Invocations() map[string][][]interface{} { defer fake.findLicenseFilesMutex.RUnlock() fake.initializeMutex.RLock() defer fake.initializeMutex.RUnlock() + fake.licenseFromFileMutex.RLock() + defer fake.licenseFromFileMutex.RUnlock() + fake.licenseFromLabelMutex.RLock() + defer fake.licenseFromLabelMutex.RUnlock() copiedInvocations := map[string][][]interface{}{} for key, value := range fake.invocations { copiedInvocations[key] = value diff --git a/pkg/license/spdx.go b/pkg/license/spdx.go deleted file mode 100644 index 2d8e56569ec..00000000000 --- a/pkg/license/spdx.go +++ /dev/null @@ -1,170 +0,0 @@ -/* -Copyright 2021 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package license - -import ( - "encoding/json" - "os" - "path/filepath" - "sync" - - "github.com/pkg/errors" - "github.com/sirupsen/logrus" -) - -// NewSPDX returns a SPDX object with the default options -func NewSPDX() (spdx *SPDX, err error) { - return NewSPDXWithOptions(DefaultSPDXOpts) -} - -// NewSPDXWithOptions returns a SPDX object with the specified options -func NewSPDXWithOptions(opts *SPDXOptions) (spdx *SPDX, err error) { - // Create the license Downloader - doptions := DefaultDownloaderOpts - doptions.CacheDir = opts.CacheDir - downloader, err := NewDownloaderWithOptions(doptions) - if err != nil { - return nil, errors.Wrap(err, "creating downloader") - } - spdx = &SPDX{ - Downloader: downloader, - Options: DefaultSPDXOpts, - } - if err := spdx.Options.Validate(); err != nil { - return nil, err - } - return spdx, nil -} - -// SPDX is an objec to interact with licenses and manifest creation -type SPDX struct { - Downloader *Downloader // License Downloader - Licenses *SPDXLicenseList // List of licenses - Options *SPDXOptions // SPDX Options -} - -// SPDXOptions are the spdx settings -type SPDXOptions struct { - CacheDir string -} - -// Validate checks the spdx options -func (o *SPDXOptions) Validate() error { - return nil -} - -// DefaultSPDXOpts are the predetermined settings. License and cache directories -// are in the temporary OS directory and are created if the do not exist -var DefaultSPDXOpts = &SPDXOptions{} - -// SPDXLicenseList abstracts the list of licenses published by SPDX.org -type SPDXLicenseList struct { - sync.RWMutex - Version string `json:"licenseListVersion"` - ReleaseDateString string `json:"releaseDate "` - LicenseData []SPDXLicenseListEntry `json:"licenses"` - Licenses map[string]*SPDXLicense -} - -// Add appends a license to the license list -func (list *SPDXLicenseList) Add(license *SPDXLicense) { - list.Lock() - defer list.Unlock() - if list.Licenses == nil { - list.Licenses = map[string]*SPDXLicense{} - } - list.Licenses[license.LicenseID] = license -} - -// SPDXLicense is a license described in JSON -type SPDXLicense struct { - IsDeprecatedLicenseID bool `json:"isDeprecatedLicenseId"` - IsFsfLibre bool `json:"isFsfLibre"` - IsOsiApproved bool `json:"isOsiApproved"` - LicenseText string `json:"licenseText"` - StandardLicenseHeaderTemplate string `json:"standardLicenseHeaderTemplate"` - StandardLicenseTemplate string `json:"standardLicenseTemplate"` - Name string `json:"name"` - LicenseID string `json:"licenseId"` - StandardLicenseHeader string `json:"standardLicenseHeader"` - SeeAlso []string `json:"seeAlso"` -} - -// WriteText writes the SPDX license text to a text file -func (license *SPDXLicense) WriteText(filePath string) error { - return errors.Wrap( - os.WriteFile( - filePath, []byte(license.LicenseText), os.FileMode(0o644), - ), "while writing license to text file", - ) -} - -// SPDXLicenseListEntry a license entry in the list -type SPDXLicenseListEntry struct { - IsOsiApproved bool `json:"isOsiApproved"` - IsDeprectaed bool `json:"isDeprecatedLicenseId"` - Reference string `json:"reference"` - DetailsURL string `json:"detailsUrl"` - ReferenceNumber string `json:"referenceNumber"` - Name string `json:"name"` - LicenseID string `json:"licenseId"` - SeeAlso []string `json:"seeAlso"` -} - -// LoadLicenses reads the license data from the downloader -func (spdx *SPDX) LoadLicenses() error { - logrus.Info("Loading license data from downloader") - licenses, err := spdx.Downloader.GetLicenses() - if err != nil { - return errors.Wrap(err, "getting licenses from downloader") - } - spdx.Licenses = licenses - logrus.Infof("SPDX: Got %d licenses from downloader", len(licenses.Licenses)) - return nil -} - -// WriteLicensesAsText writes the SPDX license collection to text files -func (spdx *SPDX) WriteLicensesAsText(targetDir string) error { - logrus.Info("Writing SPDX licenses to " + targetDir) - if spdx.Licenses.Licenses == nil { - return errors.New("unable to write licenses, they have not been loaded yet") - } - for _, l := range spdx.Licenses.Licenses { - if err := l.WriteText(filepath.Join(targetDir, l.LicenseID+".txt")); err != nil { - return errors.Wrapf(err, "while writing license %s", l.LicenseID) - } - } - return nil -} - -// GetLicense returns a license struct from its SPDX ID label -func (spdx *SPDX) GetLicense(label string) *SPDXLicense { - if lic, ok := spdx.Licenses.Licenses[label]; ok { - return lic - } - logrus.Warn("Label %s is not an ID of a known license " + label) - return nil -} - -// ParseSPDXLicense parses a SPDX license from its JSON source -func ParseSPDXLicense(licenseJSON []byte) (license *SPDXLicense, err error) { - license = &SPDXLicense{} - if err := json.Unmarshal(licenseJSON, license); err != nil { - return nil, errors.Wrap(err, "parsing SPDX licence") - } - return license, nil -} diff --git a/pkg/spdx/builder.go b/pkg/spdx/builder.go new file mode 100644 index 00000000000..ee2f62a6619 --- /dev/null +++ b/pkg/spdx/builder.go @@ -0,0 +1,200 @@ +/* +Copyright 2021 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package spdx + +import ( + "os" + "path/filepath" + + "github.com/google/go-containerregistry/pkg/name" + "github.com/google/uuid" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + "sigs.k8s.io/release-utils/util" +) + +func NewDocBuilder() *DocBuilder { + db := &DocBuilder{ + options: &defaultDocBuilderOpts, + impl: defaultDocBuilderImpl{}, + } + return db +} + +// DocBuilder is a tool to write spdx manifests +type DocBuilder struct { + options *DocBuilderOptions + impl DocBuilderImplementation +} + +// Generate creates anew SPDX document describing the artifacts specified in the options +func (db *DocBuilder) Generate(genopts *DocGenerateOptions) (*Document, error) { + // Create the SPDX document + doc, err := db.impl.GenerateDoc(db.options, genopts) + if err != nil { + return nil, errors.Wrap(err, "creating SPDX document") + } + + // If we have a specified output file, write it + if genopts.OutputFile == "" { + return doc, nil + } + + return doc, errors.Wrapf( + db.impl.WriteDoc(doc, genopts.OutputFile), + "writing doc to %s", genopts.OutputFile, + ) +} + +type DocGenerateOptions struct { + Tarballs []string // A slice of tar paths + Files []string // A slice of naked files to include in the bom + Images []string // A slice of docker images + OutputFile string // Output location + Namespace string // Namespace for the document (a unique URI) + AnalyseLayers bool // A flag that controls if deep layer analysis should be performed +} + +func (o *DocGenerateOptions) Validate() error { + if len(o.Tarballs) == 0 && len(o.Files) == 0 && len(o.Images) == 0 { + return errors.New( + "To build a document at least an image, tarball or a file has to be specified", + ) + } + return nil +} + +type DocBuilderOptions struct { + WorkDir string // Working directory (defaults to a tmp dir) +} + +var defaultDocBuilderOpts = DocBuilderOptions{ + WorkDir: filepath.Join(os.TempDir(), "spdx-docbuilder"), +} + +type DocBuilderImplementation interface { + GenerateDoc(*DocBuilderOptions, *DocGenerateOptions) (*Document, error) + WriteDoc(*Document, string) error +} + +// defaultDocBuilderImpl is the default implementation for the +// SPDX document builder +type defaultDocBuilderImpl struct{} + +// Generate generates a document +func (builder defaultDocBuilderImpl) GenerateDoc( + opts *DocBuilderOptions, genopts *DocGenerateOptions, +) (doc *Document, err error) { + if err := genopts.Validate(); err != nil { + return nil, errors.Wrap(err, "checking build options") + } + + spdx := NewSPDX() + spdx.options.AnalyzeLayers = genopts.AnalyseLayers + + if !util.Exists(opts.WorkDir) { + if err := os.MkdirAll(opts.WorkDir, os.FileMode(0o755)); err != nil { + return nil, errors.Wrap(err, "creating builder worskpace dir") + } + } + + tmpdir, err := os.MkdirTemp(opts.WorkDir, "doc-build-") + if err != nil { + return nil, errors.Wrapf(err, "creating temporary workdir in %s", opts.WorkDir) + } + defer os.RemoveAll(tmpdir) + + // Create the new document + doc = NewDocument() + doc.Namespace = genopts.Namespace + + if genopts.Namespace == "" { + logrus.Warn("Document namespace is empty, a mock URI will be supplied but the doc will not be valid") + doc.Namespace = "http://example.com/" + } + + for _, i := range genopts.Images { + logrus.Infof("Processing image: %s", i) + tararchive := filepath.Join(tmpdir, uuid.New().String()+".tar") + if err := spdx.PullImagesToArchive(i, tararchive); err != nil { + return nil, errors.Wrapf(err, "writing image %s to file", i) + } + p, err := spdx.PackageFromImageTarball(tararchive, &TarballOptions{}) + if err != nil { + return nil, errors.Wrap(err, "generating tarball package") + } + ref, err := name.ParseReference(i) + if err != nil { + return nil, errors.Wrapf(err, "parsing image reference %q", i) + } + + // Grab the package data from wither the tag or, if it's a digest, + // from parsing the digest + tag, ok := ref.(name.Tag) + if ok { + p.Name = tag.RepositoryStr() + p.DownloadLocation = tag.Name() + p.Version = tag.Identifier() + } else { + dgst, ok := ref.(name.Digest) + if ok { + p.Version = dgst.DigestStr() + p.Name = dgst.RepositoryStr() + p.DownloadLocation = dgst.Name() + } + } + if err := doc.AddPackage(p); err != nil { + return nil, errors.Wrap(err, "adding package to document") + } + } + + for _, tb := range genopts.Tarballs { + logrus.Infof("Processing tarball %s", tb) + p, err := spdx.PackageFromImageTarball(tb, &TarballOptions{}) + if err != nil { + return nil, errors.Wrap(err, "generating tarball package") + } + if err := doc.AddPackage(p); err != nil { + return nil, errors.Wrap(err, "adding package to document") + } + } + + for _, f := range genopts.Files { + logrus.Infof("Processing file %s", f) + f, err := spdx.FileFromPath(f) + if err != nil { + return nil, errors.Wrap(err, "adding file") + } + if err := doc.AddFile(f); err != nil { + return nil, errors.Wrap(err, "adding file to document") + } + } + return doc, nil +} + +// WriteDoc renders the document to a file +func (builder defaultDocBuilderImpl) WriteDoc(doc *Document, path string) error { + markup, err := doc.Render() + if err != nil { + return errors.Wrap(err, "generating document markup") + } + logrus.Infof("writing document to %s", path) + return errors.Wrap( + os.WriteFile(path, []byte(markup), os.FileMode(0o644)), + "writing document markup to file", + ) +} diff --git a/pkg/spdx/document.go b/pkg/spdx/document.go new file mode 100644 index 00000000000..2fc43dbf49c --- /dev/null +++ b/pkg/spdx/document.go @@ -0,0 +1,206 @@ +/* +Copyright 2021 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package spdx + +import ( + "bytes" + "crypto/sha1" + "fmt" + "html/template" + "log" + "os" + "regexp" + "time" + + "github.com/google/uuid" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" +) + +var docTemplate = `{{ if .Version }}SPDXVersion: {{.Version}} +{{ end -}} +DataLicense: CC0-1.0 +{{ if .ID }}SPDXID: {{ .ID }} +{{ end -}} +{{ if .Name }}DocumentName: {{ .Name }} +{{ end -}} +{{ if .Namespace }}DocumentNamespace: {{ .Namespace }} +{{ end -}} +{{ if .Creator -}} +{{- if .Creator.Person }}Creator: Person: {{ .Creator.Person }} +{{ end -}} +{{- if .Creator.Tool -}} +{{- range $key, $value := .Creator.Tool }}Creator: Tool: {{ $value }} +{{ end -}} +{{- end -}} +{{ end -}} +{{ if .Created }}Created: {{ dateFormat .Created }} +{{ end }} + +` + +// Document abstracts the SPDX document +type Document struct { + Version string // SPDX-2.2 + DataLicense string // CC0-1.0 + ID string // SPDXRef-DOCUMENT + Name string // hello-go-src + Namespace string // https://swinslow.net/spdx-examples/example6/hello-go-src-v1 + Creator struct { + Person string // Steve Winslow (steve@swinslow.net) + Tool []string // github.com/spdx/tools-golang/builder + } + Created time.Time // 2020-11-24T01:12:27Z + Packages map[string]*Package + Files map[string]*File // List of files +} + +// NewDocument returns a new SPDX document with some defaults preloaded +func NewDocument() *Document { + return &Document{ + ID: "SPDXRef-DOCUMENT", + Version: "SPDX-2.2", + DataLicense: "CC0-1.0", + Created: time.Now().UTC(), + Creator: struct { + Person string + Tool []string + }{ + Person: defaultDocumentAuthor, + Tool: []string{"k8s.io/release/pkg/spdx"}, + }, + } +} + +// AddPackage adds a new empty package to the document +func (d *Document) AddPackage(pkg *Package) error { + if d.Packages == nil { + d.Packages = map[string]*Package{} + } + + if pkg.ID == "" { + // If we so not have an ID but have a name generate it fro there + reg := regexp.MustCompile("[^a-zA-Z0-9-]+") + id := reg.ReplaceAllString(pkg.Name, "") + if id != "" { + pkg.ID = "SPDXRef-Package-" + id + } + } + if pkg.ID == "" { + return errors.New("package id is needed to add a new package") + } + if _, ok := d.Packages[pkg.ID]; ok { + return errors.New("a package named " + pkg.ID + " already exists in the document") + } + + d.Packages[pkg.ID] = pkg + return nil +} + +// Write outputs the SPDX document into a file +func (d *Document) Write(path string) error { + content, err := d.Render() + if err != nil { + return errors.Wrap(err, "rendering SPDX code") + } + if err := os.WriteFile(path, []byte(content), os.FileMode(0o644)); err != nil { + return errors.Wrap(err, "writing SPDX code to file") + } + logrus.Infof("SPDX SBOM written to %s", path) + return nil +} + +// Render reders the spdx manifest +func (d *Document) Render() (doc string, err error) { + var buf bytes.Buffer + funcMap := template.FuncMap{ + // The name "title" is what the function will be called in the template text. + "dateFormat": func(t time.Time) string { return t.UTC().Format("2006-02-01T15:04:05Z") }, + } + + if d.Name == "" { + d.Name = "BOM-SPDX-" + uuid.New().String() + logrus.Warnf("Document has no name defined, automatically set to " + d.Name) + } + + tmpl, err := template.New("document").Funcs(funcMap).Parse(docTemplate) + if err != nil { + log.Fatalf("parsing: %s", err) + } + + // Run the template to verify the output. + if err := tmpl.Execute(&buf, d); err != nil { + return "", errors.Wrap(err, "executing spdx document template") + } + + doc = buf.String() + + // List files in the document. Files listed directly on the + // document do not contain relationships yet. + filesDescribed := "" + if len(d.Files) > 0 { + doc += "\n##### Files independent of packages\n\n" + filesDescribed = "\n" + } + + for _, file := range d.Files { + fileDoc, err := file.Render() + if err != nil { + return "", errors.Wrap(err, "rendering file "+file.Name) + } + doc += fileDoc + filesDescribed += fmt.Sprintf("Relationship: %s DESCRIBES %s\n\n", d.ID, file.ID) + } + doc += filesDescribed + + // Cycle all packages and get their data + for _, pkg := range d.Packages { + pkgDoc, err := pkg.Render() + if err != nil { + return "", errors.Wrap(err, "rendering pkg "+pkg.Name) + } + + doc += pkgDoc + doc += fmt.Sprintf("Relationship: %s DESCRIBES %s\n\n", d.ID, pkg.ID) + } + + return doc, err +} + +// AddFile adds a file contained in the package +func (d *Document) AddFile(file *File) error { + if d.Files == nil { + d.Files = map[string]*File{} + } + // If file does not have an ID, we try to build one + // by hashing the file name + if file.ID == "" { + if file.Name == "" { + return errors.New("unable to generate file ID, filename not set") + } + if d.Name == "" { + return errors.New("unable to generate file ID, filename not set") + } + h := sha1.New() + if _, err := h.Write([]byte(d.Name + ":" + file.Name)); err != nil { + return errors.Wrap(err, "getting sha1 of filename") + } + file.ID = "SPDXRef-File-" + fmt.Sprintf("%x", h.Sum(nil)) + } + d.Files[file.ID] = file + return nil +} diff --git a/pkg/spdx/file.go b/pkg/spdx/file.go new file mode 100644 index 00000000000..7b547b2a7f9 --- /dev/null +++ b/pkg/spdx/file.go @@ -0,0 +1,149 @@ +/* +Copyright 2021 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package spdx + +import ( + "bytes" + "crypto/sha1" + "html/template" + "os" + "path/filepath" + "strings" + + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + "sigs.k8s.io/release-utils/hash" + "sigs.k8s.io/release-utils/util" +) + +var fileTemplate = `{{ if .Name }}FileName: {{ .Name }} +{{ end -}} +{{ if .ID }}SPDXID: {{ .ID }} +{{ end -}} +{{- if .Checksum -}} +{{- range $key, $value := .Checksum -}} +{{ if . }}FileChecksum: {{ $key }}: {{ $value }} +{{ end -}} +{{- end -}} +{{- end -}} +LicenseConcluded: {{ if .LicenseConcluded }}{{ .LicenseConcluded }}{{ else }}NOASSERTION{{ end }} +LicenseInfoInFile: {{ if .LicenseInfoInFile }}LicenseInfoInFile: {{ .LicenseInfoInFile }}{{ else }}NOASSERTION{{ end }} +FileCopyrightText: {{ if .CopyrightText }}{{ .CopyrightText }} +{{ else }}NOASSERTION{{ end }} + +` + +// File abstracts a file contained in a package +type File struct { + Name string // string /Makefile + FileName string // Name of the file + ID string // SPDXRef-Makefile + LicenseConcluded string // GPL-3.0-or-later + LicenseInfoInFile string // GPL-3.0-or-later + CopyrightText string // NOASSERTION + SourceFile string // Source file to read from (not part of the spec) + Checksum map[string]string + + options *FileOptions // Options +} + +func NewFile() (f *File) { + f = &File{ + options: &FileOptions{}, + } + return f +} + +func (f *File) Options() *FileOptions { + return f.options +} + +// FileOptions +type FileOptions struct { + WorkDir string +} + +// ReadChecksums receives a path to a file and calculates its checksums +func (f *File) ReadChecksums(filePath string) error { + if f.Checksum == nil { + f.Checksum = map[string]string{} + } + file, err := os.Open(filePath) + if err != nil { + return errors.Wrap(err, "opening file for reading: "+filePath) + } + defer file.Close() + // TODO: Make this line like the others once this PR is + // included in a k-sigs/release-util release: + // https://github.com/kubernetes-sigs/release-utils/pull/16 + s1, err := hash.ForFile(filePath, sha1.New()) + if err != nil { + return errors.Wrap(err, "getting sha1 sum for file") + } + s256, err := hash.SHA256ForFile(filePath) + if err != nil { + return errors.Wrap(err, "getting file checksums") + } + s512, err := hash.SHA512ForFile(filePath) + if err != nil { + return errors.Wrap(err, "getting file checksums") + } + + f.Checksum = map[string]string{ + "SHA1": s1, + "SHA256": s256, + "SHA512": s512, + } + return nil +} + +// Render renders the document fragment of a file +func (f *File) Render() (docFragment string, err error) { + var buf bytes.Buffer + tmpl, err := template.New("file").Parse(fileTemplate) + if err != nil { + return "", errors.Wrap(err, "parsing file template") + } + + // Run the template to verify the output. + if err := tmpl.Execute(&buf, f); err != nil { + return "", errors.Wrap(err, "executing spdx file template") + } + + docFragment = buf.String() + return docFragment, nil +} + +// ReadSourceFile reads the source file for the package and populates +// the fields derived from it (Checksums and FileName) +func (f *File) ReadSourceFile(path string) error { + if !util.Exists(path) { + return errors.New("unable to find package source file") + } + + if err := f.ReadChecksums(path); err != nil { + return errors.Wrap(err, "reading file checksums") + } + + f.SourceFile = path + f.Name = strings.TrimPrefix( + path, f.Options().WorkDir+string(filepath.Separator), + ) + f.ID = "SPDXRef-File-" + f.Checksum["SHA256"][0:15] + logrus.Infof("Added file %s as %s", f.Name, f.ID) + return nil +} diff --git a/pkg/spdx/imageanalyzer.go b/pkg/spdx/imageanalyzer.go new file mode 100644 index 00000000000..b00316bd253 --- /dev/null +++ b/pkg/spdx/imageanalyzer.go @@ -0,0 +1,86 @@ +/* +Copyright 2021 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package spdx + +import ( + "os" + "path/filepath" + + "github.com/pkg/errors" + + "github.com/sirupsen/logrus" +) + +// ImageAnalyzer is an object that checks images to see if we can add more +// information to a spdx package based on its content. Each analyzer is +// written specifically for a layer type. The idea is to be able to enrich +// common base images with more data to have the most common images covered. +type ImageAnalyzer struct { + Analyzers map[string]ContainerLayerAnalyzer +} + +func NewImageAnalyzer() *ImageAnalyzer { + // Default options for all analyzers + opts := &ContainerLayerAnalyzerOptions{ + LicenseCacheDir: filepath.Join(os.TempDir(), spdxLicenseCacheDir), + } + + // Create the instance with all the drivers we have so far + return &ImageAnalyzer{ + Analyzers: map[string]ContainerLayerAnalyzer{ + "distroless": &distrolessHandler{ + Options: opts, + }, + "go-runner": &goRunnerHandler{ + Options: opts, + }, + }, + } +} + +// AnalyzeLayer is the main method of the analyzer +// it will query each of the analyzers to see if we can +// extract more image from the layer and enrich the +// spdx package referenced by pkg +func (ia *ImageAnalyzer) AnalyzeLayer(layerPath string, pkg *Package) error { + if pkg == nil { + return errors.New("Unable to analyze layer, package is null") + } + for label, handler := range ia.Analyzers { + logrus.Infof("Scanning layer with %s", label) + can, err := handler.CanHandle(layerPath) + if err != nil { + return errors.Wrapf(err, "checking if layer can be handled with %s", label) + } + + if can { + return handler.ReadPackageData(layerPath, pkg) + } + } + return nil +} + +// ContainerLayerAnalyzer is an interface that knows how to read a +// known container layer and populate a SPDX package +type ContainerLayerAnalyzer interface { + ReadPackageData(layerPath string, pkg *Package) error + CanHandle(layerPath string) (bool, error) +} + +type ContainerLayerAnalyzerOptions struct { + LicenseCacheDir string +} diff --git a/pkg/spdx/imageanalyzer_distroless.go b/pkg/spdx/imageanalyzer_distroless.go new file mode 100644 index 00000000000..a80585ee64e --- /dev/null +++ b/pkg/spdx/imageanalyzer_distroless.go @@ -0,0 +1,277 @@ +/* +Copyright 2021 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package spdx + +import ( + "archive/tar" + "bytes" + "compress/gzip" + "encoding/json" + "io" + "io/ioutil" + "os" + "path/filepath" + "regexp" + "strings" + + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + "k8s.io/release/pkg/license" + "sigs.k8s.io/release-utils/http" + "sigs.k8s.io/release-utils/util" +) + +const ( + distrolessBundleURL = "https://raw.githubusercontent.com/GoogleContainerTools/distroless/master/" + distrolessBundle = "package_bundle_amd64_debian10.versions" // TODO: Perhaps make an option + distrolessLicensePath = "./usr/share/doc/" + distrolessLicenseName = "/copyright" + distrolessCommonLicenseDir = "/usr/share/common-licenses/" + commonLicensesRe = `(?i)/usr/share/common-licenses/[-A-Z0-9\.]+` + gzExt = ".gz" +) + +type distrolessHandler struct { + reader *license.Reader + Options *ContainerLayerAnalyzerOptions +} + +// ReadPackageData reads the distroless +func (h *distrolessHandler) ReadPackageData(layerPath string, pkg *Package) error { + // Create a new license reader to scan license files + licenseReader, err := h.licenseReader(h.Options) + if err != nil { + return errors.Wrap( + err, "creating license reader to scan distroless image", + ) + } + + // Create the package representing the distroless layer + pkg.Name = "distroless" + pkg.ID = "SPDXRef-Package-distroless" + pkg.FilesAnalyzed = false + + // Fetch the current distrolless package list + packageList, err := h.fetchDistrolessPackages() + if err != nil { + return errors.Wrap(err, "getting package lists") + } + + // Open the distroless layer tar for reading + tarfile, err := os.Open(layerPath) + if err != nil { + return errors.Wrap(err, "opening distroless image layer ") + } + defer tarfile.Close() + dir, err := os.MkdirTemp(os.TempDir(), "image-process-") + if err != nil { + return errors.Wrap(err, "creating temporary directory") + } + defer os.RemoveAll(dir) + var tr *tar.Reader + if filepath.Ext(layerPath) == gzExt { + gzf, err := gzip.NewReader(tarfile) + if err != nil { + return errors.Wrap(err, "creating gzip reader") + } + tr = tar.NewReader(gzf) + } else { + tr = tar.NewReader(tarfile) + } + for { + hdr, err := tr.Next() + if err == io.EOF { + break + } + if err != nil { + return errors.Wrap(err, "reading the image tarfile") + } + + // Scan the license directories to to determine the installed packages + if strings.HasPrefix(hdr.Name, distrolessLicensePath) && strings.HasSuffix(hdr.Name, distrolessLicenseName) { + // We infer the name of the package from the license directory + packageName := strings.TrimSuffix(strings.TrimPrefix(hdr.Name, distrolessLicensePath), distrolessLicenseName) + logrus.Infof("Creating SPDX subpackage " + packageName) + subpkg := NewPackage() + subpkg.Name = packageName + if _, ok := packageList[subpkg.Name]; ok { + logrus.Infof(" distroless uses version %s of %s", packageList[subpkg.Name], subpkg.Name) + subpkg.Version = packageList[subpkg.Name] + } else { + logrus.Warnf("could not determine version for package %s", subpkg.Name) + } + + // Extract the package license to a file + f, err := os.Create(filepath.Join(dir, packageName+".license")) + if err != nil { + return errors.Wrap(err, "creating image layer file") + } + defer f.Close() + + if _, err := io.Copy(f, tr); err != nil { + return errors.Wrap(err, "extracting license data for "+subpkg.Name) + } + + // Use our license classifier to try to determine + // the license we are dealing with + spdxlicense, err := licenseReader.LicenseFromFile(f.Name()) + if err != nil { + return errors.Wrap(err, "reading license from file") + } + + // If we still do not have a license, try to get it from the + // devian copyright files. We have to read the files so... + if spdxlicense == nil { + // ...open the file + fileData, err := ioutil.ReadFile(filepath.Join(dir, packageName+".license")) + if err != nil { + return errors.Wrap(err, "reading license file") + } + + // We will try to look for the license in two ways: + if strings.Contains(string(fileData), "is in the public domain") { + // Option 1: File is in the public domain + logrus.Info("File is the public domain") + + // In this case we include the full license text in the manifest + subpkg.CopyrightText = string(fileData) + subpkg.LicenseComments = "Found public domain declaration in copyright text file" + } else { + // Option 2: Copyright file references an installed license. + re := regexp.MustCompile(commonLicensesRe) + label := re.FindString(string(fileData)) + label = strings.TrimPrefix(label, distrolessCommonLicenseDir) + label = strings.TrimSuffix(label, ".") + + // Translate from debian to SPDX label + label = license.DebianLicenseLabels[label] + if label != "" { + spdxlicense = licenseReader.LicenseFromLabel(label) + logrus.Infof("Found license %s for package %s by reading copyright file", spdxlicense.LicenseID, subpkg.Name) + subpkg.LicenseDeclared = spdxlicense.LicenseID + } + } + } else { + subpkg.LicenseDeclared = spdxlicense.LicenseID + } + + // Add the debian package to the layer package + if err := pkg.AddPackage(subpkg); err != nil { + return errors.Wrapf(err, "adding %s subpackage", subpkg.Name) + } + } + } + return nil +} + +// fetchDistrolessPackages retrieves the package list published at the +// distroless repository keyed by package name and version +func (h *distrolessHandler) fetchDistrolessPackages() (pkgInfo map[string]string, err error) { + logrus.Info("Fetching distroless image package list") + body, err := http.NewAgent().Get(distrolessBundleURL + distrolessBundle) + if err != nil { + return nil, errors.Wrap(err, "fetching distroless image package manifest") + } + + pkgInfo = map[string]string{} + if err := json.Unmarshal(body, &pkgInfo); err != nil { + return nil, errors.Wrap(err, "unmarshalling the distroless package list") + } + logrus.Infof( + "Distroless bundle for %s lists %d packages", + distrolessBundle, len(pkgInfo), + ) + return pkgInfo, nil +} + +// licenseReader returns a reusable license reader +func (h *distrolessHandler) licenseReader(o *ContainerLayerAnalyzerOptions) (*license.Reader, error) { + if h.reader == nil { + logrus.Info("Initializing licence reader with default options") + // We use a default license cache + opts := license.DefaultReaderOptions + ldir := filepath.Join(os.TempDir(), "spdx-license-reader-licenses") + // ... unless overridden by the options + if o.LicenseCacheDir != "" { + ldir = o.LicenseCacheDir + } + + // If the license cache does not exist, create it + if !util.Exists(ldir) { + if err := os.MkdirAll(ldir, os.FileMode(0o0755)); err != nil { + return nil, errors.Wrap(err, "creating license cache directory") + } + } + opts.CacheDir = ldir + // Create the new reader + reader, err := license.NewReaderWithOptions(opts) + if err != nil { + return nil, errors.Wrap(err, "creating reusable license reader") + } + h.reader = reader + } + return h.reader, nil +} + +// CanHandle returns a bools indicating if this handle can supply more +// data about the specified tarball +func (h *distrolessHandler) CanHandle(layerPath string) (can bool, err error) { + // Open the tar file + f, err := os.Open(layerPath) + if err != nil { + return can, errors.Wrap(err, "opening tarball") + } + + var tr *tar.Reader + if filepath.Ext(layerPath) == gzExt { + gzf, err := gzip.NewReader(f) + if err != nil { + return can, errors.Wrap(err, "creating gzip reader") + } + tr = tar.NewReader(gzf) + } else { + tr = tar.NewReader(f) + } + b := bytes.NewBuffer(make([]byte, 0)) + // Search for the os-file in the tar contents + for { + hdr, err := tr.Next() + if err == io.EOF { + break // End of archive + } + if err != nil { + return can, errors.Wrap(err, "reading the image tarfile") + } + + if hdr.FileInfo().IsDir() { + continue + } + + // Scan for the os-release file in the tarball + if hdr.Name == "./etc/os-release" { + if _, err = io.Copy(b, tr); err != nil { + return can, errors.Wrap(err, "extracting os-release file") + } + } + } + // If the image has the Distroless tag in the OS file, we can handle it + if strings.Contains(b.String(), `PRETTY_NAME="Distroless"`) { + logrus.Infof("👍 Tarball %s identified as distroless layer", layerPath) + return true, nil + } + return can, nil +} diff --git a/pkg/spdx/imageanalyzer_gorunner.go b/pkg/spdx/imageanalyzer_gorunner.go new file mode 100644 index 00000000000..93808414b27 --- /dev/null +++ b/pkg/spdx/imageanalyzer_gorunner.go @@ -0,0 +1,172 @@ +/* +Copyright 2021 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package spdx + +import ( + "archive/tar" + "compress/gzip" + "io" + "io/ioutil" + "os" + "path/filepath" + + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + "k8s.io/release/pkg/license" + "sigs.k8s.io/release-utils/http" + "sigs.k8s.io/release-utils/util" +) + +const ( + goRunnerVersionURL = "https://raw.githubusercontent.com/kubernetes/release/master/images/build/go-runner/VERSION" + goRunnerLicenseURL = "https://raw.githubusercontent.com/kubernetes/release/master/images/build/go-runner/Dockerfile" +) + +type goRunnerHandler struct { + reader *license.Reader + Options *ContainerLayerAnalyzerOptions +} + +func (h *goRunnerHandler) ReadPackageData(layerPath string, pkg *Package) error { + pkg.Supplier.Person = "Kubernetes Release Managers (release-managers@kubernetes.io)" + pkg.Name = "go-runner" + + // Get the go-runner version + // TODO: Add http retries + versionb, err := http.NewAgent().Get(goRunnerVersionURL) + if err != nil { + return errors.Wrap(err, "fetching go-runner VERSION file") + } + logrus.Infof("go-runner image is at version %s", string(versionb)) + pkg.Version = string(versionb) + + // Read the docker file to scan for license + lic, err := http.NewAgent().Get(goRunnerLicenseURL) + if err != nil { + return errors.Wrap(err, "fetching go-runner VERSION file") + } + + df, err := ioutil.TempFile(os.TempDir(), "gorunner-dockerfile-") + if err != nil { + return errors.Wrap(err, "creating temporary file to read go-runner license") + } + defer df.Close() + defer os.Remove(df.Name()) + + if err := ioutil.WriteFile(df.Name(), lic, os.FileMode(0o644)); err != nil { + return errors.Wrap(err, "writing go-runner license to temp file") + } + + // Let's extract the license for the layer: + var grlic *license.License + licenseReader, err := h.licenseReader(h.Options) + if err != nil { + return errors.Wrap(err, "getting license reader") + } + // First, check if the file has our boiler plate + hasbp, err := license.HasKubernetesBoilerPlate(df.Name()) + if err != nil { + return errors.Wrap(err, "checking for k8s boilerplate in go-runner") + } + // If the boilerplate was found, we know it is apache2 + if hasbp { + grlic = licenseReader.LicenseFromLabel("Apache-2.0") + // Otherwise, as a fallback, try to classify the file + } else { + grlic, err = licenseReader.LicenseFromFile(df.Name()) + if err != nil { + return errors.Wrap(err, "attempting to read go-runner license") + } + } + pkg.LicenseDeclared = grlic.LicenseID + logrus.Infof("Found license %s in go-runner image", grlic.LicenseID) + return nil +} + +// licenseReader returns a reusable license reader +func (h *goRunnerHandler) licenseReader(o *ContainerLayerAnalyzerOptions) (*license.Reader, error) { + if h.reader == nil { + logrus.Info("Initializing licence reader with default options") + // We use a default license cache + opts := license.DefaultReaderOptions + ldir := filepath.Join(os.TempDir(), "spdx-license-reader-licenses") + // ... unless overridden by the options + if o.LicenseCacheDir != "" { + ldir = o.LicenseCacheDir + } + + // If the license cache does not exist, create it + if !util.Exists(ldir) { + if err := os.MkdirAll(ldir, os.FileMode(0o0755)); err != nil { + return nil, errors.Wrap(err, "creating license cache directory") + } + } + opts.CacheDir = ldir + // Create the new reader + reader, err := license.NewReaderWithOptions(opts) + if err != nil { + return nil, errors.Wrap(err, "creating reusable license reader") + } + h.reader = reader + } + return h.reader, nil +} + +// CanHandle returns a bools indicating if this handle can supply more +// data about the specified tarball +func (h *goRunnerHandler) CanHandle(layerPath string) (can bool, err error) { + // Open the tar file + f, err := os.Open(layerPath) + if err != nil { + return can, errors.Wrap(err, "opening tarball") + } + defer f.Close() + var tr *tar.Reader + if filepath.Ext(layerPath) == ".gz" { + gzf, err := gzip.NewReader(f) + if err != nil { + return can, errors.Wrap(err, "creating gzip reader") + } + tr = tar.NewReader(gzf) + } else { + tr = tar.NewReader(f) + } + + binaryFound := false + // Search for the os-file in the tar contents + for { + hdr, err := tr.Next() + if err == io.EOF { + break // End of archive + } + if err != nil { + return can, errors.Wrapf(err, "reading the image tarfile at %s", layerPath) + } + + if hdr.FileInfo().IsDir() { + continue + } + + // Scan for the os-release file in the tarball + if hdr.Name == "go-runner" { + binaryFound = true + logrus.Infof("👍 Tarball %s identified as a go-runner layer", layerPath) + break + } + } + return binaryFound, nil +} diff --git a/pkg/spdx/implementation.go b/pkg/spdx/implementation.go new file mode 100644 index 00000000000..e19bc6a353e --- /dev/null +++ b/pkg/spdx/implementation.go @@ -0,0 +1,179 @@ +/* +Copyright 2021 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package spdx + +//go:generate go run github.com/maxbrunsfeld/counterfeiter/v6 -generate + +import ( + "archive/tar" + "crypto/sha1" + "encoding/json" + "fmt" + "io" + "os" + "path/filepath" + "strings" + + "github.com/google/go-containerregistry/pkg/name" + v1 "github.com/google/go-containerregistry/pkg/v1" + "github.com/google/go-containerregistry/pkg/v1/remote" + "github.com/google/go-containerregistry/pkg/v1/tarball" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + "sigs.k8s.io/release-utils/util" +) + +//counterfeiter:generate . spdxImplementation + +type spdxImplementation interface { + ExtractTarballTmp(string) (string, error) + ReadArchiveManifest(string) (*ArchiveManifest, error) + PullImagesToArchive(string, string) error + PackageFromLayerTarBall(string, *TarballOptions) (*Package, error) +} + +type spdxDefaultImplementation struct{} + +// ExtractTarballTmp extracts a tarball to a temporary directory +func (di *spdxDefaultImplementation) ExtractTarballTmp(tarPath string) (tmpDir string, err error) { + tmpDir, err = os.MkdirTemp(os.TempDir(), "spdx-tar-extract-") + if err != nil { + return tmpDir, errors.Wrap(err, "creating temporary directory for tar extraction") + } + + // Open the tar file + f, err := os.Open(tarPath) + if err != nil { + return tmpDir, errors.Wrap(err, "opening tarball") + } + + tr := tar.NewReader(f) + numFiles := 0 + for { + hdr, err := tr.Next() + if err == io.EOF { + break // End of archive + } + if err != nil { + return tmpDir, errors.Wrap(err, "reading the image tarfile") + } + + if hdr.FileInfo().IsDir() { + continue + } + + if strings.HasPrefix(filepath.Base(hdr.FileInfo().Name()), ".wh") { + logrus.Info("Skipping extraction of whiteout file") + continue + } + + if err := os.MkdirAll( + filepath.Join(tmpDir, filepath.Dir(hdr.Name)), os.FileMode(0o755), + ); err != nil { + return tmpDir, errors.Wrap(err, "creating image directory structure") + } + + targetFile := filepath.Join(tmpDir, hdr.Name) + f, err := os.Create(targetFile) + if err != nil { + return tmpDir, errors.Wrap(err, "creating image layer file") + } + defer f.Close() + + if _, err := io.Copy(f, tr); err != nil { + return tmpDir, errors.Wrap(err, "extracting image data") + } + numFiles++ + } + logrus.Infof("Successfully extracted %d files from image tarball %s", numFiles, tarPath) + return tmpDir, err +} + +// readArchiveManifest extracts the manifest json from an image tar +// archive and returns the data as a struct +func (di *spdxDefaultImplementation) ReadArchiveManifest(manifestPath string) (manifest *ArchiveManifest, err error) { + // Check that we have the archive manifest.json file + if !util.Exists(manifestPath) { + return manifest, errors.New("unable to find manifest file " + manifestPath) + } + + // Parse the json file + manifestData := []ArchiveManifest{} + manifestJSON, err := os.ReadFile(manifestPath) + if err != nil { + return manifest, errors.Wrap(err, "unable to read from tarfile") + } + if err := json.Unmarshal(manifestJSON, &manifestData); err != nil { + fmt.Println(string(manifestJSON)) + return manifest, errors.Wrap(err, "unmarshalling image manifest") + } + return &manifestData[0], nil +} + +// PullImagesToArchive takes an image reference (a tag or a digest) +// and writes it into a docker tar archive in path +func (di *spdxDefaultImplementation) PullImagesToArchive(referenceString, path string) error { + // Parse the string to get a reference (tag or digest) + ref, err := name.ParseReference(referenceString) + if err != nil { + return errors.Wrapf(err, "parsing reference %s", referenceString) + } + + // Build an image from the reference + img, err := remote.Image(ref) + if err != nil { + return errors.Wrap(err, "getting image") + } + + // This algo comes from crane: + // Try to cast the reference as a tag: + tag, ok := ref.(name.Tag) + // if it fails + if !ok { + // .. and it is a digest + d, ok := ref.(name.Digest) + if !ok { + return fmt.Errorf("reference is not a tag or digest") + } + // We add a mock tag + tag = d.Repository.Tag("from-digest") // Append digest here? + } + + return tarball.MultiWriteToFile(path, map[name.Tag]v1.Image{tag: img}) +} + +// PackageFromLayerTarBall builds a SPDX package from an image +// tarball +func (di *spdxDefaultImplementation) PackageFromLayerTarBall( + layerFile string, opts *TarballOptions, +) (*Package, error) { + logrus.Infof("Generating SPDX package from layer in %s", layerFile) + + pkg := NewPackage() + pkg.options.WorkDir = opts.ExtractDir + if err := pkg.ReadSourceFile(filepath.Join(opts.ExtractDir, layerFile)); err != nil { + return nil, errors.Wrap(err, "reading source file") + } + // Build the pkg name from its internal path + h := sha1.New() + if _, err := h.Write([]byte(layerFile)); err != nil { + return nil, errors.Wrap(err, "hashing file path") + } + pkg.Name = fmt.Sprintf("%x", h.Sum(nil)) + + return pkg, nil +} diff --git a/pkg/spdx/package.go b/pkg/spdx/package.go new file mode 100644 index 00000000000..e5768ed4b9c --- /dev/null +++ b/pkg/spdx/package.go @@ -0,0 +1,243 @@ +/* +Copyright 2021 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package spdx + +import ( + "bytes" + "crypto/sha1" + "fmt" + "html/template" + "path/filepath" + "regexp" + "sort" + "strings" + + "github.com/pkg/errors" + "sigs.k8s.io/release-utils/hash" + "sigs.k8s.io/release-utils/util" +) + +var packageTemplate = `##### Package: {{ .Name }} + +{{ if .Name }}PackageName: {{ .Name }} +{{ end -}} +{{ if .ID }}SPDXID: {{ .ID }} +{{ end -}} +{{- if .Checksum -}} +{{- range $key, $value := .Checksum -}} +{{ if . }}PackageChecksum: {{ $key }}: {{ $value }} +{{ end -}} +{{- end -}} +{{- end -}} +PackageDownloadLocation: {{ if .DownloadLocation }}{{ .DownloadLocation }}{{ else }}NONE{{ end }} +FilesAnalyzed: {{ .FilesAnalyzed }} +{{ if .VerificationCode }}PackageVerificationCode: {{ .VerificationCode }} +{{ end -}} +PackageLicenseConcluded: {{ if .LicenseConcluded }}{{ .LicenseConcluded }}{{ else }}NOASSERTION{{ end }} +{{ if .FileName }}PackageFileName: {{ .FileName }} +{{ end -}} +{{ if .LicenseInfoFromFiles }}PackageLicenseInfoFromFiles: {{ .LicenseInfoFromFiles }} +{{ end -}} +{{ if .Version }}PackageVersion: {{ .Version }} +{{ end -}} +PackageLicenseDeclared: {{ if .LicenseDeclared }}{{ .LicenseDeclared }}{{ else }}NOASSERTION{{ end }} +PackageCopyrightText: {{ if .CopyrightText }}{{ .CopyrightText }} +{{ else }}NOASSERTION{{ end }} +` + +// Package groups a set of files +type Package struct { + FilesAnalyzed bool // true + Name string // hello-go-src + ID string // SPDXRef-Package-hello-go-src + DownloadLocation string // git@github.com:swinslow/spdx-examples.git#example6/content/src + VerificationCode string // 6486e016b01e9ec8a76998cefd0705144d869234 + LicenseConcluded string // LicenseID o NOASSERTION + LicenseInfoFromFiles string // GPL-3.0-or-later + LicenseDeclared string // GPL-3.0-or-later + LicenseComments string // record any relevant background information or analysis that went in to arriving at the Concluded License + CopyrightText string // string NOASSERTION + Version string // Package version + FileName string // Name of the package + SourceFile string // Source file for the package (taball for images, rpm, deb, etc) + + // Supplier: the actual distribution source for the package/directory + Supplier struct { + Person string // person name and optional () + Organization string // organization name and optional () + } + // Originator: For example, the SPDX file identifies the package glibc and Red Hat as the Package Supplier, + // but the Free Software Foundation is the Package Originator. + Originator struct { + Person string // person name and optional () + Organization string // organization name and optional () + } + // Subpackages contained + Packages map[string]*Package // Sub packages conatined in this pkg + Files map[string]*File // List of files + Checksum map[string]string // Checksum of the package + + options *PackageOptions // Options +} + +func NewPackage() (p *Package) { + p = &Package{ + options: &PackageOptions{}, + } + return p +} + +type PackageOptions struct { + WorkDir string // Working directory to read files from +} + +func (p *Package) Options() *PackageOptions { + return p.options +} + +// ReadSourceFile reads the source file for the package and populates +// the package fields derived from it (Checksums and FileName) +func (p *Package) ReadSourceFile(path string) error { + if !util.Exists(path) { + return errors.New("unable to find package source file") + } + s256, err := hash.SHA256ForFile(path) + if err != nil { + return errors.Wrap(err, "getting source file sha256") + } + s512, err := hash.SHA512ForFile(path) + if err != nil { + return errors.Wrap(err, "getting source file sha512") + } + p.Checksum = map[string]string{ + "SHA256": s256, + "SHA512": s512, + } + p.SourceFile = path + p.FileName = strings.TrimPrefix(path, p.Options().WorkDir+string(filepath.Separator)) + return nil +} + +// AddFile adds a file contained in the package +func (p *Package) AddFile(file *File) error { + if p.Files == nil { + p.Files = map[string]*File{} + } + // If file does not have an ID, we try to build one + // by hashing the file name + if file.ID == "" { + if file.Name == "" { + return errors.New("unable to generate file ID, filename not set") + } + if p.Name == "" { + return errors.New("unable to generate file ID, filename not set") + } + h := sha1.New() + if _, err := h.Write([]byte(p.Name + ":" + file.Name)); err != nil { + return errors.Wrap(err, "getting sha1 of filename") + } + file.ID = "SPDXRef-File-" + fmt.Sprintf("%x", h.Sum(nil)) + } + p.Files[file.ID] = file + return nil +} + +// AddPackage adds a new subpackage to a package +func (p *Package) AddPackage(pkg *Package) error { + if p.Packages == nil { + p.Packages = map[string]*Package{} + } + if pkg.ID == "" { + // If we so not have an ID but have a name generate it fro there + reg := regexp.MustCompile("[^a-zA-Z0-9-]+") + id := reg.ReplaceAllString(pkg.Name, "") + if id != "" { + pkg.ID = "SPDXRef-Package-" + id + } + } + if pkg.ID == "" { + return errors.New("package name is needed to add a new package") + } + if _, ok := p.Packages[pkg.ID]; ok { + return errors.New("a package named " + pkg.ID + " already exists in the document") + } + + p.Packages[pkg.ID] = pkg + return nil +} + +// Render renders the document fragment of the package +func (p *Package) Render() (docFragment string, err error) { + var buf bytes.Buffer + tmpl, err := template.New("package").Parse(packageTemplate) + if err != nil { + return "", errors.Wrap(err, "parsing package template") + } + + // If files were analyzed, calculate the verification + if p.FilesAnalyzed { + if len(p.Files) == 0 { + return docFragment, errors.New("unable to get package verification code, package has no files") + } + shaList := []string{} + for _, f := range p.Files { + if f.Checksum == nil { + return docFragment, errors.New("unable to render package, file has no checksums") + } + if _, ok := f.Checksum["SHA1"]; !ok { + return docFragment, errors.New("unable to render package, files were analyzed but some do not have sha1 checksum") + } + shaList = append(shaList, f.Checksum["SHA1"]) + } + sort.Strings(shaList) + h := sha1.New() + if _, err := h.Write([]byte(strings.Join(shaList, ""))); err != nil { + return docFragment, errors.Wrap(err, "getting sha1 verification of files") + } + p.VerificationCode = fmt.Sprintf("%x", h.Sum(nil)) + } + + // Run the template to verify the output. + if err := tmpl.Execute(&buf, p); err != nil { + return "", errors.Wrap(err, "executing spdx package template") + } + + docFragment = buf.String() + + for _, f := range p.Files { + fileFragment, err := f.Render() + if err != nil { + return "", errors.Wrap(err, "rendering file "+f.Name) + } + docFragment += fileFragment + docFragment += fmt.Sprintf("Relationship: %s CONTAINS %s\n\n", p.ID, f.ID) + } + + // Print the contained sub packages + if p.Packages != nil { + for _, pkg := range p.Packages { + pkgDoc, err := pkg.Render() + if err != nil { + return "", errors.Wrap(err, "rendering pkg "+pkg.Name) + } + + docFragment += pkgDoc + docFragment += fmt.Sprintf("Relationship: %s CONTAINS %s\n\n", p.ID, pkg.ID) + } + } + return docFragment, nil +} diff --git a/pkg/spdx/spdx.go b/pkg/spdx/spdx.go new file mode 100644 index 00000000000..07ea678494f --- /dev/null +++ b/pkg/spdx/spdx.go @@ -0,0 +1,170 @@ +/* +Copyright 2021 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package spdx + +import ( + "os" + "path/filepath" + + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + + "sigs.k8s.io/release-utils/util" +) + +const ( + defaultDocumentAuthor = "Kubernetes Release Managers (release-managers@kubernetes.io)" + archiveManifestFilename = "manifest.json" + spdxLicenseCacheDir = "spdx/lic" +) + +type SPDX struct { + impl spdxImplementation + options *Options +} + +func NewSPDX() *SPDX { + return &SPDX{ + impl: &spdxDefaultImplementation{}, + options: &defaultSPDXOptions, + } +} + +func (spdx *SPDX) SetImplementation(impl spdxImplementation) { + spdx.impl = impl +} + +type Options struct { + LicenseCacheDir string // Directory to cache SPDX license information + AnalyzeLayers bool +} + +func (spdx *SPDX) Options() *Options { + return spdx.options +} + +var defaultSPDXOptions = Options{ + LicenseCacheDir: filepath.Join(os.TempDir(), spdxLicenseCacheDir), + AnalyzeLayers: true, +} + +type ArchiveManifest struct { + ConfigFilename string `json:"Config"` + RepoTags []string `json:"RepoTags"` + LayerFiles []string `json:"Layers"` +} + +// ImageOptions set of options for processing tar files +type TarballOptions struct { + ExtractDir string // Directory where the docker tar archive will be extracted +} + +// PackageFromImageTarball returns a SPDX package from a tarball +func (spdx *SPDX) PackageFromImageTarball( + tarPath string, opts *TarballOptions, +) (imagePackage *Package, err error) { + logrus.Infof("Generating SPDX package from image tarball %s", tarPath) + + // Extract all files from tarfile + opts.ExtractDir, err = spdx.impl.ExtractTarballTmp(tarPath) + if err != nil { + return nil, errors.Wrap(err, "extracting tarball to temp dir") + } + defer os.RemoveAll(opts.ExtractDir) + + // Read the archive manifest json: + manifest, err := spdx.impl.ReadArchiveManifest( + filepath.Join(opts.ExtractDir, archiveManifestFilename), + ) + if err != nil { + return nil, errors.Wrap(err, "while reading docker archive manifest") + } + + if len(manifest.RepoTags) == 0 { + return nil, errors.New("No RepoTags found in manifest") + } + + if manifest.RepoTags[0] == "" { + return nil, errors.New( + "unable to add tar archive, manifest does not have a RepoTags entry", + ) + } + + logrus.Infof("Package describes %s image", manifest.RepoTags[0]) + + // Create the new SPDX package + imagePackage = NewPackage() + imagePackage.Options().WorkDir = opts.ExtractDir + imagePackage.Name = manifest.RepoTags[0] + + logrus.Infof("Image manifest lists %d layers", len(manifest.LayerFiles)) + + // Cycle all the layers from the manifest and add them as packages + for _, layerFile := range manifest.LayerFiles { + // Generate a package from a layer + pkg, err := spdx.impl.PackageFromLayerTarBall(layerFile, opts) + if err != nil { + return nil, errors.Wrap(err, "building package from layer") + } + + // If the option is enabled, scan the container layers + if spdx.options.AnalyzeLayers { + if err := spdx.AnalyzeImageLayer(filepath.Join(opts.ExtractDir, layerFile), pkg); err != nil { + return nil, errors.Wrap(err, "scanning layer "+pkg.ID) + } + } else { + logrus.Info("Not performing deep image analysis (opts.AnalyzeLayers = false)") + } + + // Add the layer package to the image package + if err := imagePackage.AddPackage(pkg); err != nil { + return nil, errors.Wrap(err, "adding layer to image package") + } + } + + // return the finished package + return imagePackage, nil +} + +// FileFromPath creates a File object from a path +func (spdx *SPDX) FileFromPath(filePath string) (*File, error) { + if !util.Exists(filePath) { + return nil, errors.New("file does not exist") + } + f := NewFile() + if err := f.ReadSourceFile(filePath); err != nil { + return nil, errors.Wrap(err, "creating file from path") + } + return f, nil +} + +// AnalyzeLayer uses the collection of image analyzers to see if +// it matches a known image from which a spdx package can be +// enriched with more information +func (spdx *SPDX) AnalyzeImageLayer(layerPath string, pkg *Package) error { + return NewImageAnalyzer().AnalyzeLayer(layerPath, pkg) +} + +// ExtractTarballTmp extracts a tarball to a temp file +func (spdx *SPDX) ExtractTarballTmp(tarPath string) (tmpDir string, err error) { + return spdx.impl.ExtractTarballTmp(tarPath) +} + +// PullImagesToArchive +func (spdx *SPDX) PullImagesToArchive(reference, path string) error { + return spdx.impl.PullImagesToArchive(reference, path) +}