Skip to content

Commit

Permalink
Add whitelist option (#2)
Browse files Browse the repository at this point in the history
* Add whitelist option

* Add PR hook to GH action

* Address CR comments
  • Loading branch information
charith-elastic authored Apr 23, 2020
1 parent a1d9780 commit 9ffa081
Show file tree
Hide file tree
Showing 10 changed files with 214 additions and 29 deletions.
3 changes: 3 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@ on:
push:
branches:
- '*'
pull_request:
branches:
- '*'
jobs:

build:
Expand Down
30 changes: 30 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ Go Licence Detector

This is a tool designed to generate licence notices and dependency listings for Go projects at Elastic. It parses the output of `go list -m -json all` to produce its output.


## Usage

```
Expand All @@ -25,13 +26,34 @@ Flags:
Path to the NOTICE template file. (default "example/templates/NOTICE.txt.tmpl")
-overrides string
Path to the file containing override directives.
-rules string
Path to file containing rules regarding licence types. Uses embedded rules if empty.
-validate
Validate results (slow).
Example:
$ go list -m -json all | go-licence-detector -includeIndirect -depsOut=dependencies.asciidoc -noticeOut=NOTICE.txt
```

If no file path is provided for `-noticeOut` or `-depsOut`, the corresponding output will not be generated.


## Adding rules

Allowed licence types can be specified using a JSON file with the following structure:

```json
{
"whitelist": [
"Apache-2.0",
"MIT"
]
}
```

A partial list of allowed licences at Elastic is included in `assets/rules.json` and used by default if no other rules file is specified using the `-rules` flag.


## Adding overrides

In some cases, the application will not be able to detect the licence type or infer the correct URL for a dependency. When there are issues with licences (no licence file or unknown licence type), the application will fail with an error message instructing the user to add an override to continue. The overrides file is a file containing newline-delimited JSON where each line contains a JSON object bearing the following format:
Expand All @@ -49,6 +71,14 @@ Example overrides file:
{"name": "github.com/russross/blackfriday/v2", "url": "https://gopkg.in/russross/blackfriday.v2"}
```

See `example/overrides` for the suggested structure of adding overrides.


## Validating URLs

Dependency URLs are inferred from the module path. In some rare cases, these URLs could be invalid. Passing the `-validate` flag will make the licence-detector attempt to validate each URL it detects. Please note that this process makes network requests to each of the detected URLs. Running this step in an automated fashion (such as a CI environment) is not recommended.


## Updating the licence database

The licence database file `licence.db` contains all the currently known licence types found in https://github.com/google/licenseclassifier/tree/master/licenses. In the rare case that entirely new licence types have been introduced to the codebase, follow the instructions at https://github.com/google/licenseclassifier to execute the `license_serializer` tool.
11 changes: 11 additions & 0 deletions assets/rules.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"whitelist": [
"Apache-2.0",
"BSD-2-Clause",
"BSD-3-Clause",
"ISC",
"MIT",
"MPL-2.0",
"Public Domain"
]
}
38 changes: 12 additions & 26 deletions detector/detector.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.

//go:generate pkger -include=github.com/elastic/go-licence-detector:/assets/licence.db -o=detector
//go:generate pkger -include=github.com/elastic/go-licence-detector:/assets -o=detector

package detector

Expand All @@ -37,9 +37,9 @@ import (
)

const (
assetsPath = "github.com/elastic/go-licence-detector:/assets/licence.db"
// detectionThreshold is the minimum confidence score required from the licence classifier.
detectionThreshold = 0.85
licenceDBPath = "github.com/elastic/go-licence-detector:/assets/licence.db"
)

var errLicenceNotFound = errors.New("failed to detect licence")
Expand Down Expand Up @@ -74,7 +74,7 @@ func NewClassifier(dataPath string) (*licenseclassifier.License, error) {
}

func newClassiferFromEmbeddedDB() (*licenseclassifier.License, error) {
f, err := pkger.Open(assetsPath)
f, err := pkger.Open(licenceDBPath)
if err != nil {
return nil, fmt.Errorf("failed to open bundled licence database: %w", err)
}
Expand All @@ -90,15 +90,15 @@ func newClassiferFromEmbeddedDB() (*licenseclassifier.License, error) {
}

// Detect searches the dependencies on disk and detects licences.
func Detect(data io.Reader, classifier *licenseclassifier.License, overrides dependency.Overrides, includeIndirect bool) (*dependency.List, error) {
func Detect(data io.Reader, classifier *licenseclassifier.License, rules *Rules, overrides dependency.Overrides, includeIndirect bool) (*dependency.List, error) {
// parse the output of go mod list
deps, err := parseDependencies(data, includeIndirect)
if err != nil {
return nil, err
}

// find licences for each dependency
return detectLicences(classifier, deps, overrides)
return detectLicences(classifier, rules, deps, overrides)
}

func parseDependencies(data io.Reader, includeIndirect bool) (*dependencies, error) {
Expand All @@ -125,42 +125,27 @@ func parseDependencies(data io.Reader, includeIndirect bool) (*dependencies, err
}
}

func detectLicences(classifier *licenseclassifier.License, deps *dependencies, overrides dependency.Overrides) (*dependency.List, error) {
func detectLicences(classifier *licenseclassifier.License, rules *Rules, deps *dependencies, overrides dependency.Overrides) (*dependency.List, error) {
depList := &dependency.List{}
licenceRegex := buildLicenceRegex()

var err error
if depList.Direct, err = doDetectLicences(licenceRegex, classifier, deps.direct, overrides); err != nil {
if depList.Direct, err = doDetectLicences(licenceRegex, classifier, rules, deps.direct, overrides); err != nil {
return depList, err
}

if depList.Indirect, err = doDetectLicences(licenceRegex, classifier, deps.indirect, overrides); err != nil {
if depList.Indirect, err = doDetectLicences(licenceRegex, classifier, rules, deps.indirect, overrides); err != nil {
return depList, err
}

return depList, nil
}

func doDetectLicences(licenceRegex *regexp.Regexp, classifier *licenseclassifier.License, depList []*module, overrides dependency.Overrides) ([]dependency.Info, error) {
func doDetectLicences(licenceRegex *regexp.Regexp, classifier *licenseclassifier.License, rules *Rules, depList []*module, overrides dependency.Overrides) ([]dependency.Info, error) {
if len(depList) == 0 {
return nil, nil
}

// this is not an exhaustive list of Elastic-approved licences, but includes all the ones we use to date
whitelist := map[string]struct{}{
"Apache-2.0": struct{}{},
"BSD-2-Clause": struct{}{},
"BSD-3-Clause": struct{}{},
"ISC": struct{}{},
"MIT": struct{}{},
// Yellow list: Mozilla Public License 1.1 or 2.0 (“MPL”) Exception:
// "Incorporation of unmodified source or binaries into Elastic products is permitted,
// provided that the product's NOTICE file links to a URL providing the MPL-covered source code"
// We do not modify any of the dependencies and we link to the source code, so we are okay.
"MPL-2.0": struct{}{},
"Public Domain": struct{}{},
}

depInfoList := make([]dependency.Info, len(depList))
for i, mod := range depList {
depInfo := mkDepInfo(mod, overrides)
Expand Down Expand Up @@ -191,9 +176,10 @@ func doDetectLicences(licenceRegex *regexp.Regexp, classifier *licenseclassifier
}
}

if _, ok := whitelist[depInfo.LicenceType]; !ok {
return nil, fmt.Errorf("dependency %s uses licence %s which is not whitelisted", depInfo.Name, depInfo.LicenceType)
if !rules.IsAllowed(depInfo.LicenceType) {
return nil, fmt.Errorf("dependency %s uses licence %s which is not allowed by the rules file", depInfo.Name, depInfo.LicenceType)
}

depInfoList[i] = depInfo
}

Expand Down
5 changes: 4 additions & 1 deletion detector/detector_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,10 @@ func TestDetect(t *testing.T) {
require.NoError(t, err)
defer f.Close()

gotDependencies, err := Detect(f, classifier, tc.overrides, tc.includeIndirect)
rules, err := LoadRules("")
require.NoError(t, err)

gotDependencies, err := Detect(f, classifier, rules, tc.overrides, tc.includeIndirect)
if tc.wantErr {
require.Error(t, err)
return
Expand Down
2 changes: 1 addition & 1 deletion detector/pkged.go

Large diffs are not rendered by default.

83 changes: 83 additions & 0 deletions detector/rules.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
// Licensed to Elasticsearch B.V. under one or more contributor
// license agreements. See the NOTICE file distributed with
// this work for additional information regarding copyright
// ownership. Elasticsearch B.V. licenses this file to you under
// the Apache License, Version 2.0 (the "License"); you may
// not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

package detector

import (
"encoding/json"
"fmt"
"io"
"io/ioutil"
"os"

"github.com/markbates/pkger"
)

const embeddedRulesFile = "github.com/elastic/go-licence-detector:/assets/rules.json"

// rulesFile represents the structure of the rules file.
type rulesFile struct {
Whitelist []string `json:"whitelist"`
}

// Rules holds rules for the detector.
type Rules struct {
WhiteList map[string]struct{}
}

// LoadRules loads rules from the given path. Embedded rules file is loaded if the path is empty.
func LoadRules(path string) (*Rules, error) {
var f io.ReadCloser
var err error

if path == "" {
f, err = pkger.Open(embeddedRulesFile)
} else {
f, err = os.Open(path)
}

if err != nil {
return nil, fmt.Errorf("failed to open rules file: %w", err)
}
defer f.Close()

ruleBytes, err := ioutil.ReadAll(f)
if err != nil {
return nil, fmt.Errorf("failed to read rules: %w", err)
}

var rf rulesFile
if err := json.Unmarshal(ruleBytes, &rf); err != nil {
return nil, fmt.Errorf("failed to unmarshal rules: %w", err)
}

rules := &Rules{
WhiteList: make(map[string]struct{}, len(rf.Whitelist)),
}

for _, w := range rf.Whitelist {
rules.WhiteList[w] = struct{}{}
}

return rules, nil
}

// IsAllowed returns true if the given licence is allowed by the rules.
func (r *Rules) IsAllowed(licenceID string) bool {
_, ok := r.WhiteList[licenceID]
return ok
}
50 changes: 50 additions & 0 deletions detector/rules_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
// Licensed to Elasticsearch B.V. under one or more contributor
// license agreements. See the NOTICE file distributed with
// this work for additional information regarding copyright
// ownership. Elasticsearch B.V. licenses this file to you under
// the Apache License, Version 2.0 (the "License"); you may
// not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

package detector

import (
"testing"

"github.com/stretchr/testify/require"
)

func TestLoadRules(t *testing.T) {
t.Run("embedded", func(t *testing.T) {
rules, err := LoadRules("")

require.NoError(t, err)
require.NotNil(t, rules)
require.True(t, len(rules.WhiteList) > 0)
})

t.Run("external", func(t *testing.T) {
rules, err := LoadRules("testdata/rules.json")

require.NoError(t, err)
require.NotNil(t, rules)
require.True(t, len(rules.WhiteList) > 0)
})
}

func TestRulesWhiteList(t *testing.T) {
rules, err := LoadRules("testdata/rules.json")

require.NoError(t, err)
require.True(t, rules.IsAllowed("Apache-2.0"))
require.False(t, rules.IsAllowed("WTFPL"))
}
11 changes: 11 additions & 0 deletions detector/testdata/rules.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"whitelist": [
"Apache-2.0",
"BSD-2-Clause",
"BSD-3-Clause",
"ISC",
"MIT",
"MPL-2.0",
"Public Domain"
]
}
10 changes: 9 additions & 1 deletion main.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ var (
noticeTemplateFlag = flag.String("noticeTemplate", "example/templates/NOTICE.txt.tmpl", "Path to the NOTICE template file.")
noticeOutFlag = flag.String("noticeOut", "", "Path to output the notice.")
overridesFlag = flag.String("overrides", "", "Path to the file containing override directives.")
rulesFlag = flag.String("rules", "", "Path to file containing rules regarding licence types. Uses embedded rules if empty.")
validateFlag = flag.Bool("validate", false, "Validate results (slow).")
)

Expand All @@ -64,7 +65,14 @@ func main() {
log.Fatalf("Failed to load overrides: %v", err)
}

dependencies, err := detector.Detect(depInput, classifier, overrides, *includeIndirectFlag)
// load rules
rules, err := detector.LoadRules(*rulesFlag)
if err != nil {
log.Fatalf("Failed to load rules: %v", err)
}

// detect dependencies
dependencies, err := detector.Detect(depInput, classifier, rules, overrides, *includeIndirectFlag)
if err != nil {
log.Fatalf("Failed to detect licences: %v", err)
}
Expand Down

0 comments on commit 9ffa081

Please sign in to comment.