-
Notifications
You must be signed in to change notification settings - Fork 1.6k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
License crawler for third party golang libraries (#2393)
* license file crawler tools * Add get-github-repo cli tool to resolve github repo for golang libraries * improve get_github_repo message * Add get_github_license_info.py script and related documentation, it fetches license info from github api * Add license files * Add concatenate_license.py and update other CLI tools + documentation * add license for parse_toml_dep.py
- Loading branch information
1 parent
0b8d2e1
commit 3bda9e8
Showing
15 changed files
with
1,902 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
github.com/argoproj/argo | ||
cloud.google.com/go | ||
contrib.go.opencensus.io/exporter/ocagent | ||
github.com/Azure/go-autorest | ||
github.com/Knetic/govaluate | ||
github.com/PuerkitoBio/purell | ||
github.com/PuerkitoBio/urlesc | ||
github.com/argoproj/pkg | ||
github.com/aws/aws-sdk-go | ||
github.com/beorn7/perks | ||
github.com/census-instrumentation/opencensus-proto | ||
github.com/colinmarc/hdfs | ||
github.com/davecgh/go-spew | ||
github.com/dgrijalva/jwt-go | ||
github.com/docker/spdystream | ||
github.com/dustin/go-humanize | ||
github.com/emicklei/go-restful | ||
github.com/emirpasic/gods | ||
github.com/evanphx/json-patch | ||
github.com/go-openapi/jsonpointer | ||
github.com/go-openapi/jsonreference | ||
github.com/go-openapi/spec | ||
github.com/go-openapi/swag | ||
github.com/go-sql-driver/mysql | ||
github.com/gogo/protobuf | ||
github.com/golang/protobuf | ||
github.com/google/gofuzz | ||
github.com/googleapis/gnostic | ||
github.com/gorilla/websocket | ||
github.com/grpc-ecosystem/grpc-gateway | ||
github.com/hashicorp/go-uuid | ||
github.com/hashicorp/golang-lru | ||
github.com/imdario/mergo | ||
github.com/inconshreveable/mousetrap | ||
github.com/jbenet/go-context | ||
github.com/jcmturner/gofork | ||
github.com/jmespath/go-jmespath | ||
github.com/json-iterator/go | ||
github.com/kevinburke/ssh_config | ||
github.com/konsorten/go-windows-terminal-sequences | ||
github.com/lib/pq | ||
github.com/mailru/easyjson | ||
github.com/matttproud/golang_protobuf_extensions | ||
github.com/minio/minio-go | ||
github.com/mitchellh/go-homedir | ||
github.com/mitchellh/go-ps | ||
github.com/modern-go/concurrent | ||
github.com/modern-go/reflect2 | ||
github.com/pkg/errors | ||
github.com/pmezard/go-difflib | ||
github.com/prometheus/client_golang | ||
github.com/prometheus/client_model | ||
github.com/prometheus/common | ||
github.com/prometheus/procfs | ||
github.com/sergi/go-diff | ||
github.com/sirupsen/logrus | ||
github.com/spf13/cobra | ||
github.com/spf13/pflag | ||
github.com/src-d/gcfg | ||
github.com/stretchr/objx | ||
github.com/stretchr/testify | ||
github.com/tidwall/gjson | ||
github.com/tidwall/match | ||
github.com/tidwall/pretty | ||
github.com/valyala/bytebufferpool | ||
github.com/valyala/fasttemplate | ||
github.com/xanzy/ssh-agent | ||
go.opencensus.io | ||
golang.org/x/crypto | ||
golang.org/x/net | ||
golang.org/x/oauth2 | ||
golang.org/x/sync | ||
golang.org/x/sys | ||
golang.org/x/text | ||
golang.org/x/time | ||
golang.org/x/tools | ||
google.golang.org/api | ||
google.golang.org/appengine | ||
google.golang.org/genproto | ||
google.golang.org/grpc | ||
gopkg.in/inf.v0 | ||
gopkg.in/ini.v1 | ||
gopkg.in/jcmturner/aescts.v1 | ||
gopkg.in/jcmturner/dnsutils.v1 | ||
gopkg.in/jcmturner/gokrb5.v5 | ||
gopkg.in/jcmturner/rpc.v0 | ||
gopkg.in/src-d/go-billy.v4 | ||
gopkg.in/src-d/go-git.v4 | ||
gopkg.in/warnings.v0 | ||
gopkg.in/yaml.v2 | ||
k8s.io/api | ||
k8s.io/apimachinery | ||
k8s.io/client-go | ||
k8s.io/code-generator | ||
k8s.io/gengo | ||
k8s.io/klog | ||
k8s.io/kube-openapi | ||
k8s.io/utils | ||
sigs.k8s.io/yaml | ||
upper.io/db.v3 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
upper.io/db.v3,upper/db |
99 changes: 99 additions & 0 deletions
99
third_party/argo/license-intermediate-data/license_info.csv
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
argoproj/argo | ||
GoogleCloudPlatform/gcloud-golang | ||
census-ecosystem/opencensus-go-exporter-ocagent | ||
Azure/go-autorest | ||
Knetic/govaluate | ||
PuerkitoBio/purell | ||
PuerkitoBio/urlesc | ||
argoproj/pkg | ||
aws/aws-sdk-go | ||
beorn7/perks | ||
census-instrumentation/opencensus-proto | ||
colinmarc/hdfs | ||
davecgh/go-spew | ||
dgrijalva/jwt-go | ||
docker/spdystream | ||
dustin/go-humanize | ||
emicklei/go-restful | ||
emirpasic/gods | ||
evanphx/json-patch | ||
go-openapi/jsonpointer | ||
go-openapi/jsonreference | ||
go-openapi/spec | ||
go-openapi/swag | ||
go-sql-driver/mysql | ||
gogo/protobuf | ||
golang/protobuf | ||
google/gofuzz | ||
googleapis/gnostic | ||
gorilla/websocket | ||
grpc-ecosystem/grpc-gateway | ||
hashicorp/go-uuid | ||
hashicorp/golang-lru | ||
imdario/mergo | ||
inconshreveable/mousetrap | ||
jbenet/go-context | ||
jcmturner/gofork | ||
jmespath/go-jmespath | ||
json-iterator/go | ||
kevinburke/ssh_config | ||
konsorten/go-windows-terminal-sequences | ||
lib/pq | ||
mailru/easyjson | ||
matttproud/golang_protobuf_extensions | ||
minio/minio-go | ||
mitchellh/go-homedir | ||
mitchellh/go-ps | ||
modern-go/concurrent | ||
modern-go/reflect2 | ||
pkg/errors | ||
pmezard/go-difflib | ||
prometheus/client_golang | ||
prometheus/client_model | ||
prometheus/common | ||
prometheus/procfs | ||
sergi/go-diff | ||
sirupsen/logrus | ||
spf13/cobra | ||
spf13/pflag | ||
src-d/gcfg | ||
stretchr/objx | ||
stretchr/testify | ||
tidwall/gjson | ||
tidwall/match | ||
tidwall/pretty | ||
valyala/bytebufferpool | ||
valyala/fasttemplate | ||
xanzy/ssh-agent | ||
census-instrumentation/opencensus-go | ||
golang/crypto | ||
golang/net | ||
golang/oauth2 | ||
golang/sync | ||
golang/sys | ||
golang/text | ||
golang/time | ||
golang/tools | ||
google/google-api-go-client | ||
golang/appengine | ||
google/go-genproto | ||
grpc/grpc-go | ||
go-inf/inf | ||
go-ini/ini | ||
jcmturner/aescts | ||
jcmturner/dnsutils | ||
jcmturner/gokrb5 | ||
jcmturner/rpc | ||
src-d/go-billy | ||
src-d/go-git | ||
go-warnings/warnings | ||
go-yaml/yaml | ||
kubernetes/api | ||
kubernetes/apimachinery | ||
kubernetes/client-go | ||
kubernetes/code-generator | ||
kubernetes/gengo | ||
kubernetes/klog | ||
kubernetes/kube-openapi | ||
kubernetes/utils | ||
kubernetes-sigs/yaml | ||
upper/db |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
# CLI tools to fetch license info | ||
|
||
## Why we need this? | ||
|
||
When we release third party images (can be considered as redistributing third | ||
party binary), we should be compliant to their licenses. Not just the library's | ||
license, also its dependencies and transitive dependencies' licenses. | ||
|
||
We need to do the following to be compliant: | ||
* Put license declarations in the image for all licenses. | ||
* Mirror source code in the image for code with MPL, EPL, GPL or CDDL licenses. | ||
|
||
It's not an easy task to get license of all (transitive) dependencies of a go | ||
library. Thus, we need these tools to automate this task. | ||
|
||
## How to get all dependencies with license and source code? | ||
|
||
1. Install CLI tools here: `python setup.py install` | ||
1. Collect dependencies + transitive dependencies in a go library. Put them together in a text file called `dep.txt`. Format: each line has a library name. The library name should be a valid golang import module name. | ||
|
||
Example ways to get it: | ||
* argo uses gopkg for package management. It has a [Gopkg.lock file](https://github.com/argoproj/argo/blob/master/Gopkg.lock) | ||
with all of its dependencies and transitive dependencies. All the name fields in this file is what we need. You can run `parse-toml-dep` to parse it. | ||
* minio uses [official go modules](https://blog.golang.org/using-go-modules), there's a [go.mod file](https://github.com/minio/minio/blob/master/go.mod) describing its direct dependencies. Run command `go list -m all` to get final versions that will be used in a build for all direct and indirect dependencies, [reference](https://github.com/golang/go/wiki/Modules#daily-workflow). Parse its output to make a file we need. | ||
|
||
Reminder: don't forget to put the library itself into `dep.txt`. | ||
1. Run `get-github-repo` to resolve github repos of golang imports. Not all | ||
imports can be figured out by my script, needs manual help for <2% of libraries. | ||
|
||
For a library we cannot resolve, manually put it in `dep-repo-mapping.manual.csv`, so the tool knows how to find its github repo the next time. | ||
|
||
Defaults to read dependencies from `dep.txt` and writes to `repo.txt`. | ||
1. Run `get-github-license-info` to crawl github license info of these libraries. (Not all repos have github recognizable license, needs manual help for <2% of libraries) | ||
|
||
Defaults to read repos from `repo.txt` and writes to `license-info.csv`. You | ||
need to configure github personal access token because it sends a lot of | ||
requests to github. Follow instructions in `get-github-license-info -h`. | ||
|
||
For repos that fails to fetch license, it's usually because their github repo | ||
doesn't have a github understandable license file. Check its readme and | ||
update correct info into `license-info.csv`. (Usually, use its README file which mentions license.) | ||
1. Edit license info file. Manually check the license file for all repos with a license categorized as "Other" by github. Figure out their true license names. | ||
1. Run `concatenate-license` to crawl full text license files for all dependencies and concat them into one file. | ||
|
||
Defaults to read license info from `license-info.csv`. Writes to `license.txt`. | ||
Put `license.txt` to `third_party/library/license.txt` where it is read when building docker images. | ||
1. Manually update a list of dependencies that requires source code, put it into `third_party/library/repo-MPL.txt`. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
# Copyright 2019 Google LLC | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
import argparse | ||
import requests | ||
import sys | ||
import traceback | ||
|
||
parser = argparse.ArgumentParser( | ||
description='Generate dependencies json from license.csv file.') | ||
parser.add_argument( | ||
'license_info_file', | ||
nargs='?', | ||
default='license_info.csv', | ||
help= | ||
'CSV file with license info fetched from github using get-github-license-info CLI tool. (default: %(default)s)', | ||
) | ||
parser.add_argument( | ||
'-o', | ||
'--output', | ||
dest='output_file', | ||
nargs='?', | ||
default='license.txt', | ||
help= | ||
'Concatenated license file path this command generates. (default: %(default)s)' | ||
) | ||
args = parser.parse_args() | ||
|
||
|
||
def fetch_license_text(download_link): | ||
response = requests.get(download_link) | ||
assert response.ok, 'Fetching {} failed with {} {}'.format( | ||
download_link, response.status_code, response.reason) | ||
return response.text | ||
|
||
|
||
def main(): | ||
with open(args.license_info_file, | ||
'r') as license_info_file, open(args.output_file, | ||
'w') as output_file: | ||
repo_failed = [] | ||
for line in license_info_file: | ||
line = line.strip() | ||
[repo, license_link, license_name, | ||
license_download_link] = line.split(',') | ||
try: | ||
print('Repo {} has license download link {}'.format( | ||
repo, license_download_link), | ||
file=sys.stderr) | ||
license_text = fetch_license_text(license_download_link) | ||
print( | ||
'--------------------------------------------------------------------------------', | ||
file=output_file, | ||
) | ||
print('{} {} {}'.format(repo, license_name, license_link), | ||
file=output_file) | ||
print( | ||
'--------------------------------------------------------------------------------', | ||
file=output_file, | ||
) | ||
print(license_text, file=output_file) | ||
except Exception as e: | ||
print('[failed]', e, file=sys.stderr) | ||
traceback.print_exc(file=sys.stderr) | ||
repo_failed.append(repo) | ||
print('Failed to download license file for {} repos.'.format( | ||
len(repo_failed)), | ||
file=sys.stderr) | ||
for repo in repo_failed: | ||
print(repo, file=sys.stderr) | ||
|
||
|
||
main() |
Oops, something went wrong.