Skip to content

Commit

Permalink
🌱 cron: support reading prefix from file for controller input files (…
Browse files Browse the repository at this point in the history
…7/n) (ossf#2445)

* add prefix marker file to config

Signed-off-by: Spencer Schrock <sschrock@google.com>

* Read the new config values, if they exist.

Signed-off-by: Spencer Schrock <sschrock@google.com>

* Add function to fetch prefix file config value.

Signed-off-by: Spencer Schrock <sschrock@google.com>

* Read prefix file if prefix not set.

Signed-off-by: Spencer Schrock <sschrock@google.com>

* Add tests to verify how List works with various prefixes

Signed-off-by: Spencer Schrock <sschrock@google.com>

* Add tests for getPrefix

Signed-off-by: Spencer Schrock <sschrock@google.com>

* Remove panics from iterator helper functions

Signed-off-by: Spencer Schrock <sschrock@google.com>

Signed-off-by: Spencer Schrock <sschrock@google.com>
  • Loading branch information
spencerschrock authored and raghavkaul committed Feb 9, 2023
1 parent 6591d8d commit c9dae73
Show file tree
Hide file tree
Showing 9 changed files with 305 additions and 63 deletions.
37 changes: 29 additions & 8 deletions cron/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,10 @@ const (
// TransferStatusFilename file identifies if shard transfer to BigQuery is completed.
TransferStatusFilename string = ".transfer_complete"

configFlag string = "config"
configDefault string = ""
configUsage string = "Location of config file. Required"
configFlag string = "config"
configDefault string = ""
configUsage string = "Location of config file. Required"
inputBucketParams string = "input-bucket"

projectID string = "SCORECARD_PROJECT_ID"
requestTopicURL string = "SCORECARD_REQUEST_TOPIC_URL"
Expand Down Expand Up @@ -300,16 +301,36 @@ func GetAPIResultsBucketURL() (string, error) {

// GetInputBucketURL() returns the bucket URL for input files.
func GetInputBucketURL() (string, error) {
return getStringConfigValue(inputBucketURL, configYAML, "InputBucketURL", "input-bucket-url")
bucketParams, err := GetAdditionalParams(inputBucketParams)
bURL, ok := bucketParams["url"]
if err != nil || !ok {
// TODO temporarily falling back to old variables until changes propagate to production
return getStringConfigValue(inputBucketURL, configYAML, "InputBucketURL", "input-bucket-url")
}
return bURL, nil
}

// GetInputBucketPrefix() returns the prefix used when fetching files from a bucket.
func GetInputBucketPrefix() (string, error) {
prefix, err := getStringConfigValue(inputBucketPrefix, configYAML, "InputBucketPrefix", "input-bucket-prefix")
if err != nil && !errors.Is(err, ErrorEmptyConfigValue) {
return "", err
bucketParams, err := GetAdditionalParams(inputBucketParams)
if err != nil {
// TODO temporarily falling back to old variables until changes propagate to production
prefix, err := getStringConfigValue(inputBucketPrefix, configYAML, "InputBucketPrefix", "input-bucket-prefix")
if err != nil && !errors.Is(err, ErrorEmptyConfigValue) {
return "", err
}
return prefix, nil
}
return bucketParams["prefix"], nil
}

// GetInputBucketPrefixFile() returns the file whose contents specify the prefix to use.
func GetInputBucketPrefixFile() (string, error) {
bucketParams, err := GetAdditionalParams(inputBucketParams)
if err != nil {
return "", fmt.Errorf("getting config for %s: %w", inputBucketParams, err)
}
return prefix, nil
return bucketParams["prefix-file"], nil
}

func GetAdditionalParams(subMapName string) (map[string]string, error) {
Expand Down
10 changes: 9 additions & 1 deletion cron/config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,20 @@ shard-size: 10
webhook-url:
metric-exporter: stackdriver
result-data-bucket-url: gs://ossf-scorecard-data2

# TODO temporarily leaving old variables until changes propagate to production
input-bucket-url: gs://ossf-scorecard-input-projects
# Can be used to specify directories within a bucket. Can be empty.
input-bucket-prefix:

additional-params:
criticality:
input-bucket:
url: gs://ossf-scorecard-input-projects
# Optional prefix to limit files used as input files within a bucket (e.g. a specific file or directory)
prefix:
# Optional file to read a prefix from, instead of statically defining prefix above (note: prefix must be blank to use this option)
# This is good in situations where the prefix changes frequently (e.g. always using the most recent folder in a bucket)
prefix-file:

scorecard:
# API results bucket
Expand Down
61 changes: 52 additions & 9 deletions cron/config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,25 +38,30 @@ const (
prodShardSize int = 10
prodMetricExporter string = "stackdriver"
// Raw results.
prodRawBucket = "gs://ossf-scorecard-rawdata"
prodRawBigQueryTable = "scorecard-rawdata"
prodAPIBucketURL = "gs://ossf-scorecard-cron-results"
prodInputBucketURL = "gs://ossf-scorecard-input-projects"
prodInputBucketPrefix = ""
prodRawBucket = "gs://ossf-scorecard-rawdata"
prodRawBigQueryTable = "scorecard-rawdata"
prodAPIBucketURL = "gs://ossf-scorecard-cron-results"
prodInputBucketURL = "gs://ossf-scorecard-input-projects"
prodInputBucketPrefix = ""
prodInputBucketPrefixFile = ""
)

var (
prodInputBucketParams = map[string]string{
"url": prodInputBucketURL,
"prefix": prodInputBucketPrefix,
"prefix-file": prodInputBucketPrefixFile,
}
prodScorecardParams = map[string]string{
"api-results-bucket-url": prodAPIBucketURL,
"blacklisted-checks": prodBlacklistedChecks,
"cii-data-bucket-url": prodCIIDataBucket,
"raw-bigquery-table": prodRawBigQueryTable,
"raw-result-data-bucket-url": prodRawBucket,
}
prodCriticalityParams map[string]string = nil
prodAdditionalParams = map[string]map[string]string{
"scorecard": prodScorecardParams,
"criticality": prodCriticalityParams,
prodAdditionalParams = map[string]map[string]string{
"input-bucket": prodInputBucketParams,
"scorecard": prodScorecardParams,
}
)

Expand Down Expand Up @@ -474,3 +479,41 @@ func TestEnvVarName(t *testing.T) {
})
}
}

func TestGetAdditionalParams(t *testing.T) {
t.Parallel()
//nolint:govet
tests := []struct {
name string
mapName string
want map[string]string
wantErr bool
}{
{
name: "scorecard values",
mapName: "scorecard",
want: prodScorecardParams,
wantErr: false,
},
{
name: "nonexistant value",
mapName: "this-value-should-never-exist",
want: map[string]string{},
wantErr: true,
},
}
for _, testcase := range tests {
testcase := testcase
t.Run(testcase.name, func(t *testing.T) {
t.Parallel()
got, err := GetAdditionalParams(testcase.mapName)
if testcase.wantErr != (err != nil) {
t.Fatalf("unexpected error value for GetAdditionalParams: %v", err)
}
if !cmp.Equal(got, testcase.want) {
diff := cmp.Diff(got, testcase.want)
t.Errorf("test failed: expected - %v, got - %v. \n%s", testcase.want, got, diff)
}
})
}
}
17 changes: 16 additions & 1 deletion cron/data/blob_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -128,11 +128,26 @@ func TestBlobKeysPrefix(t *testing.T) {
{
name: "no prefix",
prefix: "",
want: []string{"key1.txt", "key2.txt", "key3.txt", "subdir/key4.txt"},
want: []string{"key1.txt", "key2.txt", "key3.txt", "subdir/key4.txt", "subdir/nested/key5.txt"},
},
{
name: "subdir prefix",
prefix: "subdir/",
want: []string{"subdir/key4.txt", "subdir/nested/key5.txt"},
},
{
name: "subdir prefix no terminal slash",
prefix: "subdir",
want: []string{"subdir/key4.txt", "subdir/nested/key5.txt"},
},
{
name: "nested prefix",
prefix: "subdir/nested/",
want: []string{"subdir/nested/key5.txt"},
},
{
name: "file prefix",
prefix: "subdir/key4.txt",
want: []string{"subdir/key4.txt"},
},
}
Expand Down
Empty file.
92 changes: 92 additions & 0 deletions cron/internal/controller/bucket.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
// Copyright 2022 Security Scorecard Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Package main implements the PubSub controller.
package main

import (
"bytes"
"context"
"fmt"
"strings"

"github.com/ossf/scorecard/v4/cron/config"
"github.com/ossf/scorecard/v4/cron/data"
)

// getPrefix returns the prefix used when reading input files from a bucket.
// If "prefix" is set, the value is used irrespective of the value of "prefix-file".
// Otherwise, the contents of "prefix-file" (if set) are used.
func getPrefix(ctx context.Context, bucket string) (string, error) {
prefix, err := config.GetInputBucketPrefix()
if err != nil {
return "", fmt.Errorf("config.GetInputBucketPrefix: %w", err)
}
if prefix != "" {
// prioritize prefix if set
return prefix, nil
}

prefixFile, err := config.GetInputBucketPrefixFile()
if err != nil {
return "", fmt.Errorf("config.GetInputBucketPrefixFile: %w", err)
}
if prefixFile == "" {
// cant read a file which doesnt exist, but the value is optional so no error
return "", nil
}

b, err := data.GetBlobContent(ctx, bucket, prefixFile)
if err != nil {
return "", fmt.Errorf("fetching contents of prefix-file: %w", err)
}
s := string(b)
return strings.TrimSpace(s), nil
}

func bucketFiles(ctx context.Context) (data.Iterator, error) {
var iters []data.Iterator

bucket, err := config.GetInputBucketURL()
if err != nil {
return nil, fmt.Errorf("config.GetInputBucketURL: %w", err)
}
prefix, err := getPrefix(ctx, bucket)
if err != nil {
return nil, fmt.Errorf("getPrefix: %w", err)
}

files, err := data.GetBlobKeysWithPrefix(ctx, bucket, prefix)
if err != nil {
return nil, fmt.Errorf("data.GetBlobKeysWithPrefix: %w", err)
}

for _, f := range files {
b, err := data.GetBlobContent(ctx, bucket, f)
if err != nil {
return nil, fmt.Errorf("data.GetBlobContent: %w", err)
}
r := bytes.NewReader(b)
i, err := data.MakeIteratorFrom(r)
if err != nil {
return nil, fmt.Errorf("data.MakeIteratorFrom: %w", err)
}
iters = append(iters, i)
}
iter, err := data.MakeNestedIterator(iters)
if err != nil {
return nil, fmt.Errorf("data.MakeNestedIterator: %w", err)
}
return iter, nil
}
95 changes: 95 additions & 0 deletions cron/internal/controller/bucket_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
// Copyright 2022 Security Scorecard Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package main

import (
"context"
"path/filepath"
"testing"
)

//nolint:tparallel,paralleltest // since t.Setenv is used
func TestGetPrefix(t *testing.T) {
//nolint:govet
testcases := []struct {
name string
url string
prefix string
prefixFile string
want string
wantErr bool
}{
{
name: "no prefix",
url: "testdata/getPrefix",
prefix: "",
prefixFile: "",
want: "",
wantErr: false,
},
{
name: "prefix set",
url: "testdata/getPrefix",
prefix: "foo",
prefixFile: "",
want: "foo",
wantErr: false,
},
{
name: "prefix file set",
url: "testdata/getPrefix",
prefix: "",
prefixFile: "marker",
want: "bar",
wantErr: false,
},
{
name: "prefix and prefix file set",
url: "testdata/getPrefix",
prefix: "foo",
prefixFile: "marker",
want: "foo",
wantErr: false,
},
{
name: "non existent prefix file",
url: "testdata/getPrefix",
prefix: "",
prefixFile: "baz",
want: "",
wantErr: true,
},
}

for _, testcase := range testcases {
testcase := testcase
t.Run(testcase.name, func(t *testing.T) {
t.Setenv("INPUT_BUCKET_URL", testcase.url)
t.Setenv("INPUT_BUCKET_PREFIX", testcase.prefix)
t.Setenv("INPUT_BUCKET_PREFIX_FILE", testcase.prefixFile)
testdataPath, err := filepath.Abs(testcase.url)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
got, err := getPrefix(context.Background(), "file:///"+testdataPath)
if (err != nil) != testcase.wantErr {
t.Errorf("unexpected error produced: %v", err)
}
if got != testcase.want {
t.Errorf("test failed: expected - %s, got = %s", testcase.want, got)
}
})
}
}
Loading

0 comments on commit c9dae73

Please sign in to comment.