Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(secret): enhance secret scanning for python binary files #7223

Merged
merged 14 commits into from
Sep 30, 2024
4 changes: 3 additions & 1 deletion docs/docs/scanner/secret.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
Trivy scans any container image, filesystem and git repository to detect exposed secrets like passwords, api keys, and tokens.
Secret scanning is enabled by default.

Trivy will scan every plaintext file, according to builtin rules or configuration. There are plenty of builtin rules:
Trivy will scan every plaintext file, according to builtin rules or configuration. Also, Trivy can detect secrets in compiled Python files (`.pyc`).

There are plenty of builtin rules:

- AWS access key
- GCP service account
Expand Down
28 changes: 22 additions & 6 deletions pkg/fanal/analyzer/secret/secret.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@ var (
".gz",
".gzip",
".tar",
}

allowedBinaries = []string{
".pyc",
}
)
Expand All @@ -63,6 +66,10 @@ func init() {
analyzer.RegisterAnalyzer(NewSecretAnalyzer(secret.Scanner{}, ""))
}

func isAllowedBinary(filename string) bool {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What if we omit the is prefix?

Suggested change
func isAllowedBinary(filename string) bool {
func allowedBinary(filename string) bool {

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

return slices.Contains(allowedBinaries, filepath.Ext(filename))
}

// SecretAnalyzer is an analyzer for secrets
type SecretAnalyzer struct {
scanner secret.Scanner
Expand Down Expand Up @@ -96,20 +103,28 @@ func (a *SecretAnalyzer) Init(opt analyzer.AnalyzerOptions) error {
func (a *SecretAnalyzer) Analyze(_ context.Context, input analyzer.AnalysisInput) (*analyzer.AnalysisResult, error) {
// Do not scan binaries
binary, err := utils.IsBinary(input.Content, input.Info.Size())
if binary || err != nil {
if err != nil || (binary && !isAllowedBinary(input.FilePath)) {
return nil, nil
}

if size := input.Info.Size(); size > 10485760 { // 10MB
log.WithPrefix("secret").Warn("The size of the scanned file is too large. It is recommended to use `--skip-files` for this file to avoid high memory consumption.", log.FilePath(input.FilePath), log.Int64("size (MB)", size/1048576))
}

content, err := io.ReadAll(input.Content)
if err != nil {
return nil, xerrors.Errorf("read error %s: %w", input.FilePath, err)
}
var content []byte

content = bytes.ReplaceAll(content, []byte("\r"), []byte(""))
if !binary {
content, err = io.ReadAll(input.Content)
if err != nil {
return nil, xerrors.Errorf("read error %s: %w", input.FilePath, err)
}
content = bytes.ReplaceAll(content, []byte("\r"), []byte(""))
} else {
content, err = utils.ExtractPrintableBytes(input.Content)
if err != nil {
return nil, xerrors.Errorf("binary read error %s: %w", input.FilePath, err)
}
}

filePath := input.FilePath
// Files extracted from the image have an empty input.Dir.
Expand All @@ -122,6 +137,7 @@ func (a *SecretAnalyzer) Analyze(_ context.Context, input analyzer.AnalysisInput
result := a.scanner.Scan(secret.ScanArgs{
FilePath: filePath,
Content: content,
Binary: binary,
})

if len(result.Findings) == 0 {
Expand Down
25 changes: 25 additions & 0 deletions pkg/fanal/analyzer/secret/secret_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,16 @@ func TestSecretAnalyzer(t *testing.T) {
},
},
}
wantFindingGH_PAT := types.SecretFinding{
RuleID: "github-fine-grained-pat",
Category: "GitHub",
Title: "GitHub Fine-grained personal access tokens",
Severity: "CRITICAL",
StartLine: 1,
EndLine: 1,
Match: "Binary file \"/testdata/secret.cpython-310.pyc\" matches a rule \"GitHub Fine-grained personal access tokens\"",
}

tests := []struct {
name string
configPath string
Expand Down Expand Up @@ -153,6 +163,21 @@ func TestSecretAnalyzer(t *testing.T) {
filePath: "testdata/binaryfile",
want: nil,
},
{
name: "python binary file",
configPath: "testdata/skip-tests-config.yaml",
filePath: "testdata/secret.cpython-310.pyc",
want: &analyzer.AnalysisResult{
Secrets: []types.Secret{
{
FilePath: "/testdata/secret.cpython-310.pyc",
Findings: []types.SecretFinding{
wantFindingGH_PAT,
},
},
},
},
},
}

for _, tt := range tests {
Expand Down
Binary file not shown.
19 changes: 17 additions & 2 deletions pkg/fanal/secret/scanner.go
Original file line number Diff line number Diff line change
Expand Up @@ -366,6 +366,7 @@ func NewScanner(config *Config) Scanner {
type ScanArgs struct {
FilePath string
Content []byte
Binary bool
}

type Match struct {
Expand Down Expand Up @@ -435,8 +436,22 @@ func (s *Scanner) Scan(args ScanArgs) types.Secret {
}
}

for _, match := range matched {
findings = append(findings, toFinding(match.Rule, match.Location, censored))
if args.Binary {
for _, match := range matched {
findings = append(findings, types.SecretFinding{
RuleID: match.Rule.ID,
Category: match.Rule.Category,
Severity: lo.Ternary(match.Rule.Severity == "", "UNKNOWN", match.Rule.Severity),
Title: match.Rule.Title,
Match: fmt.Sprintf("Binary file %q matches a rule %q", args.FilePath, match.Rule.Title),
StartLine: 1,
EndLine: 1,
})
}
} else {
for _, match := range matched {
findings = append(findings, toFinding(match.Rule, match.Location, censored))
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I sugest to use toFinding function.
something like this:

Suggested change
if args.Binary {
for _, match := range matched {
findings = append(findings, types.SecretFinding{
RuleID: match.Rule.ID,
Category: match.Rule.Category,
Severity: lo.Ternary(match.Rule.Severity == "", "UNKNOWN", match.Rule.Severity),
Title: match.Rule.Title,
Match: fmt.Sprintf("Binary file %q matches a rule %q", args.FilePath, match.Rule.Title),
StartLine: 1,
EndLine: 1,
})
}
} else {
for _, match := range matched {
findings = append(findings, toFinding(match.Rule, match.Location, censored))
}
for _, match := range matched {
finding := toFinding(match.Rule, match.Location, censored)
// These fields will be unreadable for binaries,
// Therefore overwrite them.
if args.Binary {
finding.Match = fmt.Sprintf("Binary file %q matches a rule %q", args.FilePath, match.Rule.Title)
finding.Code = types.Code{}
}
findings = append(findings, finding)
}

It might make sense to add flag to toFinding function to skip findLocation.

wdyt?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sure! it's a better way!

}

if len(findings) == 0 {
Expand Down
34 changes: 34 additions & 0 deletions pkg/fanal/utils/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"os"
"os/exec"
"path/filepath"
"unicode"

xio "github.com/aquasecurity/trivy/pkg/x/io"
)
Expand Down Expand Up @@ -93,3 +94,36 @@ func IsBinary(content xio.ReadSeekerAt, fileSize int64) (bool, error) {

return false, nil
}

func ExtractPrintableBytes(content xio.ReadSeekerAt) ([]byte, error) {
const minLength = 4 // Minimum length of strings to extract
var result []byte
var currentPrintableLine []byte

current := make([]byte, 1) // buffer for 1 byte reading

for {
_, err := content.Read(current)
if err == io.EOF {
break
} else if err != nil {
return nil, err
}
if unicode.IsPrint(rune(current[0])) {
currentPrintableLine = append(currentPrintableLine, current[0])
continue
}
if len(currentPrintableLine) > minLength {
// add a space between printable lines to separate them
currentPrintableLine = append(currentPrintableLine, ' ')
result = append(result, currentPrintableLine...)
}
currentPrintableLine = nil
}
if len(currentPrintableLine) > minLength {
// add a space between printable lines to separate them
currentPrintableLine = append(currentPrintableLine, ' ')
result = append(result, currentPrintableLine...)
}
return result, nil
}
Loading