Skip to content

Commit

Permalink
fix(python): skip dev group's deps for poetry (#8106)
Browse files Browse the repository at this point in the history
Signed-off-by: nikpivkin <nikita.pivkin@smartforce.io>
  • Loading branch information
nikpivkin authored Dec 23, 2024
1 parent 7558df7 commit a034d26
Show file tree
Hide file tree
Showing 11 changed files with 650 additions and 50 deletions.
15 changes: 2 additions & 13 deletions pkg/dependency/parser/python/poetry/parse.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@ package poetry

import (
"sort"
"strings"

"github.com/BurntSushi/toml"
"golang.org/x/xerrors"

version "github.com/aquasecurity/go-pep440-version"
"github.com/aquasecurity/trivy/pkg/dependency"
"github.com/aquasecurity/trivy/pkg/dependency/parser/python"
ftypes "github.com/aquasecurity/trivy/pkg/fanal/types"
"github.com/aquasecurity/trivy/pkg/log"
xio "github.com/aquasecurity/trivy/pkg/x/io"
Expand Down Expand Up @@ -105,7 +105,7 @@ func (p *Parser) parseDependencies(deps map[string]any, pkgVersions map[string][
}

func (p *Parser) parseDependency(name string, versRange any, pkgVersions map[string][]string) (string, error) {
name = NormalizePkgName(name)
name = python.NormalizePkgName(name)
vers, ok := pkgVersions[name]
if !ok {
return "", xerrors.Errorf("no version found for %q", name)
Expand Down Expand Up @@ -149,17 +149,6 @@ func matchVersion(currentVersion, constraint string) (bool, error) {
return c.Check(v), nil
}

// NormalizePkgName normalizes the package name based on pep-0426
func NormalizePkgName(name string) string {
// The package names don't use `_`, `.` or upper case, but dependency names can contain them.
// We need to normalize those names.
// cf. https://peps.python.org/pep-0426/#name
name = strings.ToLower(name) // e.g. https://github.com/python-poetry/poetry/blob/c8945eb110aeda611cc6721565d7ad0c657d453a/poetry.lock#L819
name = strings.ReplaceAll(name, "_", "-") // e.g. https://github.com/python-poetry/poetry/blob/c8945eb110aeda611cc6721565d7ad0c657d453a/poetry.lock#L50
name = strings.ReplaceAll(name, ".", "-") // e.g. https://github.com/python-poetry/poetry/blob/c8945eb110aeda611cc6721565d7ad0c657d453a/poetry.lock#L816
return name
}

func packageID(name, ver string) string {
return dependency.ID(ftypes.Poetry, name, ver)
}
12 changes: 6 additions & 6 deletions pkg/dependency/parser/python/poetry/parse_testcase.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@ package poetry
import ftypes "github.com/aquasecurity/trivy/pkg/fanal/types"

var (
// docker run --name pipenv --rm -it python@sha256:e1141f10176d74d1a0e87a7c0a0a5a98dd98ec5ac12ce867768f40c6feae2fd9 sh
// docker run --name poetry --rm -it python@sha256:e1141f10176d74d1a0e87a7c0a0a5a98dd98ec5ac12ce867768f40c6feae2fd9 sh
// apk add curl
// curl -sSL https://install.python-poetry.org | python3 -
// curl -sSL https://install.python-poetry.org | POETRY_VERSION=1.1.7 python3 -
// export PATH=/root/.local/bin:$PATH
// poetry new normal && cd normal
// poetry add pypi@2.1
Expand All @@ -14,9 +14,9 @@ var (
{ID: "pypi@2.1", Name: "pypi", Version: "2.1"},
}

// docker run --name pipenv --rm -it python@sha256:e1141f10176d74d1a0e87a7c0a0a5a98dd98ec5ac12ce867768f40c6feae2fd9 sh
// docker run --name poetry --rm -it python@sha256:e1141f10176d74d1a0e87a7c0a0a5a98dd98ec5ac12ce867768f40c6feae2fd9 sh
// apk add curl
// curl -sSL https://install.python-poetry.org | python3 -
// curl -sSL https://install.python-poetry.org | POETRY_VERSION=1.1.7 python3 -
// export PATH=/root/.local/bin:$PATH
// poetry new many && cd many
// curl -o poetry.lock https://raw.githubusercontent.com/python-poetry/poetry/c8945eb110aeda611cc6721565d7ad0c657d453a/poetry.lock
Expand Down Expand Up @@ -108,9 +108,9 @@ var (
{ID: "xattr@0.10.1", DependsOn: []string{"cffi@1.15.1"}},
}

// docker run --name pipenv --rm -it python@sha256:e1141f10176d74d1a0e87a7c0a0a5a98dd98ec5ac12ce867768f40c6feae2fd9 sh
// docker run --name poetry --rm -it python@sha256:e1141f10176d74d1a0e87a7c0a0a5a98dd98ec5ac12ce867768f40c6feae2fd9 sh
// apk add curl
// curl -sSL https://install.python-poetry.org | python3 -
// curl -sSL https://install.python-poetry.org | POETRY_VERSION=1.1.7 python3 -
// export PATH=/root/.local/bin:$PATH
// poetry new web && cd web
// poetry add flask@1.0.3
Expand Down
30 changes: 26 additions & 4 deletions pkg/dependency/parser/python/pyproject/pyproject.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,10 @@ import (
"io"

"github.com/BurntSushi/toml"
"github.com/samber/lo"
"golang.org/x/xerrors"

"github.com/aquasecurity/trivy/pkg/dependency/parser/python"
)

type PyProject struct {
Expand All @@ -16,7 +19,26 @@ type Tool struct {
}

type Poetry struct {
Dependencies map[string]any `toml:"dependencies"`
Dependencies dependencies `toml:"dependencies"`
Groups map[string]Group `toml:"group"`
}

type Group struct {
Dependencies dependencies `toml:"dependencies"`
}

type dependencies map[string]struct{}

func (d *dependencies) UnmarshalTOML(data any) error {
m, ok := data.(map[string]any)
if !ok {
return xerrors.Errorf("dependencies must be map, but got: %T", data)
}

*d = lo.MapEntries(m, func(pkgName string, _ any) (string, struct{}) {
return python.NormalizePkgName(pkgName), struct{}{}
})
return nil
}

// Parser parses pyproject.toml defined in PEP518.
Expand All @@ -28,10 +50,10 @@ func NewParser() *Parser {
return &Parser{}
}

func (p *Parser) Parse(r io.Reader) (map[string]any, error) {
func (p *Parser) Parse(r io.Reader) (PyProject, error) {
var conf PyProject
if _, err := toml.NewDecoder(r).Decode(&conf); err != nil {
return nil, xerrors.Errorf("toml decode error: %w", err)
return PyProject{}, xerrors.Errorf("toml decode error: %w", err)
}
return conf.Tool.Poetry.Dependencies, nil
return conf, nil
}
37 changes: 22 additions & 15 deletions pkg/dependency/parser/python/pyproject/pyproject_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,26 +15,33 @@ func TestParser_Parse(t *testing.T) {
tests := []struct {
name string
file string
want map[string]any
want pyproject.PyProject
wantErr assert.ErrorAssertionFunc
}{
{
name: "happy path",
file: "testdata/happy.toml",
want: map[string]any{
"flask": "^1.0",
"python": "^3.9",
"requests": map[string]any{
"version": "2.28.1",
"optional": true,
},
"virtualenv": []any{
map[string]any{
"version": "^20.4.3,!=20.4.5,!=20.4.6",
},
map[string]any{
"version": "<20.16.6",
"markers": "sys_platform == 'win32' and python_version == '3.9'",
want: pyproject.PyProject{
Tool: pyproject.Tool{
Poetry: pyproject.Poetry{
Dependencies: map[string]struct{}{
"flask": {},
"python": {},
"requests": {},
"virtualenv": {},
},
Groups: map[string]pyproject.Group{
"dev": {
Dependencies: map[string]struct{}{
"pytest": {},
},
},
"lint": {
Dependencies: map[string]struct{}{
"ruff": {},
},
},
},
},
},
},
Expand Down
7 changes: 7 additions & 0 deletions pkg/dependency/parser/python/pyproject/testdata/happy.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,13 @@ virtualenv = [

[tool.poetry.dev-dependencies]

[tool.poetry.group.dev.dependencies]
pytest = "8.3.4"


[tool.poetry.group.lint.dependencies]
ruff = "0.8.3"

[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
14 changes: 14 additions & 0 deletions pkg/dependency/parser/python/python.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
package python

import "strings"

// NormalizePkgName normalizes the package name based on pep-0426
func NormalizePkgName(name string) string {
// The package names don't use `_`, `.` or upper case, but dependency names can contain them.
// We need to normalize those names.
// cf. https://peps.python.org/pep-0426/#name
name = strings.ToLower(name) // e.g. https://github.com/python-poetry/poetry/blob/c8945eb110aeda611cc6721565d7ad0c657d453a/poetry.lock#L819
name = strings.ReplaceAll(name, "_", "-") // e.g. https://github.com/python-poetry/poetry/blob/c8945eb110aeda611cc6721565d7ad0c657d453a/poetry.lock#L50
name = strings.ReplaceAll(name, ".", "-") // e.g. https://github.com/python-poetry/poetry/blob/c8945eb110aeda611cc6721565d7ad0c657d453a/poetry.lock#L816
return name
}
39 changes: 39 additions & 0 deletions pkg/dependency/parser/python/python_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
package python_test

import (
"testing"

"github.com/stretchr/testify/assert"

"github.com/aquasecurity/trivy/pkg/dependency/parser/python"
)

func Test_NormalizePkgName(t *testing.T) {
tests := []struct {
pkgName string
expected string
}{
{
pkgName: "SecretStorage",
expected: "secretstorage",
},
{
pkgName: "pywin32-ctypes",
expected: "pywin32-ctypes",
},
{
pkgName: "jaraco.classes",
expected: "jaraco-classes",
},
{
pkgName: "green_gdk",
expected: "green-gdk",
},
}

for _, tt := range tests {
t.Run(tt.pkgName, func(t *testing.T) {
assert.Equal(t, tt.expected, python.NormalizePkgName(tt.pkgName))
})
}
}
58 changes: 46 additions & 12 deletions pkg/fanal/analyzer/language/python/poetry/poetry.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ func (a poetryAnalyzer) parsePoetryLock(path string, r io.Reader) (*types.Applic
func (a poetryAnalyzer) mergePyProject(fsys fs.FS, dir string, app *types.Application) error {
// Parse pyproject.toml to identify the direct dependencies
path := filepath.Join(dir, types.PyProject)
p, err := a.parsePyProject(fsys, path)
project, err := a.parsePyProject(fsys, path)
if errors.Is(err, fs.ErrNotExist) {
// Assume all the packages are direct dependencies as it cannot identify them from poetry.lock
a.logger.Debug("pyproject.toml not found", log.FilePath(path))
Expand All @@ -105,34 +105,68 @@ func (a poetryAnalyzer) mergePyProject(fsys fs.FS, dir string, app *types.Applic

// Identify the direct/transitive dependencies
for i, pkg := range app.Packages {
if _, ok := p[pkg.Name]; ok {
if _, ok := project.Tool.Poetry.Dependencies[pkg.Name]; ok {
app.Packages[i].Relationship = types.RelationshipDirect
} else {
app.Packages[i].Indirect = true
app.Packages[i].Relationship = types.RelationshipIndirect
}
}

filterProdPackages(project, app)
return nil
}

func (a poetryAnalyzer) parsePyProject(fsys fs.FS, path string) (map[string]any, error) {
func filterProdPackages(project pyproject.PyProject, app *types.Application) {
packages := lo.SliceToMap(app.Packages, func(pkg types.Package) (string, types.Package) {
return pkg.ID, pkg
})

visited := make(map[string]struct{})
deps := project.Tool.Poetry.Dependencies

for group, groupDeps := range project.Tool.Poetry.Groups {
if group == "dev" {
continue
}
deps = lo.Assign(deps, groupDeps.Dependencies)
}

for _, pkg := range packages {
if _, prodDep := deps[pkg.Name]; !prodDep {
continue
}
walkPackageDeps(pkg.ID, packages, visited)
}

app.Packages = lo.Filter(app.Packages, func(pkg types.Package, _ int) bool {
_, ok := visited[pkg.ID]
return ok
})
}

func walkPackageDeps(pkgID string, packages map[string]types.Package, visited map[string]struct{}) {
if _, ok := visited[pkgID]; ok {
return
}
visited[pkgID] = struct{}{}
for _, dep := range packages[pkgID].DependsOn {
walkPackageDeps(dep, packages, visited)
}
}

func (a poetryAnalyzer) parsePyProject(fsys fs.FS, path string) (pyproject.PyProject, error) {
// Parse pyproject.toml
f, err := fsys.Open(path)
if err != nil {
return nil, xerrors.Errorf("file open error: %w", err)
return pyproject.PyProject{}, xerrors.Errorf("file open error: %w", err)
}
defer f.Close()

parsed, err := a.pyprojectParser.Parse(f)
project, err := a.pyprojectParser.Parse(f)
if err != nil {
return nil, err
return pyproject.PyProject{}, err
}

// Packages from `pyproject.toml` can use uppercase characters, `.` and `_`.
parsed = lo.MapKeys(parsed, func(_ any, pkgName string) string {
return poetry.NormalizePkgName(pkgName)
})

return parsed, nil
return project, nil
}
Loading

0 comments on commit a034d26

Please sign in to comment.