From 2faf031a2786d750d81d8a3d882050535f7b7dcf Mon Sep 17 00:00:00 2001 From: Eric Ortega <24722023+ejortega@users.noreply.github.com> Date: Fri, 3 May 2024 14:42:56 -0500 Subject: [PATCH] Improve `go.sum` parser (#1411) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Filter out modules containing /go.mod from the go.sum * Fix warning for unused import * Update changelog * Clippy fixes * Update CHANGELOG.md Co-authored-by: Christian Dürr <102963075+cd-work@users.noreply.github.com> --------- Co-authored-by: Christian Dürr <102963075+cd-work@users.noreply.github.com> --- CHANGELOG.md | 4 ++++ cli/src/deno.rs | 4 ++-- lockfile/src/golang.rs | 6 +++--- lockfile/src/parse_depfile.rs | 4 +++- lockfile/src/parsers/go_sum.rs | 22 ++++++++++++---------- 5 files changed, 24 insertions(+), 16 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 91601ec27..24d86e5ad 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ## Unreleased +## Changed + +- Improved `go.sum` file parsing to prevent the parser from listing unused packages + ## 6.3.0 - 2024-04-18 ### Fixed diff --git a/cli/src/deno.rs b/cli/src/deno.rs index 6a595db00..8b03f2c20 100644 --- a/cli/src/deno.rs +++ b/cli/src/deno.rs @@ -209,8 +209,8 @@ impl ModuleLoader for ExtensionsModuleLoader { if should_transpile { let transpiled = - source_mapper.transpile(module_specifier.to_string(), code, media_type)?; - code = transpiled.text.clone(); + source_mapper.transpile(module_specifier.to_string(), &code, media_type)?; + code.clone_from(&transpiled.text); } else { source_mapper.source_cache.insert(module_specifier.to_string(), code.clone()); } diff --git a/lockfile/src/golang.rs b/lockfile/src/golang.rs index 9fcf576c0..68bdaaaa3 100644 --- a/lockfile/src/golang.rs +++ b/lockfile/src/golang.rs @@ -47,7 +47,7 @@ mod tests { #[test] fn parse_go_sum() { let pkgs = GoSum.parse(include_str!("../../tests/fixtures/go.sum")).unwrap(); - assert_eq!(pkgs.len(), 1711); + assert_eq!(pkgs.len(), 674); let expected_pkgs = [ Package { @@ -56,8 +56,8 @@ mod tests { package_type: PackageType::Golang, }, Package { - name: "sourcegraph.com/sourcegraph/appdash".into(), - version: PackageVersion::FirstParty("v0.0.0-20190731080439-ebfcffb1b5c0".into()), + name: "sigs.k8s.io/yaml".into(), + version: PackageVersion::FirstParty("v1.2.0".into()), package_type: PackageType::Golang, }, ]; diff --git a/lockfile/src/parse_depfile.rs b/lockfile/src/parse_depfile.rs index 2ee8cb28d..effad99e5 100644 --- a/lockfile/src/parse_depfile.rs +++ b/lockfile/src/parse_depfile.rs @@ -1,7 +1,9 @@ //! Parse generic dependency files. use std::path::{Path, PathBuf}; -use anyhow::{anyhow, Context}; +#[cfg(feature = "generator")] +use anyhow::anyhow; +use anyhow::Context; use phylum_types::types::package::PackageDescriptor; use serde::{Deserialize, Serialize}; diff --git a/lockfile/src/parsers/go_sum.rs b/lockfile/src/parsers/go_sum.rs index 7e764ef46..549312b38 100644 --- a/lockfile/src/parsers/go_sum.rs +++ b/lockfile/src/parsers/go_sum.rs @@ -2,19 +2,23 @@ use nom::branch::alt; use nom::bytes::complete::{tag, take_until}; use nom::character::complete::{alphanumeric1, line_ending, space0, space1}; use nom::combinator::{opt, recognize}; -use nom::multi::many1; +use nom::multi::{many0, many1}; use nom::sequence::{preceded, tuple}; use phylum_types::types::package::PackageType; -use super::IResult; +use crate::parsers::IResult; use crate::{Package, PackageVersion}; pub fn parse(input: &str) -> IResult<&str, Vec> { - let (input, mut pkgs) = many1(package)(input)?; + let (input, pkgs) = many0(package)(input)?; - // Filter duplicate packages. - pkgs.sort_unstable(); - pkgs.dedup(); + let pkgs = pkgs + .into_iter() + .filter(|p| match &p.version { + PackageVersion::FirstParty(v) => !v.ends_with("/go.mod"), + _ => false, + }) + .collect(); Ok((input, pkgs)) } @@ -45,15 +49,13 @@ fn package_version(input: &str) -> IResult<&str, &str> { // Take away any leading whitespace. let (input, _) = space0(input)?; - // Accept all of `v[a-zA-Z0-9.+-]+` as valid version characters. + // Accept all of `v[a-zA-Z0-9.+-]+` with an optional "/go.mod" suffix. let (input, version) = recognize(tuple(( tag("v"), many1(alt((alphanumeric1, tag("."), tag("-"), tag("+")))), + opt(tag("/go.mod")), )))(input)?; - // Strip `/go.mod` suffix. - let (input, _) = opt(tag("/go.mod"))(input)?; - // Expect at least one whitespace after version. let (input, _) = space1(input)?;