diff --git a/src/audit/artipacked.rs b/src/audit/artipacked.rs index 304ace91..e9493780 100644 --- a/src/audit/artipacked.rs +++ b/src/audit/artipacked.rs @@ -48,7 +48,7 @@ impl<'a> WorkflowAudit<'a> for Artipacked<'a> { Ok(Self { config }) } - fn audit<'w>(&self, workflow: &'w Workflow) -> Result>> { + fn audit<'w>(&mut self, workflow: &'w Workflow) -> Result>> { log::debug!("audit: {} evaluating {}", Self::ident(), &workflow.filename); let mut findings = vec![]; diff --git a/src/audit/hardcoded_container_credentials.rs b/src/audit/hardcoded_container_credentials.rs index 3036adc5..197e5471 100644 --- a/src/audit/hardcoded_container_credentials.rs +++ b/src/audit/hardcoded_container_credentials.rs @@ -35,7 +35,7 @@ impl<'a> WorkflowAudit<'a> for HardcodedContainerCredentials<'a> { } fn audit<'w>( - &self, + &mut self, workflow: &'w crate::models::Workflow, ) -> anyhow::Result>> { let mut findings = vec![]; diff --git a/src/audit/impostor_commit.rs b/src/audit/impostor_commit.rs index 2a86b043..1a1f8a28 100644 --- a/src/audit/impostor_commit.rs +++ b/src/audit/impostor_commit.rs @@ -5,11 +5,14 @@ //! //! [`clank`]: https://github.com/chainguard-dev/clank -use std::ops::Deref; +use std::{ + collections::{hash_map::Entry, HashMap}, + ops::Deref, +}; use crate::{ finding::{Confidence, Finding, Severity}, - github_api::{self, ComparisonStatus}, + github_api::{self, Branch, ComparisonStatus, Tag}, models::{AuditConfig, Uses, Workflow}, }; @@ -23,21 +26,84 @@ pub const IMPOSTOR_ANNOTATION: &str = "uses a commit that doesn't belong to the pub(crate) struct ImpostorCommit<'a> { pub(crate) _config: AuditConfig<'a>, pub(crate) client: github_api::Client, + pub(crate) ref_cache: HashMap<(String, String), (Vec, Vec)>, + /// A cache of `(base_ref, head_ref) => status`. + /// + /// We don't bother disambiguating this cache by org/repo, since `head_ref` + /// is a commit ref and we expect those to be globally unique. + /// This is not technically true of Git SHAs due to SHAttered, but is + /// effectively true for SHAs on GitHub due to GitHub's collision detection. + pub(crate) ref_comparison_cache: HashMap<(String, String), bool>, } impl<'a> ImpostorCommit<'a> { + fn named_refs(&mut self, uses: Uses<'_>) -> Result<(Vec, Vec)> { + let entry = match self + .ref_cache + .entry((uses.owner.to_string(), uses.repo.to_string())) + { + Entry::Occupied(o) => o.into_mut(), + Entry::Vacant(v) => { + let branches = self.client.list_branches(uses.owner, uses.repo)?; + let tags = self.client.list_tags(uses.owner, uses.repo)?; + + v.insert((branches, tags)) + } + }; + + // Dumb: we shold be able to borrow immutably here. + Ok((entry.0.clone(), entry.1.clone())) + } + + fn named_ref_contains_commit( + &mut self, + uses: &Uses<'_>, + base_ref: &str, + head_ref: &str, + ) -> Result { + let presence = match self + .ref_comparison_cache + .entry((base_ref.to_string(), head_ref.to_string())) + { + Entry::Occupied(o) => { + log::debug!("cache hit: {base_ref}..{head_ref}"); + o.into_mut() + } + Entry::Vacant(v) => { + let presence = match self + .client + .compare_commits(uses.owner, uses.repo, base_ref, head_ref)? + { + // A base ref "contains" a commit if the base is either identical + // to the head ("identical") or the target is behind the base ("behind"). + Some(comp) => matches!( + comp.status, + ComparisonStatus::Behind | ComparisonStatus::Identical + ), + // GitHub's API returns 404 when the refs under comparison + // are completely divergent, i.e. no contains relationship is possible. + None => false, + }; + + v.insert(presence) + } + }; + + Ok(*presence) + } + /// Returns a boolean indicating whether or not this commit is an "impostor", /// i.e. resolves due to presence in GitHub's fork network but is not actually /// present in any of the specified `owner/repo`'s tags or branches. - fn impostor(&self, uses: Uses<'_>) -> Result { - let branches = self.client.list_branches(uses.owner, uses.repo)?; + fn impostor(&mut self, uses: Uses<'_>) -> Result { + let (branches, tags) = self.named_refs(uses)?; // If there's no ref or the ref is not a commit, there's nothing to impersonate. let Some(head_ref) = uses.commit_ref() else { return Ok(false); }; - for branch in &branches { + for branch in branches { if self.named_ref_contains_commit( &uses, &format!("refs/heads/{}", &branch.name), @@ -47,9 +113,7 @@ impl<'a> ImpostorCommit<'a> { } } - let tags = self.client.list_tags(uses.owner, uses.repo)?; - - for tag in &tags { + for tag in tags { if self.named_ref_contains_commit( &uses, &format!("refs/tags/{}", &tag.name), @@ -61,28 +125,13 @@ impl<'a> ImpostorCommit<'a> { // If we've made it here, the commit isn't present in any commit or tag's history, // strongly suggesting that it's an impostor. + log::warn!( + "strong impostor candidate: {head_ref} for {org}/{repo}", + org = uses.owner, + repo = uses.repo + ); Ok(true) } - - fn named_ref_contains_commit( - &self, - uses: &Uses<'_>, - base_ref: &str, - head_ref: &str, - ) -> Result { - match self - .client - .compare_commits(uses.owner, uses.repo, base_ref, head_ref)? - { - // A base ref "contains" a commit if the base is either identical - // to the head ("identical") or the target is behind the base ("behind"). - Some(comparison) => Ok(matches!( - comparison.status, - ComparisonStatus::Behind | ComparisonStatus::Identical - )), - None => Ok(false), - } - } } impl<'a> WorkflowAudit<'a> for ImpostorCommit<'a> { @@ -96,10 +145,12 @@ impl<'a> WorkflowAudit<'a> for ImpostorCommit<'a> { Ok(ImpostorCommit { _config: config, client, + ref_cache: Default::default(), + ref_comparison_cache: Default::default(), }) } - fn audit<'w>(&self, workflow: &'w Workflow) -> Result>> { + fn audit<'w>(&mut self, workflow: &'w Workflow) -> Result>> { log::debug!("audit: {} evaluating {}", Self::ident(), &workflow.filename); let mut findings = vec![]; diff --git a/src/audit/mod.rs b/src/audit/mod.rs index e8f5da43..a6d80f94 100644 --- a/src/audit/mod.rs +++ b/src/audit/mod.rs @@ -28,5 +28,5 @@ pub(crate) trait WorkflowAudit<'a> { where Self: Sized; - fn audit<'w>(&self, workflow: &'w Workflow) -> Result>>; + fn audit<'w>(&mut self, workflow: &'w Workflow) -> Result>>; } diff --git a/src/audit/pull_request_target.rs b/src/audit/pull_request_target.rs index 7c269395..3a3153b2 100644 --- a/src/audit/pull_request_target.rs +++ b/src/audit/pull_request_target.rs @@ -20,7 +20,7 @@ impl<'a> WorkflowAudit<'a> for PullRequestTarget<'a> { Ok(Self { _config: config }) } - fn audit<'w>(&self, workflow: &'w Workflow) -> Result>> { + fn audit<'w>(&mut self, workflow: &'w Workflow) -> Result>> { log::debug!("audit: {} evaluating {}", Self::ident(), &workflow.filename); let trigger = &workflow.on; diff --git a/src/audit/ref_confusion.rs b/src/audit/ref_confusion.rs index 2265e5ce..1464a4b5 100644 --- a/src/audit/ref_confusion.rs +++ b/src/audit/ref_confusion.rs @@ -70,7 +70,7 @@ impl<'a> WorkflowAudit<'a> for RefConfusion<'a> { } fn audit<'w>( - &self, + &mut self, workflow: &'w crate::models::Workflow, ) -> anyhow::Result>> { log::debug!("audit: {} evaluating {}", Self::ident(), &workflow.filename); diff --git a/src/audit/template_injection.rs b/src/audit/template_injection.rs index 0921b617..20dada62 100644 --- a/src/audit/template_injection.rs +++ b/src/audit/template_injection.rs @@ -27,7 +27,7 @@ impl<'a> TemplateInjection<'a> { for expr in iter_expressions(run) { let bare = expr.as_bare(); - if bare.starts_with("secrets.") { + if bare.starts_with("secrets.") || bare == "github.token" { // While not ideal, secret expansion is typically not exploitable. continue; } else if bare.starts_with("inputs.") { @@ -70,7 +70,7 @@ impl<'a> WorkflowAudit<'a> for TemplateInjection<'a> { } fn audit<'w>( - &self, + &mut self, workflow: &'w crate::models::Workflow, ) -> anyhow::Result>> { let mut findings = vec![]; diff --git a/src/audit/use_trusted_publishing.rs b/src/audit/use_trusted_publishing.rs index b0b8a187..cc99cbaf 100644 --- a/src/audit/use_trusted_publishing.rs +++ b/src/audit/use_trusted_publishing.rs @@ -71,7 +71,7 @@ impl<'a> WorkflowAudit<'a> for UseTrustedPublishing<'a> { } fn audit<'w>( - &self, + &mut self, workflow: &'w crate::models::Workflow, ) -> anyhow::Result>> { log::debug!("audit: {} evaluating {}", Self::ident(), &workflow.filename); diff --git a/src/github_api/mod.rs b/src/github_api/mod.rs index f0980aa0..523172df 100644 --- a/src/github_api/mod.rs +++ b/src/github_api/mod.rs @@ -87,7 +87,7 @@ impl Client { head: &str, ) -> Result> { let url = format!( - "{api_base}/repos/{owner}/{repo}/compare/{base}..{head}", + "{api_base}/repos/{owner}/{repo}/compare/{base}...{head}", api_base = self.api_base ); @@ -96,7 +96,7 @@ impl Client { StatusCode::OK => Ok(Some(resp.json()?)), StatusCode::NOT_FOUND => Ok(None), s => Err(anyhow!( - "error from GitHub API while comparing commits: {s}" + "{owner}/{repo}: error from GitHub API while comparing commits: {s}" )), } } @@ -107,7 +107,7 @@ impl Client { /// This model is intentionally incomplete. /// /// See . -#[derive(Deserialize)] +#[derive(Deserialize, Clone)] pub(crate) struct Branch { pub(crate) name: String, } @@ -115,7 +115,7 @@ pub(crate) struct Branch { /// A single tag, as returned by GitHub's tags endpoints. /// /// This model is intentionally incomplete. -#[derive(Deserialize)] +#[derive(Deserialize, Clone)] pub(crate) struct Tag { pub(crate) name: String, } diff --git a/src/main.rs b/src/main.rs index 984a4653..66818268 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2,7 +2,7 @@ use std::{io::stdout, path::PathBuf}; use anyhow::{anyhow, Result}; use audit::WorkflowAudit; -use clap::Parser; +use clap::{Parser, ValueEnum}; use models::AuditConfig; mod audit; @@ -26,10 +26,22 @@ struct Args { #[arg(long, env)] gh_token: String, + /// The output format to emit. By default, plain text will be emitted + /// on an interactive terminal and JSON otherwise. + #[arg(long, value_enum)] + format: Option, + /// The workflow filename or directory to audit. input: PathBuf, } +#[derive(Debug, Copy, Clone, ValueEnum)] +pub(crate) enum OutputFormat { + Plain, + Json, + Sarif, +} + impl<'a> From<&'a Args> for AuditConfig<'a> { fn from(value: &'a Args) -> Self { Self { @@ -79,18 +91,18 @@ fn main() -> Result<()> { } let mut results = vec![]; - let audits: &[&dyn WorkflowAudit] = &[ - &audit::artipacked::Artipacked::new(config)?, - &audit::pull_request_target::PullRequestTarget::new(config)?, - &audit::impostor_commit::ImpostorCommit::new(config)?, - &audit::ref_confusion::RefConfusion::new(config)?, - &audit::use_trusted_publishing::UseTrustedPublishing::new(config)?, - &audit::template_injection::TemplateInjection::new(config)?, - &audit::hardcoded_container_credentials::HardcodedContainerCredentials::new(config)?, + let audits: &mut [&mut dyn WorkflowAudit] = &mut [ + &mut audit::artipacked::Artipacked::new(config)?, + &mut audit::pull_request_target::PullRequestTarget::new(config)?, + &mut audit::impostor_commit::ImpostorCommit::new(config)?, + &mut audit::ref_confusion::RefConfusion::new(config)?, + &mut audit::use_trusted_publishing::UseTrustedPublishing::new(config)?, + &mut audit::template_injection::TemplateInjection::new(config)?, + &mut audit::hardcoded_container_credentials::HardcodedContainerCredentials::new(config)?, ]; for workflow in workflows.iter() { // TODO: Proper abstraction for multiple audits here. - for audit in audits { + for audit in audits.iter_mut() { for finding in audit.audit(workflow)? { results.push(finding); }