Skip to content

Commit

Permalink
switch to yamlpath (#14)
Browse files Browse the repository at this point in the history
  • Loading branch information
woodruffw authored Sep 8, 2024
1 parent 507ce6a commit 62f1847
Show file tree
Hide file tree
Showing 6 changed files with 83 additions and 289 deletions.
26 changes: 22 additions & 4 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 1 addition & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,4 @@ serde = { version = "1.0.208", features = ["derive"] }
serde-sarif = "0.6.5"
serde_json = "1.0.125"
serde_yaml = "0.9.34"
tree-sitter = "0.22.6"
tree-sitter-yaml = "0.6.1"
yamlpath = "0.3.0"
21 changes: 5 additions & 16 deletions src/audit/excessive_permissions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -98,22 +98,11 @@ impl<'a> ExcessivePermissions<'a> {
) -> Vec<(Severity, Confidence, String)> {
match permissions {
Permissions::Base(base) => match base {
// If no explicit permissions are specified, our behavior
// depends on the presence of a parent (workflow) permission
// specifier.
BasePermission::Default => match parent {
// If there's a parent permissions block, this job inherits
// from it and has nothing new to report.
Some(_) => vec![],
// If there's no parent permissions block, we're at the workflow
// level and should report the default permissions as potentially
// being too broad.
None => vec![(
Severity::Medium,
Confidence::Low,
"workflow uses default permissions, which may be excessive".into(),
)],
},
// TODO: Think more about what to do here. Flagging default
// permissions is likely to be noisy and is annoying to do,
// since it involves the *absence* of a key in the YAML
// rather than its presence.
BasePermission::Default => vec![],
BasePermission::ReadAll => vec![(
Severity::Medium,
Confidence::High,
Expand Down
271 changes: 36 additions & 235 deletions src/finding/locate.rs
Original file line number Diff line number Diff line change
@@ -1,255 +1,56 @@
//! `tree-sitter` helpers for extracting and locating concrete features
//! in the original YAML.
use anyhow::{Ok, Result};
use tree_sitter::{Language, Query, QueryCursor};
use anyhow::Result;

use super::{Feature, WorkflowLocation};
use super::{ConcreteLocation, Feature, WorkflowLocation};
use crate::models::Workflow;

/// Captures an arbitrary top-level key within a YAML stream.
const TOP_LEVEL_KEY: &str = r#"
(
(block_mapping_pair
key: (flow_node (plain_scalar (string_scalar) @key))
value: (
[
(block_node (block_mapping))
(flow_node)
]
)
) @mapping
(#eq? @key "__KEY_NAME__")
)
"#;

/// Captures an arbitrary job-level key.
const JOB_LEVEL_KEY: &str = r#"
(
(block_mapping_pair
key: (flow_node (plain_scalar (string_scalar) @jobs_key))
value: (block_node
(block_mapping
(block_mapping_pair
key: (flow_node (plain_scalar (string_scalar) @job_name))
value: (block_node
(block_mapping
(block_mapping_pair
key: (flow_node (plain_scalar (string_scalar) @job_key_name))
value: (
[
(block_node (block_mapping))
(flow_node)
]
)
) @job_key_value
)
)
)
)
)
)
(#eq? @jobs_key "jobs")
(#eq? @job_name "__JOB_NAME__")
(#eq? @job_key_name "__JOB_KEY__")
)
"#;

/// Captures an entire workflow job, including non-step keys.
const ENTIRE_JOB: &str = r#"
(
(block_mapping_pair
key: (flow_node (plain_scalar (string_scalar) @jobs_key))
value: (block_node
(block_mapping
(block_mapping_pair
key: (flow_node (plain_scalar (string_scalar) @job_name))
value: (block_node (block_mapping) @job_value)
) @full_job
)
)
)
(#eq? @jobs_key "jobs")
(#eq? @job_name "__JOB_NAME__")
)
"#;

/// Captures the sub-list of steps in a particular workflow job.
/// `tree-sitter` doesn't seem to have a way to match a particular index
/// for e.g. a particular step without capturing chains of `(.)`, so we
/// use this to collect all steps in a job and then manually walk
/// the step list to find the step we're interested in.
const ALL_STEPS_FROM_JOB: &str = r#"
(
(block_mapping_pair
key: (flow_node (plain_scalar (string_scalar) @jobs_key))
value: (block_node (block_mapping
(block_mapping_pair
key: (flow_node (plain_scalar (string_scalar) @job_name))
value: (block_node (block_mapping
(block_mapping_pair
key: (flow_node (plain_scalar (string_scalar) @steps_key))
value: (block_node (block_sequence
. (block_sequence_item
(block_node (block_mapping))
)
) @steps)
)
))
)
))
)
(#eq? @jobs_key "jobs")
(#eq? @job_name "__JOB_NAME__")
(#eq? @steps_key "steps")
)
"#;

pub(crate) struct Locator {
language: Language,
}
pub(crate) struct Locator {}

impl Locator {
pub(crate) fn new() -> Self {
Self {
language: tree_sitter_yaml::language(),
}
Self {}
}

pub(crate) fn concretize<'w>(
&self,
workflow: &'w Workflow,
location: &WorkflowLocation,
) -> Result<Feature<'w>> {
let mut cursor = QueryCursor::new();

match &location.job {
Some(job) => match &job.step {
Some(step) => {
let steps_query = Query::new(
&self.language,
&ALL_STEPS_FROM_JOB.replace("__JOB_NAME__", job.id),
)?;
let capture_index = steps_query.capture_index_for_name("steps").unwrap();

// We expect only one capture group, so we don't bother iterating.
let (group, _) = cursor
.captures(
&steps_query,
workflow.tree.root_node(),
workflow.raw.as_bytes(),
)
.next()
.expect("horrific, embarassing tree-sitter query failure");

let cap = group
.captures
.iter()
.find(|qc| qc.index == capture_index)
.unwrap();

let children = cap.node.children(&mut cap.node.walk()).collect::<Vec<_>>();
let step_node = children[step.index];

Ok(Feature {
location: step_node.into(),
feature: step_node.utf8_text(workflow.raw.as_bytes())?,
})
}
None => match job.key {
Some(key) => {
// Job with a non-step key; capture the matching key's
// span and emit it.
let job_key_query = Query::new(
&self.language,
&JOB_LEVEL_KEY
.replace("__JOB_NAME__", job.id)
.replace("__JOB_KEY__", key),
)?;

let capture_index = job_key_query
.capture_index_for_name("job_key_value")
.unwrap();

let (group, _) = cursor
.captures(
&job_key_query,
workflow.tree.root_node(),
workflow.raw.as_bytes(),
)
.next()
.expect("horrific, embarassing tree-sitter query failure");

// NOTE(ww): Empirically the captures are sometimes out
// of order here (i.e. the list and index orders don't
// match up). I'm sure there's a good reason for this, but
// it means we have to find() instead of just indexing
// via `capture_index`.
let cap = group
.captures
.iter()
.find(|qc| qc.index == capture_index)
.unwrap();

Ok(Feature {
location: cap.node.into(),
feature: cap.node.utf8_text(workflow.raw.as_bytes())?,
})
}
None => {
// Job with no interior step and no explicit key:
// capture the entire job and emit it.
let job_query = Query::new(
&self.language,
&ENTIRE_JOB.replace("__JOB_NAME__", job.id),
)?;

let (group, _) = cursor
.captures(
&job_query,
workflow.tree.root_node(),
workflow.raw.as_bytes(),
)
.next()
.expect("horrific, embarassing tree-sitter query failure");

let cap = group.captures[0];

Ok(Feature {
location: cap.node.into(),
feature: cap.node.utf8_text(workflow.raw.as_bytes())?,
})
}
},
},
None => match &location.key {
// If we're given a top-level key to isolate, query for it.
// Otherwise, return the entire workflow.
Some(key) => {
let key_query =
Query::new(&self.language, &TOP_LEVEL_KEY.replace("__KEY_NAME__", key))?;

let (group, _) = cursor
.captures(
&key_query,
workflow.tree.root_node(),
workflow.raw.as_bytes(),
)
.next()
.expect("horrific, embarassing tree-sitter query failure");
let mut path = vec![];

if let Some(job) = &location.job {
path.extend([
yamlpath::Component::Key("jobs".into()),
yamlpath::Component::Key(job.id.into()),
]);

if let Some(step) = &job.step {
path.extend([
yamlpath::Component::Key("steps".into()),
yamlpath::Component::Index(step.index),
]);
} else if let Some(key) = &job.key {
path.push(yamlpath::Component::Key(key.to_string()));
}
} else {
// Non-job top-level key.
path.push(yamlpath::Component::Key(
location
.key
.expect("API misuse: must provide key if job is not specified")
.to_string(),
));
}

let cap = dbg!(group).captures[0];
// Infallible: we always have at least one path component above.
let query = yamlpath::Query::new(path).unwrap();
let feature = workflow.document.query(&query)?;

Ok(Feature {
location: cap.node.into(),
feature: cap.node.utf8_text(workflow.raw.as_bytes())?,
})
}
None => Ok(Feature {
location: workflow.tree.root_node().into(),
feature: &workflow.raw,
}),
},
}
Ok(Feature {
location: ConcreteLocation::from(&feature.location),
feature: workflow.document.extract(&feature),
})
}
}
Loading

0 comments on commit 62f1847

Please sign in to comment.