diff --git a/Cargo.toml b/Cargo.toml index fae319ab..e9d2965b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,7 +5,7 @@ default-members = [ "crates/cli", "crates/static-analysis-kernel", "crates/static-analysis-server", - "crates/secrets" + "crates/secrets", ] resolver = "2" @@ -39,3 +39,4 @@ sha2 = "0.10.7" num_cpus = "1.15.0" tracing = "0.1.40" uuid = { version = "1.6.1", features = ["v4"] } +tree-sitter = "0.22.6" diff --git a/LICENSE-3rdparty.csv b/LICENSE-3rdparty.csv index aebed79c..f5a551a6 100644 --- a/LICENSE-3rdparty.csv +++ b/LICENSE-3rdparty.csv @@ -13,6 +13,7 @@ num_cpus,https://github.com/seanmonstar/num_cpus,MIT, Copyright (c) 2015 Sean Mc percent-encoding,https://github.com/servo/rust-url/,MIT, Copyright (c) 2013-2022 The rust-url developers prettytable-rs,https://github.com/phsym/prettytable-rs/,BSD-3-Clause,Copyright (c) 2022 Pierre-Henri Symoneaux rayon,https://crates.io/crates/rayon,MIT,Copyright (c) 2010 The Rust Project Developers +pretty_yaml,https://github.com/g-plane/pretty_yaml,MIT,Copyright (c) 2024-present Pig Fang rocket,https://github.com/SergioBenitez/Rocket,Apache-2.0,Copyright 2016 Sergio Benitez tree-sitter,https://github.com/tree-sitter/tree-sitter,MIT,2014 Max Brunsfeld sarif-rs,https://github.com/psastras/sarif-rs,MIT,Copyright (c) 2021 Paul Sastrasinh diff --git a/crates/bins/Cargo.toml b/crates/bins/Cargo.toml index 496e4d30..2f4d7e5a 100644 --- a/crates/bins/Cargo.toml +++ b/crates/bins/Cargo.toml @@ -48,9 +48,11 @@ serde_json = { workspace = true } tracing = { workspace = true } uuid = { workspace = true } indexmap = { workspace = true } -num_cpus = { workspace = true} +num_cpus = { workspace = true } serde = { workspace = true } serde_yaml = { workspace = true } +tree-sitter = { workspace = true } + # other terminal-emoji = "0.4.1" getopts = "0.2.21" @@ -59,6 +61,7 @@ rayon = "1.7.0" rocket = { version = "=0.5.0", features = ["json"] } tracing-subscriber = { version = "0.3.18", features = ["fmt", "env-filter"] } thiserror = "1" +pretty_yaml = "0.4" # For linux and macos, we need the vendored ssl diff --git a/crates/bins/src/bin/datadog-static-analyzer-server.rs b/crates/bins/src/bin/datadog-static-analyzer-server.rs index 3bc84e9e..6f2c222e 100644 --- a/crates/bins/src/bin/datadog-static-analyzer-server.rs +++ b/crates/bins/src/bin/datadog-static-analyzer-server.rs @@ -2,5 +2,5 @@ mod datadog_static_analyzer_server; #[rocket::main] async fn main() { - datadog_static_analyzer_server::start().await + datadog_static_analyzer_server::start().await; } diff --git a/crates/bins/src/bin/datadog-static-analyzer.rs b/crates/bins/src/bin/datadog-static-analyzer.rs index 411675c6..df2a2f38 100644 --- a/crates/bins/src/bin/datadog-static-analyzer.rs +++ b/crates/bins/src/bin/datadog-static-analyzer.rs @@ -500,7 +500,7 @@ fn main() -> Result<()> { "Analyzing {}, {} files detected", language, files_for_language.len() - ) + ); } // take the relative path for the analysis diff --git a/crates/bins/src/bin/datadog_static_analyzer_server/cli.rs b/crates/bins/src/bin/datadog_static_analyzer_server/cli.rs index 51fb4cef..3d6e0690 100644 --- a/crates/bins/src/bin/datadog_static_analyzer_server/cli.rs +++ b/crates/bins/src/bin/datadog_static_analyzer_server/cli.rs @@ -8,7 +8,7 @@ use std::{env, process, thread}; use super::state::ServerState; use super::utils::get_current_timestamp_ms; -fn print_usage(program: &str, opts: Options) { +fn print_usage(program: &str, opts: &Options) { let brief = format!("Usage: {} [options]", program); print!("{}", opts.usage(&brief)); } @@ -61,7 +61,7 @@ pub fn prepare_rocket() -> (Rocket, ServerState, Sender) { } if matches.opt_present("h") { - print_usage(&program, opts); + print_usage(&program, &opts); process::exit(0); } @@ -122,7 +122,7 @@ pub fn prepare_rocket() -> (Rocket, ServerState, Sender) { if latest_timestamp > 0 && current_timestamp > latest_timestamp + timeout_ms { eprintln!("exiting because of timeout, trying to exit gracefully"); - shutdown_handle.clone().notify(); + shutdown_handle.notify(); // we give 10 seconds for the process to terminate // if it does not, we abort thread::sleep(Duration::from_secs(10)); diff --git a/crates/bins/src/bin/datadog_static_analyzer_server/endpoints.rs b/crates/bins/src/bin/datadog_static_analyzer_server/endpoints.rs index 080f5f00..3d1260cd 100644 --- a/crates/bins/src/bin/datadog_static_analyzer_server/endpoints.rs +++ b/crates/bins/src/bin/datadog_static_analyzer_server/endpoints.rs @@ -149,9 +149,9 @@ async fn serve_static( /// Catches all OPTION requests in order to get the CORS related Fairing triggered. #[rocket::options("/<_..>")] -fn get_options() -> String { +const fn get_options() -> String { /* Intentionally left empty */ - "".to_string() + String::new() } /// Simple ping method that will return "pong" as response. diff --git a/crates/bins/src/bin/datadog_static_analyzer_server/fairings.rs b/crates/bins/src/bin/datadog_static_analyzer_server/fairings.rs index 4bbdc817..abd247ae 100644 --- a/crates/bins/src/bin/datadog_static_analyzer_server/fairings.rs +++ b/crates/bins/src/bin/datadog_static_analyzer_server/fairings.rs @@ -96,9 +96,9 @@ impl Fairing for KeepAlive { } } -/// Provides functionality to associate a UUIDv4 per request. +/// Provides functionality to associate a `UUIDv4` per request. /// -/// Rocket 0.5.0 does not generate per-request IDs (see: https://github.com/rwf2/Rocket/issues/21) +/// Rocket 0.5.0 does not generate per-request IDs (see: [#21](https://github.com/rwf2/Rocket/issues/21)) /// Until upstream natively supports this, we use a custom [Fairing] to generate one. pub struct TracingFairing; @@ -122,10 +122,10 @@ impl TraceSpan { } } -/// A newtype Option representing a [Span] that is used to conform to the [Request::local_cache] API +/// A newtype Option representing a [Span] that is used to conform to the [`Request::local_cache`] API struct FairingTraceSpan(Option); -/// A newtype Option representing a [String] request ID that is used to conform to the [Request::local_cache] API +/// A newtype Option representing a [String] request ID that is used to conform to the [`Request::local_cache`] API struct FairingRequestId(Option); #[rocket::async_trait] @@ -141,8 +141,7 @@ impl Fairing for TracingFairing { let request_id = req .headers() .get_one("X-Request-Id") - .map(String::from) - .unwrap_or_else(|| Uuid::new_v4().to_string()); + .map_or_else(|| Uuid::new_v4().to_string(), String::from); let request_span = tracing::info_span!( "http_request", diff --git a/crates/bins/src/bin/datadog_static_analyzer_server/ide/configuration_file/comment_preserver/mod.rs b/crates/bins/src/bin/datadog_static_analyzer_server/ide/configuration_file/comment_preserver/mod.rs new file mode 100644 index 00000000..9d6e08c7 --- /dev/null +++ b/crates/bins/src/bin/datadog_static_analyzer_server/ide/configuration_file/comment_preserver/mod.rs @@ -0,0 +1,3 @@ +mod models; +mod reconciler; +pub use reconciler::{prettify_yaml, reconcile_comments, ReconcileError}; diff --git a/crates/bins/src/bin/datadog_static_analyzer_server/ide/configuration_file/comment_preserver/models.rs b/crates/bins/src/bin/datadog_static_analyzer_server/ide/configuration_file/comment_preserver/models.rs new file mode 100644 index 00000000..50a1b7a8 --- /dev/null +++ b/crates/bins/src/bin/datadog_static_analyzer_server/ide/configuration_file/comment_preserver/models.rs @@ -0,0 +1,24 @@ +#[derive(Debug, Clone, Default)] +pub struct Line { + pub row: usize, + pub content: String, +} + +impl Line { + pub const fn new(row: usize, content: String) -> Self { + Self { row, content } + } +} + +#[derive(Debug, Clone)] +pub enum Comment { + Inline { + line: Line, + original_content: String, + }, + Block { + line: Line, + above_line: Option, + below_line: Option, + }, +} diff --git a/crates/bins/src/bin/datadog_static_analyzer_server/ide/configuration_file/comment_preserver/reconciler.rs b/crates/bins/src/bin/datadog_static_analyzer_server/ide/configuration_file/comment_preserver/reconciler.rs new file mode 100644 index 00000000..c8554613 --- /dev/null +++ b/crates/bins/src/bin/datadog_static_analyzer_server/ide/configuration_file/comment_preserver/reconciler.rs @@ -0,0 +1,594 @@ +use std::collections::HashSet; + +use super::models::{Comment, Line}; +use kernel::analysis::tree_sitter::get_tree; +use kernel::model::common::Language; +use thiserror::Error; +use tree_sitter::Node; + +#[derive(Debug, Error)] +#[error("Reconciler error")] +pub struct ReconcileError { + #[from] + pub source: anyhow::Error, +} + +pub fn prettify_yaml(content: &str) -> Result { + let options = pretty_yaml::config::FormatOptions::default(); + pretty_yaml::format_text(content, &options).map_err(|e| ReconcileError { + source: anyhow::anyhow!("Failed to format the content: {}", e), + }) +} + +/// This is a best-effort comment reconciler, which uses a simple diff algorithm to try to locate +/// comments in the new content. +/// +/// The algorithm applies for the majority of the cases affecting the static analysis configuration files but has some limitations: +/// +/// * Repeated elements with `inline` comments may lead to false positives in some edge cases (see mode `inline_limitations` in tests for more info) +/// * If the original content uses a different syntax than the one emitted by the serializer, we may not be able to determine the location of those comments (e.g. dictionaries and list can be represented in an abbreviated form) +/// +/// +/// # Returns +/// +/// If successful, it returns a `Result` containing a `String`. The `String` is the reconciled configuration +/// file content. +/// +/// # Errors +/// +/// This function will return an error of type `ReconcileError` if: +/// +/// * There's an issue getting the tree-sitter tree +/// * There's an issue trying to apply the format to the reconciled yaml content +/// +pub fn reconcile_comments( + original_content: &str, + new_content: &str, + prettify: bool, +) -> Result { + let original_content = original_content.trim(); + let new_content = new_content.trim(); + // parse the original content and look for comments + let tree = get_tree(original_content, &Language::Yaml).ok_or_else(|| { + anyhow::anyhow!("Failed to parse the original content with the tree-sitter parser") + })?; + + let root_node = tree.root_node(); + + let mut comments = vec![]; + let mut visited = HashSet::new(); + extract_comments_from_node(root_node, original_content, &mut comments, &mut visited); + + let reconciled_content = reconcile(new_content, &comments); + + // make it pretty? + if prettify { + prettify_yaml(&reconciled_content) + } else { + Ok(reconciled_content) + } +} + +fn get_related_comments<'a>( + next: Option>, + source: &str, + visited: &mut HashSet>, + comment: &mut String, +) -> Option> { + next.and_then(|next| { + if next.kind() == "comment" { + // get the comment + let content = &source[next.start_byte()..next.end_byte()]; + *comment = format!("{comment}\n{content}"); + visited.insert(next); + get_related_comments(next.next_sibling(), source, visited, comment) + } else { + Some(next) + } + }) +} + +fn extract_comments_from_node<'a>( + node: Node<'a>, + source: &str, + comments: &mut Vec, + visited: &mut HashSet>, +) { + if node.kind() == "comment" { + if !visited.insert(node) { + return; + } + + let start_byte = node.start_byte(); + let end_byte = node.end_byte(); + let comment = &source[start_byte..end_byte]; + let row = node.start_position().row; + + let prev_sibling = node.prev_sibling(); + + let final_comment = if prev_sibling + .filter(|p| p.start_position().row == row) + .is_some() + { + Comment::Inline { + line: Line::new(row, comment.to_string()), + original_content: get_line_content(source, start_byte).trim().to_string(), + } + } else { + // this can be a multiline comment + // let's keep adding lines until next_sibling is not comment + let mut comment = comment.to_string(); + let last_node = + get_related_comments(node.next_sibling(), source, visited, &mut comment); + Comment::Block { + line: Line::new(row, comment), + above_line: prev_sibling.map(|prev| { + let content = get_line_content(source, prev.end_byte()); + Line::new(row, content.to_string()) + }), + below_line: last_node.map(|next| { + let content = get_line_content(source, next.start_byte()); + Line::new(next.start_position().row, content.to_string()) + }), + } + }; + + comments.push(final_comment); + } + + for child in node.children(&mut node.walk()) { + extract_comments_from_node(child, source, comments, visited); + } +} + +fn reconcile(modified: &str, comments: &[Comment]) -> String { + let mut lines: Vec = modified.lines().map(ToString::to_string).collect(); + + for comment in comments { + match comment { + Comment::Inline { + line, + original_content, + } => manage_inline_comment(&mut lines, line, original_content), + + Comment::Block { + line, + above_line, + below_line, + } => manage_block_comment(&mut lines, line, above_line, below_line), + } + } + // rejoin the lines again + lines.join("\n") +} + +fn starts_with_and_no_comment(text: &str, pat: &str) -> bool { + let trimmed = text.trim(); + trimmed.starts_with(pat) && !trimmed.contains(" #") +} + +fn manage_inline_comment(lines: &mut [String], line: &Line, original_content: &str) { + // for comments added to a node line, we can detect the row and the original content, and then just go to that line, + // if the content of the line is the same as the original content, we can add the comment to the end of the line. + // if the content of the line is different, we have to look for the original content in the document (as it may have been moved) + // if we find it, we add the comment to the end of the line, if we don't find it, we ignore the comment. + let current_content = &lines.get(line.row); + + if current_content + .filter(|c| starts_with_and_no_comment(c, original_content)) + .is_some() + { + // line is ok, just add the comment + let comment_added = format!("{} {}", lines[line.row], line.content.clone()); + lines[line.row] = comment_added; + } else { + // content is different, try to find the original content in another line + if let Some((row, found_line)) = lines + .iter() + .enumerate() + .find(|(_, l)| starts_with_and_no_comment(l, original_content)) + { + // we found it, add the comment + let comment_added = format!("{} {}", found_line, line.content.clone()); + lines[row] = comment_added; + } else { + // ignore comment (or add it to the original line?) + } + } +} + +fn manage_block_comment( + lines: &mut Vec, + line: &Line, + above_line: &Option, + below_line: &Option, +) { + // block comment + // we check the line in the original content, if the content is the same and the line above and below are the same, we add the comment. + match (above_line, below_line) { + (Some(above_line), Some(below_line)) => { + // iterate from the start and try to find a couple of lines + let (trimmed_above, trimmed_below) = + (above_line.content.trim(), below_line.content.trim()); + let found = lines.iter().enumerate().find(|(i, l)| { + lines.get(i + 1).map_or(false, |next| { + l.trim().starts_with(trimmed_above) && next.trim().starts_with(trimmed_below) + }) + }); + if let Some(found) = found { + // add the comment in the line below + lines.insert(found.0 + 1, line.content.clone()); + } else { + // if not found, some lines may have been inserted in between. + // as most usually comments are placed above the line or in the line (not usually below) + // we will test for the the below line + search_and_insert_if_found(&below_line.content, lines, &line.content); + } + } + (Some(above_line), None) => { + // most probably was the last line + // start searching from the end + let trimmed = above_line.content.trim(); + let found = lines + .iter() + .enumerate() + .rev() + .find(|(_, l)| l.trim().starts_with(trimmed)); + if let Some(found) = found { + // add the comment in the line below + lines.insert(found.0 + 1, line.content.clone()); + } + } + (None, Some(below_line)) => { + // most probably was the first line + // start searching from the beginning + search_and_insert_if_found(&below_line.content, lines, &line.content); + } + (None, None) => { + // we have a block comment with no context, just add it to the line + // NOTE: potentially do nothing, this case should not happen + lines.insert(line.row, line.content.clone()); + } + } +} + +fn search_and_insert_if_found(content: &str, lines: &mut Vec, comment: &str) { + let trimmed = content.trim(); + let found = lines + .iter() + .enumerate() + .find(|(_, l)| l.trim().starts_with(trimmed)); + + if let Some(found) = found { + // add the comment in the line + lines.insert(found.0, comment.to_owned()); + } +} + +fn get_line_content(source: &str, byte_offset: usize) -> &str { + let start = source[..byte_offset].rfind('\n').map_or(0, |pos| pos + 1); + let end = source[byte_offset..] + .find('\n') + .map_or(source.len(), |pos| byte_offset + pos); + let content = &source[start..end]; + content + .find(" #") + .map_or(content, |index| content[..index].trim_end()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn it_works_for_inline_comments() { + let original_content = r#" +schema-version: v1 +rulesets: + - java-security + - java-1 # inline comment for java-1 + - ruleset1: + rules: + rule2: # this is rule 2 comment + only: + - foo/bar + rule1: + ignore: + - "**" +"#; + + let modified = r#" +schema-version: v1 +rulesets: + - java-security + - java-1 + - java-2 + - ruleset1: + rules: + rule2: + only: + - foo/bar + rule3: + ignore: + - "**" +"#; + + let expected = r#" +schema-version: v1 +rulesets: + - java-security + - java-1 # inline comment for java-1 + - java-2 + - ruleset1: + rules: + rule2: # this is rule 2 comment + only: + - foo/bar + rule3: + ignore: + - "**" +"#; + + let result = reconcile_comments(original_content, modified, true).unwrap(); + assert_eq!(result.trim(), expected.trim()); + } + + #[test] + fn it_works_for_block_comments() { + let original_content = r#" +schema-version: v1 +rulesets: + # this is a comment above java-security + - java-security + - java-1 + # multi1 + # multi2 + # multi3 + # multi4 + - ruleset1: + rules: + rule2: + only: + - foo/bar + rule1: + ignore: + - "**" +"#; + + let modified = r#" +schema-version: v1 +rulesets: + - java-0 + - java-security + - java-1 + - java-2 + - ruleset1: + rules: + rule2: + only: + - foo/bar + rule3: + ignore: + - "**" +"#; + + let expected = r#" +schema-version: v1 +rulesets: + - java-0 + # this is a comment above java-security + - java-security + - java-1 + - java-2 + # multi1 + # multi2 + # multi3 + # multi4 + - ruleset1: + rules: + rule2: + only: + - foo/bar + rule3: + ignore: + - "**" +"#; + + let result = reconcile_comments(original_content, modified, true).unwrap(); + assert_eq!(result.trim(), expected.trim()); + } + + #[test] + fn it_works_mixed() { + let original_content = r#" +schema-version: v1 +rulesets: + # this is a comment above java-security + - java-security + - java-1 # inline comment for java-1 + # multi1 + # multi2 + # multi3 + # multi4 + - ruleset1: + rules: + rule2: # this is rule 2 comment + only: + - foo/bar + rule1: + ignore: + - "**" +"#; + + let modified = r#" +schema-version: v1 +rulesets: + - java-0 + - java-security + - java-1 + - java-2 + - ruleset1: + rules: + rule2: + only: + - foo/bar + rule3: + ignore: + - "**" +"#; + + let expected = r#" +schema-version: v1 +rulesets: + - java-0 + # this is a comment above java-security + - java-security + - java-1 # inline comment for java-1 + - java-2 + # multi1 + # multi2 + # multi3 + # multi4 + - ruleset1: + rules: + rule2: # this is rule 2 comment + only: + - foo/bar + rule3: + ignore: + - "**" +"#; + + let result = reconcile_comments(original_content, modified, true).unwrap(); + assert_eq!(result.trim(), expected.trim()); + } + + #[test] + fn it_works_for_repeated_keys_with_inline_comments_if_no_additions_before_comment_occurrence_case_1( + ) { + let original_content = r#" +schema-version: v1 +rulesets: + - rule-1 # inline 1 + - rule-1 # inline 2 +"#; + + let modified = r#" +schema-version: v1 +rulesets: + - rule-1 + - rule-1 + - rule-2 +"#; + + let expected = r#" +schema-version: v1 +rulesets: + - rule-1 # inline 1 + - rule-1 # inline 2 + - rule-2 +"#; + + let result = reconcile_comments(original_content, modified, true).unwrap(); + assert_eq!(result.trim(), expected.trim()); + } + + #[test] + fn it_works_for_repeated_keys_with_inline_comments_if_no_additions_before_comment_occurrence_case_2( + ) { + let original_content = r#" +schema-version: v1 +rulesets: + - java-1 # inline comment 1 + - java-1 + - java-1 # inline comment 2 +"#; + + let modified = r#" +schema-version: v1 +rulesets: + - java-1 + - java-1 + - java-1 + - java-2 +"#; + + let expected = r#" +schema-version: v1 +rulesets: + - java-1 # inline comment 1 + - java-1 + - java-1 # inline comment 2 + - java-2 +"#; + + let result = reconcile_comments(original_content, modified, true).unwrap(); + assert_eq!(result.trim(), expected.trim()); + } + + #[test] + fn it_does_work_for_repeated_keys_with_inline_comments_if_additions_before_comment_occurrence() + { + let original_content = r#" +schema-version: v1 +rulesets: + - java-1 # inline comment 1 + - java-1 # inline comment 2 +"#; + + let modified = r#" +schema-version: v1 +rulesets: + - java-0 + - java-1 + - java-1 +"#; + + let expected = r#" +schema-version: v1 +rulesets: + - java-0 + - java-1 # inline comment 1 + - java-1 # inline comment 2 +"#; + + let result = reconcile_comments(original_content, modified, true).unwrap(); + assert_eq!(result.trim(), expected.trim()); + } + + mod inline_limitations { + use super::*; + + #[test] + fn it_does_not_work_for_repeated_keys_with_inline_comments_case_1() { + let original_content = r#" +schema-version: v1 +rulesets: + - java-1 # inline comment 1 + - java-1 + - java-1 # inline comment 2 +"#; + + let modified = r#" +schema-version: v1 +rulesets: + - java-0 + - java-1 + - java-1 + - java-1 +"#; + + let expected = r#" +schema-version: v1 +rulesets: + - java-0 + - java-1 # inline comment 1 + - java-1 + - java-1 # inline comment 2 +"#; + + let result = reconcile_comments(original_content, modified, true).unwrap(); + assert_ne!(result.trim(), expected.trim()); + } + } +} diff --git a/crates/bins/src/bin/datadog_static_analyzer_server/ide/configuration_file/error.rs b/crates/bins/src/bin/datadog_static_analyzer_server/ide/configuration_file/error.rs index 25f26a1c..d1a1d5e2 100644 --- a/crates/bins/src/bin/datadog_static_analyzer_server/ide/configuration_file/error.rs +++ b/crates/bins/src/bin/datadog_static_analyzer_server/ide/configuration_file/error.rs @@ -1,3 +1,4 @@ +use super::comment_preserver::ReconcileError; use rocket::{http::ContentType, response::Responder, Response}; use serde_json::json; use thiserror::Error; @@ -12,6 +13,12 @@ pub enum ConfigFileError { }, #[error("Error decoding base64 string")] Decoder { source: anyhow::Error }, + + #[error("Error reconciling comments")] + CommentReconciler { + #[from] + source: ReconcileError, + }, } impl From for ConfigFileError { @@ -33,10 +40,11 @@ impl<'r> Responder<'r, 'static> for ConfigFileError { } impl ConfigFileError { - pub fn code(&self) -> u16 { + pub const fn code(&self) -> u16 { match self { Self::Parser { .. } => 1, Self::Decoder { .. } => 2, + Self::CommentReconciler { .. } => 3, } } } diff --git a/crates/bins/src/bin/datadog_static_analyzer_server/ide/configuration_file/mod.rs b/crates/bins/src/bin/datadog_static_analyzer_server/ide/configuration_file/mod.rs index 2dd9ef32..027f3b90 100644 --- a/crates/bins/src/bin/datadog_static_analyzer_server/ide/configuration_file/mod.rs +++ b/crates/bins/src/bin/datadog_static_analyzer_server/ide/configuration_file/mod.rs @@ -1,3 +1,4 @@ +mod comment_preserver; pub mod endpoints; mod error; mod models; diff --git a/crates/bins/src/bin/datadog_static_analyzer_server/ide/configuration_file/static_analysis_config_file.rs b/crates/bins/src/bin/datadog_static_analyzer_server/ide/configuration_file/static_analysis_config_file.rs index 22d9f489..39882461 100644 --- a/crates/bins/src/bin/datadog_static_analyzer_server/ide/configuration_file/static_analysis_config_file.rs +++ b/crates/bins/src/bin/datadog_static_analyzer_server/ide/configuration_file/static_analysis_config_file.rs @@ -1,5 +1,7 @@ +use super::comment_preserver::{prettify_yaml, reconcile_comments}; use super::error::ConfigFileError; use indexmap::IndexMap; +use itertools::Itertools; use kernel::config_file::config_file_to_yaml; use kernel::{ config_file::parse_config_file, @@ -10,11 +12,17 @@ use std::{borrow::Cow, fmt::Debug, ops::Deref}; use tracing::instrument; #[derive(Debug, Default, Clone, PartialEq)] -pub struct StaticAnalysisConfigFile(ConfigFile); +pub struct StaticAnalysisConfigFile { + config_file: ConfigFile, + original_content: Option, +} impl From for StaticAnalysisConfigFile { fn from(value: ConfigFile) -> Self { - Self(value) + Self { + config_file: value, + original_content: None, + } } } @@ -27,14 +35,17 @@ impl TryFrom for StaticAnalysisConfigFile { return Ok(Self::default()); } let config = parse_config_file(&content)?; - Ok(config.into()) + Ok(Self { + config_file: config, + original_content: Some(content), + }) } } impl Deref for StaticAnalysisConfigFile { type Target = ConfigFile; fn deref(&self) -> &Self::Target { - &self.0 + &self.config_file } } @@ -104,7 +115,7 @@ impl StaticAnalysisConfigFile { #[instrument(skip(self))] pub fn ignore_rule(&mut self, rule: Cow) { - let config = &mut self.0; + let config = &mut self.config_file; if let Some((ruleset_name, rule_name)) = rule.split_once('/') { // the ruleset may exist and contain other rules so we // can't update it blindly @@ -168,14 +179,12 @@ impl StaticAnalysisConfigFile { rulesets: &[impl AsRef + Debug], config_content_base64: Option, ) -> Result { - let mut config = if config_content_base64.is_some() { - Self::try_from(config_content_base64.unwrap()).map_err(|e| { + let mut config = config_content_base64.map_or(Ok(Self::default()), |content| { + Self::try_from(content).map_err(|e| { tracing::error!(error =?e, "Error trying to parse config file"); e }) - } else { - Ok(Self(ConfigFile::default())) - }?; + })?; config.add_rulesets(rulesets); config.to_string().map_err(|e| { @@ -186,7 +195,7 @@ impl StaticAnalysisConfigFile { #[instrument(skip(self))] pub fn add_rulesets(&mut self, rulesets: &[impl AsRef + Debug]) { - let config = &mut self.0; + let config = &mut self.config_file; for ruleset in rulesets { if !config.rulesets.contains_key(ruleset.as_ref()) { config @@ -233,11 +242,43 @@ impl StaticAnalysisConfigFile { } /// Serializes the `StaticAnalysisConfigFile` into a YAML string. + /// + /// # Returns + /// + /// This function will try to prettify/format the yaml and preserve the existing comments. + /// If it fails to do so, it will return a raw yaml with the default format and without comments. + /// + /// # Errors + /// + /// Returns a `ConfigFileError` if something goes wrong. + #[instrument(skip(self))] pub fn to_string(&self) -> Result { let str = config_file_to_yaml(self)?; - // fix null maps - let fixed = str.replace(": null", ":"); - Ok(fixed) + // fix null maps, note that str will not have comments and it will be using the default serde format. + let fixed = str + .lines() + .map(|l| { + if l.ends_with(": null") { + l.replace(": null", ":") + } else { + l.to_string() + } + }) + .join("\n"); + + // reconcile and format + // NOTE: if this fails, we're going to return the content + // and swallow the error. + self.original_content + .as_ref() + .map_or_else( + || prettify_yaml(&fixed), + |original_content| reconcile_comments(original_content, &fixed, true), + ) + .or_else(|e| { + tracing::debug!(error = ?e, "Reconciliation or formatting error: {}", e.to_string()); + Ok::(fixed) + }) } } @@ -271,20 +312,20 @@ rulesets: #[test] fn it_works_complex() { let content = to_encoded_content( - r" + r#" schema-version: v1 rulesets: -- java-security -- java-1 -- ruleset1: - rules: - rule2: + - java-security + - java-1 + - ruleset1: + rules: + rule2: only: - - foo/bar - rule1: + - foo/bar + rule1: ignore: - - '**' -", + - "**" +"#, ); let rulesets = StaticAnalysisConfigFile::to_rulesets(content); assert_eq!(rulesets, vec!["java-security", "java-1", "ruleset1"]); @@ -341,9 +382,9 @@ rulesets: let expected = r" schema-version: v1 rulesets: -- ruleset1 -- ruleset2 -- a-ruleset3 + - ruleset1 + - ruleset2 + - a-ruleset3 "; assert_eq!(config.trim(), expected.trim()); } @@ -358,7 +399,7 @@ rulesets: let expected = r" schema-version: v1 rulesets: -- ruleset1 + - ruleset1 "; assert_eq!(config.trim(), expected.trim()); } @@ -382,11 +423,11 @@ rulesets: let expected = r" schema-version: v1 rulesets: -- java-security -- java-1 -- ruleset1 -- ruleset2 -- a-ruleset3 + - java-security + - java-1 + - ruleset1 + - ruleset2 + - a-ruleset3 "; assert_eq!(config.trim(), expected.trim()); @@ -395,20 +436,20 @@ rulesets: #[test] fn it_works_complex() { let content = to_encoded_content( - r" + r#" schema-version: v1 rulesets: -- java-security -- java-1 -- ruleset1: - rules: - rule2: - only: - - foo/bar - rule1: - ignore: - - '**' -", + - java-security + - java-1 + - ruleset1: + rules: + rule2: + only: + - foo/bar + rule1: + ignore: + - "**" +"#, ); let config = StaticAnalysisConfigFile::with_added_rulesets( &["ruleset1", "ruleset2", "a-ruleset3"], @@ -416,22 +457,22 @@ rulesets: ) .unwrap(); - let expected = r" + let expected = r#" schema-version: v1 rulesets: -- java-security -- java-1 -- ruleset1: - rules: - rule2: - only: - - foo/bar - rule1: - ignore: - - '**' -- ruleset2 -- a-ruleset3 -"; + - java-security + - java-1 + - ruleset1: + rules: + rule2: + only: + - foo/bar + rule1: + ignore: + - "**" + - ruleset2 + - a-ruleset3 +"#; assert_eq!(config.trim(), expected.trim()); } @@ -474,17 +515,17 @@ rulesets: StaticAnalysisConfigFile::with_ignored_rule("ruleset1/rule1".into(), content) .unwrap(); - let expected = r" + let expected = r#" schema-version: v1 rulesets: -- java-1 -- java-security -- ruleset1: - rules: - rule1: - ignore: - - '**' -"; + - java-1 + - java-security + - ruleset1: + rules: + rule1: + ignore: + - "**" +"#; assert_eq!(config.trim(), expected.trim()); } @@ -503,16 +544,17 @@ rulesets: StaticAnalysisConfigFile::with_ignored_rule("ruleset1/rule1".into(), content) .unwrap(); - let expected = r" + let expected = r#" schema-version: v1 rulesets: -- java-1 -- java-security -- ruleset1: - rules: - rule1: - ignore: - - '**'"; + - java-1 + - java-security + - ruleset1: + rules: + rule1: + ignore: + - "**" +"#; assert_eq!(config.trim(), expected.trim()); } @@ -536,18 +578,19 @@ rulesets: StaticAnalysisConfigFile::with_ignored_rule("ruleset1/rule1".into(), content) .unwrap(); - let expected = r" + let expected = r#" schema-version: v1 rulesets: -- java-1 -- java-security -- ruleset1: - rules: - rule1: - only: - - foo/bar - ignore: - - '**'"; + - java-1 + - java-security + - ruleset1: + rules: + rule1: + only: + - foo/bar + ignore: + - "**" +"#; assert_eq!(config.trim(), expected.trim()); } @@ -571,21 +614,21 @@ rulesets: StaticAnalysisConfigFile::with_ignored_rule("ruleset1/rule1".into(), content) .unwrap(); - let expected = r" + let expected = r#" schema-version: v1 rulesets: -- java-1 -- java-security -- ruleset1: - rules: - rule2: - only: - - foo/bar - rule1: - ignore: - - '**' -"; - assert!(config.trim() == expected.trim()); + - java-1 + - java-security + - ruleset1: + rules: + rule2: + only: + - foo/bar + rule1: + ignore: + - "**" +"#; + assert_eq!(config.trim(), expected.trim()); } #[test] @@ -623,19 +666,19 @@ rulesets: StaticAnalysisConfigFile::with_ignored_rule("ruleset1/rule1".into(), content) .unwrap(); - let expected = r" + let expected = r#" schema-version: v1 rulesets: -- java-security -- java-1 -- ruleset1: - rules: - rule2: - severity: ERROR - rule1: - ignore: - - '**' -"; + - java-security + - java-1 + - ruleset1: + rules: + rule2: + severity: ERROR + rule1: + ignore: + - "**" +"#; assert_eq!(config.trim(), expected.trim()); } @@ -659,18 +702,18 @@ rulesets: StaticAnalysisConfigFile::with_ignored_rule("ruleset1/rule2".into(), content) .unwrap(); - let expected = r" + let expected = r#" schema-version: v1 rulesets: -- java-security -- java-1 -- ruleset1: - rules: - rule2: - ignore: - - '**' - severity: ERROR -"; + - java-security + - java-1 + - ruleset1: + rules: + rule2: + ignore: + - "**" + severity: ERROR +"#; assert_eq!(config.trim(), expected.trim()); } @@ -773,4 +816,42 @@ rulesets: let config = super::StaticAnalysisConfigFile::try_from(String::new()).unwrap(); assert_eq!(config, expected); } + + #[test] + fn it_removes_null_on_maps_only() { + let content = to_encoded_content( + r#" +schema-version: v1 +rulesets: +- java-security +- java-1 +- ruleset1: + rules: + rule2: + only: + - "foo/bar: null" +"#, + ); + let config = super::StaticAnalysisConfigFile::with_added_rulesets( + &["ruleset1", "ruleset2", "a-ruleset3"], + Some(content), + ) + .unwrap(); + + let expected = r#" +schema-version: v1 +rulesets: + - java-security + - java-1 + - ruleset1: + rules: + rule2: + only: + - "foo/bar: null" + - ruleset2 + - a-ruleset3 +"#; + + assert_eq!(config.trim(), expected.trim()); + } } diff --git a/crates/bins/src/bin/datadog_static_analyzer_server/ide/mod.rs b/crates/bins/src/bin/datadog_static_analyzer_server/ide/mod.rs index eaa5339e..b4b6d179 100644 --- a/crates/bins/src/bin/datadog_static_analyzer_server/ide/mod.rs +++ b/crates/bins/src/bin/datadog_static_analyzer_server/ide/mod.rs @@ -1,3 +1,5 @@ +#![allow(clippy::module_name_repetitions)] + mod configuration_file; use rocket::Route; diff --git a/crates/cli/src/datadog_utils.rs b/crates/cli/src/datadog_utils.rs index fc6a3438..53cb78b4 100644 --- a/crates/cli/src/datadog_utils.rs +++ b/crates/cli/src/datadog_utils.rs @@ -287,7 +287,8 @@ pub fn get_all_default_rulesets(use_staging: bool, debug: bool) -> Result bool { match &self.ignore_file { AllRules => {