Skip to content

Commit

Permalink
refactor: most_expensive_rules returns a list of ProfilingData st…
Browse files Browse the repository at this point in the history
…ructures.
  • Loading branch information
plusvic committed Oct 18, 2024
1 parent 9ad7ea2 commit 55b6bfb
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 37 deletions.
2 changes: 2 additions & 0 deletions lib/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ pub use modules::mods;
pub use scanner::MatchingRules;
pub use scanner::ModuleOutputs;
pub use scanner::NonMatchingRules;
#[cfg(feature = "rules-profiling")]
pub use scanner::ProfilingData;
pub use scanner::ScanError;
pub use scanner::ScanOptions;
pub use scanner::ScanResults;
Expand Down
66 changes: 37 additions & 29 deletions lib/src/scanner/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ use crate::re::fast::FastVM;
use crate::re::thompson::PikeVM;
use crate::re::Action;
use crate::scanner::matches::{Match, PatternMatches, UnconfirmedMatch};
#[cfg(feature = "rules-profiling")]
use crate::scanner::ProfilingData;
use crate::scanner::ScanError;
use crate::scanner::HEARTBEAT_COUNTER;
use crate::types::{Array, Map, Struct};
Expand Down Expand Up @@ -128,54 +130,60 @@ pub(crate) struct ScanContext<'r> {
#[cfg(feature = "rules-profiling")]
impl<'r> ScanContext<'r> {
/// Returns the top N most expensive rules.
pub fn most_expensive_rules(
&self,
n: usize,
) -> Vec<(&'r str, &'r str, Duration)> {
pub fn most_expensive_rules(&self, n: usize) -> Vec<ProfilingData> {
debug_assert_eq!(
self.compiled_rules.num_rules(),
self.time_spent_in_rule.len()
);

let mut result = Vec::with_capacity(self.compiled_rules.num_rules());

for (r, t) in iter::zip(
for (rule, condition_exec_time) in iter::zip(
self.compiled_rules.rules().iter(),
self.time_spent_in_rule.iter(),
) {
// `rule_time`` is initialized with the time spent in evaluating
// the rule's condition. This number is incremented by the amount
// spent in each pattern declared by the rule.
let mut rule_time = *t;

for (_, _, pattern_id) in r.patterns.iter() {
let mut pattern_matching_time = 0;
for (_, _, pattern_id) in rule.patterns.iter() {
if let Some(d) = self.time_spent_in_pattern.get(pattern_id) {
rule_time += *d;
pattern_matching_time += *d;
}
}

// Don't track rules that took less 100ms.
//if rule_time > 100_000_000 {
let rule_name =
self.compiled_rules.ident_pool().get(r.ident_id).unwrap();

let namespace_name = self
.compiled_rules
.ident_pool()
.get(r.namespace_ident_id)
.unwrap();

result.push((
namespace_name,
rule_name,
Duration::from_nanos(rule_time),
));
//}
if condition_exec_time + pattern_matching_time > 100_000_000 {
let namespace = self
.compiled_rules
.ident_pool()
.get(rule.namespace_ident_id)
.unwrap();

let rule = self
.compiled_rules
.ident_pool()
.get(rule.ident_id)
.unwrap();

result.push(ProfilingData {
namespace,
rule,
condition_exec_time: Duration::from_nanos(
*condition_exec_time,
),
pattern_matching_time: Duration::from_nanos(
pattern_matching_time,
),
});
}
}

// Sort the results by the time spent on each rule, in descending
// order.
result.sort_by(|a, b| b.2.cmp(&a.2));
result.sort_by(|a, b| {
let a_time = a.pattern_matching_time + a.condition_exec_time;
let b_time = b.pattern_matching_time + b.condition_exec_time;

b_time.cmp(&a_time)
});
result.truncate(n);
result
}
Expand Down
33 changes: 25 additions & 8 deletions lib/src/scanner/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,19 @@ impl<'a> AsRef<[u8]> for ScannedData<'a> {
}
}

/// Contains information about the time spent on a rule.
#[cfg(feature = "rules-profiling")]
pub struct ProfilingData<'r> {
/// Rule namespace.
pub namespace: &'r str,
/// Rule name.
pub rule: &'r str,
/// Time spent executing the rule's condition.
pub condition_exec_time: Duration,
/// Time spent matching the rule's patterns.
pub pattern_matching_time: Duration,
}

/// Optional information for the scan operation.
#[derive(Debug, Default)]
pub struct ScanOptions<'a> {
Expand Down Expand Up @@ -530,10 +543,7 @@ impl<'r> Scanner<'r> {

/// Returns the top N most expensive rules.
#[cfg(feature = "rules-profiling")]
pub fn most_expensive_rules(
&self,
n: usize,
) -> Vec<(&'r str, &'r str, Duration)> {
pub fn most_expensive_rules(&self, n: usize) -> Vec<ProfilingData> {
self.wasm_store.data().most_expensive_rules(n)
}
}
Expand Down Expand Up @@ -743,10 +753,17 @@ impl<'r> Scanner<'r> {
let most_expensive_rules = self.most_expensive_rules(10);
if !most_expensive_rules.is_empty() {
log::info!("Most expensive rules:");
for r in most_expensive_rules {
log::info!("+ namespace: {}", r.0);
log::info!(" rule: {}", r.1);
log::info!(" time: {:?}", r.2);
for profiling_data in most_expensive_rules {
log::info!("+ namespace: {}", profiling_data.namespace);
log::info!(" rule: {}", profiling_data.rule);
log::info!(
" pattern matching time: {:?}",
profiling_data.pattern_matching_time
);
log::info!(
" condition execution time: {:?}",
profiling_data.condition_exec_time
);
}
}
}
Expand Down

0 comments on commit 55b6bfb

Please sign in to comment.