diff --git a/bench/src/memmem/mod.rs b/bench/src/memmem/mod.rs index c817415..8e4fe4a 100644 --- a/bench/src/memmem/mod.rs +++ b/bench/src/memmem/mod.rs @@ -106,6 +106,7 @@ pub fn all(c: &mut Criterion) { oneshot_iter(c); prebuilt_iter(c); sliceslice::all(c); + misc(c); } fn oneshot(c: &mut Criterion) { @@ -381,3 +382,114 @@ fn prebuilt_iter(c: &mut Criterion) { } } } + +use memchr::memmem::HeuristicFrequencyRank; + +fn misc(c: &mut Criterion) { + finder_construction(c); + byte_frequencies(c); +} + +fn finder_construction(c: &mut Criterion) { + // This benchmark is purely for measuring the time taken to create a `Finder`. + // It is here to prevent regressions when adding new features to the `Finder`, + // such as the ability to construct with a custom `HeuristicFrequencyRank`. + const NEEDLES: [&str; 3] = ["a", "abcd", "abcdefgh12345678"]; + + for needle in NEEDLES { + define( + c, + &format!( + "memmem/krate/misc/construct-finder/default(len={})", + needle.len() + ), + needle.as_bytes(), + Box::new(move |b| { + b.iter(|| { + memchr::memmem::FinderBuilder::new() + .build_forward(needle.as_bytes()) + }); + }), + ); + define( + c, + &format!( + "memmem/krate/misc/construct-finder/custom(len={})", + needle.len() + ), + needle.as_bytes(), + Box::new(move |b| { + b.iter(|| { + memchr::memmem::FinderBuilder::new() + .build_heuristic(needle.as_bytes(), Hfrx86) + }); + }), + ); + } +} + +fn byte_frequencies(c: &mut Criterion) { + // This benchmark exists to demonstrate a common use case for + // customizing the byte frequency table used by a `Finder` + // and the relative performance gain from using an optimal table. + // This is essentially why `HeuristicFrequencyRank` was added. + + // Bytes we want to scan for that are rare in strings but common in executables + const NEEDLE: &[u8] = b"\x00\x00\xdd\xdd'"; + + // The input for the benchmark is the benchmark binary itself + let exe = std::env::args().next().unwrap(); + let corpus = std::fs::read(exe).unwrap(); + + let bin = corpus.clone(); + define( + c, + &format!("memmem/krate/misc/frequency-table/default"), + &corpus, + Box::new(move |b| { + let finder = + memchr::memmem::FinderBuilder::new().build_forward(NEEDLE); + b.iter(|| { + assert_eq!(1, finder.find_iter(&bin).count()); + }); + }), + ); + + let bin = corpus.clone(); + define( + c, + &format!("memmem/krate/misc/frequency-table/custom"), + &corpus, + Box::new(move |b| { + let finder = memchr::memmem::FinderBuilder::new() + .build_heuristic(NEEDLE, Hfrx86); + b.iter(|| { + assert_eq!(1, finder.find_iter(&bin).count()); + }); + }), + ); +} + +// A byte-frequency table that is good for scanning binary executables +struct Hfrx86; +impl HeuristicFrequencyRank for Hfrx86 { + fn rank(&self, byte: u8) -> u8 { + const TABLE: [u8; 256] = [ + 255, 128, 61, 43, 50, 41, 27, 28, 57, 15, 21, 13, 24, 17, 17, 89, + 58, 16, 11, 7, 14, 23, 7, 6, 24, 9, 6, 5, 9, 4, 7, 16, 68, 11, 9, + 6, 88, 7, 4, 4, 23, 9, 4, 8, 8, 5, 10, 4, 30, 11, 9, 24, 11, 5, 5, + 5, 19, 11, 6, 17, 9, 9, 6, 8, 48, 58, 11, 14, 53, 40, 9, 9, 254, + 35, 3, 6, 52, 23, 6, 6, 27, 4, 7, 11, 14, 13, 10, 11, 11, 5, 2, + 10, 16, 12, 6, 19, 19, 20, 5, 14, 16, 31, 19, 7, 14, 20, 4, 4, 19, + 8, 18, 20, 24, 1, 25, 19, 58, 29, 10, 5, 15, 20, 2, 2, 9, 4, 3, 5, + 51, 11, 4, 53, 23, 39, 6, 4, 13, 81, 4, 186, 5, 67, 3, 2, 15, 0, + 0, 1, 3, 2, 0, 0, 5, 0, 0, 0, 2, 0, 0, 0, 12, 2, 1, 1, 3, 1, 1, 1, + 6, 1, 2, 1, 3, 1, 1, 2, 9, 1, 1, 0, 2, 2, 4, 4, 11, 6, 7, 3, 6, 9, + 4, 5, 46, 18, 8, 18, 17, 3, 8, 20, 16, 10, 3, 7, 175, 4, 6, 7, 13, + 3, 7, 3, 3, 1, 3, 3, 10, 3, 1, 5, 2, 0, 1, 2, 16, 3, 5, 1, 6, 1, + 1, 2, 58, 20, 3, 14, 12, 2, 1, 3, 16, 3, 5, 8, 3, 1, 8, 6, 17, 6, + 5, 3, 8, 6, 13, 175, + ]; + TABLE[byte as usize] + } +} diff --git a/src/memmem/mod.rs b/src/memmem/mod.rs index e1cd1ae..cc01b87 100644 --- a/src/memmem/mod.rs +++ b/src/memmem/mod.rs @@ -454,7 +454,7 @@ impl<'n> Finder<'n> { /// Create a new finder for the given needle. #[inline] pub fn new>(needle: &'n B) -> Finder<'n> { - FinderBuilder::new().build_forward(needle) + FinderBuilder::::new().build_forward(needle) } /// Returns the index of the first occurrence of this needle in the given @@ -581,7 +581,7 @@ impl<'n> FinderRev<'n> { /// Create a new reverse finder for the given needle. #[inline] pub fn new>(needle: &'n B) -> FinderRev<'n> { - FinderBuilder::new().build_reverse(needle) + FinderBuilder::::new().build_reverse(needle) } /// Returns the index of the last occurrence of this needle in the given @@ -690,17 +690,93 @@ impl<'n> FinderRev<'n> { } } +/// This trait allows the user to customize the heuristic used to determine +/// the relative frequency of a given byte in the dataset being searched. +/// +/// The use of this trait can have a dramatic impact on performance depending on +/// the type of data being searched. The details of why are explained in the docs of +/// [`prefilter::Prefilter`]. To summarize, the core algorithm uses a prefilter +/// to quickly identify candidate matches that are later verified more slowly. +/// This prefilter is implemented in terms of trying to find `rare` bytes at specific offsets +/// that will occur less frequently in the dataset. While the concept of a `rare` byte is similar +/// for most datasets, there are some specific datasets (like binary executables) that +/// have dramatically different byte distributions. For these datasets customizing the +/// byte frequency heuristic can have a massive impact on performance, and might even +/// need to be done at runtime. +/// +/// The default implementation of `HeuristicFrequencyRank` reads from the static +/// frequency table defined in `src/memmem/byte_frequencies.rs`. +/// This is optimal for most inputs, so if you are unsure of the impact of using +/// a custom `HeuristicFrequencyRank` you should probably just use the default. +/// +/// Example: +/// ``` +/// use memchr::memmem::{FinderBuilder, HeuristicFrequencyRank}; +/// +/// // A byte-frequency table that is good for scanning binary executables +/// struct X86; +/// impl HeuristicFrequencyRank for X86 { +/// fn rank(&self, byte: u8) -> u8 { +/// const TABLE: [u8; 256] = [ +/// 255, 128, 61, 43, 50, 41, 27, 28, 57, 15, 21, 13, 24, 17, 17, 89, 58, 16, 11, 7, 14, 23, 7, 6, 24, 9, 6, 5, 9, 4, 7, 16, +/// 68, 11, 9, 6, 88, 7, 4, 4, 23, 9, 4, 8, 8, 5, 10, 4, 30, 11, 9, 24, 11, 5, 5, 5, 19, 11, 6, 17, 9, 9, 6, 8, +/// 48, 58, 11, 14, 53, 40, 9, 9, 254, 35, 3, 6, 52, 23, 6, 6, 27, 4, 7, 11, 14, 13, 10, 11, 11, 5, 2, 10, 16, 12, 6, 19, +/// 19, 20, 5, 14, 16, 31, 19, 7, 14, 20, 4, 4, 19, 8, 18, 20, 24, 1, 25, 19, 58, 29, 10, 5, 15, 20, 2, 2, 9, 4, 3, 5, +/// 51, 11, 4, 53, 23, 39, 6, 4, 13, 81, 4, 186, 5, 67, 3, 2, 15, 0, 0, 1, 3, 2, 0, 0, 5, 0, 0, 0, 2, 0, 0, 0, +/// 12, 2, 1, 1, 3, 1, 1, 1, 6, 1, 2, 1, 3, 1, 1, 2, 9, 1, 1, 0, 2, 2, 4, 4, 11, 6, 7, 3, 6, 9, 4, 5, +/// 46, 18, 8, 18, 17, 3, 8, 20, 16, 10, 3, 7, 175, 4, 6, 7, 13, 3, 7, 3, 3, 1, 3, 3, 10, 3, 1, 5, 2, 0, 1, 2, +/// 16, 3, 5, 1, 6, 1, 1, 2, 58, 20, 3, 14, 12, 2, 1, 3, 16, 3, 5, 8, 3, 1, 8, 6, 17, 6, 5, 3, 8, 6, 13, 175, +/// ]; +/// TABLE[byte as usize] +/// } +/// } +/// // Create a new finder with the custom heuristic +/// type T = FinderBuilder; +/// let finder = T::new().heuristic(X86).build_forward(b"\x00\x00\xdd\xdd"); +/// ``` +pub trait HeuristicFrequencyRank { + /// Return the heuristical frequency rank of the given byte. A lower rank + /// means the byte is believed to occur less frequently. + fn rank(&self, byte: u8) -> u8; +} + +/// The default byte frequency heuristic that is good for most inputs +pub struct DefaultHFR; +impl HeuristicFrequencyRank for DefaultHFR { + fn rank(&self, byte: u8) -> u8 { + byte_frequencies::BYTE_FREQUENCIES[byte as usize] + } +} + +// Allow passing `H` to related functions by reference OR move construction +impl<'a, H> HeuristicFrequencyRank for &'a H +where + H: HeuristicFrequencyRank, +{ + fn rank(&self, byte: u8) -> u8 { + H::rank(*self, byte) + } +} + /// A builder for constructing non-default forward or reverse memmem finders. /// /// A builder is primarily useful for configuring a substring searcher. /// Currently, the only configuration exposed is the ability to disable /// heuristic prefilters used to speed up certain searches. -#[derive(Clone, Debug, Default)] -pub struct FinderBuilder { +#[derive(Clone, Debug)] +pub struct FinderBuilder { config: SearcherConfig, + // We use `Option` to avoid `HeuristicFrequencyRank: Default` + heuristic: Option, } -impl FinderBuilder { +impl Default for FinderBuilder { + fn default() -> Self { + FinderBuilder{config: SearcherConfig::default(), heuristic: None} + } +} + +impl FinderBuilder { /// Create a new finder builder with default settings. pub fn new() -> FinderBuilder { FinderBuilder::default() @@ -709,10 +785,16 @@ impl FinderBuilder { /// Build a forward finder using the given needle from the current /// settings. pub fn build_forward<'n, B: ?Sized + AsRef<[u8]>>( - &self, + self, needle: &'n B, ) -> Finder<'n> { - Finder { searcher: Searcher::new(self.config, needle.as_ref()) } + // This can probably be improved but I stopped when I realized `FinderBuilder::new()...` was broken. + // Maybe you can figure something else out that is better. + if let Some(h) = self.heuristic { + Finder { searcher: Searcher::new_heuristic(self.config, needle.as_ref(), h) } + } else { + Finder { searcher: Searcher::new(self.config, needle.as_ref()) } + } } /// Build a reverse finder using the given needle from the current @@ -728,10 +810,18 @@ impl FinderBuilder { /// /// See the documentation for [`Prefilter`] for more discussion on why /// you might want to configure this. - pub fn prefilter(&mut self, prefilter: Prefilter) -> &mut FinderBuilder { + pub fn prefilter(mut self, prefilter: Prefilter) -> Self { self.config.prefilter = prefilter; self } + + /// Seta custom heuristic for determining the frequency of a given byte in the dataset. + /// + /// See the documentation for [`HeuristicFrequencyRank`] for more discussion on why + /// you might want to configure this. + pub fn heuristic(self, heuristic: U) -> FinderBuilder { + FinderBuilder{config: self.config, heuristic: Some(heuristic)} + } } /// The internal implementation of a forward substring searcher. @@ -817,15 +907,46 @@ enum SearcherKind { } impl<'n> Searcher<'n> { + /// NOTE: This method is important and must not be removed! + /// The reason is kind of obscure, so I will try to explain. + /// + /// When rust generates assembly for a regular function call, it generates a `direct call`. + /// In assembly, this looks something like `call memchr::memmem::Searcher::new`. + /// The function address is embedded into the instruction, so it is fast. + /// + /// When rust generates assembly for a generic function call, it generates an `indirect call` + /// In assembly, this looks something like `call qword ptr [rip + memchr::memmem::Searcher::new@GOTPCREL]`. + /// The function address must be calculated dynamically at runtime, so it is slower. + /// + /// Since `FinderBuilder::new_heuristic` is a public method and it uses `H: HeuristicFrequencyRank`, then + /// the compiler cannot make assumptions about the value of `H`, so it will always generate an inefficient + /// indirect call for `Searcher::new`. This is true regardless of how you embed the type signature, + /// as long as a public method accepts a generic type that is then forwarded to other generic functions. + /// + /// To prevent an indirect call being generated for the default case where we want to use the default heuristic, + /// we need a non-generic function that will call the generic version with a hard-coded value for `H`. + /// This allows the compiler to generate a regular function that can be called directly. + /// + /// When constructing a `Finder` with a custom `HeuristicFrequencyRank`, + /// the use of an indirect call is unavoidable. fn new(config: SearcherConfig, needle: &'n [u8]) -> Searcher<'n> { + Self::new_heuristic(config, needle, DefaultHFR) + } + + fn new_heuristic( + config: SearcherConfig, + needle: &'n [u8], + heuristic: H, + ) -> Searcher<'n> { use self::SearcherKind::*; - let ninfo = NeedleInfo::new(needle); + let ninfo = NeedleInfo::new(needle, &heuristic); let mk = |kind: SearcherKind| { let prefn = prefilter::forward( &config.prefilter, &ninfo.rarebytes, needle, + heuristic, ); Searcher { needle: CowBytes::new(needle), ninfo, prefn, kind } }; @@ -1010,9 +1131,12 @@ impl<'n> Searcher<'n> { } impl NeedleInfo { - pub(crate) fn new(needle: &[u8]) -> NeedleInfo { + pub(crate) fn new( + needle: &[u8], + heuristic: &H, + ) -> NeedleInfo { NeedleInfo { - rarebytes: RareNeedleBytes::forward(needle), + rarebytes: RareNeedleBytes::forward(needle, heuristic), nhash: NeedleHash::forward(needle), } } diff --git a/src/memmem/prefilter/fallback.rs b/src/memmem/prefilter/fallback.rs index ae1bbcc..8d3faed 100644 --- a/src/memmem/prefilter/fallback.rs +++ b/src/memmem/prefilter/fallback.rs @@ -93,7 +93,7 @@ mod tests { use super::*; fn freqy_find(haystack: &[u8], needle: &[u8]) -> Option { - let ninfo = NeedleInfo::new(needle); + let ninfo = NeedleInfo::new(needle, &crate::memmem::DefaultHFR); let mut prestate = PrefilterState::new(); find(&mut prestate, &ninfo, haystack, needle) } diff --git a/src/memmem/prefilter/mod.rs b/src/memmem/prefilter/mod.rs index 015d3b2..fb37d16 100644 --- a/src/memmem/prefilter/mod.rs +++ b/src/memmem/prefilter/mod.rs @@ -1,4 +1,6 @@ -use crate::memmem::{rarebytes::RareNeedleBytes, NeedleInfo}; +use crate::memmem::{ + rarebytes::RareNeedleBytes, HeuristicFrequencyRank, NeedleInfo, +}; mod fallback; #[cfg(memchr_runtime_simd)] @@ -287,10 +289,11 @@ impl PrefilterState { /// is the default). In general, we try to use an AVX prefilter, followed by /// SSE and then followed by a generic one based on memchr. #[inline(always)] -pub(crate) fn forward( +pub(crate) fn forward( config: &Prefilter, rare: &RareNeedleBytes, needle: &[u8], + heuristic: H, ) -> Option { if config.is_none() || needle.len() <= 1 { return None; @@ -327,7 +330,8 @@ pub(crate) fn forward( // Check that our rarest byte has a reasonably low rank. The main issue // here is that the fallback prefilter can perform pretty poorly if it's // given common bytes. So we try to avoid the worst cases here. - let (rare1_rank, _) = rare.as_ranks(needle); + let (rare1, _) = rare.as_rare_bytes(needle); + let rare1_rank = heuristic.rank(rare1) as usize; if rare1_rank <= MAX_FALLBACK_RANK { // SAFETY: fallback::find is safe to call in all environments. return unsafe { Some(PrefilterFn::new(fallback::find)) }; diff --git a/src/memmem/rarebytes.rs b/src/memmem/rarebytes.rs index fb33f68..6f02bf5 100644 --- a/src/memmem/rarebytes.rs +++ b/src/memmem/rarebytes.rs @@ -1,3 +1,5 @@ +use super::HeuristicFrequencyRank; + /// A heuristic frequency based detection of rare bytes for substring search. /// /// This detector attempts to pick out two bytes in a needle that are predicted @@ -44,7 +46,10 @@ impl RareNeedleBytes { /// Detect the leftmost offsets of the two rarest bytes in the given /// needle. - pub(crate) fn forward(needle: &[u8]) -> RareNeedleBytes { + pub(crate) fn forward( + needle: &[u8], + h: &H, + ) -> RareNeedleBytes { if needle.len() <= 1 || needle.len() > core::u8::MAX as usize { // For needles bigger than u8::MAX, our offsets aren't big enough. // (We make our offsets small to reduce stack copying.) @@ -62,17 +67,17 @@ impl RareNeedleBytes { // Find the rarest two bytes. We make them distinct by construction. let (mut rare1, mut rare1i) = (needle[0], 0); let (mut rare2, mut rare2i) = (needle[1], 1); - if rank(rare2) < rank(rare1) { + if h.rank(rare2) < h.rank(rare1) { core::mem::swap(&mut rare1, &mut rare2); core::mem::swap(&mut rare1i, &mut rare2i); } for (i, &b) in needle.iter().enumerate().skip(2) { - if rank(b) < rank(rare1) { + if h.rank(b) < h.rank(rare1) { rare2 = rare1; rare2i = rare1i; rare1 = b; rare1i = i as u8; - } else if b != rare1 && rank(b) < rank(rare2) { + } else if b != rare1 && h.rank(b) < h.rank(rare2) { rare2 = b; rare2i = i as u8; } @@ -119,18 +124,4 @@ impl RareNeedleBytes { pub(crate) fn as_rare_usize(&self) -> (usize, usize) { (self.rare1i as usize, self.rare2i as usize) } - - /// Return the byte frequency rank of each byte. The higher the rank, the - /// more frequency the byte is predicted to be. The needle given must be - /// the same one given to the RareNeedleBytes constructor. - pub(crate) fn as_ranks(&self, needle: &[u8]) -> (usize, usize) { - let (b1, b2) = self.as_rare_bytes(needle); - (rank(b1), rank(b2)) - } -} - -/// Return the heuristical frequency rank of the given byte. A lower rank -/// means the byte is believed to occur less frequently. -fn rank(b: u8) -> usize { - crate::memmem::byte_frequencies::BYTE_FREQUENCIES[b as usize] as usize }