Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Runtime customization of byte-frequency table - Default type param version #119

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 112 additions & 0 deletions bench/src/memmem/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ pub fn all(c: &mut Criterion) {
oneshot_iter(c);
prebuilt_iter(c);
sliceslice::all(c);
misc(c);
}

fn oneshot(c: &mut Criterion) {
Expand Down Expand Up @@ -381,3 +382,114 @@ fn prebuilt_iter(c: &mut Criterion) {
}
}
}

use memchr::memmem::HeuristicFrequencyRank;

fn misc(c: &mut Criterion) {
finder_construction(c);
byte_frequencies(c);
}

fn finder_construction(c: &mut Criterion) {
// This benchmark is purely for measuring the time taken to create a `Finder`.
// It is here to prevent regressions when adding new features to the `Finder`,
// such as the ability to construct with a custom `HeuristicFrequencyRank`.
const NEEDLES: [&str; 3] = ["a", "abcd", "abcdefgh12345678"];

for needle in NEEDLES {
define(
c,
&format!(
"memmem/krate/misc/construct-finder/default(len={})",
needle.len()
),
needle.as_bytes(),
Box::new(move |b| {
b.iter(|| {
memchr::memmem::FinderBuilder::new()
.build_forward(needle.as_bytes())
});
}),
);
define(
c,
&format!(
"memmem/krate/misc/construct-finder/custom(len={})",
needle.len()
),
needle.as_bytes(),
Box::new(move |b| {
b.iter(|| {
memchr::memmem::FinderBuilder::new()
.build_heuristic(needle.as_bytes(), Hfrx86)
});
}),
);
}
}

fn byte_frequencies(c: &mut Criterion) {
// This benchmark exists to demonstrate a common use case for
// customizing the byte frequency table used by a `Finder`
// and the relative performance gain from using an optimal table.
// This is essentially why `HeuristicFrequencyRank` was added.

// Bytes we want to scan for that are rare in strings but common in executables
const NEEDLE: &[u8] = b"\x00\x00\xdd\xdd'";

// The input for the benchmark is the benchmark binary itself
let exe = std::env::args().next().unwrap();
let corpus = std::fs::read(exe).unwrap();

let bin = corpus.clone();
define(
c,
&format!("memmem/krate/misc/frequency-table/default"),
&corpus,
Box::new(move |b| {
let finder =
memchr::memmem::FinderBuilder::new().build_forward(NEEDLE);
b.iter(|| {
assert_eq!(1, finder.find_iter(&bin).count());
});
}),
);

let bin = corpus.clone();
define(
c,
&format!("memmem/krate/misc/frequency-table/custom"),
&corpus,
Box::new(move |b| {
let finder = memchr::memmem::FinderBuilder::new()
.build_heuristic(NEEDLE, Hfrx86);
b.iter(|| {
assert_eq!(1, finder.find_iter(&bin).count());
});
}),
);
}

// A byte-frequency table that is good for scanning binary executables
struct Hfrx86;
impl HeuristicFrequencyRank for Hfrx86 {
fn rank(&self, byte: u8) -> u8 {
const TABLE: [u8; 256] = [
255, 128, 61, 43, 50, 41, 27, 28, 57, 15, 21, 13, 24, 17, 17, 89,
58, 16, 11, 7, 14, 23, 7, 6, 24, 9, 6, 5, 9, 4, 7, 16, 68, 11, 9,
6, 88, 7, 4, 4, 23, 9, 4, 8, 8, 5, 10, 4, 30, 11, 9, 24, 11, 5, 5,
5, 19, 11, 6, 17, 9, 9, 6, 8, 48, 58, 11, 14, 53, 40, 9, 9, 254,
35, 3, 6, 52, 23, 6, 6, 27, 4, 7, 11, 14, 13, 10, 11, 11, 5, 2,
10, 16, 12, 6, 19, 19, 20, 5, 14, 16, 31, 19, 7, 14, 20, 4, 4, 19,
8, 18, 20, 24, 1, 25, 19, 58, 29, 10, 5, 15, 20, 2, 2, 9, 4, 3, 5,
51, 11, 4, 53, 23, 39, 6, 4, 13, 81, 4, 186, 5, 67, 3, 2, 15, 0,
0, 1, 3, 2, 0, 0, 5, 0, 0, 0, 2, 0, 0, 0, 12, 2, 1, 1, 3, 1, 1, 1,
6, 1, 2, 1, 3, 1, 1, 2, 9, 1, 1, 0, 2, 2, 4, 4, 11, 6, 7, 3, 6, 9,
4, 5, 46, 18, 8, 18, 17, 3, 8, 20, 16, 10, 3, 7, 175, 4, 6, 7, 13,
3, 7, 3, 3, 1, 3, 3, 10, 3, 1, 5, 2, 0, 1, 2, 16, 3, 5, 1, 6, 1,
1, 2, 58, 20, 3, 14, 12, 2, 1, 3, 16, 3, 5, 8, 3, 1, 8, 6, 17, 6,
5, 3, 8, 6, 13, 175,
];
TABLE[byte as usize]
}
}
146 changes: 135 additions & 11 deletions src/memmem/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -454,7 +454,7 @@ impl<'n> Finder<'n> {
/// Create a new finder for the given needle.
#[inline]
pub fn new<B: ?Sized + AsRef<[u8]>>(needle: &'n B) -> Finder<'n> {
FinderBuilder::new().build_forward(needle)
FinderBuilder::<DefaultHFR>::new().build_forward(needle)
}

/// Returns the index of the first occurrence of this needle in the given
Expand Down Expand Up @@ -581,7 +581,7 @@ impl<'n> FinderRev<'n> {
/// Create a new reverse finder for the given needle.
#[inline]
pub fn new<B: ?Sized + AsRef<[u8]>>(needle: &'n B) -> FinderRev<'n> {
FinderBuilder::new().build_reverse(needle)
FinderBuilder::<DefaultHFR>::new().build_reverse(needle)
}

/// Returns the index of the last occurrence of this needle in the given
Expand Down Expand Up @@ -690,17 +690,93 @@ impl<'n> FinderRev<'n> {
}
}

/// This trait allows the user to customize the heuristic used to determine
/// the relative frequency of a given byte in the dataset being searched.
///
/// The use of this trait can have a dramatic impact on performance depending on
/// the type of data being searched. The details of why are explained in the docs of
/// [`prefilter::Prefilter`]. To summarize, the core algorithm uses a prefilter
/// to quickly identify candidate matches that are later verified more slowly.
/// This prefilter is implemented in terms of trying to find `rare` bytes at specific offsets
/// that will occur less frequently in the dataset. While the concept of a `rare` byte is similar
/// for most datasets, there are some specific datasets (like binary executables) that
/// have dramatically different byte distributions. For these datasets customizing the
/// byte frequency heuristic can have a massive impact on performance, and might even
/// need to be done at runtime.
///
/// The default implementation of `HeuristicFrequencyRank` reads from the static
/// frequency table defined in `src/memmem/byte_frequencies.rs`.
/// This is optimal for most inputs, so if you are unsure of the impact of using
/// a custom `HeuristicFrequencyRank` you should probably just use the default.
///
/// Example:
/// ```
/// use memchr::memmem::{FinderBuilder, HeuristicFrequencyRank};
///
/// // A byte-frequency table that is good for scanning binary executables
/// struct X86;
/// impl HeuristicFrequencyRank for X86 {
/// fn rank(&self, byte: u8) -> u8 {
/// const TABLE: [u8; 256] = [
/// 255, 128, 61, 43, 50, 41, 27, 28, 57, 15, 21, 13, 24, 17, 17, 89, 58, 16, 11, 7, 14, 23, 7, 6, 24, 9, 6, 5, 9, 4, 7, 16,
/// 68, 11, 9, 6, 88, 7, 4, 4, 23, 9, 4, 8, 8, 5, 10, 4, 30, 11, 9, 24, 11, 5, 5, 5, 19, 11, 6, 17, 9, 9, 6, 8,
/// 48, 58, 11, 14, 53, 40, 9, 9, 254, 35, 3, 6, 52, 23, 6, 6, 27, 4, 7, 11, 14, 13, 10, 11, 11, 5, 2, 10, 16, 12, 6, 19,
/// 19, 20, 5, 14, 16, 31, 19, 7, 14, 20, 4, 4, 19, 8, 18, 20, 24, 1, 25, 19, 58, 29, 10, 5, 15, 20, 2, 2, 9, 4, 3, 5,
/// 51, 11, 4, 53, 23, 39, 6, 4, 13, 81, 4, 186, 5, 67, 3, 2, 15, 0, 0, 1, 3, 2, 0, 0, 5, 0, 0, 0, 2, 0, 0, 0,
/// 12, 2, 1, 1, 3, 1, 1, 1, 6, 1, 2, 1, 3, 1, 1, 2, 9, 1, 1, 0, 2, 2, 4, 4, 11, 6, 7, 3, 6, 9, 4, 5,
/// 46, 18, 8, 18, 17, 3, 8, 20, 16, 10, 3, 7, 175, 4, 6, 7, 13, 3, 7, 3, 3, 1, 3, 3, 10, 3, 1, 5, 2, 0, 1, 2,
/// 16, 3, 5, 1, 6, 1, 1, 2, 58, 20, 3, 14, 12, 2, 1, 3, 16, 3, 5, 8, 3, 1, 8, 6, 17, 6, 5, 3, 8, 6, 13, 175,
/// ];
/// TABLE[byte as usize]
/// }
/// }
/// // Create a new finder with the custom heuristic
/// type T = FinderBuilder;
/// let finder = T::new().heuristic(X86).build_forward(b"\x00\x00\xdd\xdd");
/// ```
pub trait HeuristicFrequencyRank {
/// Return the heuristical frequency rank of the given byte. A lower rank
/// means the byte is believed to occur less frequently.
fn rank(&self, byte: u8) -> u8;
}

/// The default byte frequency heuristic that is good for most inputs
pub struct DefaultHFR;
impl HeuristicFrequencyRank for DefaultHFR {
fn rank(&self, byte: u8) -> u8 {
byte_frequencies::BYTE_FREQUENCIES[byte as usize]
}
}

// Allow passing `H` to related functions by reference OR move construction
impl<'a, H> HeuristicFrequencyRank for &'a H
where
H: HeuristicFrequencyRank,
{
fn rank(&self, byte: u8) -> u8 {
H::rank(*self, byte)
}
}

/// A builder for constructing non-default forward or reverse memmem finders.
///
/// A builder is primarily useful for configuring a substring searcher.
/// Currently, the only configuration exposed is the ability to disable
/// heuristic prefilters used to speed up certain searches.
#[derive(Clone, Debug, Default)]
pub struct FinderBuilder {
#[derive(Clone, Debug)]
pub struct FinderBuilder<H: HeuristicFrequencyRank = DefaultHFR> {
config: SearcherConfig,
// We use `Option<H>` to avoid `HeuristicFrequencyRank: Default`
heuristic: Option<H>,
}

impl FinderBuilder {
impl<H: HeuristicFrequencyRank> Default for FinderBuilder<H> {
fn default() -> Self {
FinderBuilder{config: SearcherConfig::default(), heuristic: None}
}
}

impl<H: HeuristicFrequencyRank> FinderBuilder<H> {
/// Create a new finder builder with default settings.
pub fn new() -> FinderBuilder {
FinderBuilder::default()
Expand All @@ -709,10 +785,16 @@ impl FinderBuilder {
/// Build a forward finder using the given needle from the current
/// settings.
pub fn build_forward<'n, B: ?Sized + AsRef<[u8]>>(
&self,
self,
needle: &'n B,
) -> Finder<'n> {
Finder { searcher: Searcher::new(self.config, needle.as_ref()) }
// This can probably be improved but I stopped when I realized `FinderBuilder::new()...` was broken.
// Maybe you can figure something else out that is better.
if let Some(h) = self.heuristic {
Finder { searcher: Searcher::new_heuristic(self.config, needle.as_ref(), h) }
} else {
Finder { searcher: Searcher::new(self.config, needle.as_ref()) }
}
}

/// Build a reverse finder using the given needle from the current
Expand All @@ -728,10 +810,18 @@ impl FinderBuilder {
///
/// See the documentation for [`Prefilter`] for more discussion on why
/// you might want to configure this.
pub fn prefilter(&mut self, prefilter: Prefilter) -> &mut FinderBuilder {
pub fn prefilter(mut self, prefilter: Prefilter) -> Self {
self.config.prefilter = prefilter;
self
}

/// Seta custom heuristic for determining the frequency of a given byte in the dataset.
///
/// See the documentation for [`HeuristicFrequencyRank`] for more discussion on why
/// you might want to configure this.
pub fn heuristic<U: HeuristicFrequencyRank>(self, heuristic: U) -> FinderBuilder<U> {
FinderBuilder{config: self.config, heuristic: Some(heuristic)}
}
}

/// The internal implementation of a forward substring searcher.
Expand Down Expand Up @@ -817,15 +907,46 @@ enum SearcherKind {
}

impl<'n> Searcher<'n> {
/// NOTE: This method is important and must not be removed!
/// The reason is kind of obscure, so I will try to explain.
///
/// When rust generates assembly for a regular function call, it generates a `direct call`.
/// In assembly, this looks something like `call memchr::memmem::Searcher::new`.
/// The function address is embedded into the instruction, so it is fast.
///
/// When rust generates assembly for a generic function call, it generates an `indirect call`
/// In assembly, this looks something like `call qword ptr [rip + memchr::memmem::Searcher::new@GOTPCREL]`.
/// The function address must be calculated dynamically at runtime, so it is slower.
///
/// Since `FinderBuilder::new_heuristic` is a public method and it uses `H: HeuristicFrequencyRank`, then
/// the compiler cannot make assumptions about the value of `H`, so it will always generate an inefficient
/// indirect call for `Searcher::new`. This is true regardless of how you embed the type signature,
/// as long as a public method accepts a generic type that is then forwarded to other generic functions.
///
/// To prevent an indirect call being generated for the default case where we want to use the default heuristic,
/// we need a non-generic function that will call the generic version with a hard-coded value for `H`.
/// This allows the compiler to generate a regular function that can be called directly.
///
/// When constructing a `Finder` with a custom `HeuristicFrequencyRank`,
/// the use of an indirect call is unavoidable.
fn new(config: SearcherConfig, needle: &'n [u8]) -> Searcher<'n> {
Self::new_heuristic(config, needle, DefaultHFR)
}

fn new_heuristic<H: HeuristicFrequencyRank>(
config: SearcherConfig,
needle: &'n [u8],
heuristic: H,
) -> Searcher<'n> {
use self::SearcherKind::*;

let ninfo = NeedleInfo::new(needle);
let ninfo = NeedleInfo::new(needle, &heuristic);
let mk = |kind: SearcherKind| {
let prefn = prefilter::forward(
&config.prefilter,
&ninfo.rarebytes,
needle,
heuristic,
);
Searcher { needle: CowBytes::new(needle), ninfo, prefn, kind }
};
Expand Down Expand Up @@ -1010,9 +1131,12 @@ impl<'n> Searcher<'n> {
}

impl NeedleInfo {
pub(crate) fn new(needle: &[u8]) -> NeedleInfo {
pub(crate) fn new<H: HeuristicFrequencyRank>(
needle: &[u8],
heuristic: &H,
) -> NeedleInfo {
NeedleInfo {
rarebytes: RareNeedleBytes::forward(needle),
rarebytes: RareNeedleBytes::forward(needle, heuristic),
nhash: NeedleHash::forward(needle),
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/memmem/prefilter/fallback.rs
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ mod tests {
use super::*;

fn freqy_find(haystack: &[u8], needle: &[u8]) -> Option<usize> {
let ninfo = NeedleInfo::new(needle);
let ninfo = NeedleInfo::new(needle, &crate::memmem::DefaultHFR);
let mut prestate = PrefilterState::new();
find(&mut prestate, &ninfo, haystack, needle)
}
Expand Down
10 changes: 7 additions & 3 deletions src/memmem/prefilter/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
use crate::memmem::{rarebytes::RareNeedleBytes, NeedleInfo};
use crate::memmem::{
rarebytes::RareNeedleBytes, HeuristicFrequencyRank, NeedleInfo,
};

mod fallback;
#[cfg(memchr_runtime_simd)]
Expand Down Expand Up @@ -287,10 +289,11 @@ impl PrefilterState {
/// is the default). In general, we try to use an AVX prefilter, followed by
/// SSE and then followed by a generic one based on memchr.
#[inline(always)]
pub(crate) fn forward(
pub(crate) fn forward<H: HeuristicFrequencyRank>(
config: &Prefilter,
rare: &RareNeedleBytes,
needle: &[u8],
heuristic: H,
) -> Option<PrefilterFn> {
if config.is_none() || needle.len() <= 1 {
return None;
Expand Down Expand Up @@ -327,7 +330,8 @@ pub(crate) fn forward(
// Check that our rarest byte has a reasonably low rank. The main issue
// here is that the fallback prefilter can perform pretty poorly if it's
// given common bytes. So we try to avoid the worst cases here.
let (rare1_rank, _) = rare.as_ranks(needle);
let (rare1, _) = rare.as_rare_bytes(needle);
let rare1_rank = heuristic.rank(rare1) as usize;
if rare1_rank <= MAX_FALLBACK_RANK {
// SAFETY: fallback::find is safe to call in all environments.
return unsafe { Some(PrefilterFn::new(fallback::find)) };
Expand Down
Loading