Skip to content

Commit

Permalink
Reintroduce the reverse suffix literal optimization.
Browse files Browse the repository at this point in the history
It's too good to pass up. This time, we avoid quadratic behavior
with a simple work-around: we limit the amount of reverse searching
we do after having found a literal match. If the reverse search ends
at the beginning of its search text (whether a match or not), then we
stop the reverse suffix optimization and fall back to the standard forward
search.

This reverts commit 50d991e.

# Conflicts:
#	src/exec.rs
  • Loading branch information
BurntSushi authored and SeanRBurton committed May 20, 2016
1 parent 9fc0ac8 commit 14a8989
Show file tree
Hide file tree
Showing 6 changed files with 396 additions and 188 deletions.
2 changes: 1 addition & 1 deletion bench/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ fn main() {
.unwrap_or_else(|e| e.exit());

let mmap = Mmap::open_path(&args.arg_file, Protection::Read).unwrap();
let haystack = unsafe { str::from_utf8(mmap.as_slice()).unwrap() };
let haystack = unsafe { str::from_utf8_unchecked(mmap.as_slice()) };

println!("{}", args.count(&haystack));
}
Expand Down
8 changes: 8 additions & 0 deletions bench/src/misc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,14 @@ bench_match!(long_needle2, r"bbbbbbbbbbbbbbbbbbbbbbbbbbbbbba", {
repeat("b").take(100_000).collect::<String>() + "a"
});

// This benchmark specifically targets the "reverse suffix literal"
// optimization. In particular, it is easy for a naive implementation to
// take quadratic worst case time. This benchmark provides a case for such
// a scenario.
bench_not_match!(reverse_suffix_no_quadratic, r"[r-z].*bcdefghijklmnopq", {
repeat("bcdefghijklmnopq").take(500).collect::<String>()
});

#[cfg(feature = "re-rust")]
#[bench]
fn replace_all(b: &mut Bencher) {
Expand Down
45 changes: 33 additions & 12 deletions src/dfa.rs
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ pub struct Fsm<'a> {
#[derive(Clone, Debug)]
pub enum Result<T> {
Match(T),
NoMatch,
NoMatch(usize),
Quit,
}

Expand All @@ -223,7 +223,28 @@ impl<T> Result<T> {
pub fn is_match(&self) -> bool {
match *self {
Result::Match(_) => true,
Result::NoMatch | Result::Quit => false,
Result::NoMatch(_) | Result::Quit => false,
}
}

/// Maps the given function onto T and returns the result.
///
/// If this isn't a match, then this is a no-op.
pub fn map<U, F: FnMut(T) -> U>(self, mut f: F) -> Result<U> {
match self {
Result::Match(t) => Result::Match(f(t)),
Result::NoMatch(x) => Result::NoMatch(x),
Result::Quit => Result::Quit,
}
}

/// Sets the non-match position.
///
/// If this isn't a non-match, then this is a no-op.
fn set_non_match(self, at: usize) -> Result<T> {
match self {
Result::NoMatch(_) => Result::NoMatch(at),
r => r,
}
}
}
Expand Down Expand Up @@ -465,7 +486,7 @@ impl<'a> Fsm<'a> {
state_flags,
) {
None => return Result::Quit,
Some(STATE_DEAD) => return Result::NoMatch,
Some(STATE_DEAD) => return Result::NoMatch(at),
Some(si) => si,
};
debug_assert!(dfa.start != STATE_UNKNOWN);
Expand Down Expand Up @@ -498,7 +519,7 @@ impl<'a> Fsm<'a> {
state_flags,
) {
None => return Result::Quit,
Some(STATE_DEAD) => return Result::NoMatch,
Some(STATE_DEAD) => return Result::NoMatch(at),
Some(si) => si,
};
debug_assert!(dfa.start != STATE_UNKNOWN);
Expand Down Expand Up @@ -532,7 +553,7 @@ impl<'a> Fsm<'a> {
state_flags,
) {
None => return Result::Quit,
Some(STATE_DEAD) => return Result::NoMatch,
Some(STATE_DEAD) => return Result::NoMatch(at),
Some(si) => si,
};
debug_assert!(dfa.start != STATE_UNKNOWN);
Expand Down Expand Up @@ -601,7 +622,7 @@ impl<'a> Fsm<'a> {
// reported as an index to the most recent byte that resulted in a
// transition to a match state and is always stored in capture slot `1`
// when searching forwards. Its maximum value is `text.len()`.
let mut result = Result::NoMatch;
let mut result = Result::NoMatch(self.at);
let (mut prev_si, mut next_si) = (self.start, self.start);
let mut at = self.at;
while at < text.len() {
Expand Down Expand Up @@ -690,7 +711,7 @@ impl<'a> Fsm<'a> {
next_si &= !STATE_START;
prev_si = next_si;
at = match self.prefix_at(text, at) {
None => return Result::NoMatch,
None => return Result::NoMatch(text.len()),
Some(i) => i,
};
} else if next_si >= STATE_UNKNOWN {
Expand All @@ -711,7 +732,7 @@ impl<'a> Fsm<'a> {
self.at = at;
next_si = match self.next_state(qcur, qnext, prev_si, byte) {
None => return Result::Quit,
Some(STATE_DEAD) => return result,
Some(STATE_DEAD) => return result.set_non_match(at),
Some(si) => si,
};
debug_assert!(next_si != STATE_UNKNOWN);
Expand All @@ -735,7 +756,7 @@ impl<'a> Fsm<'a> {
prev_si &= STATE_MAX;
prev_si = match self.next_state(qcur, qnext, prev_si, Byte::eof()) {
None => return Result::Quit,
Some(STATE_DEAD) => return result,
Some(STATE_DEAD) => return result.set_non_match(text.len()),
Some(si) => si & !STATE_START,
};
debug_assert!(prev_si != STATE_UNKNOWN);
Expand All @@ -762,7 +783,7 @@ impl<'a> Fsm<'a> {
// N.B. The code duplication here is regrettable. Efforts to improve
// it without sacrificing performance are welcome. ---AG
debug_assert!(self.prog.is_reverse);
let mut result = Result::NoMatch;
let mut result = Result::NoMatch(self.at);
let (mut prev_si, mut next_si) = (self.start, self.start);
let mut at = self.at;
while at > 0 {
Expand Down Expand Up @@ -816,7 +837,7 @@ impl<'a> Fsm<'a> {
self.at = at;
next_si = match self.next_state(qcur, qnext, prev_si, byte) {
None => return Result::Quit,
Some(STATE_DEAD) => return result,
Some(STATE_DEAD) => return result.set_non_match(at),
Some(si) => si,
};
debug_assert!(next_si != STATE_UNKNOWN);
Expand All @@ -837,7 +858,7 @@ impl<'a> Fsm<'a> {
// Run the DFA once more on the special EOF senitnel value.
prev_si = match self.next_state(qcur, qnext, prev_si, Byte::eof()) {
None => return Result::Quit,
Some(STATE_DEAD) => return result,
Some(STATE_DEAD) => return result.set_non_match(0),
Some(si) => si,
};
debug_assert!(prev_si != STATE_UNKNOWN);
Expand Down
Loading

0 comments on commit 14a8989

Please sign in to comment.