Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

src: minor refactor to string_search.h #20546

Closed
wants to merge 1 commit into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
145 changes: 54 additions & 91 deletions src/string_search.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,18 +9,11 @@

#include "node_internals.h"
#include <string.h>
#include <algorithm>

namespace node {
namespace stringsearch {


// Returns the maximum of the two parameters.
template <typename T>
T Max(T a, T b) {
return a < b ? b : a;
}


static const uint32_t kMaxOneByteCharCodeU = 0xff;

template <typename T>
Expand Down Expand Up @@ -98,7 +91,9 @@ class StringSearchBase {
template <typename Char>
class StringSearch : private StringSearchBase {
public:
explicit StringSearch(Vector<const Char> pattern)
typedef stringsearch::Vector<const Char> Vector;

explicit StringSearch(Vector pattern)
: pattern_(pattern), start_(0) {
if (pattern.length() >= kBMMaxShift) {
start_ = pattern.length() - kBMMaxShift;
Expand All @@ -108,17 +103,17 @@ class StringSearch : private StringSearchBase {
CHECK_GT(pattern_length, 0);
if (pattern_length < kBMMinPatternLength) {
if (pattern_length == 1) {
strategy_ = &SingleCharSearch;
strategy_ = &StringSearch::SingleCharSearch;
return;
}
strategy_ = &LinearSearch;
strategy_ = &StringSearch::LinearSearch;
return;
}
strategy_ = &InitialSearch;
strategy_ = &StringSearch::InitialSearch;
}

size_t Search(Vector<const Char> subject, size_t index) {
return strategy_(this, subject, index);
size_t Search(Vector subject, size_t index) {
return (this->*strategy_)(subject, index);
}

static inline int AlphabetSize() {
Expand All @@ -136,31 +131,12 @@ class StringSearch : private StringSearchBase {
}

private:
typedef size_t (*SearchFunction)(
StringSearch<Char>*,
Vector<const Char>,
size_t);

static size_t SingleCharSearch(StringSearch<Char>* search,
Vector<const Char> subject,
size_t start_index);

static size_t LinearSearch(StringSearch<Char>* search,
Vector<const Char> subject,
size_t start_index);

static size_t InitialSearch(StringSearch<Char>* search,
Vector<const Char> subject,
size_t start_index);

static size_t BoyerMooreHorspoolSearch(
StringSearch<Char>* search,
Vector<const Char> subject,
size_t start_index);

static size_t BoyerMooreSearch(StringSearch<Char>* search,
Vector<const Char> subject,
size_t start_index);
typedef size_t (StringSearch::*SearchFunction)(Vector, size_t);
size_t SingleCharSearch(Vector subject, size_t start_index);
size_t LinearSearch(Vector subject, size_t start_index);
size_t InitialSearch(Vector subject, size_t start_index);
size_t BoyerMooreHorspoolSearch(Vector subject, size_t start_index);
size_t BoyerMooreSearch(Vector subject, size_t start_index);

void PopulateBoyerMooreHorspoolTable();

Expand Down Expand Up @@ -197,7 +173,7 @@ class StringSearch : private StringSearchBase {
}

// The pattern to search for.
Vector<const Char> pattern_;
Vector pattern_;
// Pointer to implementation of the search.
SearchFunction strategy_;
// Cache value of Max(0, pattern_length() - kBMMaxShift)
Expand All @@ -213,8 +189,8 @@ inline T AlignDown(T value, U alignment) {


inline uint8_t GetHighestValueByte(uint16_t character) {
return Max(static_cast<uint8_t>(character & 0xFF),
static_cast<uint8_t>(character >> 8));
return std::max(static_cast<uint8_t>(character & 0xFF),
static_cast<uint8_t>(character >> 8));
}


Expand Down Expand Up @@ -319,11 +295,10 @@ inline size_t FindFirstCharacter(Vector<const uint8_t> pattern,

template <typename Char>
size_t StringSearch<Char>::SingleCharSearch(
StringSearch<Char>* search,
Vector<const Char> subject,
Vector subject,
size_t index) {
CHECK_EQ(1, search->pattern_.length());
return FindFirstCharacter(search->pattern_, subject, index);
CHECK_EQ(1, pattern_.length());
return FindFirstCharacter(pattern_, subject, index);
}

//---------------------------------------------------------------------
Expand All @@ -333,22 +308,19 @@ size_t StringSearch<Char>::SingleCharSearch(
// Simple linear search for short patterns. Never bails out.
template <typename Char>
size_t StringSearch<Char>::LinearSearch(
StringSearch<Char>* search,
Vector<const Char> subject,
Vector subject,
size_t index) {
Vector<const Char> pattern = search->pattern_;
CHECK_GT(pattern.length(), 1);
const size_t pattern_length = pattern.length();
const size_t n = subject.length() - pattern_length;
CHECK_GT(pattern_.length(), 1);
const size_t n = subject.length() - pattern_.length();
for (size_t i = index; i <= n; i++) {
i = FindFirstCharacter(pattern, subject, i);
i = FindFirstCharacter(pattern_, subject, i);
if (i == subject.length())
return subject.length();
CHECK_LE(i, n);

bool matches = true;
for (size_t j = 1; j < pattern_length; j++) {
if (pattern[j] != subject[i + j]) {
for (size_t j = 1; j < pattern_.length(); j++) {
if (pattern_[j] != subject[i + j]) {
matches = false;
break;
}
Expand All @@ -366,19 +338,17 @@ size_t StringSearch<Char>::LinearSearch(

template <typename Char>
size_t StringSearch<Char>::BoyerMooreSearch(
StringSearch<Char>* search,
Vector<const Char> subject,
Vector subject,
size_t start_index) {
Vector<const Char> pattern = search->pattern_;
const size_t subject_length = subject.length();
const size_t pattern_length = pattern.length();
const size_t pattern_length = pattern_.length();
// Only preprocess at most kBMMaxShift last characters of pattern.
size_t start = search->start_;
size_t start = start_;

int* bad_char_occurrence = search->bad_char_table();
int* good_suffix_shift = search->good_suffix_shift_table();
int* bad_char_occurrence = bad_char_table();
int* good_suffix_shift = good_suffix_shift_table();

Char last_char = pattern[pattern_length - 1];
Char last_char = pattern_[pattern_length - 1];
size_t index = start_index;
// Continue search from i.
while (index <= subject_length - pattern_length) {
Expand All @@ -391,7 +361,7 @@ size_t StringSearch<Char>::BoyerMooreSearch(
return subject.length();
}
}
while (pattern[j] == (c = subject[index + j])) {
while (pattern_[j] == (c = subject[index + j])) {
if (j == 0) {
return index;
}
Expand Down Expand Up @@ -420,7 +390,6 @@ size_t StringSearch<Char>::BoyerMooreSearch(
template <typename Char>
void StringSearch<Char>::PopulateBoyerMooreTable() {
const size_t pattern_length = pattern_.length();
Vector<const Char> pattern = pattern_;
// Only look at the last kBMMaxShift characters of pattern (from start_
// to pattern_length).
const size_t start = start_;
Expand Down Expand Up @@ -448,8 +417,8 @@ void StringSearch<Char>::PopulateBoyerMooreTable() {
{
size_t i = pattern_length;
while (i > start) {
Char c = pattern[i - 1];
while (suffix <= pattern_length && c != pattern[suffix - 1]) {
Char c = pattern_[i - 1];
while (suffix <= pattern_length && c != pattern_[suffix - 1]) {
if (static_cast<size_t>(shift_table[suffix]) == length) {
shift_table[suffix] = suffix - i;
}
Expand All @@ -458,7 +427,7 @@ void StringSearch<Char>::PopulateBoyerMooreTable() {
suffix_table[--i] = --suffix;
if (suffix == pattern_length) {
// No suffix to extend, so we check against last_char only.
while ((i > start) && (pattern[i - 1] != last_char)) {
while ((i > start) && (pattern_[i - 1] != last_char)) {
if (static_cast<size_t>(shift_table[pattern_length]) == length) {
shift_table[pattern_length] = pattern_length - i;
}
Expand Down Expand Up @@ -489,17 +458,15 @@ void StringSearch<Char>::PopulateBoyerMooreTable() {

template <typename Char>
size_t StringSearch<Char>::BoyerMooreHorspoolSearch(
StringSearch<Char>* search,
Vector<const Char> subject,
Vector subject,
size_t start_index) {
Vector<const Char> pattern = search->pattern_;
const size_t subject_length = subject.length();
const size_t pattern_length = pattern.length();
int* char_occurrences = search->bad_char_table();
const size_t pattern_length = pattern_.length();
int* char_occurrences = bad_char_table();
int64_t badness = -pattern_length;

// How bad we are doing without a good-suffix table.
Char last_char = pattern[pattern_length - 1];
Char last_char = pattern_[pattern_length - 1];
int last_char_shift =
pattern_length - 1 -
CharOccurrence(char_occurrences, static_cast<Char>(last_char));
Expand All @@ -519,7 +486,7 @@ size_t StringSearch<Char>::BoyerMooreHorspoolSearch(
}
}
j--;
while (pattern[j] == (subject[index + j])) {
while (pattern_[j] == (subject[index + j])) {
if (j == 0) {
return index;
}
Expand All @@ -532,9 +499,9 @@ size_t StringSearch<Char>::BoyerMooreHorspoolSearch(
// compared to reading each character exactly once.
badness += (pattern_length - j) - last_char_shift;
if (badness > 0) {
search->PopulateBoyerMooreTable();
search->strategy_ = &BoyerMooreSearch;
return BoyerMooreSearch(search, subject, index);
PopulateBoyerMooreTable();
strategy_ = &StringSearch::BoyerMooreSearch;
return BoyerMooreSearch(subject, index);
}
}
return subject.length();
Expand Down Expand Up @@ -575,11 +542,9 @@ void StringSearch<Char>::PopulateBoyerMooreHorspoolTable() {
// isn't found very early in the subject. Upgrades to BoyerMooreHorspool.
template <typename Char>
size_t StringSearch<Char>::InitialSearch(
StringSearch<Char>* search,
Vector<const Char> subject,
Vector subject,
size_t index) {
Vector<const Char> pattern = search->pattern_;
const size_t pattern_length = pattern.length();
const size_t pattern_length = pattern_.length();
// Badness is a count of how much work we have done. When we have
// done enough work we decide it's probably worth switching to a better
// algorithm.
Expand All @@ -590,13 +555,13 @@ size_t StringSearch<Char>::InitialSearch(
for (size_t i = index, n = subject.length() - pattern_length; i <= n; i++) {
badness++;
if (badness <= 0) {
i = FindFirstCharacter(pattern, subject, i);
i = FindFirstCharacter(pattern_, subject, i);
if (i == subject.length())
return subject.length();
CHECK_LE(i, n);
size_t j = 1;
do {
if (pattern[j] != subject[i + j]) {
if (pattern_[j] != subject[i + j]) {
break;
}
j++;
Expand All @@ -606,9 +571,9 @@ size_t StringSearch<Char>::InitialSearch(
}
badness += j;
} else {
search->PopulateBoyerMooreHorspoolTable();
search->strategy_ = &BoyerMooreHorspoolSearch;
return BoyerMooreHorspoolSearch(search, subject, i);
PopulateBoyerMooreHorspoolTable();
strategy_ = &StringSearch::BoyerMooreHorspoolSearch;
return BoyerMooreHorspoolSearch(subject, i);
}
}
return subject.length();
Expand All @@ -629,7 +594,6 @@ size_t SearchString(Vector<const Char> subject,
} // namespace node

namespace node {
using node::stringsearch::Vector;

template <typename Char>
size_t SearchString(const Char* haystack,
Expand All @@ -643,9 +607,8 @@ size_t SearchString(const Char* haystack,
// code, create two vectors that are reversed views into the input strings.
// For example, v_needle[0] would return the *last* character of the needle.
// So we're searching for the first instance of rev(needle) in rev(haystack)
Vector<const Char> v_needle = Vector<const Char>(
needle, needle_length, is_forward);
Vector<const Char> v_haystack = Vector<const Char>(
stringsearch::Vector<const Char> v_needle(needle, needle_length, is_forward);
stringsearch::Vector<const Char> v_haystack(
haystack, haystack_length, is_forward);
size_t diff = haystack_length - needle_length;
size_t relative_start_index;
Expand Down