Skip to content

Commit

Permalink
Use std::string_view to return RegEx matches to avoid generating std:…
Browse files Browse the repository at this point in the history
…:string instances

- These std::string_view reference substring(view)s in the original
  std::string_view on which the matches are found.
- NOTE: As long as the source std::string_view on which the matches were
  found is valid, the matches will be valid.
  • Loading branch information
eduar-hte committed Oct 22, 2024
1 parent c441f7d commit 9bb7439
Show file tree
Hide file tree
Showing 8 changed files with 72 additions and 81 deletions.
51 changes: 27 additions & 24 deletions src/modsecurity.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@

#include <ctime>
#include <iostream>
#include <charconv>
#include <fmt/format.h>

#include "modsecurity/rule.h"
Expand Down Expand Up @@ -213,6 +214,11 @@ void ModSecurity::serverLog(void *data, const RuleMessage &rm) {
}
}

static inline int svtoi(std::string_view s) {
int value;
std::from_chars(s.data(), s.data() + s.size(), value);
return value;
}

int ModSecurity::processContentOffset(const char *content, size_t len,
const char *matchString, std::string *json, const char **err) {
Expand All @@ -225,9 +231,9 @@ int ModSecurity::processContentOffset(const char *content, size_t len,
const unsigned char *buf;
size_t jsonSize;

std::list<Utils::SMatch> vars = variables.searchAll(matchString);
std::list<Utils::SMatch> ops = operators.searchAll(matchString);
std::list<Utils::SMatch> trans = transformations.searchAll(matchString);
const auto vars = variables.searchAll(matchString);
const auto ops = operators.searchAll(matchString);
const auto trans = transformations.searchAll(matchString);

g = yajl_gen_alloc(NULL);
if (g == NULL) {
Expand Down Expand Up @@ -255,27 +261,27 @@ int ModSecurity::processContentOffset(const char *content, size_t len,
for(auto [it, pending] = std::tuple{vars.rbegin(), vars.size()}; pending > 3; pending -= 3) {
yajl_gen_map_open(g);
it++;
const std::string &startingAt = it->str(); it++;
const std::string &size = it->str(); it++;
const auto startingAt = it->str(); it++;
const auto size = it->str(); it++;
yajl_gen_string(g,
reinterpret_cast<const unsigned char*>("startingAt"),
strlen("startingAt"));
yajl_gen_string(g,
reinterpret_cast<const unsigned char*>(startingAt.c_str()),
reinterpret_cast<const unsigned char*>(startingAt.data()),
startingAt.size());
yajl_gen_string(g, reinterpret_cast<const unsigned char*>("size"),
strlen("size"));
yajl_gen_string(g,
reinterpret_cast<const unsigned char*>(size.c_str()),
reinterpret_cast<const unsigned char*>(size.data()),
size.size());
yajl_gen_map_close(g);

if (stoi(startingAt) >= len) {
if (svtoi(startingAt) >= len) {
*err = "Offset is out of the content limits.";
return -1;
}

const auto value = std::string(content, stoi(startingAt), stoi(size));
const auto value = std::string(content, svtoi(startingAt), svtoi(size));
if (varValue.size() > 0) {
varValue.push_back(' ');
}
Expand All @@ -295,30 +301,27 @@ int ModSecurity::processContentOffset(const char *content, size_t len,
varValue.size());
yajl_gen_map_close(g);

while (!trans.empty()) {
modsecurity::actions::transformations::Transformation *t;
for (const auto &match : trans) {
yajl_gen_map_open(g);
yajl_gen_string(g,
reinterpret_cast<const unsigned char*>("transformation"),
strlen("transformation"));

yajl_gen_string(g,
reinterpret_cast<const unsigned char*>(trans.back().str().c_str()),
trans.back().str().size());
reinterpret_cast<const unsigned char*>(match.str().data()),
match.str().size());

t = modsecurity::actions::transformations::Transformation::instantiate(
trans.back().str().c_str());
auto t = std::unique_ptr<Transformation>(
modsecurity::actions::transformations::Transformation::instantiate(
std::string(match.str().data(), match.str().size())));
t->transform(varValue, nullptr);
trans.pop_back();

yajl_gen_string(g, reinterpret_cast<const unsigned char*>("value"),
strlen("value"));
yajl_gen_string(g, reinterpret_cast<const unsigned char*>(
varValue.c_str()),
varValue.size());
yajl_gen_map_close(g);

delete t;
}

yajl_gen_array_close(g);
Expand All @@ -333,30 +336,30 @@ int ModSecurity::processContentOffset(const char *content, size_t len,
strlen("highlight"));
yajl_gen_map_open(g);
it++;
const std::string &startingAt = it->str(); it++;
const std::string &size = ops.back().str(); it++;
const auto startingAt = it->str(); it++;
const auto size = ops.back().str(); it++;
yajl_gen_string(g,
reinterpret_cast<const unsigned char*>("startingAt"),
strlen("startingAt"));
yajl_gen_string(g,
reinterpret_cast<const unsigned char*>(startingAt.c_str()),
reinterpret_cast<const unsigned char*>(startingAt.data()),
startingAt.size());
yajl_gen_string(g, reinterpret_cast<const unsigned char*>("size"),
strlen("size"));
yajl_gen_string(g,
reinterpret_cast<const unsigned char*>(size.c_str()),
reinterpret_cast<const unsigned char*>(size.data()),
size.size());
yajl_gen_map_close(g);

if (stoi(startingAt) >= varValue.size()) {
if (svtoi(startingAt) >= varValue.size()) {
*err = "Offset is out of the variable limits.";
return -1;
}
yajl_gen_string(g,
reinterpret_cast<const unsigned char*>("value"),
strlen("value"));

const auto value = std::string(varValue, stoi(startingAt), stoi(size));
const auto value = std::string(varValue, svtoi(startingAt), svtoi(size));

yajl_gen_string(g,
reinterpret_cast<const unsigned char*>(value.c_str()),
Expand Down
6 changes: 3 additions & 3 deletions src/operators/verify_cpf.cc
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,6 @@ bool VerifyCPF::verify(const char *cpfnumber, int len) const {

bool VerifyCPF::evaluate(Transaction *t, RuleWithActions *rule,
const std::string& input, RuleMessage &ruleMessage) {
std::list<SMatch> matches;
bool is_cpf = false;
int i;

Expand All @@ -120,9 +119,10 @@ bool VerifyCPF::evaluate(Transaction *t, RuleWithActions *rule,
}

for (i = 0; i < input.size() - 1 && is_cpf == false; i++) {
matches = m_re->searchAll(input.substr(i, input.size()));
const auto iv = std::string_view(input).substr(i, input.size());
const auto matches = m_re->searchAll(iv);
for (const auto & m : matches) {
is_cpf = verify(m.str().c_str(), m.str().size());
is_cpf = verify(m.str().data(), m.str().size());
if (is_cpf) {
logOffset(ruleMessage, m.offset(), m.str().size());
if (rule && t && rule->hasCaptureAction()) {
Expand Down
6 changes: 3 additions & 3 deletions src/operators/verify_ssn.cc
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,6 @@ bool VerifySSN::verify(const char *ssnumber, int len) {

bool VerifySSN::evaluate(Transaction *t, RuleWithActions *rule,
const std::string& input, RuleMessage &ruleMessage) {
std::list<SMatch> matches;
bool is_ssn = false;
int i;

Expand All @@ -122,9 +121,10 @@ bool VerifySSN::evaluate(Transaction *t, RuleWithActions *rule,
}

for (i = 0; i < input.size() - 1 && is_ssn == false; i++) {
matches = m_re->searchAll(input.substr(i, input.size()));
const auto iv = std::string_view(input).substr(i, input.size());
const auto matches = m_re->searchAll(iv);
for (const auto & j : matches) {
is_ssn = verify(j.str().c_str(), j.str().size());
is_ssn = verify(j.str().data(), j.str().size());
if (is_ssn) {
logOffset(ruleMessage, j.offset(), j.str().size());
if (rule && t && rule->hasCaptureAction()) {
Expand Down
9 changes: 4 additions & 5 deletions src/operators/verify_svnr.cc
Original file line number Diff line number Diff line change
Expand Up @@ -80,19 +80,18 @@ bool VerifySVNR::verify(const char *svnrnumber, int len) const {

bool VerifySVNR::evaluate(Transaction *t, RuleWithActions *rule,
const std::string& input, RuleMessage &ruleMessage) {
std::list<SMatch> matches;
bool is_svnr = false;
int i;

if (m_param.empty()) {
return is_svnr;
return false;
}

for (i = 0; i < input.size() - 1 && is_svnr == false; i++) {
matches = m_re->searchAll(input.substr(i, input.size()));

const auto iv = std::string_view(input).substr(i, input.size());
const auto matches = m_re->searchAll(iv);
for (const auto & j : matches) {
is_svnr = verify(j.str().c_str(), j.str().size());
is_svnr = verify(j.str().data(), j.str().size());
if (is_svnr) {
logOffset(ruleMessage, j.offset(), j.str().size());
if (rule && t && rule->hasCaptureAction()) {
Expand Down
18 changes: 9 additions & 9 deletions src/utils/regex.cc
Original file line number Diff line number Diff line change
Expand Up @@ -136,8 +136,8 @@ Regex::~Regex() {
}


std::list<SMatch> Regex::searchAll(std::string_view s) const {
std::list<SMatch> retList;
std::vector<SMatch> Regex::searchAll(std::string_view s) const {
std::vector<SMatch> ret;
int rc = 0;
#ifdef WITH_PCRE2
PCRE2_SPTR pcre2_s = reinterpret_cast<PCRE2_SPTR>(s.data());
Expand Down Expand Up @@ -172,9 +172,9 @@ std::list<SMatch> Regex::searchAll(std::string_view s) const {
rc = -1;
break;
}
std::string match = std::string(s, start, len);
const auto match = s.substr(start, len);
offset = start + len;
retList.push_front(SMatch(match, start));
ret.push_back(SMatch(match, start));

if (len == 0) {
rc = 0;
Expand All @@ -186,7 +186,7 @@ std::list<SMatch> Regex::searchAll(std::string_view s) const {
#ifdef WITH_PCRE2
pcre2_match_data_free(match_data);
#endif
return retList;
return ret;
}

RegexResult Regex::searchOneMatch(std::string_view s, std::vector<SMatchCapture>& captures, unsigned long match_limit) const {
Expand Down Expand Up @@ -343,7 +343,7 @@ RegexResult Regex::searchGlobal(std::string_view s, std::vector<SMatchCapture>&
return RegexResult::Ok;
}

int Regex::search(std::string_view s, SMatch *match) const {
int Regex::search(std::string_view s, SMatch &match) const {
#ifdef WITH_PCRE2
PCRE2_SPTR pcre2_s = reinterpret_cast<PCRE2_SPTR>(s.data());
pcre2_match_data *match_data = pcre2_match_data_create_from_pattern(m_pc, NULL);
Expand All @@ -358,16 +358,16 @@ int Regex::search(std::string_view s, SMatch *match) const {
0, PCRE2_NO_JIT, match_data, NULL) > 0;
}
if (ret > 0) { // match
PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(match_data);
const PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(match_data);
#else
int ovector[OVECCOUNT];
int ret = pcre_exec(m_pc, m_pce, s.data(),
s.size(), 0, 0, ovector, OVECCOUNT) > 0;

if (ret > 0) {
#endif
*match = SMatch(
std::string(s, ovector[ret-1], ovector[ret] - ovector[ret-1]),
match = SMatch(
s.substr(ovector[ret-1], ovector[ret] - ovector[ret-1]),
0);
}

Expand Down
12 changes: 6 additions & 6 deletions src/utils/regex.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,15 +46,15 @@ class SMatch {
m_match(),
m_offset(0) { }

SMatch(const std::string &match, size_t offset) :
SMatch(std::string_view match, size_t offset) :
m_match(match),
m_offset(offset) { }

const std::string& str() const { return m_match; }
std::string_view str() const { return m_match; }
size_t offset() const { return m_offset; }

private:
std::string m_match;
std::string_view m_match;
size_t m_offset;
};

Expand All @@ -81,10 +81,10 @@ class Regex {
bool hasError() const {
return (m_pc == NULL);
}
std::list<SMatch> searchAll(std::string_view s) const;
std::vector<SMatch> searchAll(std::string_view s) const;
RegexResult searchOneMatch(std::string_view s, std::vector<SMatchCapture>& captures, unsigned long match_limit = 0) const;
RegexResult searchGlobal(std::string_view s, std::vector<SMatchCapture>& captures, unsigned long match_limit = 0) const;
int search(std::string_view s, SMatch *match) const;
int search(std::string_view s, SMatch &match) const;
int search(std::string_view s) const;

const std::string pattern;
Expand All @@ -101,7 +101,7 @@ class Regex {
};


static inline int regex_search(const std::string& s, SMatch *match, const Regex& regex) {
static inline int regex_search(const std::string& s, SMatch &match, const Regex& regex) {
return regex.search(s, match);
}

Expand Down
2 changes: 1 addition & 1 deletion test/regression/regression.cc
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ void perform_unit_test(const ModSecurityTest<RegressionTest> &test,
SMatch match;
const auto s = context.m_modsec_rules.getParserError();

if (regex_search(s, &match, re)) {
if (regex_search(s, match, re)) {
if (test.m_automake_output) {
std::cout << ":test-result: PASS " << filename \
<< ":" << t->name << std::endl;
Expand Down
Loading

0 comments on commit 9bb7439

Please sign in to comment.