Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bugfix/hyperscan backport 202208 #118

Merged
merged 12 commits into from
Sep 3, 2022
4 changes: 4 additions & 0 deletions chimera/ch_runtime.c
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,10 @@ ch_error_t catchupPcre(struct HybridContext *hyctx, unsigned int id,
} else if (cbrv == CH_CALLBACK_SKIP_PATTERN) {
DEBUG_PRINTF("user callback told us to skip this pattern\n");
pd->scanStart = hyctx->length;
if (top_id == id) {
break;
}
continue;
}

if (top_id == id) {
Expand Down
9 changes: 7 additions & 2 deletions src/compiler/compiler.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2020, Intel Corporation
* Copyright (c) 2015-2021, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
Expand Down Expand Up @@ -323,7 +323,8 @@ void addExpression(NG &ng, unsigned index, const char *expression,
}

// Ensure that our pattern isn't too long (in characters).
if (strlen(expression) > cc.grey.limitPatternLength) {
size_t maxlen = cc.grey.limitPatternLength + 1;
if (strnlen(expression, maxlen) >= maxlen) {
throw CompileError("Pattern length exceeds limit.");
}

Expand Down Expand Up @@ -416,6 +417,10 @@ void addLitExpression(NG &ng, unsigned index, const char *expression,
"HS_FLAG_SOM_LEFTMOST are supported in literal API.");
}

if (!strcmp(expression, "")) {
throw CompileError("Pure literal API doesn't support empty string.");
}

// This expression must be a pure literal, we can build ue2_literal
// directly based on expression text.
ParsedLitExpression ple(index, expression, expLength, flags, id);
Expand Down
8 changes: 7 additions & 1 deletion src/hs.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2020, Intel Corporation
* Copyright (c) 2015-2021, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
Expand Down Expand Up @@ -517,6 +517,12 @@ hs_error_t hs_expression_info_int(const char *expression, unsigned int flags,
return HS_COMPILER_ERROR;
}

if (flags & HS_FLAG_COMBINATION) {
*error = generateCompileError("Invalid parameter: unsupported "
"logical combination expression", -1);
return HS_COMPILER_ERROR;
}

*info = nullptr;
*error = nullptr;

Expand Down
12 changes: 3 additions & 9 deletions src/hs_compile.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2020, Intel Corporation
* Copyright (c) 2015-2021, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
Expand Down Expand Up @@ -748,10 +748,7 @@ hs_error_t HS_CDECL hs_free_compile_error(hs_compile_error_t *error);
* - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode.
* - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
* when a match is found.
* - HS_FLAG_COMBINATION - Parse the expression in logical combination
* syntax.
* - HS_FLAG_QUIET - Ignore match reporting for this expression. Used for
* the sub-expressions in logical combinations.
* - HS_FLAG_QUIET - This flag will be ignored.
*
* @param info
* On success, a pointer to the pattern information will be returned in
Expand Down Expand Up @@ -814,10 +811,7 @@ hs_error_t HS_CDECL hs_expression_info(const char *expression,
* - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode.
* - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset
* when a match is found.
* - HS_FLAG_COMBINATION - Parse the expression in logical combination
* syntax.
* - HS_FLAG_QUIET - Ignore match reporting for this expression. Used for
* the sub-expressions in logical combinations.
* - HS_FLAG_QUIET - This flag will be ignored.
*
* @param ext
* A pointer to a filled @ref hs_expr_ext_t structure that defines
Expand Down
6 changes: 4 additions & 2 deletions src/hs_internal.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019, Intel Corporation
* Copyright (c) 2019-2021, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
Expand Down Expand Up @@ -80,7 +80,9 @@ extern "C"
| HS_FLAG_PREFILTER \
| HS_FLAG_SINGLEMATCH \
| HS_FLAG_ALLOWEMPTY \
| HS_FLAG_SOM_LEFTMOST)
| HS_FLAG_SOM_LEFTMOST \
| HS_FLAG_COMBINATION \
| HS_FLAG_QUIET)

#ifdef __cplusplus
} /* extern "C" */
Expand Down
6 changes: 4 additions & 2 deletions src/nfa/mcclellancompile.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2020, Intel Corporation
* Copyright (c) 2015-2021, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
Expand Down Expand Up @@ -1081,7 +1081,9 @@ void find_better_daddy(dfa_info &info, dstate_id_t curr_id, bool using8bit,
// Use the daddy already set for this state so long as it isn't already
// a Sherman state.
dstate_id_t daddy = currState.daddy;
if (!info.is_sherman(daddy) && !info.is_widestate(daddy)) {
if (info.is_widestate(daddy)) {
return;
} else if (!info.is_sherman(daddy)) {
hinted.insert(currState.daddy);
} else {
// Fall back to granddaddy, which has already been processed (due
Expand Down
23 changes: 22 additions & 1 deletion src/rose/program_runtime.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2020, Intel Corporation
* Copyright (c) 2015-2021, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
Expand Down Expand Up @@ -3092,6 +3092,7 @@ hwlmcb_rv_t roseRunProgram_l(const struct RoseEngine *t,

const char in_catchup = prog_flags & ROSE_PROG_FLAG_IN_CATCHUP;
const char from_mpv = prog_flags & ROSE_PROG_FLAG_FROM_MPV;
const char skip_mpv_catchup = prog_flags & ROSE_PROG_FLAG_SKIP_MPV_CATCHUP;

const char *pc_base = getByOffset(t, programOffset);
const char *pc = pc_base;
Expand Down Expand Up @@ -3188,13 +3189,33 @@ hwlmcb_rv_t roseRunProgram_l(const struct RoseEngine *t,
}
L_PROGRAM_NEXT_INSTRUCTION

L_PROGRAM_CASE(CATCH_UP_MPV) {
if (from_mpv || skip_mpv_catchup) {
DEBUG_PRINTF("skipping mpv catchup\n");
} else if (roseCatchUpMPV(t,
end - scratch->core_info.buf_offset,
scratch) == HWLM_TERMINATE_MATCHING) {
return HWLM_TERMINATE_MATCHING;
}
}
L_PROGRAM_NEXT_INSTRUCTION

L_PROGRAM_CASE(SOM_FROM_REPORT) {
som = handleSomExternal(scratch, &ri->som, end);
DEBUG_PRINTF("som from report %u is %llu\n", ri->som.onmatch,
som);
}
L_PROGRAM_NEXT_INSTRUCTION

L_PROGRAM_CASE(TRIGGER_SUFFIX) {
if (roseTriggerSuffix(t, scratch, ri->queue, ri->event, som,
end) == HWLM_TERMINATE_MATCHING) {
return HWLM_TERMINATE_MATCHING;
}
work_done = 1;
}
L_PROGRAM_NEXT_INSTRUCTION

L_PROGRAM_CASE(DEDUPE) {
updateSeqPoint(tctxt, end, from_mpv);
const char do_som = t->hasSom; // TODO: constant propagate
Expand Down
6 changes: 5 additions & 1 deletion src/util/alloc.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,11 @@ class AlignedAllocator {

T *allocate(std::size_t size) const {
size_t alloc_size = size * sizeof(T);
return static_cast<T *>(aligned_malloc_internal(alloc_size, N));
T *ptr = static_cast<T *>(aligned_malloc_internal(alloc_size, N));
if (!ptr) {
throw std::bad_alloc();
}
return ptr;
}

void deallocate(T *x, std::size_t) const noexcept {
Expand Down
5 changes: 3 additions & 2 deletions tools/hscollider/sig.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@

#if defined(HAVE_SIGACTION)
#include <signal.h>
#define STACK_SIZE 8192
#endif

#ifdef HAVE_BACKTRACE
Expand Down Expand Up @@ -166,7 +167,7 @@ void installSignalHandler(void) {
}

#ifdef HAVE_SIGALTSTACK
static TLS_VARIABLE char alt_stack_loc[SIGSTKSZ];
static TLS_VARIABLE char alt_stack_loc[STACK_SIZE];
#endif

void setSignalStack(void) {
Expand All @@ -178,7 +179,7 @@ void setSignalStack(void) {
stack_t alt_stack;
memset(&alt_stack, 0, sizeof(alt_stack));
alt_stack.ss_flags = 0;
alt_stack.ss_size = SIGSTKSZ;
alt_stack.ss_size = STACK_SIZE;
alt_stack.ss_sp = alt_stack_loc;
if (!sigaltstack(&alt_stack, nullptr)) {
act.sa_flags |= SA_ONSTACK;
Expand Down
2 changes: 2 additions & 0 deletions unit/internal/multi_bit_compress.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@

#include "config.h"

#include <memory>

#include "gtest/gtest.h"
#include "ue2common.h"
#include "util/compile_error.h"
Expand Down
4 changes: 2 additions & 2 deletions util/ng_corpus_editor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -268,12 +268,12 @@ void CorpusEditorUtf8::flip_case(vector<unichar> &corpus) {
unichar CorpusEditorUtf8::chooseCodePoint(void) {
/* We need to ensure that we don't pick a surrogate cp */
const u32 range =
MAX_UNICODE + 1 - (UNICODE_SURROGATE_MAX + UNICODE_SURROGATE_MIN + 1);
MAX_UNICODE + 1 - (UNICODE_SURROGATE_MAX - UNICODE_SURROGATE_MIN + 1);
unichar raw = props.rand(0, range - 1);
if (raw < UNICODE_SURROGATE_MIN) {
return raw;
} else {
return raw + UNICODE_SURROGATE_MAX + 1;
return raw + UNICODE_SURROGATE_MAX - UNICODE_SURROGATE_MIN + 1;
}
}

Expand Down
4 changes: 2 additions & 2 deletions util/ng_corpus_generator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -476,14 +476,14 @@ void CorpusGeneratorUtf8::generateCorpus(vector<string> &data) {
* that we've been asked for. */
unichar CorpusGeneratorUtf8::getRandomChar() {
u32 range = MAX_UNICODE + 1
- (UNICODE_SURROGATE_MAX + UNICODE_SURROGATE_MIN + 1);
- (UNICODE_SURROGATE_MAX - UNICODE_SURROGATE_MIN + 1);
range = min(cProps.alphabetSize, range);
assert(range);

unichar c = 'a' + cProps.rand(0, range - 1);

if (c >= UNICODE_SURROGATE_MIN) {
c =+ UNICODE_SURROGATE_MAX + 1;
c += UNICODE_SURROGATE_MAX - UNICODE_SURROGATE_MIN + 1;
}

return c % (MAX_UNICODE + 1);
Expand Down