Skip to content

Commit

Permalink
Merge pull request #86 from VectorCamp/develop
Browse files Browse the repository at this point in the history
New release 5.4.6
  • Loading branch information
markos authored Jan 21, 2022
2 parents 1b6f37d + f9b6526 commit e6f8564
Show file tree
Hide file tree
Showing 62 changed files with 3,983 additions and 1,938 deletions.
333 changes: 200 additions & 133 deletions CMakeLists.txt

Large diffs are not rendered by default.

28 changes: 28 additions & 0 deletions benchmarks/benchmarks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,34 @@ int main(){
);
}

for (size_t i = 0; i < std::size(sizes); i++) {
MicroBenchmark bench("Vermicelli", sizes[i]);
run_benchmarks(sizes[i], MAX_LOOPS / sizes[i], matches[m], false, bench,
[&](MicroBenchmark &b) {
b.chars.set('a');
ue2::truffleBuildMasks(b.chars, (u8 *)&b.lo, (u8 *)&b.hi);
memset(b.buf.data(), 'b', b.size);
},
[&](MicroBenchmark &b) {
return vermicelliExec('a', 'b', b.buf.data(), b.buf.data() + b.size);
}
);
}

for (size_t i = 0; i < std::size(sizes); i++) {
MicroBenchmark bench("Reverse Vermicelli", sizes[i]);
run_benchmarks(sizes[i], MAX_LOOPS / sizes[i], matches[m], true, bench,
[&](MicroBenchmark &b) {
b.chars.set('a');
ue2::truffleBuildMasks(b.chars, (u8 *)&b.lo, (u8 *)&b.hi);
memset(b.buf.data(), 'b', b.size);
},
[&](MicroBenchmark &b) {
return rvermicelliExec('a', 'b', b.buf.data(), b.buf.data() + b.size);
}
);
}

for (size_t i = 0; i < std::size(sizes); i++) {
//we imitate the noodle unit tests
std::string str;
Expand Down
1 change: 1 addition & 0 deletions benchmarks/benchmarks.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include "nfa/shufticompile.h"
#include "nfa/truffle.h"
#include "nfa/trufflecompile.h"
#include "nfa/vermicelli.hpp"
#include "hwlm/noodle_build.h"
#include "hwlm/noodle_engine.h"
#include "hwlm/noodle_internal.h"
Expand Down
36 changes: 28 additions & 8 deletions cmake/arch.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ elseif (HAVE_C_INTRIN_H)
elseif (HAVE_C_ARM_NEON_H)
set (INTRIN_INC_H "arm_neon.h")
set (FAT_RUNTIME OFF)
elseif (HAVE_C_PPC64EL_ALTIVEC_H)
set (INTRIN_INC_H "altivec.h")
set (FAT_RUNTIME OFF)
else()
message (FATAL_ERROR "No intrinsics header found")
endif ()
Expand Down Expand Up @@ -85,7 +88,7 @@ if (FAT_RUNTIME)
set (CMAKE_REQUIRED_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS} ${SKYLAKE_FLAG}")
endif (BUILD_AVX512VBMI)
elseif (BUILD_AVX2)
set (CMAKE_REQUIRED_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS} -march=core-avx2 -mavx")
set (CMAKE_REQUIRED_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS} -march=core-avx2 -mavx2")
elseif ()
set (CMAKE_REQUIRED_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS} -march=core-i7 -mssse3")
endif ()
Expand All @@ -95,12 +98,12 @@ else (NOT FAT_RUNTIME)
endif ()

if (ARCH_IA32 OR ARCH_X86_64)
# ensure we have the minimum of SSSE3 - call a SSSE3 intrinsic
# ensure we have the minimum of SSE4.2 - call a SSE4.2 intrinsic
CHECK_C_SOURCE_COMPILES("#include <${INTRIN_INC_H}>
int main() {
__m128i a = _mm_set1_epi8(1);
(void)_mm_shuffle_epi8(a, a);
}" HAVE_SSSE3)
}" HAVE_SSE42)

# now look for AVX2
CHECK_C_SOURCE_COMPILES("#include <${INTRIN_INC_H}>
Expand Down Expand Up @@ -136,13 +139,26 @@ int main(){
(void)_mm512_permutexvar_epi8(idx, a);
}" HAVE_AVX512VBMI)

elseif (!ARCH_ARM32 AND !ARCH_AARCH64)

elseif (ARCH_ARM32 OR ARCH_AARCH64)
CHECK_C_SOURCE_COMPILES("#include <${INTRIN_INC_H}>
int main() {
int32x4_t a = vdupq_n_s32(1);
(void)a;
}" HAVE_NEON)
elseif (ARCH_PPC64EL)
CHECK_C_SOURCE_COMPILES("#include <${INTRIN_INC_H}>
int main() {
vector int a = vec_splat_s32(1);
(void)a;
}" HAVE_VSX)
else ()
message (FATAL_ERROR "Unsupported architecture")
endif ()

if (FAT_RUNTIME)
if ((ARCH_IA32 OR ARCH_X86_64) AND NOT HAVE_SSSE3)
message(FATAL_ERROR "SSSE3 support required to build fat runtime")
if ((ARCH_IA32 OR ARCH_X86_64) AND NOT HAVE_SSE42)
message(FATAL_ERROR "SSE4.2 support required to build fat runtime")
endif ()
if ((ARCH_IA32 OR ARCH_X86_64) AND BUILD_AVX2 AND NOT HAVE_AVX2)
message(FATAL_ERROR "AVX2 support required to build fat runtime")
Expand All @@ -163,12 +179,16 @@ else (NOT FAT_RUNTIME)
if ((ARCH_IA32 OR ARCH_X86_64) AND NOT HAVE_AVX512VBMI)
message(STATUS "Building without AVX512VBMI support")
endif ()
if ((ARCH_IA32 OR ARCH_X86_64) AND NOT HAVE_SSSE3)
message(FATAL_ERROR "A minimum of SSSE3 compiler support is required")
if ((ARCH_IA32 OR ARCH_X86_64) AND NOT HAVE_SSE42)
message(FATAL_ERROR "A minimum of SSE4.2 compiler support is required")
endif ()
if ((ARCH_ARM32 OR ARCH_AARCH64) AND NOT HAVE_NEON)
message(FATAL_ERROR "NEON support required for ARM support")
endif ()
if (ARCH_PPPC64EL AND NOT HAVE_VSX)
message(FATAL_ERROR "VSX support required for Power support")
endif ()

endif ()

unset (PREV_FLAGS)
Expand Down
6 changes: 6 additions & 0 deletions cmake/config.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@
/* "Define if building for AARCH64" */
#cmakedefine ARCH_AARCH64

/* "Define if building for PPC64EL" */
#cmakedefine ARCH_PPC64EL

/* "Define if cross compiling for AARCH64" */
#cmakedefine CROSS_COMPILE_AARCH64

Expand Down Expand Up @@ -75,6 +78,9 @@
/* C compiler has arm_sve.h */
#cmakedefine HAVE_C_ARM_SVE_H

/* C compiler has arm_neon.h */
#cmakedefine HAVE_C_PPC64EL_ALTIVEC_H

/* Define to 1 if you have the declaration of `pthread_setaffinity_np', and to
0 if you don't. */
#cmakedefine HAVE_DECL_PTHREAD_SETAFFINITY_NP
Expand Down
13 changes: 8 additions & 5 deletions cmake/platform.cmake
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# determine compiler
if (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
set(CMAKE_COMPILER_IS_CLANG TRUE)
endif()

# determine the target arch

if (CROSS_COMPILE_AARCH64)
Expand All @@ -7,15 +12,13 @@ if (CROSS_COMPILE_AARCH64)
else()
# really only interested in the preprocessor here
CHECK_C_SOURCE_COMPILES("#if !(defined(__x86_64__) || defined(_M_X64))\n#error not 64bit\n#endif\nint main(void) { return 0; }" ARCH_X86_64)

CHECK_C_SOURCE_COMPILES("#if !(defined(__i386__) || defined(_M_IX86))\n#error not 32bit\n#endif\nint main(void) { return 0; }" ARCH_IA32)

CHECK_C_SOURCE_COMPILES("#if !defined(__ARM_ARCH_ISA_A64)\n#error not 64bit\n#endif\nint main(void) { return 0; }" ARCH_AARCH64)
CHECK_C_SOURCE_COMPILES("#if !defined(__ARM_ARCH_ISA_ARM)\n#error not 32bit\n#endif\nint main(void) { return 0; }" ARCH_ARM32)

if (ARCH_X86_64 OR ARCH_AARCH64)
CHECK_C_SOURCE_COMPILES("#if !defined(__PPC64__) && !(defined(__LITTLE_ENDIAN__) && defined(__VSX__))\n#error not ppc64el\n#endif\nint main(void) { return 0; }" ARCH_PPC64EL)
if (ARCH_X86_64 OR ARCH_AARCH64 OR ARCH_PPC64EL)
set(ARCH_64_BIT TRUE)
else()
set(ARCH_32_BIT TRUE)
endif()
endif()
endif()
7 changes: 6 additions & 1 deletion examples/patbench.cc
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@
*
*/

#include <random>
#include <algorithm>
#include <cstring>
#include <chrono>
Expand Down Expand Up @@ -151,6 +152,8 @@ using std::set;
using std::min;
using std::max;
using std::copy;
using std::random_device;
using std::mt19937;

enum Criterion {
CRITERION_THROUGHPUT,
Expand Down Expand Up @@ -731,7 +734,9 @@ int main(int argc, char **argv) {
count++;
cout << "." << std::flush;
vector<unsigned> sv(s.begin(), s.end());
random_shuffle(sv.begin(), sv.end());
random_device rng;
mt19937 urng(rng());
shuffle(sv.begin(), sv.end(), urng);
unsigned groups = factor_max + 1;
for (unsigned current_group = 0; current_group < groups;
current_group++) {
Expand Down
8 changes: 4 additions & 4 deletions src/fdr/teddy.c
Original file line number Diff line number Diff line change
Expand Up @@ -893,10 +893,10 @@ do { \
#define CONFIRM_TEDDY(var, bucket, offset, reason, conf_fn) \
do { \
if (unlikely(diff128(var, ones128()))) { \
u64a __attribute__((aligned(16))) vector[2]; \
store128(vector, var); \
u64a lo = vector[0]; \
u64a hi = vector[1]; \
u64a __attribute__((aligned(16))) vec[2]; \
store128(vec, var); \
u64a lo = vec[0]; \
u64a hi = vec[1]; \
CONF_CHUNK_64(lo, bucket, offset, reason, conf_fn); \
CONF_CHUNK_64(hi, bucket, offset + 8, reason, conf_fn); \
} \
Expand Down
2 changes: 2 additions & 0 deletions src/hs_valid_platform.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,5 +44,7 @@ hs_error_t HS_CDECL hs_valid_platform(void) {
}
#elif defined(ARCH_ARM32) || defined(ARCH_AARCH64)
return HS_SUCCESS;
#elif defined(ARCH_PPC64EL)
return HS_SUCCESS;
#endif
}
2 changes: 1 addition & 1 deletion src/hwlm/hwlm.c
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
#include "nfa/accel.h"
#include "nfa/shufti.h"
#include "nfa/truffle.h"
#include "nfa/vermicelli.h"
#include "nfa/vermicelli.hpp"
#include <string.h>

#define MIN_ACCEL_LEN_BLOCK 16
Expand Down
21 changes: 1 addition & 20 deletions src/hwlm/noodle_engine_simd.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,26 +30,7 @@
/* SIMD engine agnostic noodle scan parts */

#include "util/supervector/supervector.hpp"

static u8 CASEMASK[] = { 0xff, 0xdf };

static really_inline
u8 caseClear8(u8 x, bool noCase)
{
return static_cast<u8>(x & CASEMASK[(u8)noCase]);
}

template<uint16_t S>
static really_inline SuperVector<S> getMask(u8 c, bool noCase) {
u8 k = caseClear8(c, noCase);
return SuperVector<S>(k);
}

template<uint16_t S>
static really_inline SuperVector<S> getCaseMask(void) {
return SuperVector<S>(CASEMASK[1]);
}

#include "util/supervector/casemask.hpp"

static really_really_inline
hwlm_error_t single_zscan(const struct noodTable *n,const u8 *d, const u8 *buf,
Expand Down
8 changes: 4 additions & 4 deletions src/hwlm/noodle_engine_sve.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ hwlm_error_t scanDoubleOnce(const struct noodTable *n, const u8 *buf,
svbool_t pg = svwhilelt_b8_s64(0, e - d);
svbool_t pg_rot = svwhilelt_b8_s64(0, e - d + 1);
svbool_t matched, matched_rot;
svbool_t any = doubleMatched(chars, d, pg, pg_rot, &matched, &matched_rot);
svbool_t any = doubleMatched(svreinterpret_u16(chars), d, pg, pg_rot, &matched, &matched_rot);
return doubleCheckMatched(n, buf, len, cbi, d, matched, matched_rot, any);
}

Expand All @@ -187,7 +187,7 @@ hwlm_error_t scanDoubleLoop(const struct noodTable *n, const u8 *buf,
for (size_t i = 0; i < loops; i++, d += svcntb()) {
DEBUG_PRINTF("d %p \n", d);
svbool_t matched, matched_rot;
svbool_t any = doubleMatched(chars, d, svptrue_b8(), svptrue_b8(),
svbool_t any = doubleMatched(svreinterpret_u16(chars), d, svptrue_b8(), svptrue_b8(),
&matched, &matched_rot);
hwlm_error_t rv = doubleCheckMatched(n, buf, len, cbi, d,
matched, matched_rot, any);
Expand Down Expand Up @@ -220,7 +220,7 @@ hwlm_error_t scanDouble(const struct noodTable *n, const u8 *buf, size_t len,
}
++d;

svuint16_t chars = getCharMaskDouble(n->key0, n->key1, noCase);
svuint8_t chars = svreinterpret_u8(getCharMaskDouble(n->key0, n->key1, noCase));

if (scan_len <= svcntb()) {
return scanDoubleOnce(n, buf, len, cbi, chars, d, e);
Expand All @@ -234,4 +234,4 @@ hwlm_error_t scanDouble(const struct noodTable *n, const u8 *buf, size_t len,
RETURN_IF_TERMINATED(rv);
}
return scanDoubleLoop(n, buf, len, cbi, chars, d1, e);
}
}
2 changes: 1 addition & 1 deletion src/nfa/accel.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
#include "accel.h"
#include "shufti.h"
#include "truffle.h"
#include "vermicelli.h"
#include "vermicelli.hpp"
#include "ue2common.h"

const u8 *run_accel(const union AccelAux *accel, const u8 *c, const u8 *c_end) {
Expand Down
4 changes: 1 addition & 3 deletions src/nfa/arm/shufti.hpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
/*
* Copyright (c) 2015-2017, Intel Corporation
* Copyright (c) 2020-2021, VectorCamp PC
* Copyright (c) 2021, Arm Limited
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
Expand Down Expand Up @@ -30,7 +29,6 @@

/** \file
* \brief Shufti: character class acceleration.
*
*/

template <uint16_t S>
Expand Down Expand Up @@ -73,4 +71,4 @@ SuperVector<S> blockDoubleMask(SuperVector<S> mask1_lo, SuperVector<S> mask1_hi,
t.print8("t");

return !t.eq(SuperVector<S>::Ones());
}
}
Loading

0 comments on commit e6f8564

Please sign in to comment.