Skip to content

Commit

Permalink
util/crc32c: always compile FastCRC32 with SSE4.2 instructions
Browse files Browse the repository at this point in the history
The use of this function is guarded by a CPUID check. That is,
compiling this function with SSE4.2 crc32q instructions will never cause
a SIGILL, even on CPUs that don't support SSE4.2, because the function
can never be reached.

The previous solution to enable hardware support for CRC32C required
compiling all of RocksDB with `-msse4.2` or the equivalent. This gave
the compiler permission to emit SSE4.2 instructions in functions that
were not guarded by a CPUID check, resulting in SIGILLS on non-SSE4.2
CPUs.
  • Loading branch information
benesch committed Jul 11, 2017
1 parent 0403a97 commit 87c3949
Showing 1 changed file with 5 additions and 28 deletions.
33 changes: 5 additions & 28 deletions util/crc32c.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,7 @@
#include "util/crc32c.h"

#include <stdint.h>
#ifdef __SSE4_2__
#include <nmmintrin.h>
#endif
#if defined(_WIN64)
#ifdef __AVX2__
#include <nmmintrin.h>
#endif
#endif
#include "util/coding.h"

namespace rocksdb {
Expand Down Expand Up @@ -298,21 +291,12 @@ static inline uint32_t LE_LOAD32(const uint8_t *p) {
return DecodeFixed32(reinterpret_cast<const char*>(p));
}

#ifdef __SSE4_2__
#ifdef __LP64__
#if defined(__LP64__) || defined(_WIN64)
static inline uint64_t LE_LOAD64(const uint8_t *p) {
return DecodeFixed64(reinterpret_cast<const char*>(p));
}
#endif
#endif

#if defined(_WIN64)
#ifdef __AVX2__
static inline uint64_t LE_LOAD64(const uint8_t *p) {
return DecodeFixed64(reinterpret_cast<const char*>(p));
}
#endif
#endif
static inline void Slow_CRC32(uint64_t* l, uint8_t const **p) {
uint32_t c = static_cast<uint32_t>(*l ^ LE_LOAD32(*p));
*p += 4;
Expand All @@ -329,8 +313,8 @@ static inline void Slow_CRC32(uint64_t* l, uint8_t const **p) {
table0_[c >> 24];
}

__attribute__((target("sse4.2")))
static inline void Fast_CRC32(uint64_t* l, uint8_t const **p) {
#ifdef __SSE4_2__
#ifdef __LP64__
*l = _mm_crc32_u64(*l, LE_LOAD64(*p));
*p += 8;
Expand All @@ -340,16 +324,6 @@ static inline void Fast_CRC32(uint64_t* l, uint8_t const **p) {
*l = _mm_crc32_u32(static_cast<unsigned int>(*l), LE_LOAD32(*p));
*p += 4;
#endif
#elif defined(_WIN64)
#ifdef __AVX2__
*l = _mm_crc32_u64(*l, LE_LOAD64(*p));
*p += 8;
#else
Slow_CRC32(l, p);
#endif
#else
Slow_CRC32(l, p);
#endif
}

template<void (*CRC32)(uint64_t*, uint8_t const**)>
Expand Down Expand Up @@ -411,6 +385,9 @@ static bool isSSE42() {
#endif
}

template __attribute__((target("sse4.2")))
uint32_t ExtendImpl<Fast_CRC32>(uint32_t, const char*, size_t);

typedef uint32_t (*Function)(uint32_t, const char*, size_t);

static inline Function Choose_Extend() {
Expand Down

0 comments on commit 87c3949

Please sign in to comment.