Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement support for Intel crc32 instruction (SSE 4.2) #309

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 20 additions & 11 deletions build_detect_platform
Original file line number Diff line number Diff line change
Expand Up @@ -71,75 +71,84 @@ if [ "$CXX" = "g++" ]; then
MEMCMP_FLAG="-fno-builtin-memcmp"
fi

SSE_FLAG=
if [ "$CXX" = "g++" ]; then
# use hardware crc32 instruction
SSE_FLAG="-msse4.2"
elif [ "$CXX" = "c++" ]; then
# use hardware crc32 instruction
SSE_FLAG="-march=native"
fi

case "$TARGET_OS" in
CYGWIN_*)
PLATFORM=OS_LINUX
COMMON_FLAGS="$MEMCMP_FLAG -lpthread -DOS_LINUX -DCYGWIN"
COMMON_FLAGS="$MEMCMP_FLAG $SSE_FLAG -lpthread -DOS_LINUX -DCYGWIN"
PLATFORM_LDFLAGS="-lpthread"
PORT_FILE=port/port_posix.cc
;;
Darwin)
PLATFORM=OS_MACOSX
COMMON_FLAGS="$MEMCMP_FLAG -DOS_MACOSX"
COMMON_FLAGS="$MEMCMP_FLAG $SSE_FLAG -DOS_MACOSX"
PLATFORM_SHARED_EXT=dylib
[ -z "$INSTALL_PATH" ] && INSTALL_PATH=`pwd`
PLATFORM_SHARED_LDFLAGS="-dynamiclib -install_name $INSTALL_PATH/"
PORT_FILE=port/port_posix.cc
;;
Linux)
PLATFORM=OS_LINUX
COMMON_FLAGS="$MEMCMP_FLAG -pthread -DOS_LINUX"
COMMON_FLAGS="$MEMCMP_FLAG $SSE_FLAG -pthread -DOS_LINUX"
PLATFORM_LDFLAGS="-pthread"
PORT_FILE=port/port_posix.cc
;;
SunOS)
PLATFORM=OS_SOLARIS
COMMON_FLAGS="$MEMCMP_FLAG -D_REENTRANT -DOS_SOLARIS"
COMMON_FLAGS="$MEMCMP_FLAG $SSE_FLAG -D_REENTRANT -DOS_SOLARIS"
PLATFORM_LIBS="-lpthread -lrt"
PORT_FILE=port/port_posix.cc
;;
FreeBSD)
PLATFORM=OS_FREEBSD
COMMON_FLAGS="$MEMCMP_FLAG -D_REENTRANT -DOS_FREEBSD"
COMMON_FLAGS="$MEMCMP_FLAG $SSE_FLAG -D_REENTRANT -DOS_FREEBSD"
PLATFORM_LIBS="-lpthread"
PORT_FILE=port/port_posix.cc
;;
NetBSD)
PLATFORM=OS_NETBSD
COMMON_FLAGS="$MEMCMP_FLAG -D_REENTRANT -DOS_NETBSD"
COMMON_FLAGS="$MEMCMP_FLAG $SSE_FLAG -D_REENTRANT -DOS_NETBSD"
PLATFORM_LIBS="-lpthread -lgcc_s"
PORT_FILE=port/port_posix.cc
;;
OpenBSD)
PLATFORM=OS_OPENBSD
COMMON_FLAGS="$MEMCMP_FLAG -D_REENTRANT -DOS_OPENBSD"
COMMON_FLAGS="$MEMCMP_FLAG $SSE_FLAG -D_REENTRANT -DOS_OPENBSD"
PLATFORM_LDFLAGS="-pthread"
PORT_FILE=port/port_posix.cc
;;
DragonFly)
PLATFORM=OS_DRAGONFLYBSD
COMMON_FLAGS="$MEMCMP_FLAG -D_REENTRANT -DOS_DRAGONFLYBSD"
COMMON_FLAGS="$MEMCMP_FLAG $SSE_FLAG -D_REENTRANT -DOS_DRAGONFLYBSD"
PLATFORM_LIBS="-lpthread"
PORT_FILE=port/port_posix.cc
;;
OS_ANDROID_CROSSCOMPILE)
PLATFORM=OS_ANDROID
COMMON_FLAGS="$MEMCMP_FLAG -D_REENTRANT -DOS_ANDROID -DLEVELDB_PLATFORM_POSIX"
COMMON_FLAGS="$MEMCMP_FLAG $SSE_FLAG -D_REENTRANT -DOS_ANDROID -DLEVELDB_PLATFORM_POSIX"
PLATFORM_LDFLAGS="" # All pthread features are in the Android C library
PORT_FILE=port/port_posix.cc
CROSS_COMPILE=true
;;
HP-UX)
PLATFORM=OS_HPUX
COMMON_FLAGS="$MEMCMP_FLAG -D_REENTRANT -DOS_HPUX"
COMMON_FLAGS="$MEMCMP_FLAG $SSE_FLAG -D_REENTRANT -DOS_HPUX"
PLATFORM_LDFLAGS="-pthread"
PORT_FILE=port/port_posix.cc
# man ld: +h internal_name
PLATFORM_SHARED_LDFLAGS="-shared -Wl,+h -Wl,"
;;
IOS)
PLATFORM=IOS
COMMON_FLAGS="$MEMCMP_FLAG -DOS_MACOSX"
COMMON_FLAGS="$MEMCMP_FLAG $SSE_FLAG -DOS_MACOSX"
[ -z "$INSTALL_PATH" ] && INSTALL_PATH=`pwd`
PORT_FILE=port/port_posix.cc
PLATFORM_SHARED_EXT=
Expand Down
80 changes: 79 additions & 1 deletion util/crc32c.cc
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,27 @@
#include <stdint.h>
#include "util/coding.h"

#if defined(_MSC_VER)
#include <intrin.h>
static inline bool IsSSE42Available() {
int cpu_info[4];
__cpuid(cpu_info, 1);
return (cpu_info[2] & (1 << 20)) != 0;
}
#elif defined(__GNUC__)
#include <cpuid.h>
#include <nmmintrin.h>
static inline bool IsSSE42Available() {
unsigned int eax, ebx, ecx, edx;
__get_cpuid(1, &eax, &ebx, &ecx, &edx);
return (ecx & (1 << 20)) != 0;
}
#else
static inline bool IsSSE42Available() {
return false;
}
#endif

namespace leveldb {
namespace crc32c {

Expand Down Expand Up @@ -283,7 +304,12 @@ static inline uint32_t LE_LOAD32(const uint8_t *p) {
return DecodeFixed32(reinterpret_cast<const char*>(p));
}

uint32_t Extend(uint32_t crc, const char* buf, size_t size) {
// Used to fetch a naturally-aligned 64-bit word in little endian byte-order
static inline uint64_t LE_LOAD64(const uint8_t *p) {
return DecodeFixed64(reinterpret_cast<const char*>(p));
}

static uint32_t Extend_NoSSE(uint32_t crc, const char* buf, size_t size) {
const uint8_t *p = reinterpret_cast<const uint8_t *>(buf);
const uint8_t *e = p + size;
uint32_t l = crc ^ 0xffffffffu;
Expand Down Expand Up @@ -328,5 +354,57 @@ uint32_t Extend(uint32_t crc, const char* buf, size_t size) {
return l ^ 0xffffffffu;
}

// For further improvements see Intel publication at:
// http://download.intel.com/design/intarch/papers/323405.pdf
static uint32_t Extend_SSE42(uint32_t crc, const char* buf, size_t size) {
const uint8_t *p = reinterpret_cast<const uint8_t *>(buf);
const uint8_t *e = p + size;
uint32_t l = crc ^ 0xffffffffu;

#define STEP1 do { \
l = _mm_crc32_u8(l, *p++); \
} while (0)
#define STEP4 do { \
l = _mm_crc32_u32(l, LE_LOAD32(p)); \
p += 4; \
} while (0)
#define STEP8 do { \
l = _mm_crc32_u64(l, LE_LOAD64(p)); \
p += 8; \
} while (0)

if (size > 16) {
// Process unaligned bytes
for (unsigned int i = reinterpret_cast<uintptr_t>(p) % 8; i; --i) {
STEP1;
}
// Process 8 bytes at a time
while ((e-p) >= 8) {
STEP8;
}
// Process 4 bytes at a time
if ((e-p) >= 4) {
STEP4;
}
}
// Process the last few bytes
while (p != e) {
STEP1;
}
#undef STEP4
#undef STEP1
return l ^ 0xffffffffu;
}

uint32_t Extend(uint32_t crc, const char* buf, size_t size) {
static int flag = -1;
if (flag == -1) {
flag = IsSSE42Available() ? 1 : 0;
}
return flag
? Extend_SSE42(crc, buf, size)
: Extend_NoSSE(crc, buf, size);
}

} // namespace crc32c
} // namespace leveldb