From 09fb9de179a4e6b4bf261eb3573be7937037b2cb Mon Sep 17 00:00:00 2001 From: Vadim Skipin Date: Thu, 1 Oct 2015 12:59:58 +0300 Subject: [PATCH] Implement support for Intel crc32 instruction (SSE 4.2) --- build_detect_platform | 31 +++++++++++------ util/crc32c.cc | 80 ++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 99 insertions(+), 12 deletions(-) diff --git a/build_detect_platform b/build_detect_platform index bb76c4f22e..8f6d8469d7 100755 --- a/build_detect_platform +++ b/build_detect_platform @@ -71,16 +71,25 @@ if [ "$CXX" = "g++" ]; then MEMCMP_FLAG="-fno-builtin-memcmp" fi +SSE_FLAG= +if [ "$CXX" = "g++" ]; then + # use hardware crc32 instruction + SSE_FLAG="-msse4.2" +elif [ "$CXX" = "c++" ]; then + # use hardware crc32 instruction + SSE_FLAG="-march=native" +fi + case "$TARGET_OS" in CYGWIN_*) PLATFORM=OS_LINUX - COMMON_FLAGS="$MEMCMP_FLAG -lpthread -DOS_LINUX -DCYGWIN" + COMMON_FLAGS="$MEMCMP_FLAG $SSE_FLAG -lpthread -DOS_LINUX -DCYGWIN" PLATFORM_LDFLAGS="-lpthread" PORT_FILE=port/port_posix.cc ;; Darwin) PLATFORM=OS_MACOSX - COMMON_FLAGS="$MEMCMP_FLAG -DOS_MACOSX" + COMMON_FLAGS="$MEMCMP_FLAG $SSE_FLAG -DOS_MACOSX" PLATFORM_SHARED_EXT=dylib [ -z "$INSTALL_PATH" ] && INSTALL_PATH=`pwd` PLATFORM_SHARED_LDFLAGS="-dynamiclib -install_name $INSTALL_PATH/" @@ -88,50 +97,50 @@ case "$TARGET_OS" in ;; Linux) PLATFORM=OS_LINUX - COMMON_FLAGS="$MEMCMP_FLAG -pthread -DOS_LINUX" + COMMON_FLAGS="$MEMCMP_FLAG $SSE_FLAG -pthread -DOS_LINUX" PLATFORM_LDFLAGS="-pthread" PORT_FILE=port/port_posix.cc ;; SunOS) PLATFORM=OS_SOLARIS - COMMON_FLAGS="$MEMCMP_FLAG -D_REENTRANT -DOS_SOLARIS" + COMMON_FLAGS="$MEMCMP_FLAG $SSE_FLAG -D_REENTRANT -DOS_SOLARIS" PLATFORM_LIBS="-lpthread -lrt" PORT_FILE=port/port_posix.cc ;; FreeBSD) PLATFORM=OS_FREEBSD - COMMON_FLAGS="$MEMCMP_FLAG -D_REENTRANT -DOS_FREEBSD" + COMMON_FLAGS="$MEMCMP_FLAG $SSE_FLAG -D_REENTRANT -DOS_FREEBSD" PLATFORM_LIBS="-lpthread" PORT_FILE=port/port_posix.cc ;; NetBSD) PLATFORM=OS_NETBSD - COMMON_FLAGS="$MEMCMP_FLAG -D_REENTRANT -DOS_NETBSD" + COMMON_FLAGS="$MEMCMP_FLAG $SSE_FLAG -D_REENTRANT -DOS_NETBSD" PLATFORM_LIBS="-lpthread -lgcc_s" PORT_FILE=port/port_posix.cc ;; OpenBSD) PLATFORM=OS_OPENBSD - COMMON_FLAGS="$MEMCMP_FLAG -D_REENTRANT -DOS_OPENBSD" + COMMON_FLAGS="$MEMCMP_FLAG $SSE_FLAG -D_REENTRANT -DOS_OPENBSD" PLATFORM_LDFLAGS="-pthread" PORT_FILE=port/port_posix.cc ;; DragonFly) PLATFORM=OS_DRAGONFLYBSD - COMMON_FLAGS="$MEMCMP_FLAG -D_REENTRANT -DOS_DRAGONFLYBSD" + COMMON_FLAGS="$MEMCMP_FLAG $SSE_FLAG -D_REENTRANT -DOS_DRAGONFLYBSD" PLATFORM_LIBS="-lpthread" PORT_FILE=port/port_posix.cc ;; OS_ANDROID_CROSSCOMPILE) PLATFORM=OS_ANDROID - COMMON_FLAGS="$MEMCMP_FLAG -D_REENTRANT -DOS_ANDROID -DLEVELDB_PLATFORM_POSIX" + COMMON_FLAGS="$MEMCMP_FLAG $SSE_FLAG -D_REENTRANT -DOS_ANDROID -DLEVELDB_PLATFORM_POSIX" PLATFORM_LDFLAGS="" # All pthread features are in the Android C library PORT_FILE=port/port_posix.cc CROSS_COMPILE=true ;; HP-UX) PLATFORM=OS_HPUX - COMMON_FLAGS="$MEMCMP_FLAG -D_REENTRANT -DOS_HPUX" + COMMON_FLAGS="$MEMCMP_FLAG $SSE_FLAG -D_REENTRANT -DOS_HPUX" PLATFORM_LDFLAGS="-pthread" PORT_FILE=port/port_posix.cc # man ld: +h internal_name @@ -139,7 +148,7 @@ case "$TARGET_OS" in ;; IOS) PLATFORM=IOS - COMMON_FLAGS="$MEMCMP_FLAG -DOS_MACOSX" + COMMON_FLAGS="$MEMCMP_FLAG $SSE_FLAG -DOS_MACOSX" [ -z "$INSTALL_PATH" ] && INSTALL_PATH=`pwd` PORT_FILE=port/port_posix.cc PLATFORM_SHARED_EXT= diff --git a/util/crc32c.cc b/util/crc32c.cc index 6db9e77077..42a1db2a45 100644 --- a/util/crc32c.cc +++ b/util/crc32c.cc @@ -10,6 +10,27 @@ #include #include "util/coding.h" +#if defined(_MSC_VER) +#include +static inline bool IsSSE42Available() { + int cpu_info[4]; + __cpuid(cpu_info, 1); + return (cpu_info[2] & (1 << 20)) != 0; +} +#elif defined(__GNUC__) +#include +#include +static inline bool IsSSE42Available() { + unsigned int eax, ebx, ecx, edx; + __get_cpuid(1, &eax, &ebx, &ecx, &edx); + return (ecx & (1 << 20)) != 0; +} +#else +static inline bool IsSSE42Available() { + return false; +} +#endif + namespace leveldb { namespace crc32c { @@ -283,7 +304,12 @@ static inline uint32_t LE_LOAD32(const uint8_t *p) { return DecodeFixed32(reinterpret_cast(p)); } -uint32_t Extend(uint32_t crc, const char* buf, size_t size) { +// Used to fetch a naturally-aligned 64-bit word in little endian byte-order +static inline uint64_t LE_LOAD64(const uint8_t *p) { + return DecodeFixed64(reinterpret_cast(p)); +} + +static uint32_t Extend_NoSSE(uint32_t crc, const char* buf, size_t size) { const uint8_t *p = reinterpret_cast(buf); const uint8_t *e = p + size; uint32_t l = crc ^ 0xffffffffu; @@ -328,5 +354,57 @@ uint32_t Extend(uint32_t crc, const char* buf, size_t size) { return l ^ 0xffffffffu; } +// For further improvements see Intel publication at: +// http://download.intel.com/design/intarch/papers/323405.pdf +static uint32_t Extend_SSE42(uint32_t crc, const char* buf, size_t size) { + const uint8_t *p = reinterpret_cast(buf); + const uint8_t *e = p + size; + uint32_t l = crc ^ 0xffffffffu; + +#define STEP1 do { \ + l = _mm_crc32_u8(l, *p++); \ +} while (0) +#define STEP4 do { \ + l = _mm_crc32_u32(l, LE_LOAD32(p)); \ + p += 4; \ +} while (0) +#define STEP8 do { \ + l = _mm_crc32_u64(l, LE_LOAD64(p)); \ + p += 8; \ +} while (0) + + if (size > 16) { + // Process unaligned bytes + for (unsigned int i = reinterpret_cast(p) % 8; i; --i) { + STEP1; + } + // Process 8 bytes at a time + while ((e-p) >= 8) { + STEP8; + } + // Process 4 bytes at a time + if ((e-p) >= 4) { + STEP4; + } + } + // Process the last few bytes + while (p != e) { + STEP1; + } +#undef STEP4 +#undef STEP1 + return l ^ 0xffffffffu; +} + +uint32_t Extend(uint32_t crc, const char* buf, size_t size) { + static int flag = -1; + if (flag == -1) { + flag = IsSSE42Available() ? 1 : 0; + } + return flag + ? Extend_SSE42(crc, buf, size) + : Extend_NoSSE(crc, buf, size); +} + } // namespace crc32c } // namespace leveldb