From c9eb6e65a72b18c75a671e5cb9951113fc3c533b Mon Sep 17 00:00:00 2001 From: yibin Date: Thu, 29 Dec 2022 15:50:18 +0800 Subject: [PATCH] Fix big endian issue for StringHashTable (#6556) close pingcap/tiflash#6555 --- dbms/src/Common/HashTable/StringHashTable.h | 21 ++++++++++++++----- .../HashTable/TwoLevelStringHashTable.h | 20 ++++++++++++++---- dbms/src/IO/Endian.h | 5 +++++ 3 files changed, 37 insertions(+), 9 deletions(-) diff --git a/dbms/src/Common/HashTable/StringHashTable.h b/dbms/src/Common/HashTable/StringHashTable.h index c7254111d17..3fe17250ce7 100644 --- a/dbms/src/Common/HashTable/StringHashTable.h +++ b/dbms/src/Common/HashTable/StringHashTable.h @@ -252,7 +252,6 @@ class StringHashTable : private boost::noncopyable // 1. Always memcpy 8 times bytes // 2. Use switch case extension to generate fast dispatching table // 3. Funcs are named callables that can be force_inlined - // NOTE: It relies on Little Endianness template static auto #if defined(ADDRESS_SANITIZER) || defined(THREAD_SANITIZER) @@ -296,13 +295,19 @@ class StringHashTable : private boost::noncopyable if ((reinterpret_cast(p) & 2048) == 0) { memcpy(&n[0], p, 8); - n[0] &= -1ul >> s; + if constexpr (DB::isLittleEndian()) + n[0] &= (-1ULL >> s); + else + n[0] &= (-1ULL << s); } else { const char * lp = x.data + x.size - 8; memcpy(&n[0], lp, 8); - n[0] >>= s; + if constexpr (DB::isLittleEndian()) + n[0] >>= s; + else + n[0] <<= s; } keyHolderDiscardKey(key_holder); return func(self.m1, k8, hash(k8)); @@ -312,7 +317,10 @@ class StringHashTable : private boost::noncopyable memcpy(&n[0], p, 8); const char * lp = x.data + x.size - 8; memcpy(&n[1], lp, 8); - n[1] >>= s; + if constexpr (DB::isLittleEndian()) + n[1] >>= s; + else + n[1] <<= s; keyHolderDiscardKey(key_holder); return func(self.m2, k16, hash(k16)); } @@ -321,7 +329,10 @@ class StringHashTable : private boost::noncopyable memcpy(&n[0], p, 16); const char * lp = x.data + x.size - 8; memcpy(&n[2], lp, 8); - n[2] >>= s; + if constexpr (DB::isLittleEndian()) + n[2] >>= s; + else + n[2] <<= s; keyHolderDiscardKey(key_holder); return func(self.m3, k24, hash(k24)); } diff --git a/dbms/src/Common/HashTable/TwoLevelStringHashTable.h b/dbms/src/Common/HashTable/TwoLevelStringHashTable.h index 480dfa318b4..ea5b6f68568 100644 --- a/dbms/src/Common/HashTable/TwoLevelStringHashTable.h +++ b/dbms/src/Common/HashTable/TwoLevelStringHashTable.h @@ -132,13 +132,19 @@ class TwoLevelStringHashTable : private boost::noncopyable if ((reinterpret_cast(p) & 2048) == 0) { memcpy(&n[0], p, 8); - n[0] &= -1ul >> s; + if constexpr (DB::isLittleEndian()) + n[0] &= (-1ULL >> s); + else + n[0] &= (-1ULL << s); } else { const char * lp = x.data + x.size - 8; memcpy(&n[0], lp, 8); - n[0] >>= s; + if constexpr (DB::isLittleEndian()) + n[0] >>= s; + else + n[0] <<= s; } auto res = hash(k8); auto buck = getBucketFromHash(res); @@ -150,7 +156,10 @@ class TwoLevelStringHashTable : private boost::noncopyable memcpy(&n[0], p, 8); const char * lp = x.data + x.size - 8; memcpy(&n[1], lp, 8); - n[1] >>= s; + if constexpr (DB::isLittleEndian()) + n[1] >>= s; + else + n[1] <<= s; auto res = hash(k16); auto buck = getBucketFromHash(res); keyHolderDiscardKey(key_holder); @@ -161,7 +170,10 @@ class TwoLevelStringHashTable : private boost::noncopyable memcpy(&n[0], p, 16); const char * lp = x.data + x.size - 8; memcpy(&n[2], lp, 8); - n[2] >>= s; + if constexpr (DB::isLittleEndian()) + n[2] >>= s; + else + n[2] <<= s; auto res = hash(k24); auto buck = getBucketFromHash(res); keyHolderDiscardKey(key_holder); diff --git a/dbms/src/IO/Endian.h b/dbms/src/IO/Endian.h index 4cdbd861ceb..c908cc82a8d 100644 --- a/dbms/src/IO/Endian.h +++ b/dbms/src/IO/Endian.h @@ -18,6 +18,11 @@ namespace DB { +inline constexpr bool isLittleEndian() +{ + return (boost::endian::order::native == boost::endian::order::little); +} + /// Requires: /// T is non-bool integral or scoped enumeration type template