diff --git a/dbms/src/Columns/ColumnString.cpp b/dbms/src/Columns/ColumnString.cpp index 60c07e8fe93..40b09272742 100644 --- a/dbms/src/Columns/ColumnString.cpp +++ b/dbms/src/Columns/ColumnString.cpp @@ -17,10 +17,8 @@ #include #include #include -#include #include - /// Used in the `reserve` method, when the number of rows is known, but sizes of elements are not. #define APPROX_STRING_SIZE 64 @@ -328,94 +326,54 @@ int ColumnString::compareAtWithCollationImpl(size_t n, size_t m, const IColumn & ); } -// Derived must implement function `int compare(const char *, size_t, const char *, size_t)`. -template -struct ColumnString::LessWithCollation + +template +struct ColumnString::lessWithCollation { const ColumnString & parent; - const Derived & inner; + const ICollator & collator; - LessWithCollation(const ColumnString & parent_, const Derived & inner_) + lessWithCollation(const ColumnString & parent_, const ICollator & collator_) : parent(parent_) - , inner(inner_) + , collator(collator_) {} - FLATTEN_INLINE_PURE inline bool operator()(size_t lhs, size_t rhs) const + bool operator()(size_t lhs, size_t rhs) const { - int res = inner.compare( + int res = collator.compare( reinterpret_cast(&parent.chars[parent.offsetAt(lhs)]), parent.sizeAt(lhs) - 1, // Skip last zero byte. reinterpret_cast(&parent.chars[parent.offsetAt(rhs)]), parent.sizeAt(rhs) - 1 // Skip last zero byte. ); - if constexpr (positive) - { - return (res < 0); - } - else - { - return (res > 0); - } - } -}; - -struct Utf8MB4BinCmp -{ - static FLATTEN_INLINE_PURE inline int compare(const char * s1, size_t length1, const char * s2, size_t length2) - { - return DB::BinCollatorCompare(s1, length1, s2, length2); - } -}; - -// common util functions -template <> -struct ColumnString::LessWithCollation -{ - // `CollationCmpImpl` must implement function `int compare(const char *, size_t, const char *, size_t)`. - template - static void getPermutationWithCollationImpl(const ColumnString & src, const CollationCmpImpl & collator_cmp_impl, bool reverse, size_t limit, Permutation & res) - { - size_t s = src.offsets.size(); - res.resize(s); - for (size_t i = 0; i < s; ++i) - res[i] = i; - - if (limit >= s) - limit = 0; - - if (limit) - { - if (reverse) - std::partial_sort(res.begin(), res.begin() + limit, res.end(), LessWithCollation(src, collator_cmp_impl)); - else - std::partial_sort(res.begin(), res.begin() + limit, res.end(), LessWithCollation(src, collator_cmp_impl)); - } - else - { - if (reverse) - std::sort(res.begin(), res.end(), LessWithCollation(src, collator_cmp_impl)); - else - std::sort(res.begin(), res.end(), LessWithCollation(src, collator_cmp_impl)); - } + return positive ? (res < 0) : (res > 0); } }; void ColumnString::getPermutationWithCollationImpl(const ICollator & collator, bool reverse, size_t limit, Permutation & res) const { - using PermutationWithCollationUtils = ColumnString::LessWithCollation; + size_t s = offsets.size(); + res.resize(s); + for (size_t i = 0; i < s; ++i) + res[i] = i; - // optimize path for default collator `UTF8MB4_BIN` - if (TiDB::ITiDBCollator::getCollator(TiDB::ITiDBCollator::UTF8MB4_BIN) == &collator) + if (limit >= s) + limit = 0; + + if (limit) { - Utf8MB4BinCmp cmp_impl; - PermutationWithCollationUtils::getPermutationWithCollationImpl(*this, cmp_impl, reverse, limit, res); - /// - return; + if (reverse) + std::partial_sort(res.begin(), res.begin() + limit, res.end(), lessWithCollation(*this, collator)); + else + std::partial_sort(res.begin(), res.begin() + limit, res.end(), lessWithCollation(*this, collator)); } - + else { - PermutationWithCollationUtils::getPermutationWithCollationImpl(*this, collator, reverse, limit, res); + if (reverse) + std::sort(res.begin(), res.end(), lessWithCollation(*this, collator)); + else + std::sort(res.begin(), res.end(), lessWithCollation(*this, collator)); } } @@ -426,73 +384,35 @@ void ColumnString::updateWeakHash32(WeakHash32 & hash, const TiDB::TiDBCollatorP if (hash.getData().size() != s) throw Exception(fmt::format("Size of WeakHash32 does not match size of column: column size is {}, hash size is {}", s, hash.getData().size()), ErrorCodes::LOGICAL_ERROR); + const UInt8 * pos = chars.data(); UInt32 * hash_data = hash.getData().data(); + Offset prev_offset = 0; if (collator != nullptr) { - if (collator->getCollatorId() == TiDB::ITiDBCollator::UTF8MB4_BIN) + for (const auto & offset : offsets) { - // Skip last zero byte. - LoopOneColumn(chars, offsets, offsets.size(), [&](const std::string_view & view, size_t) { - auto sort_key = BinCollatorSortKey(view.data(), view.size()); - *hash_data = ::updateWeakHash32(reinterpret_cast(sort_key.data), sort_key.size, *hash_data); - ++hash_data; - }); - } - else - { - // Skip last zero byte. - LoopOneColumn(chars, offsets, offsets.size(), [&](const std::string_view & view, size_t) { - auto sort_key = collator->sortKey(view.data(), view.size(), sort_key_container); - *hash_data = ::updateWeakHash32(reinterpret_cast(sort_key.data), sort_key.size, *hash_data); - ++hash_data; - }); - } - } - else - { - // Skip last zero byte. - LoopOneColumn(chars, offsets, offsets.size(), [&](const std::string_view & view, size_t) { - *hash_data = ::updateWeakHash32(reinterpret_cast(view.data()), view.size(), *hash_data); - ++hash_data; - }); - } -} + auto str_size = offset - prev_offset; + /// Skip last zero byte. + auto sort_key = collator->sortKey(reinterpret_cast(pos), str_size - 1, sort_key_container); + *hash_data = ::updateWeakHash32(reinterpret_cast(sort_key.data), sort_key.size, *hash_data); -void ColumnString::updateHashWithValues(IColumn::HashValues & hash_values, const TiDB::TiDBCollatorPtr & collator, String & sort_key_container) const -{ - if (collator != nullptr) - { - if (collator->getCollatorId() == TiDB::ITiDBCollator::UTF8MB4_BIN) - { - // Skip last zero byte. - LoopOneColumn(chars, offsets, offsets.size(), [&hash_values](const std::string_view & view, size_t i) { - auto sort_key = BinCollatorSortKey(view.data(), view.size()); - size_t string_size = sort_key.size; - hash_values[i].update(reinterpret_cast(&string_size), sizeof(string_size)); - hash_values[i].update(sort_key.data, sort_key.size); - }); - } - else - { - // Skip last zero byte. - LoopOneColumn(chars, offsets, offsets.size(), [&](const std::string_view & view, size_t i) { - auto sort_key = collator->sortKey(view.data(), view.size(), sort_key_container); - size_t string_size = sort_key.size; - hash_values[i].update(reinterpret_cast(&string_size), sizeof(string_size)); - hash_values[i].update(sort_key.data, sort_key.size); - }); + pos += str_size; + prev_offset = offset; + ++hash_data; } } else { - for (size_t i = 0; i < offsets.size(); ++i) + for (const auto & offset : offsets) { - size_t string_size = sizeAt(i); - size_t offset = offsetAt(i); + auto str_size = offset - prev_offset; + /// Skip last zero byte. + *hash_data = ::updateWeakHash32(pos, str_size - 1, *hash_data); - hash_values[i].update(reinterpret_cast(&string_size), sizeof(string_size)); - hash_values[i].update(reinterpret_cast(&chars[offset]), string_size); + pos += str_size; + prev_offset = offset; + ++hash_data; } } } diff --git a/dbms/src/Columns/ColumnString.h b/dbms/src/Columns/ColumnString.h index 2204319e090..c6326f88674 100644 --- a/dbms/src/Columns/ColumnString.h +++ b/dbms/src/Columns/ColumnString.h @@ -52,8 +52,8 @@ class ColumnString final : public COWPtrHelper template struct less; - template - struct LessWithCollation; + template + struct lessWithCollation; ColumnString() = default; @@ -118,7 +118,7 @@ class ColumnString final : public COWPtrHelper void insert(const Field & x) override { - const auto & s = DB::get(x); + const String & s = DB::get(x); const size_t old_size = chars.size(); const size_t size_to_append = s.size() + 1; const size_t new_size = old_size + size_to_append; @@ -134,7 +134,7 @@ class ColumnString final : public COWPtrHelper void insertFrom(const IColumn & src_, size_t n) override { - const auto & src = static_cast(src_); + const ColumnString & src = static_cast(src_); if (n != 0) { @@ -213,7 +213,7 @@ class ColumnString final : public COWPtrHelper if (collator != nullptr) { - // Skip last zero byte. + /// Skip last zero byte. auto sort_key = collator->sortKey(reinterpret_cast(src), string_size - 1, sort_key_container); string_size = sort_key.size; src = sort_key.data; @@ -259,7 +259,34 @@ class ColumnString final : public COWPtrHelper } } - void updateHashWithValues(IColumn::HashValues & hash_values, const TiDB::TiDBCollatorPtr & collator, String & sort_key_container) const override; + void updateHashWithValues(IColumn::HashValues & hash_values, const TiDB::TiDBCollatorPtr & collator, String & sort_key_container) const override + { + if (collator != nullptr) + { + for (size_t i = 0; i < offsets.size(); ++i) + { + size_t string_size = sizeAt(i); + size_t offset = offsetAt(i); + + /// Skip last zero byte. + auto sort_key = collator->sortKey(reinterpret_cast(&chars[offset]), string_size - 1, sort_key_container); + string_size = sort_key.size; + hash_values[i].update(reinterpret_cast(&string_size), sizeof(string_size)); + hash_values[i].update(sort_key.data, sort_key.size); + } + } + else + { + for (size_t i = 0; i < offsets.size(); ++i) + { + size_t string_size = sizeAt(i); + size_t offset = offsetAt(i); + + hash_values[i].update(reinterpret_cast(&string_size), sizeof(string_size)); + hash_values[i].update(reinterpret_cast(&chars[offset]), string_size); + } + } + } void updateWeakHash32(WeakHash32 & hash, const TiDB::TiDBCollatorPtr &, String &) const override; @@ -277,7 +304,7 @@ class ColumnString final : public COWPtrHelper int compareAt(size_t n, size_t m, const IColumn & rhs_, int /*nan_direction_hint*/) const override { - const auto & rhs = static_cast(rhs_); + const ColumnString & rhs = static_cast(rhs_); const size_t size = sizeAt(n); const size_t rhs_size = rhs.sizeAt(m); diff --git a/dbms/src/Functions/CollationOperatorOptimized.h b/dbms/src/Functions/CollationOperatorOptimized.h index 8276a41fa17..395ecc5b9eb 100644 --- a/dbms/src/Functions/CollationOperatorOptimized.h +++ b/dbms/src/Functions/CollationOperatorOptimized.h @@ -17,7 +17,6 @@ #include #include #include -#include #include #include @@ -28,6 +27,42 @@ namespace DB { +template +ALWAYS_INLINE inline int signum(T val) +{ + return (0 < val) - (val < 0); +} + +// Check equality is much faster than other comparison. +// - check size first +// - return 0 if equal else 1 +__attribute__((flatten, always_inline, pure)) inline uint8_t RawStrEqualCompare(const std::string_view & lhs, const std::string_view & rhs) +{ + return StringRef(lhs) == StringRef(rhs) ? 0 : 1; +} + +// Compare str view by memcmp +__attribute__((flatten, always_inline, pure)) inline int RawStrCompare(const std::string_view & v1, const std::string_view & v2) +{ + return signum(v1.compare(v2)); +} + +constexpr char SPACE = ' '; + +// Remove tail space +__attribute__((flatten, always_inline, pure)) inline std::string_view RightTrim(const std::string_view & v) +{ + if (likely(v.empty() || v.back() != SPACE)) + return v; + size_t end = v.find_last_not_of(SPACE); + return end == std::string_view::npos ? std::string_view{} : std::string_view(v.data(), end + 1); +} + +__attribute__((flatten, always_inline, pure)) inline int RtrimStrCompare(const std::string_view & va, const std::string_view & vb) +{ + return RawStrCompare(RightTrim(va), RightTrim(vb)); +} + // If true, only need to check equal or not. template struct IsEqualRelated @@ -48,7 +83,6 @@ struct IsEqualRelated> }; // Loop columns and invoke callback for each pair. -// Remove last zero byte. template __attribute__((flatten, always_inline)) inline void LoopTwoColumns( const ColumnString::Chars_t & a_data, @@ -58,29 +92,18 @@ __attribute__((flatten, always_inline)) inline void LoopTwoColumns( size_t size, F && func) { - ColumnString::Offset a_prev_offset = 0; - ColumnString::Offset b_prev_offset = 0; - const auto * a_ptr = reinterpret_cast(a_data.data()); - const auto * b_ptr = reinterpret_cast(b_data.data()); - for (size_t i = 0; i < size; ++i) { - auto a_size = a_offsets[i] - a_prev_offset; - auto b_size = b_offsets[i] - b_prev_offset; - - // Remove last zero byte. - func({a_ptr, a_size - 1}, {b_ptr, b_size - 1}, i); - - a_ptr += a_size; - b_ptr += b_size; + size_t a_size = StringUtil::sizeAt(a_offsets, i) - 1; + size_t b_size = StringUtil::sizeAt(b_offsets, i) - 1; + const auto * a_ptr = reinterpret_cast(&a_data[StringUtil::offsetAt(a_offsets, i)]); + const auto * b_ptr = reinterpret_cast(&b_data[StringUtil::offsetAt(b_offsets, i)]); - a_prev_offset = a_offsets[i]; - b_prev_offset = b_offsets[i]; + func({a_ptr, a_size}, {b_ptr, b_size}, i); } } // Loop one column and invoke callback for each pair. -// Remove last zero byte. template __attribute__((flatten, always_inline)) inline void LoopOneColumn( const ColumnString::Chars_t & a_data, @@ -88,18 +111,12 @@ __attribute__((flatten, always_inline)) inline void LoopOneColumn( size_t size, F && func) { - ColumnString::Offset a_prev_offset = 0; - const auto * a_ptr = reinterpret_cast(a_data.data()); - for (size_t i = 0; i < size; ++i) { - auto a_size = a_offsets[i] - a_prev_offset; - - // Remove last zero byte. - func({a_ptr, a_size - 1}, i); + size_t a_size = StringUtil::sizeAt(a_offsets, i) - 1; + const auto * a_ptr = reinterpret_cast(&a_data[StringUtil::offsetAt(a_offsets, i)]); - a_ptr += a_size; - a_prev_offset = a_offsets[i]; + func({a_ptr, a_size}, i); } } diff --git a/dbms/src/Interpreters/AggregationCommon.h b/dbms/src/Interpreters/AggregationCommon.h index a043760b9af..9f390133088 100644 --- a/dbms/src/Interpreters/AggregationCommon.h +++ b/dbms/src/Interpreters/AggregationCommon.h @@ -171,7 +171,7 @@ static inline T ALWAYS_INLINE packFixed( return key; } -/* + /// Hash a set of keys into a UInt128 value. static inline UInt128 ALWAYS_INLINE hash128( size_t i, @@ -202,7 +202,7 @@ static inline UInt128 ALWAYS_INLINE hash128( return key; } -*/ + /// Almost the same as above but it doesn't return any reference to key data. static inline UInt128 ALWAYS_INLINE hash128( @@ -212,7 +212,7 @@ static inline UInt128 ALWAYS_INLINE hash128( const TiDB::TiDBCollators & collators, std::vector & sort_key_containers) { - UInt128 key{}; + UInt128 key; SipHash hash; if (collators.empty()) @@ -252,7 +252,7 @@ static inline StringRef * ALWAYS_INLINE placeKeysInPool( return reinterpret_cast(res); } -/* + /// Copy keys to the pool. Then put into pool StringRefs to them and return the pointer to the first. static inline StringRef * ALWAYS_INLINE extractKeysAndPlaceInPool( size_t i, @@ -326,7 +326,7 @@ inline StringRef ALWAYS_INLINE extractKeysAndPlaceInPoolContiguous( return {res, sum_keys_size}; } -*/ + /** Serialize keys into a continuous chunk of memory. */ diff --git a/dbms/src/Interpreters/SetVariants.h b/dbms/src/Interpreters/SetVariants.h index 58f102555b6..08ebb5d5100 100644 --- a/dbms/src/Interpreters/SetVariants.h +++ b/dbms/src/Interpreters/SetVariants.h @@ -21,7 +21,7 @@ #include #include #include - +#include namespace DB { diff --git a/dbms/src/Storages/Transaction/Collator.cpp b/dbms/src/Storages/Transaction/Collator.cpp index fc40701e3c5..d11d693a8a4 100644 --- a/dbms/src/Storages/Transaction/Collator.cpp +++ b/dbms/src/Storages/Transaction/Collator.cpp @@ -13,9 +13,9 @@ // limitations under the License. #include +#include #include #include -#include #include @@ -183,12 +183,22 @@ class BinCollator final : public ITiDBCollator int compare(const char * s1, size_t length1, const char * s2, size_t length2) const override { - return DB::BinCollatorCompare(s1, length1, s2, length2); + if constexpr (padding) + return DB::RtrimStrCompare({s1, length1}, {s2, length2}); + else + return DB::RawStrCompare({s1, length1}, {s2, length2}); } StringRef sortKey(const char * s, size_t length, std::string &) const override { - return DB::BinCollatorSortKey(s, length); + if constexpr (padding) + { + return StringRef(rtrim(s, length)); + } + else + { + return StringRef(s, length); + } } std::unique_ptr pattern() const override { return std::make_unique>>(); } @@ -580,9 +590,6 @@ TiDBCollatorPtr ITiDBCollator::getCollator(int32_t id) { switch (id) { - case ITiDBCollator::UTF8MB4_BIN: - static const auto utf8mb4_collator = UTF8MB4_BIN_TYPE(UTF8MB4_BIN); - return &utf8mb4_collator; case ITiDBCollator::BINARY: static const auto binary_collator = BinCollator(BINARY); return &binary_collator; @@ -592,6 +599,9 @@ TiDBCollatorPtr ITiDBCollator::getCollator(int32_t id) case ITiDBCollator::LATIN1_BIN: static const auto latin1_collator = BinCollator(LATIN1_BIN); return &latin1_collator; + case ITiDBCollator::UTF8MB4_BIN: + static const auto utf8mb4_collator = UTF8MB4_BIN_TYPE(UTF8MB4_BIN); + return &utf8mb4_collator; case ITiDBCollator::UTF8_BIN: static const auto utf8_collator = UTF8MB4_BIN_TYPE(UTF8_BIN); return &utf8_collator; diff --git a/dbms/src/Storages/Transaction/CollatorUtils.h b/dbms/src/Storages/Transaction/CollatorUtils.h deleted file mode 100644 index 3f318a5b700..00000000000 --- a/dbms/src/Storages/Transaction/CollatorUtils.h +++ /dev/null @@ -1,96 +0,0 @@ -// Copyright 2022 PingCAP, Ltd. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include -#include - -#include - -#define FLATTEN_INLINE_PURE __attribute__((flatten, always_inline, pure)) - -namespace DB -{ - -template -ALWAYS_INLINE inline int signum(T val) -{ - return (0 < val) - (val < 0); -} - -// Check equality is much faster than other comparison. -// - check size first -// - return 0 if equal else 1 -FLATTEN_INLINE_PURE inline int RawStrEqualCompare(const std::string_view & lhs, const std::string_view & rhs) -{ - return StringRef(lhs) == StringRef(rhs) ? 0 : 1; -} - -// Compare str view by memcmp -FLATTEN_INLINE_PURE inline int RawStrCompare(const std::string_view & v1, const std::string_view & v2) -{ - return v1.compare(v2); -} - -constexpr char SPACE = ' '; - -FLATTEN_INLINE_PURE inline std::string_view RightTrimRaw(const std::string_view & v) -{ - size_t end = v.find_last_not_of(SPACE); - return end == std::string_view::npos ? std::string_view{} : std::string_view(v.data(), end + 1); -} - -// Remove tail space -FLATTEN_INLINE_PURE inline std::string_view RightTrim(const std::string_view & v) -{ - if (likely(v.empty() || v.back() != SPACE)) - return v; - return RightTrimRaw(v); -} - -FLATTEN_INLINE_PURE inline std::string_view RightTrimNoEmpty(const std::string_view & v) -{ - if (likely(v.back() != SPACE)) - return v; - return RightTrimRaw(v); -} - -FLATTEN_INLINE_PURE inline int RtrimStrCompare(const std::string_view & va, const std::string_view & vb) -{ - return RawStrCompare(RightTrim(va), RightTrim(vb)); -} - -template -FLATTEN_INLINE_PURE inline int BinCollatorCompare(const char * s1, size_t length1, const char * s2, size_t length2) -{ - if constexpr (padding) - return DB::RtrimStrCompare({s1, length1}, {s2, length2}); - else - return DB::RawStrCompare({s1, length1}, {s2, length2}); -} - -template -FLATTEN_INLINE_PURE inline StringRef BinCollatorSortKey(const char * s, size_t length) -{ - if constexpr (padding) - { - return StringRef(RightTrim({s, length})); - } - else - { - return StringRef(s, length); - } -} -} // namespace DB diff --git a/dbms/src/Storages/Transaction/tests/gtest_tidb_collator.cpp b/dbms/src/Storages/Transaction/tests/gtest_tidb_collator.cpp index 13c51dba2db..9a6dae3db08 100644 --- a/dbms/src/Storages/Transaction/tests/gtest_tidb_collator.cpp +++ b/dbms/src/Storages/Transaction/tests/gtest_tidb_collator.cpp @@ -13,7 +13,6 @@ // limitations under the License. #include -#include #include namespace DB::tests @@ -67,45 +66,59 @@ const typename CollatorCases::SortKeyCase CollatorCases::sk_cases[] = { {"a", {PREVENT_TRUNC("\x61"), PREVENT_TRUNC("\x61"), PREVENT_TRUNC("\x00\x41"), PREVENT_TRUNC("\x61"), PREVENT_TRUNC("\x0e\x33")}}, {"A", {PREVENT_TRUNC("\x41"), PREVENT_TRUNC("\x41"), PREVENT_TRUNC("\x00\x41"), PREVENT_TRUNC("\x41"), PREVENT_TRUNC("\x0e\x33")}}, {"😃", - {PREVENT_TRUNC("\xf0\x9f\x98\x83"), PREVENT_TRUNC("\xf0\x9f\x98\x83"), PREVENT_TRUNC("\xff\xfd"), PREVENT_TRUNC("\xf0\x9f\x98\x83"), PREVENT_TRUNC("\xff\xfd")}}, + {PREVENT_TRUNC("\xf0\x9f\x98\x83"), PREVENT_TRUNC("\xf0\x9f\x98\x83"), PREVENT_TRUNC("\xff\xfd"), PREVENT_TRUNC("\xf0\x9f\x98\x83"), + PREVENT_TRUNC("\xff\xfd")}}, {"Foo © bar 𝌆 baz ☃ qux", - {PREVENT_TRUNC("\x46\x6f\x6f\x20\xc2\xa9\x20\x62\x61\x72\x20\xf0\x9d\x8c\x86\x20\x62\x61\x7a\x20\xe2\x98\x83\x20\x71\x75\x78"), - PREVENT_TRUNC("\x46\x6f\x6f\x20\xc2\xa9\x20\x62\x61\x72\x20\xf0\x9d\x8c\x86\x20\x62\x61\x7a\x20\xe2\x98\x83\x20\x71\x75\x78"), - PREVENT_TRUNC("\x00\x46\x00\x4f\x00\x4f\x00\x20\x00\xa9\x00\x20\x00\x42\x00\x41\x00\x52\x00\x20\xff\xfd\x00\x20\x00\x42\x00\x41" - "\x00\x5a\x00\x20\x26\x03\x00\x20\x00\x51\x00\x55\x00\x58"), - PREVENT_TRUNC("\x46\x6f\x6f\x20\xc2\xa9\x20\x62\x61\x72\x20\xf0\x9d\x8c\x86\x20\x62\x61\x7a\x20\xe2\x98\x83\x20\x71\x75\x78"), - PREVENT_TRUNC("\x0E\xB9\x0F\x82\x0F\x82\x02\x09\x02\xC5\x02\x09\x0E\x4A\x0E\x33\x0F\xC0\x02\x09\xFF\xFD\x02\x09\x0E\x4A\x0E\x33" - "\x10\x6A\x02\x09\x06\xFF\x02\x09\x0F\xB4\x10\x1F\x10\x5A")}}, + {PREVENT_TRUNC("\x46\x6f\x6f\x20\xc2\xa9\x20\x62\x61\x72\x20\xf0\x9d\x8c\x86\x20\x62\x61\x7a\x20\xe2\x98\x83\x20\x71\x75\x78"), + PREVENT_TRUNC("\x46\x6f\x6f\x20\xc2\xa9\x20\x62\x61\x72\x20\xf0\x9d\x8c\x86\x20\x62\x61\x7a\x20\xe2\x98\x83\x20\x71\x75\x78"), + PREVENT_TRUNC("\x00\x46\x00\x4f\x00\x4f\x00\x20\x00\xa9\x00\x20\x00\x42\x00\x41\x00\x52\x00\x20\xff\xfd\x00\x20\x00\x42\x00\x41" + "\x00\x5a\x00\x20\x26\x03\x00\x20\x00\x51\x00\x55\x00\x58"), + PREVENT_TRUNC("\x46\x6f\x6f\x20\xc2\xa9\x20\x62\x61\x72\x20\xf0\x9d\x8c\x86\x20\x62\x61\x7a\x20\xe2\x98\x83\x20\x71\x75\x78"), + PREVENT_TRUNC("\x0E\xB9\x0F\x82\x0F\x82\x02\x09\x02\xC5\x02\x09\x0E\x4A\x0E\x33\x0F\xC0\x02\x09\xFF\xFD\x02\x09\x0E\x4A\x0E\x33" + "\x10\x6A\x02\x09\x06\xFF\x02\x09\x0F\xB4\x10\x1F\x10\x5A")}}, {"a ", {PREVENT_TRUNC("\x61\x20"), PREVENT_TRUNC("\x61"), PREVENT_TRUNC("\x00\x41"), PREVENT_TRUNC("\x61"), PREVENT_TRUNC("\x0e\x33")}}, {"", {PREVENT_TRUNC(""), PREVENT_TRUNC(""), PREVENT_TRUNC(""), PREVENT_TRUNC(""), PREVENT_TRUNC("")}}, {"ß", - {PREVENT_TRUNC("\xc3\x9f"), PREVENT_TRUNC("\xc3\x9f"), PREVENT_TRUNC("\x00\x53"), PREVENT_TRUNC("\xc3\x9f"), PREVENT_TRUNC("\x0F\xEA\x0F\xEA")}}, + {PREVENT_TRUNC("\xc3\x9f"), PREVENT_TRUNC("\xc3\x9f"), PREVENT_TRUNC("\x00\x53"), PREVENT_TRUNC("\xc3\x9f"), + PREVENT_TRUNC("\x0F\xEA\x0F\xEA")}}, }; const typename CollatorCases::PatternCase CollatorCases::pattern_cases[] = { {"A", - {{"a", {false, false, true, false, true}}, {"A", {true, true, true, true, true}}, {"À", {false, false, true, false, true}}, {"", {false, false, false, false, false}}}}, + {{"a", {false, false, true, false, true}}, {"A", {true, true, true, true, true}}, {"À", {false, false, true, false, true}}, + {"", {false, false, false, false, false}}}}, {"_A", - {{"aA", {true, true, true, true, true}}, {"ÀA", {false, false, true, true, true}}, {"ÀÀ", {false, false, true, false, true}}, {"", {false, false, false, false, false}}}}, + {{"aA", {true, true, true, true, true}}, {"ÀA", {false, false, true, true, true}}, {"ÀÀ", {false, false, true, false, true}}, + {"", {false, false, false, false, false}}}}, {"%A", - {{"a", {false, false, true, false, true}}, {"ÀA", {true, true, true, true, true}}, {"À", {false, false, true, false, true}}, {"", {false, false, false, false, false}}}}, + {{"a", {false, false, true, false, true}}, {"ÀA", {true, true, true, true, true}}, {"À", {false, false, true, false, true}}, + {"", {false, false, false, false, false}}}}, {"À", - {{"a", {false, false, true, false, true}}, {"A", {false, false, true, false, true}}, {"À", {true, true, true, true, true}}, {"", {false, false, false, false, false}}}}, + {{"a", {false, false, true, false, true}}, {"A", {false, false, true, false, true}}, {"À", {true, true, true, true, true}}, + {"", {false, false, false, false, false}}}}, {"_À", - {{" À", {true, true, true, true, true}}, {"ÀA", {false, false, true, false, true}}, {"ÀÀ", {false, false, true, true, true}}, {"", {false, false, false, false, false}}}}, + {{" À", {true, true, true, true, true}}, {"ÀA", {false, false, true, false, true}}, {"ÀÀ", {false, false, true, true, true}}, + {"", {false, false, false, false, false}}}}, {"%À", - {{"À", {true, true, true, true, true}}, {"ÀÀÀ", {true, true, true, true, true}}, {"ÀA", {false, false, true, false, true}}, {"", {false, false, false, false, false}}}}, + {{"À", {true, true, true, true, true}}, {"ÀÀÀ", {true, true, true, true, true}}, {"ÀA", {false, false, true, false, true}}, + {"", {false, false, false, false, false}}}}, {"À_", - {{"À ", {true, true, true, true, true}}, {"ÀAA", {false, false, false, false, false}}, {"À", {false, false, false, false, false}}, {"", {false, false, false, false, false}}}}, + {{"À ", {true, true, true, true, true}}, {"ÀAA", {false, false, false, false, false}}, {"À", {false, false, false, false, false}}, + {"", {false, false, false, false, false}}}}, {"À%", - {{"À", {true, true, true, true, true}}, {"ÀÀÀ", {true, true, true, true, true}}, {"AÀ", {false, false, true, false, true}}, {"", {false, false, false, false, false}}}}, + {{"À", {true, true, true, true, true}}, {"ÀÀÀ", {true, true, true, true, true}}, {"AÀ", {false, false, true, false, true}}, + {"", {false, false, false, false, false}}}}, {"", - {{"À", {false, false, false, false, false}}, {"ÀÀÀ", {false, false, false, false, false}}, {"AÀ", {false, false, false, false, false}}, {"", {true, true, true, true, true}}}}, + {{"À", {false, false, false, false, false}}, {"ÀÀÀ", {false, false, false, false, false}}, + {"AÀ", {false, false, false, false, false}}, {"", {true, true, true, true, true}}}}, {"%", - {{"À", {true, true, true, true, true}}, {"ÀÀÀ", {true, true, true, true, true}}, {"AÀ", {true, true, true, true, true}}, {"", {true, true, true, true, true}}}}, + {{"À", {true, true, true, true, true}}, {"ÀÀÀ", {true, true, true, true, true}}, {"AÀ", {true, true, true, true, true}}, + {"", {true, true, true, true, true}}}}, {"a_%À", - {{"ÀÀ", {false, false, false, false, false}}, {"aÀÀ", {true, true, true, true, true}}, {"ÀÀÀÀ", {false, false, true, false, true}}, {"ÀÀÀa", {false, false, true, false, true}}}}, + {{"ÀÀ", {false, false, false, false, false}}, {"aÀÀ", {true, true, true, true, true}}, {"ÀÀÀÀ", {false, false, true, false, true}}, + {"ÀÀÀa", {false, false, true, false, true}}}}, {"À%_a", - {{"ÀÀ", {false, false, false, false, false}}, {"aÀÀ", {false, false, true, false, true}}, {"ÀÀÀa", {true, true, true, true, true}}, {"aÀÀÀ", {false, false, true, false, true}}}}, + {{"ÀÀ", {false, false, false, false, false}}, {"aÀÀ", {false, false, true, false, true}}, {"ÀÀÀa", {true, true, true, true, true}}, + {"aÀÀÀ", {false, false, true, false, true}}}}, {"___a", {{"中a", {true, true, false, false, false}}, {"中文字a", {false, false, true, true, true}}}}, {"𐐭", {{"𐐨", {false, false, true, false, false}}}}, }; @@ -120,7 +133,7 @@ void testCollator() const std::string & s2 = std::get<1>(c); int ans = std::get(std::get<2>(c)); std::cout << "Compare case (" << s1 << ", " << s2 << ", " << ans << ")" << std::endl; - ASSERT_EQ(signum((collator->compare(s1.data(), s1.length(), s2.data(), s2.length()))), ans); + ASSERT_EQ(collator->compare(s1.data(), s1.length(), s2.data(), s2.length()), ans); } for (const auto & c : CollatorCases::sk_cases) { @@ -176,29 +189,14 @@ struct UnicodeCICollator static constexpr auto collation_case = CollatorCases::UnicodeCI; }; -TEST(CollatorSuite, BinCollator) -{ - testCollator(); -} +TEST(CollatorSuite, BinCollator) { testCollator(); } -TEST(CollatorSuite, BinPaddingCollator) -{ - testCollator(); -} +TEST(CollatorSuite, BinPaddingCollator) { testCollator(); } -TEST(CollatorSuite, Utf8BinPaddingCollator) -{ - testCollator(); -} +TEST(CollatorSuite, Utf8BinPaddingCollator) { testCollator(); } -TEST(CollatorSuite, GeneralCICollator) -{ - testCollator(); -} +TEST(CollatorSuite, GeneralCICollator) { testCollator(); } -TEST(CollatorSuite, UnicodeCICollator) -{ - testCollator(); -} +TEST(CollatorSuite, UnicodeCICollator) { testCollator(); } } // namespace DB::tests