diff --git a/contrib/client-c b/contrib/client-c index 2c6c5fed8d7..36e05cb0f24 160000 --- a/contrib/client-c +++ b/contrib/client-c @@ -1 +1 @@ -Subproject commit 2c6c5fed8d7c48bcb52198f1107d4c58dd22f7e2 +Subproject commit 36e05cb0f24c085785abf367176dac2a45bfd67b diff --git a/contrib/tiflash-proxy b/contrib/tiflash-proxy index 4c10d3bcf95..7578b816399 160000 --- a/contrib/tiflash-proxy +++ b/contrib/tiflash-proxy @@ -1 +1 @@ -Subproject commit 4c10d3bcf95b288d6bb12acdf1872f41004c512e +Subproject commit 7578b8163992ce933074135f8687ad447d88ea9b diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index 8797edd22ea..0ffcfa9261a 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -90,6 +90,7 @@ add_headers_and_sources(dbms src/Storages/Page/V2/mvcc) add_headers_and_sources(dbms src/Storages/Page/V2/VersionSet) add_headers_and_sources(dbms src/Storages/Page/V2/gc) add_headers_and_sources(dbms src/WindowFunctions) +add_headers_and_sources(dbms src/TiDB/Schema) if (ENABLE_V3_PAGESTORAGE) add_headers_and_sources(dbms src/Storages/Page/V3) add_headers_and_sources(dbms src/Storages/Page/V3/LogFile) diff --git a/dbms/src/Columns/ColumnNullable.cpp b/dbms/src/Columns/ColumnNullable.cpp index 6348f3bdfd0..a3ef93f1ad3 100644 --- a/dbms/src/Columns/ColumnNullable.cpp +++ b/dbms/src/Columns/ColumnNullable.cpp @@ -20,6 +20,7 @@ #include #include #include +#include namespace DB @@ -41,10 +42,10 @@ ColumnNullable::ColumnNullable(MutableColumnPtr && nested_column_, MutableColumn nested_column = nested_column_materialized; if (!getNestedColumn().canBeInsideNullable()) - throw Exception{getNestedColumn().getName() + " cannot be inside Nullable column", ErrorCodes::ILLEGAL_COLUMN}; + throw Exception(fmt::format("{} cannot be inside Nullable column", getNestedColumn().getName()), ErrorCodes::ILLEGAL_COLUMN); if (null_map->isColumnConst()) - throw Exception{"ColumnNullable cannot have constant null map", ErrorCodes::ILLEGAL_COLUMN}; + throw Exception("ColumnNullable cannot have constant null map", ErrorCodes::ILLEGAL_COLUMN); } @@ -106,7 +107,7 @@ void ColumnNullable::updateWeakHash32(WeakHash32 & hash, const TiDB::TiDBCollato auto s = size(); if (hash.getData().size() != s) - throw Exception("Size of WeakHash32 does not match size of column: column size is " + std::to_string(s) + ", hash size is " + std::to_string(hash.getData().size()), ErrorCodes::LOGICAL_ERROR); + throw Exception(fmt::format("Size of WeakHash32 does not match size of column: column size is {}, hash size is {}", s, hash.getData().size()), ErrorCodes::LOGICAL_ERROR); WeakHash32 old_hash = hash; nested_column->updateWeakHash32(hash, collator, sort_key_container); @@ -158,12 +159,12 @@ void ColumnNullable::get(size_t n, Field & res) const StringRef ColumnNullable::getDataAt(size_t /*n*/) const { - throw Exception{"Method getDataAt is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED}; + throw Exception(fmt::format("Method getDataAt is not supported for {}", getName()), ErrorCodes::NOT_IMPLEMENTED); } void ColumnNullable::insertData(const char * /*pos*/, size_t /*length*/) { - throw Exception{"Method insertData is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED}; + throw Exception(fmt::format("Method insertData is not supported for {}", getName()), ErrorCodes::NOT_IMPLEMENTED); } bool ColumnNullable::decodeTiDBRowV2Datum(size_t cursor, const String & raw_value, size_t length, bool force_decode) @@ -212,7 +213,7 @@ const char * ColumnNullable::deserializeAndInsertFromArena(const char * pos, con void ColumnNullable::insertRangeFrom(const IColumn & src, size_t start, size_t length) { - const ColumnNullable & nullable_col = static_cast(src); + const auto & nullable_col = static_cast(src); getNullMapColumn().insertRangeFrom(*nullable_col.null_map, start, length); getNestedColumn().insertRangeFrom(*nullable_col.nested_column, start, length); } @@ -233,7 +234,7 @@ void ColumnNullable::insert(const Field & x) void ColumnNullable::insertFrom(const IColumn & src, size_t n) { - const ColumnNullable & src_concrete = static_cast(src); + const auto & src_concrete = static_cast(src); getNestedColumn().insertFrom(src_concrete.getNestedColumn(), n); getNullMapData().push_back(src_concrete.getNullMapData()[n]); } @@ -285,24 +286,24 @@ std::tuple ColumnNullable::compareAtCheckNull(size_t n, size_t m, con return std::make_tuple(has_null, res); } -int ColumnNullable::compareAtWithCollation( +int ColumnNullable::compareAt( size_t n, size_t m, const IColumn & rhs_, int null_direction_hint, const ICollator & collator) const { - const ColumnNullable & nullable_rhs = static_cast(rhs_); + const auto & nullable_rhs = static_cast(rhs_); auto [has_null, res] = compareAtCheckNull(n, m, nullable_rhs, null_direction_hint); if (has_null) return res; const IColumn & nested_rhs = nullable_rhs.getNestedColumn(); - return getNestedColumn().compareAtWithCollation(n, m, nested_rhs, null_direction_hint, collator); + return getNestedColumn().compareAt(n, m, nested_rhs, null_direction_hint, collator); } int ColumnNullable::compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const { - const ColumnNullable & nullable_rhs = static_cast(rhs_); + const auto & nullable_rhs = static_cast(rhs_); auto [has_null, res] = compareAtCheckNull(n, m, nullable_rhs, null_direction_hint); if (has_null) return res; @@ -310,7 +311,7 @@ int ColumnNullable::compareAt(size_t n, size_t m, const IColumn & rhs_, int null return getNestedColumn().compareAt(n, m, nested_rhs, null_direction_hint); } -void ColumnNullable::getPermutationWithCollation( +void ColumnNullable::getPermutation( const ICollator & collator, bool reverse, size_t limit, @@ -318,7 +319,7 @@ void ColumnNullable::getPermutationWithCollation( DB::IColumn::Permutation & res) const { /// Cannot pass limit because of unknown amount of NULLs. - getNestedColumn().getPermutationWithCollation(collator, reverse, 0, null_direction_hint, res); + getNestedColumn().getPermutation(collator, reverse, 0, null_direction_hint, res); adjustPermutationWithNullDirection(reverse, limit, null_direction_hint, res); } @@ -538,7 +539,7 @@ void ColumnNullable::applyNullMapImpl(const ColumnUInt8 & map) const NullMap & arr2 = map.getData(); if (arr1.size() != arr2.size()) - throw Exception{"Inconsistent sizes of ColumnNullable objects", ErrorCodes::LOGICAL_ERROR}; + throw Exception("Inconsistent sizes of ColumnNullable objects", ErrorCodes::LOGICAL_ERROR); for (size_t i = 0, size = arr1.size(); i < size; ++i) arr1[i] |= negative ^ arr2[i]; @@ -565,9 +566,11 @@ void ColumnNullable::applyNullMap(const ColumnNullable & other) void ColumnNullable::checkConsistency() const { if (null_map->size() != getNestedColumn().size()) - throw Exception("Logical error: Sizes of nested column and null map of Nullable column are not equal: null size is : " - + std::to_string(null_map->size()) + " column size is : " + std::to_string(getNestedColumn().size()), - ErrorCodes::SIZES_OF_NESTED_COLUMNS_ARE_INCONSISTENT); + throw Exception( + fmt::format("Logical error: Sizes of nested column and null map of Nullable column are not equal: null size is : {} column size is : {}", + null_map->size(), + getNestedColumn().size()), + ErrorCodes::SIZES_OF_NESTED_COLUMNS_ARE_INCONSISTENT); } diff --git a/dbms/src/Columns/ColumnNullable.h b/dbms/src/Columns/ColumnNullable.h index 7f2ca204942..5e1ea474a92 100644 --- a/dbms/src/Columns/ColumnNullable.h +++ b/dbms/src/Columns/ColumnNullable.h @@ -84,9 +84,9 @@ class ColumnNullable final : public COWPtrHelper ColumnPtr permute(const Permutation & perm, size_t limit) const override; std::tuple compareAtCheckNull(size_t n, size_t m, const ColumnNullable & rhs, int null_direction_hint) const; int compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const override; - int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint, const ICollator & collator) const override; + int compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint, const ICollator & collator) const override; void getPermutation(bool reverse, size_t limit, int null_direction_hint, Permutation & res) const override; - void getPermutationWithCollation(const ICollator & collator, bool reverse, size_t limit, int null_direction_hint, Permutation & res) const override; + void getPermutation(const ICollator & collator, bool reverse, size_t limit, int null_direction_hint, Permutation & res) const override; void adjustPermutationWithNullDirection(bool reverse, size_t limit, int null_direction_hint, Permutation & res) const; void reserve(size_t n) override; size_t byteSize() const override; diff --git a/dbms/src/Columns/ColumnString.cpp b/dbms/src/Columns/ColumnString.cpp index de20fb7169b..54d4238616f 100644 --- a/dbms/src/Columns/ColumnString.cpp +++ b/dbms/src/Columns/ColumnString.cpp @@ -17,6 +17,7 @@ #include #include #include +#include /// Used in the `reserve` method, when the number of rows is known, but sizes of elements are not. #define APPROX_STRING_SIZE 64 @@ -80,7 +81,7 @@ void ColumnString::insertRangeFrom(const IColumn & src, size_t start, size_t len if (length == 0) return; - const ColumnString & src_concrete = static_cast(src); + const auto & src_concrete = static_cast(src); if (start + length > src_concrete.offsets.size()) throw Exception("Parameter out of bound in IColumnString::insertRangeFrom method.", @@ -310,7 +311,7 @@ void ColumnString::getExtremes(Field & min, Field & max) const int ColumnString::compareAtWithCollationImpl(size_t n, size_t m, const IColumn & rhs_, const ICollator & collator) const { - const ColumnString & rhs = static_cast(rhs_); + const auto & rhs = static_cast(rhs_); return collator.compare( reinterpret_cast(&chars[offsetAt(n)]), @@ -374,7 +375,7 @@ void ColumnString::updateWeakHash32(WeakHash32 & hash, const TiDB::TiDBCollatorP auto s = offsets.size(); if (hash.getData().size() != s) - throw Exception("Size of WeakHash32 does not match size of column: column size is " + std::to_string(s) + ", hash size is " + std::to_string(hash.getData().size()), ErrorCodes::LOGICAL_ERROR); + throw Exception(fmt::format("Size of WeakHash32 does not match size of column: column size is {}, hash size is {}", s, hash.getData().size()), ErrorCodes::LOGICAL_ERROR); const UInt8 * pos = chars.data(); UInt32 * hash_data = hash.getData().data(); diff --git a/dbms/src/Columns/ColumnString.h b/dbms/src/Columns/ColumnString.h index 7c12ef4b719..48b02388a6c 100644 --- a/dbms/src/Columns/ColumnString.h +++ b/dbms/src/Columns/ColumnString.h @@ -315,7 +315,7 @@ class ColumnString final : public COWPtrHelper return size > rhs_size ? 1 : (size < rhs_size ? -1 : 0); } - int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, int, const ICollator & collator) const override + int compareAt(size_t n, size_t m, const IColumn & rhs_, int, const ICollator & collator) const override { return compareAtWithCollationImpl(n, m, rhs_, collator); } @@ -324,7 +324,7 @@ class ColumnString final : public COWPtrHelper void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override; - void getPermutationWithCollation(const ICollator & collator, bool reverse, size_t limit, int, Permutation & res) const override + void getPermutation(const ICollator & collator, bool reverse, size_t limit, int, Permutation & res) const override { getPermutationWithCollationImpl(collator, reverse, limit, res); } diff --git a/dbms/src/Columns/IColumn.h b/dbms/src/Columns/IColumn.h index 3c764b1a6f8..a5a25e6e28c 100644 --- a/dbms/src/Columns/IColumn.h +++ b/dbms/src/Columns/IColumn.h @@ -239,9 +239,9 @@ class IColumn : public COWPtr */ virtual int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const = 0; - virtual int compareAtWithCollation(size_t, size_t, const IColumn &, int, const ICollator &) const + virtual int compareAt(size_t, size_t, const IColumn &, int, const ICollator &) const { - throw Exception("Method compareAtWithCollation is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED); + throw Exception(fmt::format("Method compareAt with collation is not supported for {}" + getName()), ErrorCodes::NOT_IMPLEMENTED); } /** Returns a permutation that sorts elements of this column, @@ -252,9 +252,9 @@ class IColumn : public COWPtr */ virtual void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const = 0; - virtual void getPermutationWithCollation(const ICollator &, bool, size_t, int, Permutation &) const + virtual void getPermutation(const ICollator &, bool, size_t, int, Permutation &) const { - throw Exception("Method getPermutationWithCollation is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED); + throw Exception(fmt::format("Method getPermutation with collation is not supported for {}", getName()), ErrorCodes::NOT_IMPLEMENTED); } /** Copies each element according offsets parameter. diff --git a/dbms/src/Common/Decimal.h b/dbms/src/Common/Decimal.h index b465a27f938..4839e9be608 100644 --- a/dbms/src/Common/Decimal.h +++ b/dbms/src/Common/Decimal.h @@ -280,10 +280,6 @@ using Decimal64 = Decimal; using Decimal128 = Decimal; using Decimal256 = Decimal; -static constexpr PrecType minDecimalPrecision() -{ - return 1; -} template static constexpr PrecType maxDecimalPrecision() { @@ -310,6 +306,33 @@ constexpr PrecType maxDecimalPrecision() return 65; } +template +constexpr PrecType minDecimalPrecision() +{ + /// return a invalid value + return maxDecimalPrecision() + 1; +} +template <> +constexpr PrecType minDecimalPrecision() +{ + return 1; +} +template <> +constexpr PrecType minDecimalPrecision() +{ + return maxDecimalPrecision() + 1; +} +template <> +constexpr PrecType minDecimalPrecision() +{ + return maxDecimalPrecision() + 1; +} +template <> +constexpr PrecType minDecimalPrecision() +{ + return maxDecimalPrecision() + 1; +} + template struct PromoteType { diff --git a/dbms/src/Core/Field.h b/dbms/src/Core/Field.h index 3d2673a3412..038a9c3fe90 100644 --- a/dbms/src/Core/Field.h +++ b/dbms/src/Core/Field.h @@ -122,7 +122,20 @@ class DecimalField } if (cnt == 0) cnt = 1; - return cnt; + return std::max(cnt, scale); + } + + /// In TiFlash there are 4 subtype of decimal: + /// Decimal32, Decimal64, Decimal128 and Decimal256 + /// they are not compatible with each other. So a DecimalField + /// can not be inserted into a decimal column with DecimalType + /// getPrecWithCurrentDecimalType will return the prec that fit + /// current decimal type, that is to say, current DecimalField can be + /// inserted into a decimal column with type `Decimal(getPrecWithCurrentDecimalType, getScale)` + UInt32 getPrecWithCurrentDecimalType() const + { + auto raw_prec = getPrec(); + return std::max(raw_prec, minDecimalPrecision()); } template diff --git a/dbms/src/Core/SortCursor.h b/dbms/src/Core/SortCursor.h index b35b408de30..21c3d911958 100644 --- a/dbms/src/Core/SortCursor.h +++ b/dbms/src/Core/SortCursor.h @@ -219,7 +219,7 @@ struct SortCursorWithCollation int nulls_direction = impl->desc[i].nulls_direction; int res; if (impl->need_collation[i]) - res = impl->sort_columns[i]->compareAtWithCollation(lhs_pos, rhs_pos, *(rhs.impl->sort_columns[i]), nulls_direction, *impl->desc[i].collator); + res = impl->sort_columns[i]->compareAt(lhs_pos, rhs_pos, *(rhs.impl->sort_columns[i]), nulls_direction, *impl->desc[i].collator); else res = impl->sort_columns[i]->compareAt(lhs_pos, rhs_pos, *(rhs.impl->sort_columns[i]), nulls_direction); @@ -241,7 +241,7 @@ struct SortCursorWithCollation int res; if (impl->need_collation[i]) { - res = impl->sort_columns[i]->compareAtWithCollation(lhs_pos, rhs_pos, *(rhs.impl->sort_columns[i]), nulls_direction, *impl->desc[i].collator); + res = impl->sort_columns[i]->compareAt(lhs_pos, rhs_pos, *(rhs.impl->sort_columns[i]), nulls_direction, *impl->desc[i].collator); } else res = impl->sort_columns[i]->compareAt(lhs_pos, rhs_pos, *(rhs.impl->sort_columns[i]), nulls_direction); diff --git a/dbms/src/DataStreams/FilterBlockInputStream.cpp b/dbms/src/DataStreams/FilterBlockInputStream.cpp index 3739b57d82d..a7ecf12d8cb 100644 --- a/dbms/src/DataStreams/FilterBlockInputStream.cpp +++ b/dbms/src/DataStreams/FilterBlockInputStream.cpp @@ -60,7 +60,7 @@ FilterBlockInputStream::FilterBlockInputStream( Block FilterBlockInputStream::getTotals() { - if (IProfilingBlockInputStream * child = dynamic_cast(&*children.back())) + if (auto * child = dynamic_cast(&*children.back())) { totals = child->getTotals(); expression->executeOnTotals(totals); diff --git a/dbms/src/DataStreams/HashJoinBuildBlockInputStream.cpp b/dbms/src/DataStreams/HashJoinBuildBlockInputStream.cpp index 91fd34bfff4..61808b48c50 100644 --- a/dbms/src/DataStreams/HashJoinBuildBlockInputStream.cpp +++ b/dbms/src/DataStreams/HashJoinBuildBlockInputStream.cpp @@ -13,6 +13,7 @@ // limitations under the License. +#include #include namespace DB { @@ -25,4 +26,26 @@ Block HashJoinBuildBlockInputStream::readImpl() return block; } +void HashJoinBuildBlockInputStream::appendInfo(FmtBuffer & buffer) const +{ + static const std::unordered_map join_type_map{ + {ASTTableJoin::Kind::Inner, "Inner"}, + {ASTTableJoin::Kind::Left, "Left"}, + {ASTTableJoin::Kind::Right, "Right"}, + {ASTTableJoin::Kind::Full, "Full"}, + {ASTTableJoin::Kind::Cross, "Cross"}, + {ASTTableJoin::Kind::Comma, "Comma"}, + {ASTTableJoin::Kind::Anti, "Anti"}, + {ASTTableJoin::Kind::LeftSemi, "Left_Semi"}, + {ASTTableJoin::Kind::LeftAnti, "Left_Anti"}, + {ASTTableJoin::Kind::Cross_Left, "Cross_Left"}, + {ASTTableJoin::Kind::Cross_Right, "Cross_Right"}, + {ASTTableJoin::Kind::Cross_Anti, "Cross_Anti"}, + {ASTTableJoin::Kind::Cross_LeftSemi, "Cross_LeftSemi"}, + {ASTTableJoin::Kind::Cross_LeftAnti, "Cross_LeftAnti"}}; + auto join_type_it = join_type_map.find(join->getKind()); + if (join_type_it == join_type_map.end()) + throw TiFlashException("Unknown join type", Errors::Coprocessor::Internal); + buffer.fmtAppend(", join_kind = {}", join_type_it->second); +} } // namespace DB diff --git a/dbms/src/DataStreams/HashJoinBuildBlockInputStream.h b/dbms/src/DataStreams/HashJoinBuildBlockInputStream.h index 57b505f5237..dbfc7f30310 100644 --- a/dbms/src/DataStreams/HashJoinBuildBlockInputStream.h +++ b/dbms/src/DataStreams/HashJoinBuildBlockInputStream.h @@ -41,6 +41,7 @@ class HashJoinBuildBlockInputStream : public IProfilingBlockInputStream protected: Block readImpl() override; + void appendInfo(FmtBuffer & buffer) const override; private: JoinPtr join; diff --git a/dbms/src/DataStreams/IBlockInputStream.cpp b/dbms/src/DataStreams/IBlockInputStream.cpp index 57dbe0e6ad0..a05fbf83c96 100644 --- a/dbms/src/DataStreams/IBlockInputStream.cpp +++ b/dbms/src/DataStreams/IBlockInputStream.cpp @@ -77,15 +77,17 @@ size_t IBlockInputStream::checkDepthImpl(size_t max_depth, size_t level) const return res + 1; } - void IBlockInputStream::dumpTree(FmtBuffer & buffer, size_t indent, size_t multiplier) { - // todo append getHeader().dumpStructure() buffer.fmtAppend( - "{}{}{}\n", + "{}{}{}", String(indent, ' '), getName(), multiplier > 1 ? fmt::format(" x {}", multiplier) : ""); + if (!extra_info.empty()) + buffer.fmtAppend(": <{}>", extra_info); + appendInfo(buffer); + buffer.append("\n"); ++indent; /// If the subtree is repeated several times, then we output it once with the multiplier. diff --git a/dbms/src/DataStreams/IBlockInputStream.h b/dbms/src/DataStreams/IBlockInputStream.h index 75fdffb3d29..472eac282d4 100644 --- a/dbms/src/DataStreams/IBlockInputStream.h +++ b/dbms/src/DataStreams/IBlockInputStream.h @@ -135,6 +135,7 @@ class IBlockInputStream : private boost::noncopyable */ void addTableLock(const TableLockHolder & lock) { table_locks.push_back(lock); } + void setExtraInfo(String info) { extra_info = info; } template void forEachChild(F && f) @@ -176,6 +177,8 @@ class IBlockInputStream : private boost::noncopyable } } + virtual void appendInfo(FmtBuffer & /*buffer*/) const {}; + protected: BlockInputStreams children; mutable std::shared_mutex children_mutex; @@ -188,6 +191,9 @@ class IBlockInputStream : private boost::noncopyable mutable std::mutex tree_id_mutex; mutable String tree_id; + /// The info that hints why the inputStream is needed to run. + String extra_info; + /// Get text with names of this source and the entire subtree, this function should only be called after the /// InputStream tree is constructed. String getTreeID() const; diff --git a/dbms/src/DataStreams/LimitBlockInputStream.cpp b/dbms/src/DataStreams/LimitBlockInputStream.cpp index 81c31fc5d77..4ec6157257c 100644 --- a/dbms/src/DataStreams/LimitBlockInputStream.cpp +++ b/dbms/src/DataStreams/LimitBlockInputStream.cpp @@ -83,4 +83,8 @@ Block LimitBlockInputStream::readImpl() return res; } +void LimitBlockInputStream::appendInfo(FmtBuffer & buffer) const +{ + buffer.fmtAppend(", limit = {}", limit); +} } // namespace DB diff --git a/dbms/src/DataStreams/LimitBlockInputStream.h b/dbms/src/DataStreams/LimitBlockInputStream.h index 21978773daf..e6a7013210b 100644 --- a/dbms/src/DataStreams/LimitBlockInputStream.h +++ b/dbms/src/DataStreams/LimitBlockInputStream.h @@ -43,6 +43,7 @@ class LimitBlockInputStream : public IProfilingBlockInputStream protected: Block readImpl() override; + void appendInfo(FmtBuffer & buffer) const override; private: size_t limit; diff --git a/dbms/src/DataStreams/MergeSortingBlockInputStream.cpp b/dbms/src/DataStreams/MergeSortingBlockInputStream.cpp index 0975ace963a..e79426f686e 100644 --- a/dbms/src/DataStreams/MergeSortingBlockInputStream.cpp +++ b/dbms/src/DataStreams/MergeSortingBlockInputStream.cpp @@ -287,5 +287,9 @@ Block MergeSortingBlocksBlockInputStream::mergeImpl(std::priority_queue +#include +#include +#include #include #include #include @@ -275,4 +278,9 @@ void ParallelAggregatingBlockInputStream::execute() no_more_keys); } +void ParallelAggregatingBlockInputStream::appendInfo(FmtBuffer & buffer) const +{ + buffer.fmtAppend(", max_threads: {}, final: {}", max_threads, final ? "true" : "false"); +} + } // namespace DB diff --git a/dbms/src/DataStreams/ParallelAggregatingBlockInputStream.h b/dbms/src/DataStreams/ParallelAggregatingBlockInputStream.h index 398c3d35bbc..41e61786370 100644 --- a/dbms/src/DataStreams/ParallelAggregatingBlockInputStream.h +++ b/dbms/src/DataStreams/ParallelAggregatingBlockInputStream.h @@ -50,7 +50,7 @@ class ParallelAggregatingBlockInputStream : public IProfilingBlockInputStream Block getHeader() const override; - virtual void collectNewThreadCountOfThisLevel(int & cnt) override + void collectNewThreadCountOfThisLevel(int & cnt) override { cnt += processor.getMaxThreads(); } @@ -62,6 +62,8 @@ class ParallelAggregatingBlockInputStream : public IProfilingBlockInputStream } Block readImpl() override; + void appendInfo(FmtBuffer & buffer) const override; + private: const LoggerPtr log; diff --git a/dbms/src/DataStreams/PartialSortingBlockInputStream.cpp b/dbms/src/DataStreams/PartialSortingBlockInputStream.cpp index 30f520fdec3..4069f3818a8 100644 --- a/dbms/src/DataStreams/PartialSortingBlockInputStream.cpp +++ b/dbms/src/DataStreams/PartialSortingBlockInputStream.cpp @@ -12,15 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include - +#include #include +#include namespace DB { - - Block PartialSortingBlockInputStream::readImpl() { Block res = children.back()->read(); @@ -28,5 +26,8 @@ Block PartialSortingBlockInputStream::readImpl() return res; } - +void PartialSortingBlockInputStream::appendInfo(FmtBuffer & buffer) const +{ + buffer.fmtAppend(": limit = {}", limit); } +} // namespace DB diff --git a/dbms/src/DataStreams/PartialSortingBlockInputStream.h b/dbms/src/DataStreams/PartialSortingBlockInputStream.h index 4a7a62474df..1b2f554ef94 100644 --- a/dbms/src/DataStreams/PartialSortingBlockInputStream.h +++ b/dbms/src/DataStreams/PartialSortingBlockInputStream.h @@ -50,6 +50,7 @@ class PartialSortingBlockInputStream : public IProfilingBlockInputStream protected: Block readImpl() override; + void appendInfo(FmtBuffer & buffer) const override; private: SortDescription description; diff --git a/dbms/src/DataStreams/TiRemoteBlockInputStream.h b/dbms/src/DataStreams/TiRemoteBlockInputStream.h index ec442ba6a29..f249bf1a0dc 100644 --- a/dbms/src/DataStreams/TiRemoteBlockInputStream.h +++ b/dbms/src/DataStreams/TiRemoteBlockInputStream.h @@ -14,6 +14,7 @@ #pragma once +#include #include #include #include @@ -59,11 +60,11 @@ class TiRemoteBlockInputStream : public IProfilingBlockInputStream void initRemoteExecutionSummaries(tipb::SelectResponse & resp, size_t index) { - for (auto & execution_summary : resp.execution_summaries()) + for (const auto & execution_summary : resp.execution_summaries()) { if (execution_summary.has_executor_id()) { - auto & executor_id = execution_summary.executor_id(); + const auto & executor_id = execution_summary.executor_id(); execution_summaries[index][executor_id].time_processed_ns = execution_summary.time_processed_ns(); execution_summaries[index][executor_id].num_produced_rows = execution_summary.num_produced_rows(); execution_summaries[index][executor_id].num_iterations = execution_summary.num_iterations(); @@ -83,11 +84,11 @@ class TiRemoteBlockInputStream : public IProfilingBlockInputStream return; } auto & execution_summaries_map = execution_summaries[index]; - for (auto & execution_summary : resp.execution_summaries()) + for (const auto & execution_summary : resp.execution_summaries()) { if (execution_summary.has_executor_id()) { - auto & executor_id = execution_summary.executor_id(); + const auto & executor_id = execution_summary.executor_id(); if (unlikely(execution_summaries_map.find(executor_id) == execution_summaries_map.end())) { LOG_FMT_WARNING(log, "execution {} not found in execution_summaries, this should not happen", executor_id); @@ -223,12 +224,12 @@ class TiRemoteBlockInputStream : public IProfilingBlockInputStream bool isStreamingCall() const { return is_streaming_reader; } const std::vector & getConnectionProfileInfos() const { return connection_profile_infos; } - virtual void collectNewThreadCountOfThisLevel(int & cnt) override + void collectNewThreadCountOfThisLevel(int & cnt) override { remote_reader->collectNewThreadCount(cnt); } - virtual void resetNewThreadCountCompute() override + void resetNewThreadCountCompute() override { if (collected) { @@ -238,11 +239,24 @@ class TiRemoteBlockInputStream : public IProfilingBlockInputStream } protected: - virtual void readSuffixImpl() override + void readSuffixImpl() override { LOG_FMT_DEBUG(log, "finish read {} rows from remote", total_rows); remote_reader->close(); } + + void appendInfo(FmtBuffer & buffer) const override + { + buffer.append(": schema: {"); + buffer.joinStr( + sample_block.begin(), + sample_block.end(), + [](const auto & arg, FmtBuffer & fb) { + fb.fmtAppend("<{}, {}>", arg.name, arg.type->getName()); + }, + ", "); + buffer.append("}"); + } }; using ExchangeReceiverInputStream = TiRemoteBlockInputStream; diff --git a/dbms/src/DataStreams/WindowBlockInputStream.cpp b/dbms/src/DataStreams/WindowBlockInputStream.cpp index 8d9fb13cbc5..bc63db52873 100644 --- a/dbms/src/DataStreams/WindowBlockInputStream.cpp +++ b/dbms/src/DataStreams/WindowBlockInputStream.cpp @@ -109,11 +109,11 @@ bool WindowBlockInputStream::isDifferentFromPrevPartition(UInt64 current_partiti const auto * compared_column = compared_columns[partition_column_indices[i]].get(); if (window_description.partition_by[i].collator) { - if (compared_column->compareAtWithCollation(current_partition_row, - prev_frame_start.row, - *reference_column, - 1 /* nan_direction_hint */, - *window_description.partition_by[i].collator) + if (compared_column->compareAt(current_partition_row, + prev_frame_start.row, + *reference_column, + 1 /* nan_direction_hint */, + *window_description.partition_by[i].collator) != 0) { return true; @@ -278,7 +278,7 @@ bool WindowBlockInputStream::arePeers(const RowNumber & x, const RowNumber & y) const auto * column_y = inputAt(y)[order_column_indices[i]].get(); if (window_description.order_by[i].collator) { - if (column_x->compareAtWithCollation(x.row, y.row, *column_y, 1 /* nan_direction_hint */, *window_description.order_by[i].collator) != 0) + if (column_x->compareAt(x.row, y.row, *column_y, 1 /* nan_direction_hint */, *window_description.order_by[i].collator) != 0) { return false; } diff --git a/dbms/src/DataTypes/DataTypeDecimal.h b/dbms/src/DataTypes/DataTypeDecimal.h index c8f32c03117..47f6602c9a3 100644 --- a/dbms/src/DataTypes/DataTypeDecimal.h +++ b/dbms/src/DataTypes/DataTypeDecimal.h @@ -192,7 +192,7 @@ using DataTypeDecimal256 = DataTypeDecimal; inline DataTypePtr createDecimal(UInt64 prec, UInt64 scale) { - if (prec < minDecimalPrecision() || prec > maxDecimalPrecision()) + if (prec < minDecimalPrecision() || prec > maxDecimalPrecision()) throw Exception("Wrong precision:" + DB::toString(prec), ErrorCodes::ARGUMENT_OUT_OF_BOUND); if (static_cast(scale) > prec) diff --git a/dbms/src/DataTypes/FieldToDataType.h b/dbms/src/DataTypes/FieldToDataType.h index 9903172f860..9f4b80b7324 100644 --- a/dbms/src/DataTypes/FieldToDataType.h +++ b/dbms/src/DataTypes/FieldToDataType.h @@ -40,8 +40,7 @@ class FieldToDataType : public StaticVisitor template DataTypePtr operator()(const DecimalField & x) const { - PrecType prec = maxDecimalPrecision(); - return std::make_shared>(prec, x.getScale()); + return std::make_shared>(x.getPrecWithCurrentDecimalType(), x.getScale()); } }; diff --git a/dbms/src/DataTypes/tests/gtest_decimal_literal_datatype.cpp b/dbms/src/DataTypes/tests/gtest_decimal_literal_datatype.cpp new file mode 100644 index 00000000000..0ae32502679 --- /dev/null +++ b/dbms/src/DataTypes/tests/gtest_decimal_literal_datatype.cpp @@ -0,0 +1,100 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +namespace DB +{ +namespace tests +{ +TEST(DecimalLiteralDataTypeTest, getPrec) +try +{ + /// Decimal32 + ASSERT_TRUE(DecimalField(0, 0).getPrec() == 1); + ASSERT_TRUE(DecimalField(0, 1).getPrec() == 1); + ASSERT_TRUE(DecimalField(0, 2).getPrec() == 2); + ASSERT_TRUE(DecimalField(123, 0).getPrec() == 3); + ASSERT_TRUE(DecimalField(123, 2).getPrec() == 3); + ASSERT_TRUE(DecimalField(123, 4).getPrec() == 4); + + /// Decimal64 + ASSERT_TRUE(DecimalField(0, 0).getPrec() == 1); + ASSERT_TRUE(DecimalField(0, 1).getPrec() == 1); + ASSERT_TRUE(DecimalField(0, 2).getPrec() == 2); + ASSERT_TRUE(DecimalField(123, 0).getPrec() == 3); + ASSERT_TRUE(DecimalField(123, 2).getPrec() == 3); + ASSERT_TRUE(DecimalField(123, 4).getPrec() == 4); + ASSERT_TRUE(DecimalField(1234567891011ll, 4).getPrec() == 13); + + /// Decimal128 + ASSERT_TRUE(DecimalField(0, 0).getPrec() == 1); + ASSERT_TRUE(DecimalField(0, 1).getPrec() == 1); + ASSERT_TRUE(DecimalField(0, 2).getPrec() == 2); + ASSERT_TRUE(DecimalField(123, 0).getPrec() == 3); + ASSERT_TRUE(DecimalField(123, 2).getPrec() == 3); + ASSERT_TRUE(DecimalField(123, 4).getPrec() == 4); + ASSERT_TRUE(DecimalField(Int128(123123123123123ll) * 1000000, 4).getPrec() == 21); + + /// Decimal256 + ASSERT_TRUE(DecimalField(Int256(0), 0).getPrec() == 1); + ASSERT_TRUE(DecimalField(Int256(0), 1).getPrec() == 1); + ASSERT_TRUE(DecimalField(Int256(0), 2).getPrec() == 2); + ASSERT_TRUE(DecimalField(Int256(123), 0).getPrec() == 3); + ASSERT_TRUE(DecimalField(Int256(123), 2).getPrec() == 3); + ASSERT_TRUE(DecimalField(Int256(123), 4).getPrec() == 4); + ASSERT_TRUE(DecimalField(Int256(123123123123123123ll) * Int256(1000000000ll) * Int256(100000000000000ll), 4).getPrec() == 41); +} +CATCH + +TEST(DecimalLiteralDataTypeTest, fieldToDataType) +try +{ + /// Decimal32 + ASSERT_TRUE(DataTypeFactory::instance().get("Decimal(1,0)")->equals(*applyVisitor(FieldToDataType(), Field(DecimalField(0, 0))))); + ASSERT_TRUE(DataTypeFactory::instance().get("Decimal(1,1)")->equals(*applyVisitor(FieldToDataType(), Field(DecimalField(0, 1))))); + ASSERT_TRUE(DataTypeFactory::instance().get("Decimal(2,2)")->equals(*applyVisitor(FieldToDataType(), Field(DecimalField(0, 2))))); + ASSERT_TRUE(DataTypeFactory::instance().get("Decimal(3,0)")->equals(*applyVisitor(FieldToDataType(), Field(DecimalField(123, 0))))); + ASSERT_TRUE(DataTypeFactory::instance().get("Decimal(3,2)")->equals(*applyVisitor(FieldToDataType(), Field(DecimalField(123, 2))))); + ASSERT_TRUE(DataTypeFactory::instance().get("Decimal(4,4)")->equals(*applyVisitor(FieldToDataType(), Field(DecimalField(123, 4))))); + + /// Decimal64 + ASSERT_TRUE(DataTypeFactory::instance().get("Decimal(10,0)")->equals(*applyVisitor(FieldToDataType(), Field(DecimalField(0, 0))))); + ASSERT_TRUE(DataTypeFactory::instance().get("Decimal(10,1)")->equals(*applyVisitor(FieldToDataType(), Field(DecimalField(0, 1))))); + ASSERT_TRUE(DataTypeFactory::instance().get("Decimal(10,2)")->equals(*applyVisitor(FieldToDataType(), Field(DecimalField(0, 2))))); + ASSERT_TRUE(DataTypeFactory::instance().get("Decimal(10,0)")->equals(*applyVisitor(FieldToDataType(), Field(DecimalField(123, 0))))); + ASSERT_TRUE(DataTypeFactory::instance().get("Decimal(10,2)")->equals(*applyVisitor(FieldToDataType(), Field(DecimalField(123, 2))))); + ASSERT_TRUE(DataTypeFactory::instance().get("Decimal(13,4)")->equals(*applyVisitor(FieldToDataType(), Field(DecimalField(1234567891011ll, 4))))); + + /// Decimal128 + ASSERT_TRUE(DataTypeFactory::instance().get("Decimal(19,0)")->equals(*applyVisitor(FieldToDataType(), Field(DecimalField(0, 0))))); + ASSERT_TRUE(DataTypeFactory::instance().get("Decimal(19,1)")->equals(*applyVisitor(FieldToDataType(), Field(DecimalField(0, 1))))); + ASSERT_TRUE(DataTypeFactory::instance().get("Decimal(19,2)")->equals(*applyVisitor(FieldToDataType(), Field(DecimalField(0, 2))))); + ASSERT_TRUE(DataTypeFactory::instance().get("Decimal(19,0)")->equals(*applyVisitor(FieldToDataType(), Field(DecimalField(123, 0))))); + ASSERT_TRUE(DataTypeFactory::instance().get("Decimal(19,2)")->equals(*applyVisitor(FieldToDataType(), Field(DecimalField(123, 2))))); + ASSERT_TRUE(DataTypeFactory::instance().get("Decimal(21,4)")->equals(*applyVisitor(FieldToDataType(), Field(DecimalField(Int128(123123123123123ll) * 1000000, 4))))); + + /// Decimal256 + ASSERT_TRUE(DataTypeFactory::instance().get("Decimal(39,0)")->equals(*applyVisitor(FieldToDataType(), Field(DecimalField(Int256(0), 0))))); + ASSERT_TRUE(DataTypeFactory::instance().get("Decimal(39,1)")->equals(*applyVisitor(FieldToDataType(), Field(DecimalField(Int256(0), 1))))); + ASSERT_TRUE(DataTypeFactory::instance().get("Decimal(39,2)")->equals(*applyVisitor(FieldToDataType(), Field(DecimalField(Int256(0), 2))))); + ASSERT_TRUE(DataTypeFactory::instance().get("Decimal(39,0)")->equals(*applyVisitor(FieldToDataType(), Field(DecimalField(Int256(123), 0))))); + ASSERT_TRUE(DataTypeFactory::instance().get("Decimal(39,2)")->equals(*applyVisitor(FieldToDataType(), Field(DecimalField(Int256(123), 2))))); + ASSERT_TRUE(DataTypeFactory::instance().get("Decimal(41,4)")->equals(*applyVisitor(FieldToDataType(), Field(DecimalField(Int256(123123123123123123ll) * Int256(1000000000ll) * Int256(100000000000000ll), 4))))); +} +CATCH +} // namespace tests +} // namespace DB diff --git a/dbms/src/Databases/test/gtest_database.cpp b/dbms/src/Databases/test/gtest_database.cpp index 72915b8644f..6b8bbc17348 100644 --- a/dbms/src/Databases/test/gtest_database.cpp +++ b/dbms/src/Databases/test/gtest_database.cpp @@ -25,11 +25,11 @@ #include #include #include -#include #include #include #include #include +#include #include #include diff --git a/dbms/src/Debug/MockSchemaGetter.h b/dbms/src/Debug/MockSchemaGetter.h index cdbaed97223..f02699866ce 100644 --- a/dbms/src/Debug/MockSchemaGetter.h +++ b/dbms/src/Debug/MockSchemaGetter.h @@ -15,14 +15,13 @@ #pragma once #include -#include +#include namespace DB { struct MockSchemaGetter { - TiDB::DBInfoPtr getDatabase(DatabaseID db_id) { return MockTiDB::instance().getDBInfoByID(db_id); } Int64 getVersion() { return MockTiDB::instance().getVersion(); } diff --git a/dbms/src/Debug/MockSchemaNameMapper.h b/dbms/src/Debug/MockSchemaNameMapper.h index b3fabab198a..003525aad89 100644 --- a/dbms/src/Debug/MockSchemaNameMapper.h +++ b/dbms/src/Debug/MockSchemaNameMapper.h @@ -14,7 +14,7 @@ #pragma once -#include +#include namespace DB { diff --git a/dbms/src/Debug/MockTiDB.h b/dbms/src/Debug/MockTiDB.h index cb09f9e305a..36d2af90859 100644 --- a/dbms/src/Debug/MockTiDB.h +++ b/dbms/src/Debug/MockTiDB.h @@ -15,10 +15,10 @@ #pragma once #include -#include -#include #include #include +#include +#include #include diff --git a/dbms/src/Debug/astToExecutor.cpp b/dbms/src/Debug/astToExecutor.cpp index 999eb6d2e68..82f894905e6 100644 --- a/dbms/src/Debug/astToExecutor.cpp +++ b/dbms/src/Debug/astToExecutor.cpp @@ -31,6 +31,93 @@ namespace DB { +void literalFieldToTiPBExpr(const ColumnInfo & ci, const Field & val_field, tipb::Expr * expr, Int32 collator_id) +{ + *(expr->mutable_field_type()) = columnInfoToFieldType(ci); + expr->mutable_field_type()->set_collate(collator_id); + if (!val_field.isNull()) + { + WriteBufferFromOwnString ss; + switch (ci.tp) + { + case TiDB::TypeLongLong: + case TiDB::TypeLong: + case TiDB::TypeShort: + case TiDB::TypeTiny: + case TiDB::TypeInt24: + if (ci.hasUnsignedFlag()) + { + expr->set_tp(tipb::ExprType::Uint64); + UInt64 val = val_field.safeGet(); + encodeDAGUInt64(val, ss); + } + else + { + expr->set_tp(tipb::ExprType::Int64); + Int64 val = val_field.safeGet(); + encodeDAGInt64(val, ss); + } + break; + case TiDB::TypeFloat: + { + expr->set_tp(tipb::ExprType::Float32); + auto val = static_cast(val_field.safeGet()); + encodeDAGFloat32(val, ss); + break; + } + case TiDB::TypeDouble: + { + expr->set_tp(tipb::ExprType::Float64); + Float64 val = val_field.safeGet(); + encodeDAGFloat64(val, ss); + break; + } + case TiDB::TypeString: + { + expr->set_tp(tipb::ExprType::String); + const auto & val = val_field.safeGet(); + encodeDAGString(val, ss); + break; + } + case TiDB::TypeNewDecimal: + { + expr->set_tp(tipb::ExprType::MysqlDecimal); + encodeDAGDecimal(val_field, ss); + break; + } + case TiDB::TypeDate: + { + expr->set_tp(tipb::ExprType::MysqlTime); + UInt64 val = val_field.safeGet(); + encodeDAGUInt64(MyDate(val).toPackedUInt(), ss); + break; + } + case TiDB::TypeDatetime: + case TiDB::TypeTimestamp: + { + expr->set_tp(tipb::ExprType::MysqlTime); + UInt64 val = val_field.safeGet(); + encodeDAGUInt64(MyDateTime(val).toPackedUInt(), ss); + break; + } + case TiDB::TypeTime: + { + expr->set_tp(tipb::ExprType::MysqlDuration); + Int64 val = val_field.safeGet(); + encodeDAGInt64(val, ss); + break; + } + default: + throw Exception(fmt::format("Type {} does not support literal in function unit test", getDataTypeByColumnInfo(ci)->getName())); + } + expr->set_val(ss.releaseStr()); + } + else + { + expr->set_tp(tipb::ExprType::Null); + } +} + namespace { std::unordered_map func_name_to_sig({ @@ -112,76 +199,9 @@ DAGColumnInfo toNullableDAGColumnInfo(const DAGColumnInfo & input) void literalToPB(tipb::Expr * expr, const Field & value, uint32_t collator_id) { - WriteBufferFromOwnString ss; - switch (value.getType()) - { - case Field::Types::Which::Null: - { - expr->set_tp(tipb::Null); - auto * ft = expr->mutable_field_type(); - ft->set_tp(TiDB::TypeNull); - ft->set_collate(collator_id); - // Null literal expr doesn't need value. - break; - } - case Field::Types::Which::UInt64: - { - expr->set_tp(tipb::Uint64); - auto * ft = expr->mutable_field_type(); - ft->set_tp(TiDB::TypeLongLong); - ft->set_flag(TiDB::ColumnFlagUnsigned | TiDB::ColumnFlagNotNull); - ft->set_collate(collator_id); - encodeDAGUInt64(value.get(), ss); - break; - } - case Field::Types::Which::Int64: - { - expr->set_tp(tipb::Int64); - auto * ft = expr->mutable_field_type(); - ft->set_tp(TiDB::TypeLongLong); - ft->set_flag(TiDB::ColumnFlagNotNull); - ft->set_collate(collator_id); - encodeDAGInt64(value.get(), ss); - break; - } - case Field::Types::Which::Float64: - { - expr->set_tp(tipb::Float64); - auto * ft = expr->mutable_field_type(); - ft->set_tp(TiDB::TypeFloat); - ft->set_flag(TiDB::ColumnFlagNotNull); - ft->set_collate(collator_id); - encodeDAGFloat64(value.get(), ss); - break; - } - case Field::Types::Which::Decimal32: - case Field::Types::Which::Decimal64: - case Field::Types::Which::Decimal128: - case Field::Types::Which::Decimal256: - { - expr->set_tp(tipb::MysqlDecimal); - auto * ft = expr->mutable_field_type(); - ft->set_tp(TiDB::TypeNewDecimal); - ft->set_flag(TiDB::ColumnFlagNotNull); - ft->set_collate(collator_id); - encodeDAGDecimal(value, ss); - break; - } - case Field::Types::Which::String: - { - expr->set_tp(tipb::String); - auto * ft = expr->mutable_field_type(); - ft->set_tp(TiDB::TypeString); - ft->set_flag(TiDB::ColumnFlagNotNull); - ft->set_collate(collator_id); - // TODO: Align with TiDB. - encodeDAGBytes(value.get(), ss); - break; - } - default: - throw Exception(String("Unsupported literal type: ") + value.getTypeName(), ErrorCodes::LOGICAL_ERROR); - } - expr->set_val(ss.releaseStr()); + DataTypePtr type = applyVisitor(FieldToDataType(), value); + ColumnInfo ci = reverseGetColumnInfo({"", type}, 0, Field(), true); + literalFieldToTiPBExpr(ci, value, expr, collator_id); } String getFunctionNameForConstantFolding(tipb::Expr * expr) @@ -262,15 +282,15 @@ void identifierToPB(const DAGSchema & input, ASTIdentifier * id, tipb::Expr * ex void astToPB(const DAGSchema & input, ASTPtr ast, tipb::Expr * expr, uint32_t collator_id, const Context & context) { - if (ASTIdentifier * id = typeid_cast(ast.get())) + if (auto * id = typeid_cast(ast.get())) { identifierToPB(input, id, expr, collator_id); } - else if (ASTFunction * func = typeid_cast(ast.get())) + else if (auto * func = typeid_cast(ast.get())) { functionToPB(input, func, expr, collator_id, context); } - else if (ASTLiteral * lit = typeid_cast(ast.get())) + else if (auto * lit = typeid_cast(ast.get())) { literalToPB(expr, lit->value, collator_id); } @@ -505,7 +525,7 @@ void identifierToPB(const DAGSchema & input, ASTIdentifier * id, tipb::Expr * ex void collectUsedColumnsFromExpr(const DAGSchema & input, ASTPtr ast, std::unordered_set & used_columns) { - if (ASTIdentifier * id = typeid_cast(ast.get())) + if (auto * id = typeid_cast(ast.get())) { auto column_name = splitQualifiedName(id->getColumnName()); if (!column_name.first.empty()) @@ -526,7 +546,7 @@ void collectUsedColumnsFromExpr(const DAGSchema & input, ASTPtr ast, std::unorde } } } - else if (ASTFunction * func = typeid_cast(ast.get())) + else if (auto * func = typeid_cast(ast.get())) { if (AggregateFunctionFactory::instance().isAggregateFunctionName(func->name)) { @@ -559,7 +579,7 @@ void collectUsedColumnsFromExpr(const DAGSchema & input, ASTPtr ast, std::unorde TiDB::ColumnInfo compileExpr(const DAGSchema & input, ASTPtr ast) { TiDB::ColumnInfo ci; - if (ASTIdentifier * id = typeid_cast(ast.get())) + if (auto * id = typeid_cast(ast.get())) { /// check column auto ft = std::find_if(input.begin(), input.end(), [&](const auto & field) { @@ -574,7 +594,7 @@ TiDB::ColumnInfo compileExpr(const DAGSchema & input, ASTPtr ast) throw Exception("No such column " + id->getColumnName(), ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); ci = ft->second; } - else if (ASTFunction * func = typeid_cast(ast.get())) + else if (auto * func = typeid_cast(ast.get())) { /// check function String func_name_lowercase = Poco::toLower(func->name); @@ -692,7 +712,7 @@ TiDB::ColumnInfo compileExpr(const DAGSchema & input, ASTPtr ast) compileExpr(input, child_ast); } } - else if (ASTLiteral * lit = typeid_cast(ast.get())) + else if (auto * lit = typeid_cast(ast.get())) { switch (lit->value.getType()) { @@ -909,7 +929,7 @@ bool TopN::toTiPBExecutor(tipb::Executor * tipb_executor, uint32_t collator_id, tipb::TopN * topn = tipb_executor->mutable_topn(); for (const auto & child : order_columns) { - ASTOrderByElement * elem = typeid_cast(child.get()); + auto * elem = typeid_cast(child.get()); if (!elem) throw Exception("Invalid order by element", ErrorCodes::LOGICAL_ERROR); tipb::ByItem * by = topn->add_order_by(); @@ -954,7 +974,7 @@ bool Aggregation::toTiPBExecutor(tipb::Executor * tipb_executor, uint32_t collat auto & input_schema = children[0]->output_schema; for (const auto & expr : agg_exprs) { - const ASTFunction * func = typeid_cast(expr.get()); + const auto * func = typeid_cast(expr.get()); if (!func || !AggregateFunctionFactory::instance().isAggregateFunctionName(func->name)) throw Exception("Only agg function is allowed in select for a query with aggregation", ErrorCodes::LOGICAL_ERROR); @@ -1024,7 +1044,7 @@ void Aggregation::columnPrune(std::unordered_set & used_columns) { if (used_columns.find(func->getColumnName()) != used_columns.end()) { - const ASTFunction * agg_func = typeid_cast(func.get()); + const auto * agg_func = typeid_cast(func.get()); if (agg_func != nullptr) { /// agg_func should not be nullptr, just double check @@ -1075,7 +1095,7 @@ void Aggregation::toMPPSubPlan(size_t & executor_index, const DAGProperties & pr /// re-construct agg_exprs and gby_exprs in final_agg for (size_t i = 0; i < partial_agg->agg_exprs.size(); i++) { - const ASTFunction * agg_func = typeid_cast(partial_agg->agg_exprs[i].get()); + const auto * agg_func = typeid_cast(partial_agg->agg_exprs[i].get()); ASTPtr update_agg_expr = agg_func->clone(); auto * update_agg_func = typeid_cast(update_agg_expr.get()); if (agg_func->name == "count") @@ -1368,7 +1388,7 @@ ExecutorPtr compileTopN(ExecutorPtr input, size_t & executor_index, ASTPtr order std::vector order_columns; for (const auto & child : order_exprs->children) { - ASTOrderByElement * elem = typeid_cast(child.get()); + auto * elem = typeid_cast(child.get()); if (!elem) throw Exception("Invalid order by element", ErrorCodes::LOGICAL_ERROR); order_columns.push_back(child); @@ -1399,7 +1419,7 @@ ExecutorPtr compileAggregation(ExecutorPtr input, size_t & executor_index, ASTPt { for (const auto & expr : agg_funcs->children) { - const ASTFunction * func = typeid_cast(expr.get()); + const auto * func = typeid_cast(expr.get()); if (!func || !AggregateFunctionFactory::instance().isAggregateFunctionName(func->name)) { need_append_project = true; @@ -1490,7 +1510,7 @@ ExecutorPtr compileProject(ExecutorPtr input, size_t & executor_index, ASTPtr se output_schema.emplace_back(ft->first, ft->second); continue; } - const ASTFunction * func = typeid_cast(expr.get()); + const auto * func = typeid_cast(expr.get()); if (func && AggregateFunctionFactory::instance().isAggregateFunctionName(func->name)) { throw Exception("No such agg " + func->getColumnName(), ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); diff --git a/dbms/src/Debug/astToExecutor.h b/dbms/src/Debug/astToExecutor.h index 54839e60dc6..37d3f22b6e1 100644 --- a/dbms/src/Debug/astToExecutor.h +++ b/dbms/src/Debug/astToExecutor.h @@ -294,6 +294,8 @@ ExecutorPtr compileExchangeSender(ExecutorPtr input, size_t & executor_index, ti ExecutorPtr compileExchangeReceiver(size_t & executor_index, DAGSchema schema); +void literalFieldToTiPBExpr(const ColumnInfo & ci, const Field & field, tipb::Expr * expr, Int32 collator_id); + //TODO: add compileWindow } // namespace DB \ No newline at end of file diff --git a/dbms/src/Debug/dbgFuncCoprocessor.cpp b/dbms/src/Debug/dbgFuncCoprocessor.cpp index a4a1f6730c9..e9335d1e2bd 100644 --- a/dbms/src/Debug/dbgFuncCoprocessor.cpp +++ b/dbms/src/Debug/dbgFuncCoprocessor.cpp @@ -336,7 +336,7 @@ void dbgFuncTiDBQueryFromNaturalDag(Context & context, const ASTs & args, DBGInv if (args.size() != 1) throw Exception("Args not matched, should be: json_dag_path", ErrorCodes::BAD_ARGUMENTS); - String json_dag_path = safeGet(typeid_cast(*args[0]).value); + auto json_dag_path = safeGet(typeid_cast(*args[0]).value); auto dag = NaturalDag(json_dag_path, &Poco::Logger::get("MockDAG")); dag.init(); dag.build(context); @@ -431,7 +431,7 @@ BlockInputStreamPtr dbgFuncTiDBQuery(Context & context, const ASTs & args) if (args.empty() || args.size() > 3) throw Exception("Args not matched, should be: query[, region-id, dag_prop_string]", ErrorCodes::BAD_ARGUMENTS); - String query = safeGet(typeid_cast(*args[0]).value); + auto query = safeGet(typeid_cast(*args[0]).value); RegionID region_id = InvalidRegionID; if (args.size() >= 2) region_id = safeGet(typeid_cast(*args[1]).value); @@ -464,8 +464,8 @@ BlockInputStreamPtr dbgFuncMockTiDBQuery(Context & context, const ASTs & args) if (args.size() < 2 || args.size() > 4) throw Exception("Args not matched, should be: query, region-id[, start-ts, dag_prop_string]", ErrorCodes::BAD_ARGUMENTS); - String query = safeGet(typeid_cast(*args[0]).value); - RegionID region_id = safeGet(typeid_cast(*args[1]).value); + auto query = safeGet(typeid_cast(*args[0]).value); + auto region_id = safeGet(typeid_cast(*args[1]).value); Timestamp start_ts = DEFAULT_MAX_READ_TSO; if (args.size() >= 3) start_ts = safeGet(typeid_cast(*args[2]).value); @@ -671,14 +671,14 @@ const ASTTablesInSelectQueryElement * getJoin(ASTSelectQuery & ast_query) if (!ast_query.tables) return nullptr; - const ASTTablesInSelectQuery & tables_in_select_query = static_cast(*ast_query.tables); + const auto & tables_in_select_query = static_cast(*ast_query.tables); if (tables_in_select_query.children.empty()) return nullptr; const ASTTablesInSelectQueryElement * joined_table = nullptr; for (const auto & child : tables_in_select_query.children) { - const ASTTablesInSelectQueryElement & tables_element = static_cast(*child); + const auto & tables_element = static_cast(*child); if (tables_element.table_join) { if (!joined_table) @@ -737,7 +737,7 @@ std::pair compileQueryBlock( bool append_pk_column = false; for (const auto & expr : ast_query.select_expression_list->children) { - if (ASTIdentifier * identifier = typeid_cast(expr.get())) + if (auto * identifier = typeid_cast(expr.get())) { if (identifier->getColumnName() == MutableSupport::tidb_pk_column_name) { @@ -756,7 +756,7 @@ std::pair compileQueryBlock( String right_table_alias; { String database_name, table_name; - const ASTTableExpression & table_to_join = static_cast(*joined_table->table_expression); + const auto & table_to_join = static_cast(*joined_table->table_expression); if (table_to_join.database_and_table_name) { auto identifier = static_cast(*table_to_join.database_and_table_name); @@ -788,7 +788,7 @@ std::pair compileQueryBlock( bool right_append_pk_column = false; for (const auto & expr : ast_query.select_expression_list->children) { - if (ASTIdentifier * identifier = typeid_cast(expr.get())) + if (auto * identifier = typeid_cast(expr.get())) { auto names = splitQualifiedName(identifier->getColumnName()); if (names.second == MutableSupport::tidb_pk_column_name) @@ -831,7 +831,7 @@ std::pair compileQueryBlock( bool has_agg_func = false; for (const auto & child : ast_query.select_expression_list->children) { - const ASTFunction * func = typeid_cast(child.get()); + const auto * func = typeid_cast(child.get()); if (func && AggregateFunctionFactory::instance().isAggregateFunctionName(func->name)) { has_agg_func = true; diff --git a/dbms/src/Debug/dbgFuncMockTiDBTable.cpp b/dbms/src/Debug/dbgFuncMockTiDBTable.cpp index 5b5cc004b58..65d0b2eadaa 100644 --- a/dbms/src/Debug/dbgFuncMockTiDBTable.cpp +++ b/dbms/src/Debug/dbgFuncMockTiDBTable.cpp @@ -24,8 +24,8 @@ #include #include #include -#include #include +#include #include namespace DB diff --git a/dbms/src/Debug/dbgFuncRegion.cpp b/dbms/src/Debug/dbgFuncRegion.cpp index 7924c086508..b2024eac1d8 100644 --- a/dbms/src/Debug/dbgFuncRegion.cpp +++ b/dbms/src/Debug/dbgFuncRegion.cpp @@ -40,7 +40,7 @@ extern const int UNKNOWN_TABLE; // put_region(region_id, start, end, database_name, table_name[, partition-name]) void dbgFuncPutRegion(Context & context, const ASTs & args, DBGInvoker::Printer output) { - RegionID region_id = static_cast(safeGet(typeid_cast(*args[0]).value)); + auto region_id = static_cast(safeGet(typeid_cast(*args[0]).value)); bool has_partition_id = false; size_t args_size = args.size(); if (dynamic_cast(args[args_size - 1].get()) != nullptr) @@ -81,8 +81,8 @@ void dbgFuncPutRegion(Context & context, const ASTs & args, DBGInvoker::Printer } else { - HandleID start = static_cast(safeGet(typeid_cast(*args[1]).value)); - HandleID end = static_cast(safeGet(typeid_cast(*args[2]).value)); + auto start = static_cast(safeGet(typeid_cast(*args[1]).value)); + auto end = static_cast(safeGet(typeid_cast(*args[2]).value)); TMTContext & tmt = context.getTMTContext(); RegionPtr region = RegionBench::createRegion(table_id, region_id, start, end); @@ -107,7 +107,7 @@ void dbgFuncTryFlushRegion(Context & context, const ASTs & args, DBGInvoker::Pri throw Exception("Args not matched, should be: region-id", ErrorCodes::BAD_ARGUMENTS); } - RegionID region_id = static_cast(safeGet(typeid_cast(*args[0]).value)); + auto region_id = static_cast(safeGet(typeid_cast(*args[0]).value)); TMTContext & tmt = context.getTMTContext(); tmt.getRegionTable().tryFlushRegion(region_id); @@ -160,7 +160,7 @@ void dbgFuncDumpAllRegion(Context & context, const ASTs & args, DBGInvoker::Prin if (args.empty()) throw Exception("Args not matched, should be: table_id", ErrorCodes::BAD_ARGUMENTS); - TableID table_id = static_cast(safeGet(typeid_cast(*args[0]).value)); + auto table_id = static_cast(safeGet(typeid_cast(*args[0]).value)); bool ignore_none = false; if (args.size() > 1) @@ -190,7 +190,7 @@ void dbgFuncRemoveRegion(Context & context, const ASTs & args, DBGInvoker::Print if (args.empty()) throw Exception("Args not matched, should be: region_id", ErrorCodes::BAD_ARGUMENTS); - RegionID region_id = static_cast(safeGet(typeid_cast(*args[0]).value)); + auto region_id = static_cast(safeGet(typeid_cast(*args[0]).value)); TMTContext & tmt = context.getTMTContext(); KVStorePtr & kvstore = tmt.getKVStore(); diff --git a/dbms/src/Debug/dbgFuncSchema.cpp b/dbms/src/Debug/dbgFuncSchema.cpp index 00ba5ab7335..8b73ddc23a3 100644 --- a/dbms/src/Debug/dbgFuncSchema.cpp +++ b/dbms/src/Debug/dbgFuncSchema.cpp @@ -22,10 +22,10 @@ #include #include #include -#include -#include #include #include +#include +#include #include #include diff --git a/dbms/src/Debug/dbgFuncSchemaName.cpp b/dbms/src/Debug/dbgFuncSchemaName.cpp index a4dac1ae050..4c2ad86bd62 100644 --- a/dbms/src/Debug/dbgFuncSchemaName.cpp +++ b/dbms/src/Debug/dbgFuncSchemaName.cpp @@ -20,9 +20,9 @@ #include #include #include -#include -#include #include +#include +#include #include #include @@ -97,7 +97,7 @@ BlockInputStreamPtr dbgFuncQueryMapped(Context & context, const ASTs & args) if (args.size() < 2 || args.size() > 3) throw Exception("Args not matched, should be: query, database-name[, table-name]", ErrorCodes::BAD_ARGUMENTS); - String query = safeGet(typeid_cast(*args[0]).value); + auto query = safeGet(typeid_cast(*args[0]).value); const String & database_name = typeid_cast(*args[1]).name; if (args.size() == 3) diff --git a/dbms/src/Encryption/FileProvider.cpp b/dbms/src/Encryption/FileProvider.cpp index b4666cf6a71..f2f96fa8568 100644 --- a/dbms/src/Encryption/FileProvider.cpp +++ b/dbms/src/Encryption/FileProvider.cpp @@ -142,8 +142,12 @@ void FileProvider::deleteRegularFile(const String & file_path_, const Encryption { throw DB::TiFlashException("File: " + data_file.path() + " is not a regular file", Errors::Encryption::Internal); } - key_manager->deleteFile(encryption_path_.full_path, true); + // Remove the file on disk before removing the encryption key. Or we may leave an encrypted file without the encryption key + // and the encrypted file can not be read. + // In the worst case that TiFlash crash between removing the file on disk and removing the encryption key, we may leave + // the encryption key not deleted. However, this is a rare case and won't cause serious problem. data_file.remove(false); + key_manager->deleteFile(encryption_path_.full_path, true); } } diff --git a/dbms/src/Encryption/FileProvider.h b/dbms/src/Encryption/FileProvider.h index 79eec8f632b..1d23af7a2db 100644 --- a/dbms/src/Encryption/FileProvider.h +++ b/dbms/src/Encryption/FileProvider.h @@ -67,6 +67,7 @@ class FileProvider // If dir_path_as_encryption_path is true, use dir_path_ as EncryptionPath // If false, use every file's path inside dir_path_ as EncryptionPath + // Note this method is not atomic, and after calling it, the files in dir_path_ cannot be read again. void deleteDirectory( const String & dir_path_, bool dir_path_as_encryption_path = false, diff --git a/dbms/src/Encryption/MockKeyManager.cpp b/dbms/src/Encryption/MockKeyManager.cpp index d125961fd06..bbaeb37848a 100644 --- a/dbms/src/Encryption/MockKeyManager.cpp +++ b/dbms/src/Encryption/MockKeyManager.cpp @@ -13,8 +13,10 @@ // limitations under the License. #include +#include #include #include +#include #include #include @@ -40,12 +42,14 @@ MockKeyManager::MockKeyManager(EncryptionMethod method_, const String & key_, co , key{key_} , iv{iv} , encryption_enabled{encryption_enabled_} + , logger(DB::Logger::get("MockKeyManager")) {} FileEncryptionInfo MockKeyManager::newFile(const String & fname) { if (encryption_enabled) { + LOG_FMT_TRACE(logger, "Create mock encryption [file={}]", fname); files.emplace_back(fname); } return getFile(fname); @@ -64,6 +68,7 @@ void MockKeyManager::deleteFile(const String & fname, bool throw_on_error) { if (*iter == fname) { + LOG_FMT_TRACE(logger, "Delete mock encryption [file={}]", fname); files.erase(iter); break; } @@ -80,6 +85,7 @@ void MockKeyManager::linkFile(const String & src_fname, const String & dst_fname { throw DB::Exception(fmt::format("Can't find file which name is {}", src_fname), DB::ErrorCodes::LOGICAL_ERROR); } + LOG_FMT_TRACE(logger, "Link mock encryption file [src_file={}] [dst_file={}]", src_fname, dst_fname); files.emplace_back(dst_fname); } } diff --git a/dbms/src/Encryption/MockKeyManager.h b/dbms/src/Encryption/MockKeyManager.h index 914e6ab1fe4..268bb00d129 100644 --- a/dbms/src/Encryption/MockKeyManager.h +++ b/dbms/src/Encryption/MockKeyManager.h @@ -20,12 +20,15 @@ namespace DB { -class MockKeyManager : public KeyManager +class Logger; +using LoggerPtr = std::shared_ptr; + +class MockKeyManager final : public KeyManager { public: - ~MockKeyManager() = default; + ~MockKeyManager() override = default; - MockKeyManager(bool encryption_enabled_ = true); + explicit MockKeyManager(bool encryption_enabled_ = true); MockKeyManager(EncryptionMethod method_, const String & key_, const String & iv, bool encryption_enabled_ = true); @@ -50,5 +53,7 @@ class MockKeyManager : public KeyManager String key; String iv; bool encryption_enabled; + + LoggerPtr logger; }; } // namespace DB diff --git a/dbms/src/Flash/BatchCoprocessorHandler.cpp b/dbms/src/Flash/BatchCoprocessorHandler.cpp index 273ceec8f08..0fd41832711 100644 --- a/dbms/src/Flash/BatchCoprocessorHandler.cpp +++ b/dbms/src/Flash/BatchCoprocessorHandler.cpp @@ -18,8 +18,8 @@ #include #include #include -#include #include +#include #include diff --git a/dbms/src/Flash/Coprocessor/DAGContext.h b/dbms/src/Flash/Coprocessor/DAGContext.h index d031ff103ff..e3e5efdcbc6 100644 --- a/dbms/src/Flash/Coprocessor/DAGContext.h +++ b/dbms/src/Flash/Coprocessor/DAGContext.h @@ -174,7 +174,7 @@ class DAGContext explicit DAGContext(const tipb::DAGRequest & dag_request_, String log_identifier, size_t concurrency) : dag_request(&dag_request_) , initialize_concurrency(concurrency) - , is_mpp_task(false) + , is_mpp_task(true) , is_root_mpp_task(false) , tunnel_set(nullptr) , log(Logger::get(log_identifier)) diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp index cffae76cb81..aa269469cdb 100644 --- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp +++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.cpp @@ -1279,15 +1279,7 @@ String DAGExpressionAnalyzer::getActions(const tipb::Expr & expr, const Expressi } else if (isScalarFunctionExpr(expr)) { - const String & func_name = getFunctionName(expr); - if (DAGExpressionAnalyzerHelper::function_builder_map.count(func_name) != 0) - { - ret = DAGExpressionAnalyzerHelper::function_builder_map[func_name](this, expr, actions); - } - else - { - ret = buildFunction(expr, actions); - } + ret = DAGExpressionAnalyzerHelper::buildFunction(this, expr, actions); } else { @@ -1341,18 +1333,4 @@ String DAGExpressionAnalyzer::buildTupleFunctionForGroupConcat( return applyFunction(func_name, argument_names, actions, nullptr); } -String DAGExpressionAnalyzer::buildFunction( - const tipb::Expr & expr, - const ExpressionActionsPtr & actions) -{ - const String & func_name = getFunctionName(expr); - Names argument_names; - for (const auto & child : expr.children()) - { - String name = getActions(child, actions); - argument_names.push_back(name); - } - return applyFunction(func_name, argument_names, actions, getCollatorFromExpr(expr)); -} - } // namespace DB diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h index f565e7a6348..3b7112af02d 100644 --- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h +++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzer.h @@ -275,10 +275,6 @@ class DAGExpressionAnalyzer : private boost::noncopyable const ExpressionActionsPtr & actions, const String & column_name); - String buildFunction( - const tipb::Expr & expr, - const ExpressionActionsPtr & actions); - String buildFilterColumn( const ExpressionActionsPtr & actions, const std::vector & conditions); diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.cpp b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.cpp index cabd88e0ba7..ee529680d28 100644 --- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.cpp +++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.cpp @@ -248,7 +248,7 @@ String DAGExpressionAnalyzerHelper::buildCastFunctionInternal( return result_name; FunctionBuilderPtr function_builder = FunctionFactory::instance().get(tidb_cast_name, analyzer->getContext()); - FunctionBuilderTiDBCast * function_builder_tidb_cast = dynamic_cast(function_builder.get()); + auto * function_builder_tidb_cast = dynamic_cast(function_builder.get()); function_builder_tidb_cast->setInUnion(in_union); function_builder_tidb_cast->setTiDBFieldType(field_type); @@ -401,6 +401,37 @@ String DAGExpressionAnalyzerHelper::buildRegexpFunction( return analyzer->applyFunction(func_name, argument_names, actions, collator); } +String DAGExpressionAnalyzerHelper::buildDefaultFunction( + DAGExpressionAnalyzer * analyzer, + const tipb::Expr & expr, + const ExpressionActionsPtr & actions) +{ + const String & func_name = getFunctionName(expr); + Names argument_names; + for (const auto & child : expr.children()) + { + String name = analyzer->getActions(child, actions); + argument_names.push_back(name); + } + return analyzer->applyFunction(func_name, argument_names, actions, getCollatorFromExpr(expr)); +} + +String DAGExpressionAnalyzerHelper::buildFunction( + DAGExpressionAnalyzer * analyzer, + const tipb::Expr & expr, + const ExpressionActionsPtr & actions) +{ + const String & func_name = getFunctionName(expr); + if (function_builder_map.count(func_name) != 0) + { + return function_builder_map[func_name](analyzer, expr, actions); + } + else + { + return buildDefaultFunction(analyzer, expr, actions); + } +} + DAGExpressionAnalyzerHelper::FunctionBuilderMap DAGExpressionAnalyzerHelper::function_builder_map( {{"in", DAGExpressionAnalyzerHelper::buildInFunction}, {"notIn", DAGExpressionAnalyzerHelper::buildInFunction}, diff --git a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.h b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.h index c8f2b658388..fcafcc57819 100644 --- a/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.h +++ b/dbms/src/Flash/Coprocessor/DAGExpressionAnalyzerHelper.h @@ -25,6 +25,10 @@ class DAGExpressionAnalyzer; class DAGExpressionAnalyzerHelper { public: + static String buildFunction( + DAGExpressionAnalyzer * analyzer, + const tipb::Expr & expr, + const ExpressionActionsPtr & actions); static String buildInFunction( DAGExpressionAnalyzer * analyzer, const tipb::Expr & expr, @@ -83,6 +87,11 @@ class DAGExpressionAnalyzerHelper const tipb::Expr & expr, const ExpressionActionsPtr & actions); + static String buildDefaultFunction( + DAGExpressionAnalyzer * analyzer, + const tipb::Expr & expr, + const ExpressionActionsPtr & actions); + using FunctionBuilder = std::function; using FunctionBuilderMap = std::unordered_map; diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp index 8d91b8b23e9..5fac49faaed 100644 --- a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp +++ b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.cpp @@ -33,9 +33,7 @@ #include #include #include -#include #include -#include #include #include #include @@ -43,6 +41,7 @@ #include #include #include +#include #include #include #include @@ -50,10 +49,6 @@ #include #include #include -#include -#include -#include - namespace DB { @@ -91,7 +86,7 @@ struct AnalysisResult Names aggregation_keys; TiDB::TiDBCollators aggregation_collators; AggregateDescriptions aggregate_descriptions; - bool is_final_agg; + bool is_final_agg = false; }; AnalysisResult analyzeExpressions( @@ -184,329 +179,130 @@ void DAGQueryBlockInterpreter::handleTableScan(const TiDBTableScan & table_scan, analyzer = std::move(storage_interpreter.analyzer); } -void DAGQueryBlockInterpreter::prepareJoin( - const google::protobuf::RepeatedPtrField & keys, - const DataTypes & key_types, - DAGPipeline & pipeline, - Names & key_names, - bool left, - bool is_right_out_join, - const google::protobuf::RepeatedPtrField & filters, - String & filter_column_name) -{ - NamesAndTypes source_columns; - for (auto const & p : pipeline.firstStream()->getHeader().getNamesAndTypesList()) - source_columns.emplace_back(p.name, p.type); - DAGExpressionAnalyzer dag_analyzer(std::move(source_columns), context); - ExpressionActionsChain chain; - if (dag_analyzer.appendJoinKeyAndJoinFilters(chain, keys, key_types, key_names, left, is_right_out_join, filters, filter_column_name)) - { - pipeline.transform([&](auto & stream) { stream = std::make_shared(stream, chain.getLastActions(), log->identifier()); }); - } -} - -ExpressionActionsPtr DAGQueryBlockInterpreter::genJoinOtherConditionAction( - const tipb::Join & join, - NamesAndTypes & source_columns, - String & filter_column_for_other_condition, - String & filter_column_for_other_eq_condition) -{ - if (join.other_conditions_size() == 0 && join.other_eq_conditions_from_in_size() == 0) - return nullptr; - DAGExpressionAnalyzer dag_analyzer(source_columns, context); - ExpressionActionsChain chain; - std::vector condition_vector; - if (join.other_conditions_size() > 0) - { - for (const auto & c : join.other_conditions()) - { - condition_vector.push_back(&c); - } - filter_column_for_other_condition = dag_analyzer.appendWhere(chain, condition_vector); - } - if (join.other_eq_conditions_from_in_size() > 0) - { - condition_vector.clear(); - for (const auto & c : join.other_eq_conditions_from_in()) - { - condition_vector.push_back(&c); - } - filter_column_for_other_eq_condition = dag_analyzer.appendWhere(chain, condition_vector); - } - return chain.getLastActions(); -} - -/// ClickHouse require join key to be exactly the same type -/// TiDB only require the join key to be the same category -/// for example decimal(10,2) join decimal(20,0) is allowed in -/// TiDB and will throw exception in ClickHouse -void getJoinKeyTypes(const tipb::Join & join, DataTypes & key_types) -{ - for (int i = 0; i < join.left_join_keys().size(); i++) - { - if (!exprHasValidFieldType(join.left_join_keys(i)) || !exprHasValidFieldType(join.right_join_keys(i))) - throw TiFlashException("Join key without field type", Errors::Coprocessor::BadRequest); - DataTypes types; - types.emplace_back(getDataTypeByFieldTypeForComputingLayer(join.left_join_keys(i).field_type())); - types.emplace_back(getDataTypeByFieldTypeForComputingLayer(join.right_join_keys(i).field_type())); - DataTypePtr common_type = getLeastSupertype(types); - key_types.emplace_back(common_type); - } -} - void DAGQueryBlockInterpreter::handleJoin(const tipb::Join & join, DAGPipeline & pipeline, SubqueryForSet & right_query) { - // build - static const std::unordered_map equal_join_type_map{ - {tipb::JoinType::TypeInnerJoin, ASTTableJoin::Kind::Inner}, - {tipb::JoinType::TypeLeftOuterJoin, ASTTableJoin::Kind::Left}, - {tipb::JoinType::TypeRightOuterJoin, ASTTableJoin::Kind::Right}, - {tipb::JoinType::TypeSemiJoin, ASTTableJoin::Kind::Inner}, - {tipb::JoinType::TypeAntiSemiJoin, ASTTableJoin::Kind::Anti}, - {tipb::JoinType::TypeLeftOuterSemiJoin, ASTTableJoin::Kind::LeftSemi}, - {tipb::JoinType::TypeAntiLeftOuterSemiJoin, ASTTableJoin::Kind::LeftAnti}}; - static const std::unordered_map cartesian_join_type_map{ - {tipb::JoinType::TypeInnerJoin, ASTTableJoin::Kind::Cross}, - {tipb::JoinType::TypeLeftOuterJoin, ASTTableJoin::Kind::Cross_Left}, - {tipb::JoinType::TypeRightOuterJoin, ASTTableJoin::Kind::Cross_Right}, - {tipb::JoinType::TypeSemiJoin, ASTTableJoin::Kind::Cross}, - {tipb::JoinType::TypeAntiSemiJoin, ASTTableJoin::Kind::Cross_Anti}, - {tipb::JoinType::TypeLeftOuterSemiJoin, ASTTableJoin::Kind::Cross_LeftSemi}, - {tipb::JoinType::TypeAntiLeftOuterSemiJoin, ASTTableJoin::Kind::Cross_LeftAnti}}; - - if (input_streams_vec.size() != 2) + if (unlikely(input_streams_vec.size() != 2)) { throw TiFlashException("Join query block must have 2 input streams", Errors::BroadcastJoin::Internal); } - const auto & join_type_map = join.left_join_keys_size() == 0 ? cartesian_join_type_map : equal_join_type_map; - auto join_type_it = join_type_map.find(join.join_type()); - if (join_type_it == join_type_map.end()) - throw TiFlashException("Unknown join type in dag request", Errors::Coprocessor::BadRequest); - - /// (cartesian) (anti) left semi join. - const bool is_left_semi_family = join.join_type() == tipb::JoinType::TypeLeftOuterSemiJoin || join.join_type() == tipb::JoinType::TypeAntiLeftOuterSemiJoin; - - ASTTableJoin::Kind kind = join_type_it->second; - const bool is_semi_join = join.join_type() == tipb::JoinType::TypeSemiJoin || join.join_type() == tipb::JoinType::TypeAntiSemiJoin || is_left_semi_family; - ASTTableJoin::Strictness strictness = ASTTableJoin::Strictness::All; - if (is_semi_join) - strictness = ASTTableJoin::Strictness::Any; - - /// in DAG request, inner part is the build side, however for TiFlash implementation, - /// the build side must be the right side, so need to swap the join side if needed - /// 1. for (cross) inner join, there is no problem in this swap. - /// 2. for (cross) semi/anti-semi join, the build side is always right, needn't swap. - /// 3. for non-cross left/right join, there is no problem in this swap. - /// 4. for cross left join, the build side is always right, needn't and can't swap. - /// 5. for cross right join, the build side is always left, so it will always swap and change to cross left join. - /// note that whatever the build side is, we can't support cross-right join now. - - bool swap_join_side; - if (kind == ASTTableJoin::Kind::Cross_Right) - swap_join_side = true; - else if (kind == ASTTableJoin::Kind::Cross_Left) - swap_join_side = false; - else - swap_join_side = join.inner_idx() == 0; + JoinInterpreterHelper::TiFlashJoin tiflash_join{join}; - DAGPipeline left_pipeline; - DAGPipeline right_pipeline; + DAGPipeline probe_pipeline; + DAGPipeline build_pipeline; + probe_pipeline.streams = input_streams_vec[1 - tiflash_join.build_side_index]; + build_pipeline.streams = input_streams_vec[tiflash_join.build_side_index]; - if (swap_join_side) - { - if (kind == ASTTableJoin::Kind::Left) - kind = ASTTableJoin::Kind::Right; - else if (kind == ASTTableJoin::Kind::Right) - kind = ASTTableJoin::Kind::Left; - else if (kind == ASTTableJoin::Kind::Cross_Right) - kind = ASTTableJoin::Kind::Cross_Left; - left_pipeline.streams = input_streams_vec[1]; - right_pipeline.streams = input_streams_vec[0]; - } - else - { - left_pipeline.streams = input_streams_vec[0]; - right_pipeline.streams = input_streams_vec[1]; - } + RUNTIME_ASSERT(!input_streams_vec[0].empty(), log, "left input streams cannot be empty"); + const Block & left_input_header = input_streams_vec[0].back()->getHeader(); - NamesAndTypes join_output_columns; - /// columns_for_other_join_filter is a vector of columns used - /// as the input columns when compiling other join filter. - /// Note the order in the column vector is very important: - /// first the columns in input_streams_vec[0], then followed - /// by the columns in input_streams_vec[1], if there are other - /// columns generated before compile other join filter, then - /// append the extra columns afterwards. In order to figure out - /// whether a given column is already in the column vector or - /// not quickly, we use another set to store the column names - NamesAndTypes columns_for_other_join_filter; - std::unordered_set column_set_for_other_join_filter; - bool make_nullable = join.join_type() == tipb::JoinType::TypeRightOuterJoin; - for (auto const & p : input_streams_vec[0][0]->getHeader().getNamesAndTypesList()) - { - join_output_columns.emplace_back(p.name, make_nullable ? makeNullable(p.type) : p.type); - columns_for_other_join_filter.emplace_back(p.name, make_nullable ? makeNullable(p.type) : p.type); - column_set_for_other_join_filter.emplace(p.name); - } - make_nullable = join.join_type() == tipb::JoinType::TypeLeftOuterJoin; - for (auto const & p : input_streams_vec[1][0]->getHeader().getNamesAndTypesList()) - { - if (!is_semi_join) - /// for semi join, the columns from right table will be ignored - join_output_columns.emplace_back(p.name, make_nullable ? makeNullable(p.type) : p.type); - /// however, when compiling join's other condition, we still need the columns from right table - columns_for_other_join_filter.emplace_back(p.name, make_nullable ? makeNullable(p.type) : p.type); - column_set_for_other_join_filter.emplace(p.name); - } - - bool is_tiflash_left_join = kind == ASTTableJoin::Kind::Left || kind == ASTTableJoin::Kind::Cross_Left; - /// Cross_Right join will be converted to Cross_Left join, so no need to check Cross_Right - bool is_tiflash_right_join = kind == ASTTableJoin::Kind::Right; - /// all the columns from right table should be added after join, even for the join key - NamesAndTypesList columns_added_by_join; - make_nullable = is_tiflash_left_join; - for (auto const & p : right_pipeline.streams[0]->getHeader().getNamesAndTypesList()) - { - columns_added_by_join.emplace_back(p.name, make_nullable ? makeNullable(p.type) : p.type); - } - - String match_helper_name; - if (is_left_semi_family) - { - const auto & left_block = input_streams_vec[0][0]->getHeader(); - const auto & right_block = input_streams_vec[1][0]->getHeader(); - - match_helper_name = Join::match_helper_prefix; - for (int i = 1; left_block.has(match_helper_name) || right_block.has(match_helper_name); ++i) - { - match_helper_name = Join::match_helper_prefix + std::to_string(i); - } - - columns_added_by_join.emplace_back(match_helper_name, Join::match_helper_type); - join_output_columns.emplace_back(match_helper_name, Join::match_helper_type); - } + RUNTIME_ASSERT(!input_streams_vec[1].empty(), log, "right input streams cannot be empty"); + const Block & right_input_header = input_streams_vec[1].back()->getHeader(); - DataTypes join_key_types; - getJoinKeyTypes(join, join_key_types); - TiDB::TiDBCollators collators; - size_t join_key_size = join_key_types.size(); - if (join.probe_types_size() == static_cast(join_key_size) && join.build_types_size() == join.probe_types_size()) - for (size_t i = 0; i < join_key_size; i++) - { - if (removeNullable(join_key_types[i])->isString()) - { - if (join.probe_types(i).collate() != join.build_types(i).collate()) - throw TiFlashException("Join with different collators on the join key", Errors::Coprocessor::BadRequest); - collators.push_back(getCollatorFromFieldType(join.probe_types(i))); - } - else - collators.push_back(nullptr); - } - - Names left_key_names, right_key_names; - String left_filter_column_name, right_filter_column_name; + String match_helper_name = tiflash_join.genMatchHelperName(left_input_header, right_input_header); + NamesAndTypesList columns_added_by_join = tiflash_join.genColumnsAddedByJoin(build_pipeline.firstStream()->getHeader(), match_helper_name); + NamesAndTypes join_output_columns = tiflash_join.genJoinOutputColumns(left_input_header, right_input_header, match_helper_name); /// add necessary transformation if the join key is an expression - prepareJoin( - swap_join_side ? join.right_join_keys() : join.left_join_keys(), - join_key_types, - left_pipeline, - left_key_names, + bool is_tiflash_right_join = tiflash_join.isTiFlashRightJoin(); + + // prepare probe side + auto [probe_side_prepare_actions, probe_key_names, probe_filter_column_name] = JoinInterpreterHelper::prepareJoin( + context, + probe_pipeline.firstStream()->getHeader(), + tiflash_join.getProbeJoinKeys(), + tiflash_join.join_key_types, true, is_tiflash_right_join, - swap_join_side ? join.right_conditions() : join.left_conditions(), - left_filter_column_name); - - prepareJoin( - swap_join_side ? join.left_join_keys() : join.right_join_keys(), - join_key_types, - right_pipeline, - right_key_names, + tiflash_join.getProbeConditions()); + RUNTIME_ASSERT(probe_side_prepare_actions, log, "probe_side_prepare_actions cannot be nullptr"); + + // prepare build side + auto [build_side_prepare_actions, build_key_names, build_filter_column_name] = JoinInterpreterHelper::prepareJoin( + context, + build_pipeline.firstStream()->getHeader(), + tiflash_join.getBuildJoinKeys(), + tiflash_join.join_key_types, false, is_tiflash_right_join, - swap_join_side ? join.left_conditions() : join.right_conditions(), - right_filter_column_name); + tiflash_join.getBuildConditions()); + RUNTIME_ASSERT(build_side_prepare_actions, log, "build_side_prepare_actions cannot be nullptr"); - String other_filter_column_name, other_eq_filter_from_in_column_name; - for (auto const & p : left_pipeline.streams[0]->getHeader().getNamesAndTypesList()) - { - if (column_set_for_other_join_filter.find(p.name) == column_set_for_other_join_filter.end()) - columns_for_other_join_filter.emplace_back(p.name, p.type); - } - for (auto const & p : right_pipeline.streams[0]->getHeader().getNamesAndTypesList()) - { - if (column_set_for_other_join_filter.find(p.name) == column_set_for_other_join_filter.end()) - columns_for_other_join_filter.emplace_back(p.name, p.type); - } - - ExpressionActionsPtr other_condition_expr - = genJoinOtherConditionAction(join, columns_for_other_join_filter, other_filter_column_name, other_eq_filter_from_in_column_name); + auto [other_condition_expr, other_filter_column_name, other_eq_filter_from_in_column_name] + = tiflash_join.genJoinOtherConditionAction(context, left_input_header, right_input_header, probe_side_prepare_actions); const Settings & settings = context.getSettingsRef(); - size_t join_build_concurrency = settings.join_concurrent_build ? std::min(max_streams, right_pipeline.streams.size()) : 1; size_t max_block_size_for_cross_join = settings.max_block_size; fiu_do_on(FailPoints::minimum_block_size_for_cross_join, { max_block_size_for_cross_join = 1; }); JoinPtr join_ptr = std::make_shared( - left_key_names, - right_key_names, + probe_key_names, + build_key_names, true, SizeLimits(settings.max_rows_in_join, settings.max_bytes_in_join, settings.join_overflow_mode), - kind, - strictness, + tiflash_join.kind, + tiflash_join.strictness, log->identifier(), - join_build_concurrency, - collators, - left_filter_column_name, - right_filter_column_name, + tiflash_join.join_key_collators, + probe_filter_column_name, + build_filter_column_name, other_filter_column_name, other_eq_filter_from_in_column_name, other_condition_expr, max_block_size_for_cross_join, match_helper_name); - recordJoinExecuteInfo(swap_join_side ? 0 : 1, join_ptr); + recordJoinExecuteInfo(tiflash_join.build_side_index, join_ptr); + size_t join_build_concurrency = settings.join_concurrent_build ? std::min(max_streams, build_pipeline.streams.size()) : 1; + + /// build side streams + executeExpression(build_pipeline, build_side_prepare_actions, "append join key and join filters for build side"); // add a HashJoinBuildBlockInputStream to build a shared hash table - size_t concurrency_build_index = 0; - auto get_concurrency_build_index = [&concurrency_build_index, &join_build_concurrency]() { - return (concurrency_build_index++) % join_build_concurrency; - }; - right_pipeline.transform([&](auto & stream) { + auto get_concurrency_build_index = JoinInterpreterHelper::concurrencyBuildIndexGenerator(join_build_concurrency); + build_pipeline.transform([&](auto & stream) { stream = std::make_shared(stream, join_ptr, get_concurrency_build_index(), log->identifier()); + stream->setExtraInfo( + fmt::format("join build, build_side_root_executor_id = {}", dagContext().getJoinExecuteInfoMap()[query_block.source_name].build_side_root_executor_id)); }); - executeUnion(right_pipeline, max_streams, log, /*ignore_block=*/true); + executeUnion(build_pipeline, max_streams, log, /*ignore_block=*/true, "for join"); - right_query.source = right_pipeline.firstStream(); + right_query.source = build_pipeline.firstStream(); right_query.join = join_ptr; - right_query.join->setSampleBlock(right_query.source->getHeader()); + join_ptr->init(right_query.source->getHeader(), join_build_concurrency); + /// probe side streams + executeExpression(probe_pipeline, probe_side_prepare_actions, "append join key and join filters for probe side"); NamesAndTypes source_columns; - for (const auto & p : left_pipeline.streams[0]->getHeader().getNamesAndTypesList()) + for (const auto & p : probe_pipeline.firstStream()->getHeader()) source_columns.emplace_back(p.name, p.type); DAGExpressionAnalyzer dag_analyzer(std::move(source_columns), context); ExpressionActionsChain chain; dag_analyzer.appendJoin(chain, right_query, columns_added_by_join); - pipeline.streams = left_pipeline.streams; + pipeline.streams = probe_pipeline.streams; /// add join input stream if (is_tiflash_right_join) { auto & join_execute_info = dagContext().getJoinExecuteInfoMap()[query_block.source_name]; - for (size_t i = 0; i < join_build_concurrency; i++) + size_t not_joined_concurrency = join_ptr->getNotJoinedStreamConcurrency(); + for (size_t i = 0; i < not_joined_concurrency; ++i) { - auto non_joined_stream = chain.getLastActions()->createStreamWithNonJoinedDataIfFullOrRightJoin( + auto non_joined_stream = join_ptr->createStreamWithNonJoinedRows( pipeline.firstStream()->getHeader(), i, - join_build_concurrency, + not_joined_concurrency, settings.max_block_size); + non_joined_stream->setExtraInfo("add stream with non_joined_data if full_or_right_join"); pipeline.streams_with_non_joined_data.push_back(non_joined_stream); join_execute_info.non_joined_streams.push_back(non_joined_stream); } } for (auto & stream : pipeline.streams) + { stream = std::make_shared(stream, chain.getLastActions(), log->identifier()); + stream->setExtraInfo(fmt::format("join probe, join_executor_id = {}", query_block.source_name)); + } /// add a project to remove all the useless column NamesWithAliases project_cols; @@ -516,7 +312,7 @@ void DAGQueryBlockInterpreter::handleJoin(const tipb::Join & join, DAGPipeline & /// it is guaranteed by its children query block project_cols.emplace_back(c.name, c.name); } - executeProject(pipeline, project_cols); + executeProject(pipeline, project_cols, "remove useless column after join"); analyzer = std::make_unique(std::move(join_output_columns), context); } @@ -530,19 +326,22 @@ void DAGQueryBlockInterpreter::recordJoinExecuteInfo(size_t build_side_index, co dagContext().getJoinExecuteInfoMap()[query_block.source_name] = std::move(join_execute_info); } -void DAGQueryBlockInterpreter::executeWhere(DAGPipeline & pipeline, const ExpressionActionsPtr & expr, String & filter_column) +void DAGQueryBlockInterpreter::executeWhere(DAGPipeline & pipeline, const ExpressionActionsPtr & expr, String & filter_column, const String & extra_info) { - pipeline.transform([&](auto & stream) { stream = std::make_shared(stream, expr, filter_column, log->identifier()); }); + pipeline.transform([&](auto & stream) { + stream = std::make_shared(stream, expr, filter_column, log->identifier()); + stream->setExtraInfo(extra_info); + }); } void DAGQueryBlockInterpreter::executeWindow( DAGPipeline & pipeline, WindowDescription & window_description) { - executeExpression(pipeline, window_description.before_window); + executeExpression(pipeline, window_description.before_window, "before window"); /// If there are several streams, we merge them into one - executeUnion(pipeline, max_streams, log); + executeUnion(pipeline, max_streams, log, false, "merge into one for window input"); assert(pipeline.streams.size() == 1); pipeline.firstStream() = std::make_shared(pipeline.firstStream(), window_description, log->identifier()); } @@ -555,7 +354,10 @@ void DAGQueryBlockInterpreter::executeAggregation( AggregateDescriptions & aggregate_descriptions, bool is_final_agg) { - pipeline.transform([&](auto & stream) { stream = std::make_shared(stream, expression_actions_ptr, log->identifier()); }); + pipeline.transform([&](auto & stream) { + stream = std::make_shared(stream, expression_actions_ptr, log->identifier()); + stream->setExtraInfo("before aggregation"); + }); Block before_agg_header = pipeline.firstStream()->getHeader(); @@ -608,11 +410,14 @@ void DAGQueryBlockInterpreter::executeAggregation( } } -void DAGQueryBlockInterpreter::executeExpression(DAGPipeline & pipeline, const ExpressionActionsPtr & expressionActionsPtr) +void DAGQueryBlockInterpreter::executeExpression(DAGPipeline & pipeline, const ExpressionActionsPtr & expressionActionsPtr, const String & extra_info) { if (!expressionActionsPtr->getActions().empty()) { - pipeline.transform([&](auto & stream) { stream = std::make_shared(stream, expressionActionsPtr, log->identifier()); }); + pipeline.transform([&](auto & stream) { + stream = std::make_shared(stream, expressionActionsPtr, log->identifier()); + stream->setExtraInfo(extra_info); + }); } } @@ -644,7 +449,7 @@ void DAGQueryBlockInterpreter::orderStreams(DAGPipeline & pipeline, SortDescript }); /// If there are several streams, we merge them into one - executeUnion(pipeline, max_streams, log); + executeUnion(pipeline, max_streams, log, false, "for partial order"); /// Merge the sorted blocks. pipeline.firstStream() = std::make_shared( @@ -675,6 +480,7 @@ void DAGQueryBlockInterpreter::handleExchangeReceiver(DAGPipeline & pipeline) BlockInputStreamPtr stream = std::make_shared(it->second, log->identifier(), query_block.source_name); exchange_receiver_io_input_streams.push_back(stream); stream = std::make_shared(stream, 8192, 0, log->identifier()); + stream->setExtraInfo("squashing after exchange receiver"); pipeline.streams.push_back(stream); } NamesAndTypes source_columns; @@ -721,8 +527,11 @@ void DAGQueryBlockInterpreter::handleProjection(DAGPipeline & pipeline, const ti output_columns.emplace_back(alias, col.type); project_cols.emplace_back(col.name, alias); } - pipeline.transform([&](auto & stream) { stream = std::make_shared(stream, chain.getLastActions(), log->identifier()); }); - executeProject(pipeline, project_cols); + pipeline.transform([&](auto & stream) { + stream = std::make_shared(stream, chain.getLastActions(), log->identifier()); + stream->setExtraInfo("before projection"); + }); + executeProject(pipeline, project_cols, "projection"); analyzer = std::make_unique(std::move(output_columns), context); } @@ -736,7 +545,7 @@ void DAGQueryBlockInterpreter::handleWindow(DAGPipeline & pipeline, const tipb:: DAGExpressionAnalyzer dag_analyzer(input_columns, context); WindowDescription window_description = dag_analyzer.buildWindowDescription(window); executeWindow(pipeline, window_description); - executeExpression(pipeline, window_description.after_window); + executeExpression(pipeline, window_description.after_window, "cast after window"); analyzer = std::make_unique(window_description.after_window_columns, context); } @@ -823,7 +632,7 @@ void DAGQueryBlockInterpreter::executeImpl(DAGPipeline & pipeline) if (res.before_where) { // execute where - executeWhere(pipeline, res.before_where, res.filter_column_name); + executeWhere(pipeline, res.before_where, res.filter_column_name, "execute where"); recordProfileStreams(pipeline, query_block.selection_name); } @@ -843,12 +652,12 @@ void DAGQueryBlockInterpreter::executeImpl(DAGPipeline & pipeline) if (res.before_having) { // execute having - executeWhere(pipeline, res.before_having, res.having_column_name); + executeWhere(pipeline, res.before_having, res.having_column_name, "execute having"); recordProfileStreams(pipeline, query_block.having_name); } if (res.before_order_and_select) { - executeExpression(pipeline, res.before_order_and_select); + executeExpression(pipeline, res.before_order_and_select, "before order and select"); } if (!res.order_columns.empty()) @@ -859,7 +668,7 @@ void DAGQueryBlockInterpreter::executeImpl(DAGPipeline & pipeline) } // execute final project action - executeProject(pipeline, final_project); + executeProject(pipeline, final_project, "final projection"); // execute limit if (query_block.limit_or_topn && query_block.limit_or_topn->tp() == tipb::TypeLimit) { @@ -879,12 +688,15 @@ void DAGQueryBlockInterpreter::executeImpl(DAGPipeline & pipeline) } } -void DAGQueryBlockInterpreter::executeProject(DAGPipeline & pipeline, NamesWithAliases & project_cols) +void DAGQueryBlockInterpreter::executeProject(DAGPipeline & pipeline, NamesWithAliases & project_cols, const String & extra_info) { if (project_cols.empty()) return; ExpressionActionsPtr project = generateProjectExpressionActions(pipeline.firstStream(), context, project_cols); - pipeline.transform([&](auto & stream) { stream = std::make_shared(stream, project, log->identifier()); }); + pipeline.transform([&](auto & stream) { + stream = std::make_shared(stream, project, log->identifier()); + stream->setExtraInfo(extra_info); + }); } void DAGQueryBlockInterpreter::executeLimit(DAGPipeline & pipeline) @@ -897,7 +709,7 @@ void DAGQueryBlockInterpreter::executeLimit(DAGPipeline & pipeline) pipeline.transform([&](auto & stream) { stream = std::make_shared(stream, limit, 0, log->identifier(), false); }); if (pipeline.hasMoreThanOneStream()) { - executeUnion(pipeline, max_streams, log); + executeUnion(pipeline, max_streams, log, false, "for partial limit"); pipeline.transform([&](auto & stream) { stream = std::make_shared(stream, limit, 0, log->identifier(), false); }); } } @@ -944,7 +756,7 @@ BlockInputStreams DAGQueryBlockInterpreter::execute() executeImpl(pipeline); if (!pipeline.streams_with_non_joined_data.empty()) { - executeUnion(pipeline, max_streams, log); + executeUnion(pipeline, max_streams, log, false, "final union for non_joined_data"); restorePipelineConcurrency(pipeline); } diff --git a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h index 69bac9c3ba9..e68c4f91cee 100644 --- a/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h +++ b/dbms/src/Flash/Coprocessor/DAGQueryBlockInterpreter.h @@ -61,27 +61,13 @@ class DAGQueryBlockInterpreter void handleMockTableScan(const TiDBTableScan & table_scan, DAGPipeline & pipeline); void handleTableScan(const TiDBTableScan & table_scan, DAGPipeline & pipeline); void handleJoin(const tipb::Join & join, DAGPipeline & pipeline, SubqueryForSet & right_query); - void prepareJoin( - const google::protobuf::RepeatedPtrField & keys, - const DataTypes & key_types, - DAGPipeline & pipeline, - Names & key_names, - bool left, - bool is_right_out_join, - const google::protobuf::RepeatedPtrField & filters, - String & filter_column_name); void handleExchangeReceiver(DAGPipeline & pipeline); void handleMockExchangeReceiver(DAGPipeline & pipeline); void handleProjection(DAGPipeline & pipeline, const tipb::Projection & projection); void handleWindow(DAGPipeline & pipeline, const tipb::Window & window); void handleWindowOrder(DAGPipeline & pipeline, const tipb::Sort & window_sort); - ExpressionActionsPtr genJoinOtherConditionAction( - const tipb::Join & join, - NamesAndTypes & source_columns, - String & filter_column_for_other_condition, - String & filter_column_for_other_eq_condition); - void executeWhere(DAGPipeline & pipeline, const ExpressionActionsPtr & expressionActionsPtr, String & filter_column); - void executeExpression(DAGPipeline & pipeline, const ExpressionActionsPtr & expressionActionsPtr); + void executeWhere(DAGPipeline & pipeline, const ExpressionActionsPtr & expressionActionsPtr, String & filter_column, const String & extra_info = ""); + void executeExpression(DAGPipeline & pipeline, const ExpressionActionsPtr & expressionActionsPtr, const String & extra_info = ""); void executeWindowOrder(DAGPipeline & pipeline, SortDescription sort_desc); void orderStreams(DAGPipeline & pipeline, SortDescription order_descr, Int64 limit); void executeOrder(DAGPipeline & pipeline, const NamesAndTypes & order_columns); @@ -96,7 +82,7 @@ class DAGQueryBlockInterpreter const TiDB::TiDBCollators & collators, AggregateDescriptions & aggregate_descriptions, bool is_final_agg); - void executeProject(DAGPipeline & pipeline, NamesWithAliases & project_cols); + void executeProject(DAGPipeline & pipeline, NamesWithAliases & project_cols, const String & extra_info = ""); void handleExchangeSender(DAGPipeline & pipeline); void handleMockExchangeSender(DAGPipeline & pipeline); diff --git a/dbms/src/Flash/Coprocessor/DAGStorageInterpreter.cpp b/dbms/src/Flash/Coprocessor/DAGStorageInterpreter.cpp index f9353b26a41..df7e504d2c4 100644 --- a/dbms/src/Flash/Coprocessor/DAGStorageInterpreter.cpp +++ b/dbms/src/Flash/Coprocessor/DAGStorageInterpreter.cpp @@ -33,8 +33,8 @@ #include #include #include -#include #include +#include #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-parameter" @@ -187,7 +187,7 @@ void setQuotaAndLimitsOnTableScan(Context & context, DAGPipeline & pipeline) QuotaForIntervals & quota = context.getQuota(); pipeline.transform([&](auto & stream) { - if (IProfilingBlockInputStream * p_stream = dynamic_cast(stream.get())) + if (auto * p_stream = dynamic_cast(stream.get())) { p_stream->setLimits(limits); p_stream->setQuota(quota); @@ -374,8 +374,10 @@ void DAGStorageInterpreter::executePushedDownFilter( { auto & stream = pipeline.streams[i]; stream = std::make_shared(stream, before_where, filter_column_name, log->identifier()); + stream->setExtraInfo("push down filter"); // after filter, do project action to keep the schema of local streams and remote streams the same. stream = std::make_shared(stream, project_after_where, log->identifier()); + stream->setExtraInfo("projection after push down filter"); } } @@ -413,6 +415,7 @@ void DAGStorageInterpreter::executeCastAfterTableScan( { auto & stream = pipeline.streams[i++]; stream = std::make_shared(stream, extra_cast, log->identifier()); + stream->setExtraInfo("cast after local tableScan"); } // remote streams if (i < pipeline.streams.size()) @@ -425,6 +428,7 @@ void DAGStorageInterpreter::executeCastAfterTableScan( { auto & stream = pipeline.streams[i++]; stream = std::make_shared(stream, project_for_cop_read, log->identifier()); + stream->setExtraInfo("cast after remote tableScan"); } } } diff --git a/dbms/src/Flash/Coprocessor/DAGUtils.cpp b/dbms/src/Flash/Coprocessor/DAGUtils.cpp index 66f5d7031d7..87f58131c8c 100644 --- a/dbms/src/Flash/Coprocessor/DAGUtils.cpp +++ b/dbms/src/Flash/Coprocessor/DAGUtils.cpp @@ -707,7 +707,16 @@ void assertBlockSchema( actual->getName())); } } - +/// used by test +std::unordered_map getFuncNameToSigMap() +{ + std::unordered_map ret; + for (const auto & element : scalar_func_map) + { + ret[element.second] = element.first; + } + return ret; +} } // namespace bool isScalarFunctionExpr(const tipb::Expr & expr) @@ -1420,5 +1429,12 @@ tipb::EncodeType analyzeDAGEncodeType(DAGContext & dag_context) return tipb::EncodeType::TypeDefault; return encode_type; } +tipb::ScalarFuncSig reverseGetFuncSigByFuncName(const String & name) +{ + static std::unordered_map func_name_sig_map = getFuncNameToSigMap(); + if (func_name_sig_map.find(name) == func_name_sig_map.end()) + throw Exception(fmt::format("Unsupported function {}", name)); + return func_name_sig_map[name]; +} } // namespace DB diff --git a/dbms/src/Flash/Coprocessor/DAGUtils.h b/dbms/src/Flash/Coprocessor/DAGUtils.h index 4d6a62bbe6f..5776edf0098 100644 --- a/dbms/src/Flash/Coprocessor/DAGUtils.h +++ b/dbms/src/Flash/Coprocessor/DAGUtils.h @@ -104,5 +104,6 @@ class UniqueNameGenerator tipb::DAGRequest getDAGRequestFromStringWithRetry(const String & s); tipb::EncodeType analyzeDAGEncodeType(DAGContext & dag_context); +tipb::ScalarFuncSig reverseGetFuncSigByFuncName(const String & name); } // namespace DB diff --git a/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp b/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp index b7c75c06e67..741aa7b5e26 100644 --- a/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp +++ b/dbms/src/Flash/Coprocessor/InterpreterDAG.cpp @@ -26,7 +26,7 @@ InterpreterDAG::InterpreterDAG(Context & context_, const DAGQuerySource & dag_) , dag(dag_) { const Settings & settings = context.getSettingsRef(); - if (dagContext().isBatchCop() || dagContext().isMPPTask()) + if (dagContext().isBatchCop() || (dagContext().isMPPTask() && !dagContext().isTest())) max_streams = settings.max_threads; else if (dagContext().isTest()) max_streams = dagContext().initialize_concurrency; @@ -85,9 +85,9 @@ BlockIO InterpreterDAG::execute() /// add union to run in parallel if needed if (dagContext().isMPPTask()) /// MPPTask do not need the returned blocks. - executeUnion(pipeline, max_streams, dagContext().log, /*ignore_block=*/true); + executeUnion(pipeline, max_streams, dagContext().log, /*ignore_block=*/true, "for mpp"); else - executeUnion(pipeline, max_streams, dagContext().log); + executeUnion(pipeline, max_streams, dagContext().log, false, "for non mpp"); if (dagContext().hasSubquery()) { const Settings & settings = context.getSettingsRef(); diff --git a/dbms/src/Flash/Coprocessor/InterpreterUtils.cpp b/dbms/src/Flash/Coprocessor/InterpreterUtils.cpp index 69060071997..c9810454218 100644 --- a/dbms/src/Flash/Coprocessor/InterpreterUtils.cpp +++ b/dbms/src/Flash/Coprocessor/InterpreterUtils.cpp @@ -34,6 +34,7 @@ void restoreConcurrency( { BlockInputStreamPtr shared_query_block_input_stream = std::make_shared(concurrency * 5, pipeline.firstStream(), log->identifier()); + shared_query_block_input_stream->setExtraInfo("restore concurrency"); pipeline.streams.assign(concurrency, shared_query_block_input_stream); } } @@ -50,9 +51,15 @@ BlockInputStreamPtr combinedNonJoinedDataStream( else if (pipeline.streams_with_non_joined_data.size() > 1) { if (ignore_block) + { ret = std::make_shared(pipeline.streams_with_non_joined_data, nullptr, max_threads, log->identifier()); + ret->setExtraInfo("combine non joined(ignore block)"); + } else + { ret = std::make_shared(pipeline.streams_with_non_joined_data, nullptr, max_threads, log->identifier()); + ret->setExtraInfo("combine non joined"); + } } pipeline.streams_with_non_joined_data.clear(); return ret; @@ -62,7 +69,8 @@ void executeUnion( DAGPipeline & pipeline, size_t max_streams, const LoggerPtr & log, - bool ignore_block) + bool ignore_block, + const String & extra_info) { if (pipeline.streams.size() == 1 && pipeline.streams_with_non_joined_data.empty()) return; @@ -73,6 +81,7 @@ void executeUnion( pipeline.firstStream() = std::make_shared(pipeline.streams, non_joined_data_stream, max_streams, log->identifier()); else pipeline.firstStream() = std::make_shared(pipeline.streams, non_joined_data_stream, max_streams, log->identifier()); + pipeline.firstStream()->setExtraInfo(extra_info); pipeline.streams.resize(1); } else if (non_joined_data_stream != nullptr) diff --git a/dbms/src/Flash/Coprocessor/InterpreterUtils.h b/dbms/src/Flash/Coprocessor/InterpreterUtils.h index 91e6d483220..5c4d4721d5e 100644 --- a/dbms/src/Flash/Coprocessor/InterpreterUtils.h +++ b/dbms/src/Flash/Coprocessor/InterpreterUtils.h @@ -37,7 +37,8 @@ void executeUnion( DAGPipeline & pipeline, size_t max_streams, const LoggerPtr & log, - bool ignore_block = false); + bool ignore_block = false, + const String & extra_info = ""); ExpressionActionsPtr generateProjectExpressionActions( const BlockInputStreamPtr & stream, diff --git a/dbms/src/Flash/Coprocessor/JoinInterpreterHelper.cpp b/dbms/src/Flash/Coprocessor/JoinInterpreterHelper.cpp new file mode 100644 index 00000000000..2582a84ac46 --- /dev/null +++ b/dbms/src/Flash/Coprocessor/JoinInterpreterHelper.cpp @@ -0,0 +1,356 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace DB::JoinInterpreterHelper +{ +namespace +{ +std::pair getJoinKindAndBuildSideIndex(const tipb::Join & join) +{ + static const std::unordered_map equal_join_type_map{ + {tipb::JoinType::TypeInnerJoin, ASTTableJoin::Kind::Inner}, + {tipb::JoinType::TypeLeftOuterJoin, ASTTableJoin::Kind::Left}, + {tipb::JoinType::TypeRightOuterJoin, ASTTableJoin::Kind::Right}, + {tipb::JoinType::TypeSemiJoin, ASTTableJoin::Kind::Inner}, + {tipb::JoinType::TypeAntiSemiJoin, ASTTableJoin::Kind::Anti}, + {tipb::JoinType::TypeLeftOuterSemiJoin, ASTTableJoin::Kind::LeftSemi}, + {tipb::JoinType::TypeAntiLeftOuterSemiJoin, ASTTableJoin::Kind::LeftAnti}}; + static const std::unordered_map cartesian_join_type_map{ + {tipb::JoinType::TypeInnerJoin, ASTTableJoin::Kind::Cross}, + {tipb::JoinType::TypeLeftOuterJoin, ASTTableJoin::Kind::Cross_Left}, + {tipb::JoinType::TypeRightOuterJoin, ASTTableJoin::Kind::Cross_Right}, + {tipb::JoinType::TypeSemiJoin, ASTTableJoin::Kind::Cross}, + {tipb::JoinType::TypeAntiSemiJoin, ASTTableJoin::Kind::Cross_Anti}, + {tipb::JoinType::TypeLeftOuterSemiJoin, ASTTableJoin::Kind::Cross_LeftSemi}, + {tipb::JoinType::TypeAntiLeftOuterSemiJoin, ASTTableJoin::Kind::Cross_LeftAnti}}; + + const auto & join_type_map = join.left_join_keys_size() == 0 ? cartesian_join_type_map : equal_join_type_map; + auto join_type_it = join_type_map.find(join.join_type()); + if (unlikely(join_type_it == join_type_map.end())) + throw TiFlashException("Unknown join type in dag request", Errors::Coprocessor::BadRequest); + + ASTTableJoin::Kind kind = join_type_it->second; + + /// in DAG request, inner part is the build side, however for TiFlash implementation, + /// the build side must be the right side, so need to swap the join side if needed + /// 1. for (cross) inner join, there is no problem in this swap. + /// 2. for (cross) semi/anti-semi join, the build side is always right, needn't swap. + /// 3. for non-cross left/right join, there is no problem in this swap. + /// 4. for cross left join, the build side is always right, needn't and can't swap. + /// 5. for cross right join, the build side is always left, so it will always swap and change to cross left join. + /// note that whatever the build side is, we can't support cross-right join now. + + size_t build_side_index = 0; + switch (kind) + { + case ASTTableJoin::Kind::Cross_Right: + build_side_index = 0; + break; + case ASTTableJoin::Kind::Cross_Left: + build_side_index = 1; + break; + default: + build_side_index = join.inner_idx(); + } + assert(build_side_index == 0 || build_side_index == 1); + + // should swap join side. + if (build_side_index != 1) + { + switch (kind) + { + case ASTTableJoin::Kind::Left: + kind = ASTTableJoin::Kind::Right; + break; + case ASTTableJoin::Kind::Right: + kind = ASTTableJoin::Kind::Left; + break; + case ASTTableJoin::Kind::Cross_Right: + kind = ASTTableJoin::Kind::Cross_Left; + default:; // just `default`, for other kinds, don't need to change kind. + } + } + + return {kind, build_side_index}; +} + +DataTypes getJoinKeyTypes(const tipb::Join & join) +{ + if (unlikely(join.left_join_keys_size() != join.right_join_keys_size())) + throw TiFlashException("size of join.left_join_keys != size of join.right_join_keys", Errors::Coprocessor::BadRequest); + DataTypes key_types; + for (int i = 0; i < join.left_join_keys_size(); ++i) + { + if (unlikely(!exprHasValidFieldType(join.left_join_keys(i)) || !exprHasValidFieldType(join.right_join_keys(i)))) + throw TiFlashException("Join key without field type", Errors::Coprocessor::BadRequest); + DataTypes types; + types.emplace_back(getDataTypeByFieldTypeForComputingLayer(join.left_join_keys(i).field_type())); + types.emplace_back(getDataTypeByFieldTypeForComputingLayer(join.right_join_keys(i).field_type())); + DataTypePtr common_type = getLeastSupertype(types); + key_types.emplace_back(common_type); + } + return key_types; +} + +TiDB::TiDBCollators getJoinKeyCollators(const tipb::Join & join, const DataTypes & join_key_types) +{ + TiDB::TiDBCollators collators; + size_t join_key_size = join_key_types.size(); + if (join.probe_types_size() == static_cast(join_key_size) && join.build_types_size() == join.probe_types_size()) + for (size_t i = 0; i < join_key_size; ++i) + { + if (removeNullable(join_key_types[i])->isString()) + { + if (unlikely(join.probe_types(i).collate() != join.build_types(i).collate())) + throw TiFlashException("Join with different collators on the join key", Errors::Coprocessor::BadRequest); + collators.push_back(getCollatorFromFieldType(join.probe_types(i))); + } + else + collators.push_back(nullptr); + } + return collators; +} + +std::tuple doGenJoinOtherConditionAction( + const Context & context, + const tipb::Join & join, + const NamesAndTypes & source_columns) +{ + if (join.other_conditions_size() == 0 && join.other_eq_conditions_from_in_size() == 0) + return {nullptr, "", ""}; + + DAGExpressionAnalyzer dag_analyzer(source_columns, context); + ExpressionActionsChain chain; + + String filter_column_for_other_condition; + if (join.other_conditions_size() > 0) + { + std::vector condition_vector; + for (const auto & c : join.other_conditions()) + { + condition_vector.push_back(&c); + } + filter_column_for_other_condition = dag_analyzer.appendWhere(chain, condition_vector); + } + + String filter_column_for_other_eq_condition; + if (join.other_eq_conditions_from_in_size() > 0) + { + std::vector condition_vector; + for (const auto & c : join.other_eq_conditions_from_in()) + { + condition_vector.push_back(&c); + } + filter_column_for_other_eq_condition = dag_analyzer.appendWhere(chain, condition_vector); + } + + return {chain.getLastActions(), std::move(filter_column_for_other_condition), std::move(filter_column_for_other_eq_condition)}; +} +} // namespace + +TiFlashJoin::TiFlashJoin(const tipb::Join & join_) // NOLINT(cppcoreguidelines-pro-type-member-init) + : join(join_) + , join_key_types(getJoinKeyTypes(join_)) + , join_key_collators(getJoinKeyCollators(join_, join_key_types)) +{ + std::tie(kind, build_side_index) = getJoinKindAndBuildSideIndex(join); + strictness = isSemiJoin() ? ASTTableJoin::Strictness::Any : ASTTableJoin::Strictness::All; +} + +String TiFlashJoin::genMatchHelperName(const Block & header1, const Block & header2) const +{ + if (!isLeftSemiFamily()) + { + return ""; + } + + size_t i = 0; + String match_helper_name = fmt::format("{}{}", Join::match_helper_prefix, i); + while (header1.has(match_helper_name) || header2.has(match_helper_name)) + { + match_helper_name = fmt::format("{}{}", Join::match_helper_prefix, ++i); + } + return match_helper_name; +} + +NamesAndTypes TiFlashJoin::genColumnsForOtherJoinFilter( + const Block & left_input_header, + const Block & right_input_header, + const ExpressionActionsPtr & probe_prepare_join_actions) const +{ +#ifndef NDEBUG + auto is_prepare_actions_valid = [](const Block & origin_block, const ExpressionActionsPtr & prepare_actions) { + const Block & prepare_sample_block = prepare_actions->getSampleBlock(); + for (const auto & p : origin_block) + { + if (!prepare_sample_block.has(p.name)) + return false; + } + return true; + }; + if (unlikely(!is_prepare_actions_valid(build_side_index == 1 ? left_input_header : right_input_header, probe_prepare_join_actions))) + { + throw TiFlashException("probe_prepare_join_actions isn't valid", Errors::Coprocessor::Internal); + } +#endif + + /// columns_for_other_join_filter is a vector of columns used + /// as the input columns when compiling other join filter. + /// Note the order in the column vector is very important: + /// first the columns in left_input_header, then followed + /// by the columns in right_input_header, if there are other + /// columns generated before compile other join filter, then + /// append the extra columns afterwards. In order to figure out + /// whether a given column is already in the column vector or + /// not quickly, we use another set to store the column names. + + /// The order of columns must be {left_input, right_input, extra columns}, + /// because tidb requires the input schema of join to be {left_input, right_input}. + /// Extra columns are appended to prevent extra columns from being repeatedly generated. + + NamesAndTypes columns_for_other_join_filter; + std::unordered_set column_set_for_origin_columns; + + auto append_origin_columns = [&columns_for_other_join_filter, &column_set_for_origin_columns](const Block & header, bool make_nullable) { + for (const auto & p : header) + { + columns_for_other_join_filter.emplace_back(p.name, make_nullable ? makeNullable(p.type) : p.type); + column_set_for_origin_columns.emplace(p.name); + } + }; + append_origin_columns(left_input_header, join.join_type() == tipb::JoinType::TypeRightOuterJoin); + append_origin_columns(right_input_header, join.join_type() == tipb::JoinType::TypeLeftOuterJoin); + + /// append the columns generated by probe side prepare join actions. + /// the new columns are + /// - filter_column and related temporary columns + /// - join keys and related temporary columns + auto append_new_columns = [&columns_for_other_join_filter, &column_set_for_origin_columns](const Block & header, bool make_nullable) { + for (const auto & p : header) + { + if (column_set_for_origin_columns.find(p.name) == column_set_for_origin_columns.end()) + columns_for_other_join_filter.emplace_back(p.name, make_nullable ? makeNullable(p.type) : p.type); + } + }; + bool make_nullable = build_side_index == 1 + ? join.join_type() == tipb::JoinType::TypeRightOuterJoin + : join.join_type() == tipb::JoinType::TypeLeftOuterJoin; + append_new_columns(probe_prepare_join_actions->getSampleBlock(), make_nullable); + + return columns_for_other_join_filter; +} + +/// all the columns from build side streams should be added after join, even for the join key. +NamesAndTypesList TiFlashJoin::genColumnsAddedByJoin( + const Block & build_side_header, + const String & match_helper_name) const +{ + NamesAndTypesList columns_added_by_join; + bool make_nullable = isTiFlashLeftJoin(); + for (auto const & p : build_side_header) + { + columns_added_by_join.emplace_back(p.name, make_nullable ? makeNullable(p.type) : p.type); + } + if (!match_helper_name.empty()) + { + columns_added_by_join.emplace_back(match_helper_name, Join::match_helper_type); + } + return columns_added_by_join; +} + +NamesAndTypes TiFlashJoin::genJoinOutputColumns( + const Block & left_input_header, + const Block & right_input_header, + const String & match_helper_name) const +{ + NamesAndTypes join_output_columns; + auto append_output_columns = [&join_output_columns](const Block & header, bool make_nullable) { + for (auto const & p : header) + { + join_output_columns.emplace_back(p.name, make_nullable ? makeNullable(p.type) : p.type); + } + }; + + append_output_columns(left_input_header, join.join_type() == tipb::JoinType::TypeRightOuterJoin); + if (!isSemiJoin()) + { + /// for semi join, the columns from right table will be ignored + append_output_columns(right_input_header, join.join_type() == tipb::JoinType::TypeLeftOuterJoin); + } + + if (!match_helper_name.empty()) + { + join_output_columns.emplace_back(match_helper_name, Join::match_helper_type); + } + + return join_output_columns; +} + +std::tuple TiFlashJoin::genJoinOtherConditionAction( + const Context & context, + const Block & left_input_header, + const Block & right_input_header, + const ExpressionActionsPtr & probe_side_prepare_join) const +{ + auto columns_for_other_join_filter + = genColumnsForOtherJoinFilter( + left_input_header, + right_input_header, + probe_side_prepare_join); + + return doGenJoinOtherConditionAction(context, join, columns_for_other_join_filter); +} + +std::tuple prepareJoin( + const Context & context, + const Block & input_header, + const google::protobuf::RepeatedPtrField & keys, + const DataTypes & key_types, + bool left, + bool is_right_out_join, + const google::protobuf::RepeatedPtrField & filters) +{ + NamesAndTypes source_columns; + for (auto const & p : input_header) + source_columns.emplace_back(p.name, p.type); + DAGExpressionAnalyzer dag_analyzer(std::move(source_columns), context); + ExpressionActionsChain chain; + Names key_names; + String filter_column_name; + dag_analyzer.appendJoinKeyAndJoinFilters(chain, keys, key_types, key_names, left, is_right_out_join, filters, filter_column_name); + return {chain.getLastActions(), std::move(key_names), std::move(filter_column_name)}; +} + +std::function concurrencyBuildIndexGenerator(size_t join_build_concurrency) +{ + size_t init_value = 0; + return [init_value, join_build_concurrency]() mutable { + return (init_value++) % join_build_concurrency; + }; +} +} // namespace DB::JoinInterpreterHelper \ No newline at end of file diff --git a/dbms/src/Flash/Coprocessor/JoinInterpreterHelper.h b/dbms/src/Flash/Coprocessor/JoinInterpreterHelper.h new file mode 100644 index 00000000000..d84c03d572d --- /dev/null +++ b/dbms/src/Flash/Coprocessor/JoinInterpreterHelper.h @@ -0,0 +1,133 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace DB +{ +class Context; + +namespace JoinInterpreterHelper +{ +struct TiFlashJoin +{ + explicit TiFlashJoin(const tipb::Join & join_); + + const tipb::Join & join; + + ASTTableJoin::Kind kind; + size_t build_side_index = 0; + + DataTypes join_key_types; + TiDB::TiDBCollators join_key_collators; + + ASTTableJoin::Strictness strictness; + + /// (cartesian) (anti) left semi join. + bool isLeftSemiFamily() const { return join.join_type() == tipb::JoinType::TypeLeftOuterSemiJoin || join.join_type() == tipb::JoinType::TypeAntiLeftOuterSemiJoin; } + + bool isSemiJoin() const { return join.join_type() == tipb::JoinType::TypeSemiJoin || join.join_type() == tipb::JoinType::TypeAntiSemiJoin || isLeftSemiFamily(); } + + const google::protobuf::RepeatedPtrField & getBuildJoinKeys() const + { + return build_side_index == 1 ? join.right_join_keys() : join.left_join_keys(); + } + + const google::protobuf::RepeatedPtrField & getProbeJoinKeys() const + { + return build_side_index == 0 ? join.right_join_keys() : join.left_join_keys(); + } + + const google::protobuf::RepeatedPtrField & getBuildConditions() const + { + return build_side_index == 1 ? join.right_conditions() : join.left_conditions(); + } + + const google::protobuf::RepeatedPtrField & getProbeConditions() const + { + return build_side_index == 0 ? join.right_conditions() : join.left_conditions(); + } + + bool isTiFlashLeftJoin() const { return kind == ASTTableJoin::Kind::Left || kind == ASTTableJoin::Kind::Cross_Left; } + + /// Cross_Right join will be converted to Cross_Left join, so no need to check Cross_Right + bool isTiFlashRightJoin() const { return kind == ASTTableJoin::Kind::Right; } + + /// return a name that is unique in header1 and header2 for left semi family join, + /// return "" for everything else. + String genMatchHelperName(const Block & header1, const Block & header2) const; + + /// columns_added_by_join + /// = join_output_columns - probe_side_columns + /// = build_side_columns + match_helper_name + NamesAndTypesList genColumnsAddedByJoin( + const Block & build_side_header, + const String & match_helper_name) const; + + /// The columns output by join will be: + /// {columns of left_input, columns of right_input, match_helper_name} + NamesAndTypes genJoinOutputColumns( + const Block & left_input_header, + const Block & right_input_header, + const String & match_helper_name) const; + + /// @other_condition_expr: generates other_filter_column and other_eq_filter_from_in_column + /// @other_filter_column_name: column name of `and(other_cond1, other_cond2, ...)` + /// @other_eq_filter_from_in_column_name: column name of `and(other_eq_cond1_from_in, other_eq_cond2_from_in, ...)` + /// such as + /// `select * from t where col1 in (select col2 from t2 where t1.col2 = t2.col3)` + /// - other_filter is `t1.col2 = t2.col3` + /// - other_eq_filter_from_in_column is `t1.col1 = t2.col2` + /// + /// new columns from build side prepare join actions cannot be appended. + /// because the input that other filter accepts is + /// {left_input_columns, right_input_columns, new_columns_from_probe_side_prepare, match_helper_name}. + std::tuple genJoinOtherConditionAction( + const Context & context, + const Block & left_input_header, + const Block & right_input_header, + const ExpressionActionsPtr & probe_side_prepare_join) const; + + NamesAndTypes genColumnsForOtherJoinFilter( + const Block & left_input_header, + const Block & right_input_header, + const ExpressionActionsPtr & probe_prepare_join_actions) const; +}; + +/// @join_prepare_expr_actions: generates join key columns and join filter column +/// @key_names: column names of keys. +/// @filter_column_name: column name of `and(filters)` +std::tuple prepareJoin( + const Context & context, + const Block & input_header, + const google::protobuf::RepeatedPtrField & keys, + const DataTypes & key_types, + bool left, + bool is_right_out_join, + const google::protobuf::RepeatedPtrField & filters); + +std::function concurrencyBuildIndexGenerator(size_t join_build_concurrency); +} // namespace JoinInterpreterHelper +} // namespace DB diff --git a/dbms/src/Flash/CoprocessorHandler.cpp b/dbms/src/Flash/CoprocessorHandler.cpp index e432dd37083..3d653025b83 100644 --- a/dbms/src/Flash/CoprocessorHandler.cpp +++ b/dbms/src/Flash/CoprocessorHandler.cpp @@ -22,8 +22,8 @@ #include #include #include -#include #include +#include #include diff --git a/dbms/src/Flash/Mpp/ExchangeReceiver.cpp b/dbms/src/Flash/Mpp/ExchangeReceiver.cpp index 9639771c586..f194afee31f 100644 --- a/dbms/src/Flash/Mpp/ExchangeReceiver.cpp +++ b/dbms/src/Flash/Mpp/ExchangeReceiver.cpp @@ -92,10 +92,14 @@ class AsyncRequestHandler : public UnaryCallback switch (stage) { case AsyncRequestStage::WAIT_MAKE_READER: + { + // Use lock to ensure reader is created already in reactor thread + std::unique_lock lock(mu); if (!ok) reader.reset(); notifyReactor(); break; + } case AsyncRequestStage::WAIT_BATCH_READ: if (ok) ++read_packet_index; @@ -227,6 +231,8 @@ class AsyncRequestHandler : public UnaryCallback void start() { stage = AsyncRequestStage::WAIT_MAKE_READER; + // Use lock to ensure async reader is unreachable from grpc thread before this function returns + std::unique_lock lock(mu); rpc_context->makeAsyncReader(*request, reader, thisAsUnaryCallback()); } @@ -283,6 +289,7 @@ class AsyncRequestHandler : public UnaryCallback size_t read_packet_index = 0; Status finish_status = RPCContext::getStatusOK(); LoggerPtr log; + std::mutex mu; }; } // namespace @@ -393,10 +400,10 @@ void ExchangeReceiverBase::reactor(const std::vector & asyn MPMCQueue ready_requests(alive_async_connections * 2); std::vector waiting_for_retry_requests; - std::vector> handlers; + std::vector> handlers; handlers.reserve(alive_async_connections); for (const auto & req : async_requests) - handlers.emplace_back(&ready_requests, &msg_channel, rpc_context, req, exc_log->identifier()); + handlers.emplace_back(std::make_unique(&ready_requests, &msg_channel, rpc_context, req, exc_log->identifier())); while (alive_async_connections > 0) { diff --git a/dbms/src/Flash/tests/exchange_perftest.cpp b/dbms/src/Flash/tests/exchange_perftest.cpp index 45dbac4a7f6..c2e047bec62 100644 --- a/dbms/src/Flash/tests/exchange_perftest.cpp +++ b/dbms/src/Flash/tests/exchange_perftest.cpp @@ -462,7 +462,7 @@ struct ReceiverHelper SizeLimits(0, 0, OverflowMode::THROW), ASTTableJoin::Kind::Inner, ASTTableJoin::Strictness::All, - concurrency, + /*req_id=*/"", TiDB::TiDBCollators{nullptr}, "", "", @@ -471,7 +471,7 @@ struct ReceiverHelper nullptr, 65536); - join_ptr->setSampleBlock(receiver_header); + join_ptr->init(receiver_header, concurrency); for (int i = 0; i < concurrency; ++i) streams[i] = std::make_shared(streams[i], join_ptr, i, /*req_id=*/""); diff --git a/dbms/src/Flash/tests/gtest_interpreter.cpp b/dbms/src/Flash/tests/gtest_interpreter.cpp index 0d07159b8fc..aed9d9e90f9 100644 --- a/dbms/src/Flash/tests/gtest_interpreter.cpp +++ b/dbms/src/Flash/tests/gtest_interpreter.cpp @@ -47,18 +47,18 @@ try .build(context); { String expected = R"( -Union - SharedQuery x 10 - Expression - MergeSorting - Union - PartialSorting x 10 - Expression - Filter - SharedQuery - ParallelAggregating - Expression x 10 - Filter +Union: + SharedQuery x 10: + Expression: + MergeSorting, limit = 10 + Union: + PartialSorting x 10: limit = 10 + Expression: + Filter: + SharedQuery: + ParallelAggregating, max_threads: 10, final: true + Expression x 10: + Filter: MockTableScan)"; ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); } @@ -72,18 +72,18 @@ Union { String expected = R"( -Union - SharedQuery x 10 - Limit - Union - Limit x 10 - Expression - Expression - Filter - SharedQuery - ParallelAggregating - Expression x 10 - Filter +Union: + SharedQuery x 10: + Limit, limit = 10 + Union: + Limit x 10, limit = 10 + Expression: + Expression: + Filter: + SharedQuery: + ParallelAggregating, max_threads: 10, final: true + Expression x 10: + Filter: MockTableScan)"; ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); } @@ -100,17 +100,17 @@ try .build(context); { String expected = R"( -Union - Expression x 10 - Expression - Expression - Expression - Expression - Expression - Expression - Expression - Expression - Expression +Union: + Expression x 10: + Expression: + Expression: + Expression: + Expression: + Expression: + Expression: + Expression: + Expression: + Expression: MockTableScan)"; ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); } @@ -122,18 +122,18 @@ Union .build(context); { String expected = R"( -Union - Expression x 10 - Expression - Expression - SharedQuery - Expression - MergeSorting - Union - PartialSorting x 10 - Expression - Expression - Expression +Union: + Expression x 10: + Expression: + Expression: + SharedQuery: + Expression: + MergeSorting, limit = 10 + Union: + PartialSorting x 10: limit = 10 + Expression: + Expression: + Expression: MockTableScan)"; ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); } @@ -147,24 +147,24 @@ Union .build(context); { String expected = R"( -Union - Expression x 10 - Expression - Expression - Expression - SharedQuery - ParallelAggregating - Expression x 10 - Expression - Expression - SharedQuery - Expression - MergeSorting - Union - PartialSorting x 10 - Expression - Expression - Expression +Union: + Expression x 10: + Expression: + Expression: + Expression: + SharedQuery: + ParallelAggregating, max_threads: 10, final: true + Expression x 10: + Expression: + Expression: + SharedQuery: + Expression: + MergeSorting, limit = 10 + Union: + PartialSorting x 10: limit = 10 + Expression: + Expression: + Expression: MockTableScan)"; ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); } @@ -181,33 +181,33 @@ Union .build(context); { String expected = R"( -Union - SharedQuery x 10 - Limit - Union - Limit x 10 - Expression - Expression - Expression - Expression - Expression - Filter - Expression - Expression - Expression - SharedQuery - ParallelAggregating - Expression x 10 - Expression - Expression - SharedQuery - Expression - MergeSorting - Union - PartialSorting x 10 - Expression - Expression - Expression +Union: + SharedQuery x 10: + Limit, limit = 10 + Union: + Limit x 10, limit = 10 + Expression: + Expression: + Expression: + Expression: + Expression: + Filter: + Expression: + Expression: + Expression: + SharedQuery: + ParallelAggregating, max_threads: 10, final: true + Expression x 10: + Expression: + Expression: + SharedQuery: + Expression: + MergeSorting, limit = 10 + Union: + PartialSorting x 10: limit = 10 + Expression: + Expression: + Expression: MockTableScan)"; ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); } @@ -231,24 +231,24 @@ Union { String expected = R"( CreatingSets - Union - HashJoinBuildBlockInputStream x 10 - Expression - Expression + Union: + HashJoinBuildBlockInputStream x 10: , join_kind = Left + Expression: + Expression: MockTableScan - Union x 2 - HashJoinBuildBlockInputStream x 10 - Expression - Expression - Expression - HashJoinProbe - Expression + Union x 2: + HashJoinBuildBlockInputStream x 10: , join_kind = Left + Expression: + Expression: + Expression: + HashJoinProbe: + Expression: MockTableScan - Union - Expression x 10 - Expression - HashJoinProbe - Expression + Union: + Expression x 10: + Expression: + HashJoinProbe: + Expression: MockTableScan)"; ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); } @@ -260,17 +260,17 @@ CreatingSets .build(context); { String expected = R"( -Union - Expression x 10 - Expression - Expression - Expression - Expression - Expression - Expression - Expression - Expression - Expression +Union: + Expression x 10: + Expression: + Expression: + Expression: + Expression: + Expression: + Expression: + Expression: + Expression: + Expression: MockExchangeReceiver)"; ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); } @@ -283,18 +283,18 @@ Union .build(context); { String expected = R"( -Union +Union: MockExchangeSender x 10 - Expression - Expression - Expression - Expression - Expression - Expression - Expression - Expression - Expression - Expression + Expression: + Expression: + Expression: + Expression: + Expression: + Expression: + Expression: + Expression: + Expression: + Expression: MockExchangeReceiver)"; ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); } @@ -318,24 +318,24 @@ Union { String expected = R"( CreatingSets - Union - HashJoinBuildBlockInputStream x 10 - Expression - Expression + Union: + HashJoinBuildBlockInputStream x 10: , join_kind = Left + Expression: + Expression: MockExchangeReceiver - Union x 2 - HashJoinBuildBlockInputStream x 10 - Expression - Expression - Expression - HashJoinProbe - Expression + Union x 2: + HashJoinBuildBlockInputStream x 10: , join_kind = Left + Expression: + Expression: + Expression: + HashJoinProbe: + Expression: MockExchangeReceiver - Union - Expression x 10 - Expression - HashJoinProbe - Expression + Union: + Expression x 10: + Expression: + HashJoinProbe: + Expression: MockExchangeReceiver)"; ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); } @@ -360,25 +360,25 @@ CreatingSets { String expected = R"( CreatingSets - Union - HashJoinBuildBlockInputStream x 10 - Expression - Expression + Union: + HashJoinBuildBlockInputStream x 10: , join_kind = Left + Expression: + Expression: MockExchangeReceiver - Union x 2 - HashJoinBuildBlockInputStream x 10 - Expression - Expression - Expression - HashJoinProbe - Expression + Union x 2: + HashJoinBuildBlockInputStream x 10: , join_kind = Left + Expression: + Expression: + Expression: + HashJoinProbe: + Expression: MockExchangeReceiver - Union + Union: MockExchangeSender x 10 - Expression - Expression - HashJoinProbe - Expression + Expression: + Expression: + HashJoinProbe: + Expression: MockExchangeReceiver)"; ASSERT_BLOCKINPUTSTREAM_EQAUL(expected, request, 10); } diff --git a/dbms/src/Functions/FunctionsDateTime.cpp b/dbms/src/Functions/FunctionsDateTime.cpp index dd072a00f76..607f6bc4c99 100644 --- a/dbms/src/Functions/FunctionsDateTime.cpp +++ b/dbms/src/Functions/FunctionsDateTime.cpp @@ -254,7 +254,6 @@ void registerFunctionsDateTime(FunctionFactory & factory) factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); - factory.registerFunction(); factory.registerFunction(); } diff --git a/dbms/src/Functions/FunctionsDateTime.h b/dbms/src/Functions/FunctionsDateTime.h index df579a1bab8..08f5a5887d4 100644 --- a/dbms/src/Functions/FunctionsDateTime.h +++ b/dbms/src/Functions/FunctionsDateTime.h @@ -3450,7 +3450,6 @@ using FunctionToTiDBDayOfYear = FunctionMyDateOrMyDateTimeToSomething; using FunctionToTiDBToSeconds = FunctionMyDateOrMyDateTimeToSomething; using FunctionToTiDBToDays = FunctionMyDateOrMyDateTimeToSomething; - using FunctionToRelativeYearNum = FunctionDateOrDateTimeToSomething; using FunctionToRelativeQuarterNum = FunctionDateOrDateTimeToSomething; using FunctionToRelativeMonthNum = FunctionDateOrDateTimeToSomething; diff --git a/dbms/src/Interpreters/AsynchronousMetrics.cpp b/dbms/src/Interpreters/AsynchronousMetrics.cpp index e96d57e0370..8095fbb0e59 100644 --- a/dbms/src/Interpreters/AsynchronousMetrics.cpp +++ b/dbms/src/Interpreters/AsynchronousMetrics.cpp @@ -21,8 +21,12 @@ #include #include #include +#include #include +#include #include +#include +#include #include #include @@ -125,6 +129,26 @@ static void calculateMaxAndSum(Max & max, Sum & sum, T x) max = x; } +FileUsageStatistics AsynchronousMetrics::getPageStorageFileUsage() +{ + // Get from RegionPersister + auto & tmt = context.getTMTContext(); + auto & kvstore = tmt.getKVStore(); + FileUsageStatistics usage = kvstore->getFileUsageStatistics(); + + // Get the blob file status from all PS V3 instances + if (auto global_storage_pool = context.getGlobalStoragePool(); global_storage_pool != nullptr) + { + const auto log_usage = global_storage_pool->log_storage->getFileUsageStatistics(); + const auto meta_usage = global_storage_pool->meta_storage->getFileUsageStatistics(); + const auto data_usage = global_storage_pool->data_storage->getFileUsageStatistics(); + + usage.total_file_num += log_usage.total_file_num + meta_usage.total_file_num + data_usage.total_file_num; + usage.total_disk_size += log_usage.total_disk_size + meta_usage.total_disk_size + data_usage.total_disk_size; + usage.total_valid_size += log_usage.total_valid_size + meta_usage.total_valid_size + data_usage.total_valid_size; + } + return usage; +} void AsynchronousMetrics::update() { @@ -147,6 +171,7 @@ void AsynchronousMetrics::update() set("Uptime", context.getUptimeSeconds()); { + // Get the snapshot status from all delta tree tables auto databases = context.getDatabases(); double max_dt_stable_oldest_snapshot_lifetime = 0.0; @@ -177,6 +202,13 @@ void AsynchronousMetrics::update() set("MaxDTBackgroundTasksLength", max_dt_background_tasks_length); } + { + const FileUsageStatistics usage = getPageStorageFileUsage(); + set("BlobFileNums", usage.total_file_num); + set("BlobDiskBytes", usage.total_disk_size); + set("BlobValidBytes", usage.total_valid_size); + } + #if USE_TCMALLOC { /// tcmalloc related metrics. Remove if you switch to different allocator. diff --git a/dbms/src/Interpreters/AsynchronousMetrics.h b/dbms/src/Interpreters/AsynchronousMetrics.h index 5de328601a6..536e6a6b6f6 100644 --- a/dbms/src/Interpreters/AsynchronousMetrics.h +++ b/dbms/src/Interpreters/AsynchronousMetrics.h @@ -14,6 +14,8 @@ #pragma once +#include + #include #include #include @@ -47,6 +49,9 @@ class AsynchronousMetrics /// Returns copy of all values. Container getValues() const; +private: + FileUsageStatistics getPageStorageFileUsage(); + private: Context & context; diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index 26e950d7798..a0adef5b50d 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -59,9 +59,9 @@ #include #include #include -#include #include #include +#include #include #include #include @@ -162,7 +162,7 @@ struct ContextShared PathCapacityMetricsPtr path_capacity_ptr; /// Path capacity metrics FileProviderPtr file_provider; /// File provider. IORateLimiter io_rate_limiter; - PageStorageRunMode storage_run_mode; + PageStorageRunMode storage_run_mode = PageStorageRunMode::ONLY_V3; DM::GlobalStoragePoolPtr global_storage_pool; /// Named sessions. The user could specify session identifier to reuse settings and temporary tables in subsequent requests. @@ -207,6 +207,7 @@ struct ContextShared explicit ContextShared(std::shared_ptr runtime_components_factory_) : runtime_components_factory(std::move(runtime_components_factory_)) + , storage_run_mode(PageStorageRunMode::ONLY_V3) { /// TODO: make it singleton (?) static std::atomic num_calls{0}; @@ -714,7 +715,7 @@ Dependencies Context::getDependencies(const String & database_name, const String checkDatabaseAccessRightsImpl(db); } - ViewDependencies::const_iterator iter = shared->view_dependencies.find(DatabaseAndTableName(db, table_name)); + auto iter = shared->view_dependencies.find(DatabaseAndTableName(db, table_name)); if (iter == shared->view_dependencies.end()) return {}; @@ -728,7 +729,7 @@ bool Context::isTableExist(const String & database_name, const String & table_na String db = resolveDatabase(database_name, current_database); checkDatabaseAccessRightsImpl(db); - Databases::const_iterator it = shared->databases.find(db); + auto it = shared->databases.find(db); return shared->databases.end() != it && it->second->isTableExist(*this, table_name); } @@ -754,7 +755,7 @@ void Context::assertTableExists(const String & database_name, const String & tab String db = resolveDatabase(database_name, current_database); checkDatabaseAccessRightsImpl(db); - Databases::const_iterator it = shared->databases.find(db); + auto it = shared->databases.find(db); if (shared->databases.end() == it) throw Exception(fmt::format("Database {} doesn't exist", backQuoteIfNeed(db)), ErrorCodes::UNKNOWN_DATABASE); @@ -771,7 +772,7 @@ void Context::assertTableDoesntExist(const String & database_name, const String if (check_database_access_rights) checkDatabaseAccessRightsImpl(db); - Databases::const_iterator it = shared->databases.find(db); + auto it = shared->databases.find(db); if (shared->databases.end() != it && it->second->isTableExist(*this, table_name)) throw Exception(fmt::format("Table {}.{} already exists.", backQuoteIfNeed(db), backQuoteIfNeed(table_name)), ErrorCodes::TABLE_ALREADY_EXISTS); } @@ -826,7 +827,7 @@ Tables Context::getExternalTables() const StoragePtr Context::tryGetExternalTable(const String & table_name) const { - TableAndCreateASTs::const_iterator jt = external_tables.find(table_name); + auto jt = external_tables.find(table_name); if (external_tables.end() == jt) return StoragePtr(); @@ -864,7 +865,7 @@ StoragePtr Context::getTableImpl(const String & database_name, const String & ta String db = resolveDatabase(database_name, current_database); checkDatabaseAccessRightsImpl(db); - Databases::const_iterator it = shared->databases.find(db); + auto it = shared->databases.find(db); if (shared->databases.end() == it) { if (exception) @@ -894,7 +895,7 @@ void Context::addExternalTable(const String & table_name, const StoragePtr & sto StoragePtr Context::tryRemoveExternalTable(const String & table_name) { - TableAndCreateASTs::const_iterator it = external_tables.find(table_name); + auto it = external_tables.find(table_name); if (external_tables.end() == it) return StoragePtr(); @@ -954,7 +955,7 @@ std::unique_ptr Context::getDDLGuardIfTableDoesntExist(const String & { auto lock = getLock(); - Databases::const_iterator it = shared->databases.find(database); + auto it = shared->databases.find(database); if (shared->databases.end() != it && it->second->isTableExist(*this, table)) return {}; @@ -993,7 +994,7 @@ ASTPtr Context::getCreateTableQuery(const String & database_name, const String & ASTPtr Context::getCreateExternalTableQuery(const String & table_name) const { - TableAndCreateASTs::const_iterator jt = external_tables.find(table_name); + auto jt = external_tables.find(table_name); if (external_tables.end() == jt) throw Exception(fmt::format("Temporary table {} doesn't exist", backQuoteIfNeed(table_name)), ErrorCodes::UNKNOWN_TABLE); @@ -1088,7 +1089,7 @@ void Context::setCurrentQueryId(const String & query_id) UInt64 a; UInt64 b; }; - } random; + } random{}; { auto lock = getLock(); @@ -1650,9 +1651,8 @@ bool Context::initializeGlobalStoragePoolIfNeed(const PathPool & path_pool) auto lock = getLock(); if (shared->global_storage_pool) { - // Can't init GlobalStoragePool twice. - // otherwise the pagestorage instances in `StoragePool` for each table won't be updated and cause unexpected problem. - throw Exception("GlobalStoragePool has already been initialized.", ErrorCodes::LOGICAL_ERROR); + // GlobalStoragePool may be initialized many times in some test cases for restore. + LOG_WARNING(shared->log, "GlobalStoragePool has already been initialized."); } CurrentMetrics::set(CurrentMetrics::GlobalStorageRunMode, static_cast(shared->storage_run_mode)); if (shared->storage_run_mode == PageStorageRunMode::MIX_MODE || shared->storage_run_mode == PageStorageRunMode::ONLY_V3) diff --git a/dbms/src/Interpreters/ExpressionActions.cpp b/dbms/src/Interpreters/ExpressionActions.cpp index 45d5293d584..8e75a64427c 100644 --- a/dbms/src/Interpreters/ExpressionActions.cpp +++ b/dbms/src/Interpreters/ExpressionActions.cpp @@ -225,7 +225,7 @@ void ExpressionAction::prepare(Block & sample_block) for (const auto & name : array_joined_columns) { ColumnWithTypeAndName & current = sample_block.getByName(name); - const DataTypeArray * array_type = typeid_cast(&*current.type); + const auto * array_type = typeid_cast(&*current.type); if (!array_type) throw Exception("ARRAY JOIN requires array argument", ErrorCodes::TYPE_MISMATCH); current.type = array_type->getNestedType(); @@ -354,7 +354,7 @@ void ExpressionAction::execute(Block & block) const if (ColumnPtr converted = any_array_ptr->convertToFullColumnIfConst()) any_array_ptr = converted; - const ColumnArray * any_array = typeid_cast(&*any_array_ptr); + const auto * any_array = typeid_cast(&*any_array_ptr); if (!any_array) throw Exception("ARRAY JOIN of not array: " + *array_joined_columns.begin(), ErrorCodes::TYPE_MISMATCH); @@ -461,8 +461,7 @@ void ExpressionAction::executeOnTotals(Block & block) const join->joinTotals(block); } - -std::string ExpressionAction::toString() const +String ExpressionAction::toString() const { std::stringstream ss; switch (type) @@ -496,7 +495,7 @@ std::string ExpressionAction::toString() const case ARRAY_JOIN: ss << (array_join_is_left ? "LEFT " : "") << "ARRAY JOIN "; - for (NameSet::const_iterator it = array_joined_columns.begin(); it != array_joined_columns.end(); ++it) + for (auto it = array_joined_columns.begin(); it != array_joined_columns.end(); ++it) { if (it != array_joined_columns.begin()) ss << ", "; @@ -506,7 +505,7 @@ std::string ExpressionAction::toString() const case JOIN: ss << "JOIN "; - for (NamesAndTypesList::const_iterator it = columns_added_by_join.begin(); it != columns_added_by_join.end(); ++it) + for (auto it = columns_added_by_join.begin(); it != columns_added_by_join.end(); ++it) { if (it != columns_added_by_join.begin()) ss << ", "; @@ -529,7 +528,6 @@ std::string ExpressionAction::toString() const default: throw Exception("Unexpected Action type", ErrorCodes::LOGICAL_ERROR); } - return ss.str(); } @@ -842,9 +840,9 @@ void ExpressionActions::finalize(const Names & output_columns) if (final_columns.empty() && !input_columns.empty()) final_columns.insert(getSmallestColumn(input_columns)); - for (NamesAndTypesList::iterator it = input_columns.begin(); it != input_columns.end();) + for (auto it = input_columns.begin(); it != input_columns.end();) { - NamesAndTypesList::iterator it0 = it; + auto it0 = it; ++it; if (!needed_columns.count(it0->name)) { @@ -931,8 +929,8 @@ std::string ExpressionActions::dumpActions() const ss << "\noutput:\n"; NamesAndTypesList output_columns = sample_block.getNamesAndTypesList(); - for (NamesAndTypesList::const_iterator it = output_columns.begin(); it != output_columns.end(); ++it) - ss << it->name << " " << it->type->getName() << "\n"; + for (const auto & output_column : output_columns) + ss << output_column.name << " " << output_column.type->getName() << "\n"; return ss.str(); } diff --git a/dbms/src/Interpreters/ExpressionActions.h b/dbms/src/Interpreters/ExpressionActions.h index 7ec54a1a8ae..e8fb48f4e3f 100644 --- a/dbms/src/Interpreters/ExpressionActions.h +++ b/dbms/src/Interpreters/ExpressionActions.h @@ -251,7 +251,7 @@ struct ExpressionActionsChain ExpressionActionsPtr actions; Names required_output; - Step(const ExpressionActionsPtr & actions_ = nullptr, const Names & required_output_ = Names()) + explicit Step(const ExpressionActionsPtr & actions_ = nullptr, const Names & required_output_ = Names()) : actions(actions_) , required_output(required_output_) {} diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index eb07e2d541e..a532ed8a8e0 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -247,16 +247,16 @@ void ExpressionAnalyzer::translateQualifiedNames() if (!select_query || !select_query->tables || select_query->tables->children.empty()) return; - ASTTablesInSelectQueryElement & element = static_cast(*select_query->tables->children[0]); + auto & element = static_cast(*select_query->tables->children[0]); if (!element.table_expression) /// This is ARRAY JOIN without a table at the left side. return; - ASTTableExpression & table_expression = static_cast(*element.table_expression); + auto & table_expression = static_cast(*element.table_expression); if (table_expression.database_and_table_name) { - const ASTIdentifier & identifier = static_cast(*table_expression.database_and_table_name); + const auto & identifier = static_cast(*table_expression.database_and_table_name); alias = identifier.tryGetAlias(); @@ -291,7 +291,7 @@ void ExpressionAnalyzer::translateQualifiedNames() void ExpressionAnalyzer::translateQualifiedNamesImpl(ASTPtr & ast, const String & database_name, const String & table_name, const String & alias) { - if (ASTIdentifier * ident = typeid_cast(ast.get())) + if (auto * ident = typeid_cast(ast.get())) { if (ident->kind == ASTIdentifier::Column) { @@ -352,7 +352,7 @@ void ExpressionAnalyzer::translateQualifiedNamesImpl(ASTPtr & ast, const String if (ast->children.size() != 1) throw Exception("Logical error: qualified asterisk must have exactly one child", ErrorCodes::LOGICAL_ERROR); - ASTIdentifier * ident = typeid_cast(ast->children[0].get()); + auto * ident = typeid_cast(ast->children[0].get()); if (!ident) throw Exception("Logical error: qualified asterisk must have identifier as its child", ErrorCodes::LOGICAL_ERROR); @@ -396,7 +396,7 @@ void ExpressionAnalyzer::optimizeIfWithConstantCondition() bool ExpressionAnalyzer::tryExtractConstValueFromCondition(const ASTPtr & condition, bool & value) const { /// numeric constant in condition - if (const ASTLiteral * literal = typeid_cast(condition.get())) + if (const auto * literal = typeid_cast(condition.get())) { if (literal->value.getType() == Field::Types::Int64 || literal->value.getType() == Field::Types::UInt64) { @@ -406,14 +406,14 @@ bool ExpressionAnalyzer::tryExtractConstValueFromCondition(const ASTPtr & condit } /// cast of numeric constant in condition to UInt8 - if (const ASTFunction * function = typeid_cast(condition.get())) + if (const auto * function = typeid_cast(condition.get())) { if (function->name == "CAST") { - if (ASTExpressionList * expr_list = typeid_cast(function->arguments.get())) + if (auto * expr_list = typeid_cast(function->arguments.get())) { const ASTPtr & type_ast = expr_list->children.at(1); - if (const ASTLiteral * type_literal = typeid_cast(type_ast.get())) + if (const auto * type_literal = typeid_cast(type_ast.get())) { if (type_literal->value.getType() == Field::Types::String && type_literal->value.get() == "UInt8") return tryExtractConstValueFromCondition(expr_list->children.at(0), value); @@ -432,7 +432,7 @@ void ExpressionAnalyzer::optimizeIfWithConstantConditionImpl(ASTPtr & current_as for (ASTPtr & child : current_ast->children) { - ASTFunction * function_node = typeid_cast(child.get()); + auto * function_node = typeid_cast(child.get()); if (!function_node || function_node->name != "if") { optimizeIfWithConstantConditionImpl(child, aliases); @@ -440,7 +440,7 @@ void ExpressionAnalyzer::optimizeIfWithConstantConditionImpl(ASTPtr & current_as } optimizeIfWithConstantConditionImpl(function_node->arguments, aliases); - ASTExpressionList * args = typeid_cast(function_node->arguments.get()); + auto * args = typeid_cast(function_node->arguments.get()); ASTPtr condition_expr = args->children.at(0); ASTPtr then_expr = args->children.at(1); @@ -603,13 +603,13 @@ void ExpressionAnalyzer::initGlobalSubqueries(ASTPtr & ast) /// Bottom-up actions. - if (ASTFunction * node = typeid_cast(ast.get())) + if (auto * node = typeid_cast(ast.get())) { /// For GLOBAL IN. if (do_global && (node->name == "globalIn" || node->name == "globalNotIn")) addExternalStorage(node->arguments->children.at(1)); } - else if (ASTTablesInSelectQueryElement * node = typeid_cast(ast.get())) + else if (auto * node = typeid_cast(ast.get())) { /// For GLOBAL JOIN. if (do_global && node->table_join @@ -628,7 +628,7 @@ void ExpressionAnalyzer::findExternalTables(ASTPtr & ast) /// If table type identifier StoragePtr external_storage; - if (ASTIdentifier * node = typeid_cast(ast.get())) + if (auto * node = typeid_cast(ast.get())) if (node->kind == ASTIdentifier::Table) if ((external_storage = context.tryGetExternalTable(node->name))) external_tables[node->name] = external_storage; @@ -658,8 +658,8 @@ static std::shared_ptr interpretSubquery( const Names & required_source_columns) { /// Subquery or table name. The name of the table is similar to the subquery `SELECT * FROM t`. - const ASTSubquery * subquery = typeid_cast(subquery_or_table_name.get()); - const ASTIdentifier * table = typeid_cast(subquery_or_table_name.get()); + const auto * subquery = typeid_cast(subquery_or_table_name.get()); + const auto * table = typeid_cast(subquery_or_table_name.get()); if (!subquery && !table) throw Exception("IN/JOIN supports only SELECT subqueries.", ErrorCodes::BAD_ARGUMENTS); @@ -721,9 +721,9 @@ static std::shared_ptr interpretSubquery( std::set all_column_names; std::set assigned_column_names; - if (ASTSelectWithUnionQuery * select_with_union = typeid_cast(query.get())) + if (auto * select_with_union = typeid_cast(query.get())) { - if (ASTSelectQuery * select = typeid_cast(select_with_union->list_of_selects->children.at(0).get())) + if (auto * select = typeid_cast(select_with_union->list_of_selects->children.at(0).get())) { for (auto & expr : select->select_expression_list->children) all_column_names.insert(expr->getAliasOrColumnName()); @@ -973,7 +973,7 @@ void ExpressionAnalyzer::normalizeTreeImpl( { /// `IN t` can be specified, where t is a table, which is equivalent to `IN (SELECT * FROM t)`. if (functionIsInOrGlobalInOperator(func_node->name)) - if (ASTIdentifier * right = typeid_cast(func_node->arguments->children.at(1).get())) + if (auto * right = typeid_cast(func_node->arguments->children.at(1).get())) if (!aliases.count(right->name)) right->kind = ASTIdentifier::Table; @@ -1030,7 +1030,7 @@ void ExpressionAnalyzer::normalizeTreeImpl( } } } - else if (ASTExpressionList * node = typeid_cast(ast.get())) + else if (auto * node = typeid_cast(ast.get())) { // Get hidden column names of mutable storage OrderedNameSet filtered_names; @@ -1068,14 +1068,14 @@ void ExpressionAnalyzer::normalizeTreeImpl( } } } - else if (ASTTablesInSelectQueryElement * node = typeid_cast(ast.get())) + else if (auto * node = typeid_cast(ast.get())) { if (node->table_expression) { auto & database_and_table_name = static_cast(*node->table_expression).database_and_table_name; if (database_and_table_name) { - if (ASTIdentifier * right = typeid_cast(database_and_table_name.get())) + if (auto * right = typeid_cast(database_and_table_name.get())) { right->kind = ASTIdentifier::Table; } @@ -1127,7 +1127,7 @@ void ExpressionAnalyzer::normalizeTreeImpl( } /// If the WHERE clause or HAVING consists of a single alias, the reference must be replaced not only in children, but also in where_expression and having_expression. - if (ASTSelectQuery * select = typeid_cast(ast.get())) + if (auto * select = typeid_cast(ast.get())) { if (select->prewhere_expression) normalizeTreeImpl(select->prewhere_expression, finished_asts, current_asts, current_alias, level + 1); @@ -1211,7 +1211,7 @@ void ExpressionAnalyzer::executeScalarSubqueriesImpl(ASTPtr & ast) * The request is sent to remote servers with already substituted constants. */ - if (ASTSubquery * subquery = typeid_cast(ast.get())) + if (auto * subquery = typeid_cast(ast.get())) { Context subquery_context = context; Settings subquery_settings = context.getSettings(); @@ -1283,7 +1283,7 @@ void ExpressionAnalyzer::executeScalarSubqueriesImpl(ASTPtr & ast) /** Don't descend into subqueries in arguments of IN operator. * But if an argument is not subquery, than deeper may be scalar subqueries and we need to descend in them. */ - ASTFunction * func = typeid_cast(ast.get()); + auto * func = typeid_cast(ast.get()); if (func && functionIsInOrGlobalInOperator(func->name)) { @@ -1424,7 +1424,7 @@ void ExpressionAnalyzer::optimizeOrderBy() for (const auto & elem : elems) { String name = elem->children.front()->getColumnName(); - const ASTOrderByElement & order_by_elem = typeid_cast(*elem); + const auto & order_by_elem = typeid_cast(*elem); if (elems_set.emplace(name, order_by_elem.collation ? order_by_elem.collation->getColumnName() : "").second) unique_elems.emplace_back(elem); @@ -1496,14 +1496,14 @@ void ExpressionAnalyzer::makeSetsForIndexImpl(const ASTPtr & node, const Block & continue; /// Don't dive into lambda functions - const ASTFunction * func = typeid_cast(child.get()); + const auto * func = typeid_cast(child.get()); if (func && func->name == "lambda") continue; makeSetsForIndexImpl(child, sample_block); } - const ASTFunction * func = typeid_cast(node.get()); + const auto * func = typeid_cast(node.get()); if (func && functionIsInOperator(func->name)) { const IAST & args = *func->arguments; @@ -1551,7 +1551,7 @@ void ExpressionAnalyzer::makeSet(const ASTFunction * node, const Block & sample_ return; /// If the subquery or table name for SELECT. - const ASTIdentifier * identifier = typeid_cast(arg.get()); + const auto * identifier = typeid_cast(arg.get()); if (typeid_cast(arg.get()) || identifier) { /// We get the stream of blocks for the subquery. Create Set and put it in place of the subquery. @@ -1566,7 +1566,7 @@ void ExpressionAnalyzer::makeSet(const ASTFunction * node, const Block & sample_ if (table) { - StorageSet * storage_set = dynamic_cast(table.get()); + auto * storage_set = dynamic_cast(table.get()); if (storage_set) { @@ -1650,7 +1650,7 @@ void ExpressionAnalyzer::makeExplicitSet(const ASTFunction * node, const Block & DataTypes set_element_types; const ASTPtr & left_arg = args.children.at(0); - const ASTFunction * left_arg_tuple = typeid_cast(left_arg.get()); + const auto * left_arg_tuple = typeid_cast(left_arg.get()); /** NOTE If tuple in left hand side specified non-explicitly * Example: identity((a, b)) IN ((1, 2), (3, 4)) @@ -1672,7 +1672,7 @@ void ExpressionAnalyzer::makeExplicitSet(const ASTFunction * node, const Block & bool single_value = false; ASTPtr elements_ast = arg; - if (ASTFunction * set_func = typeid_cast(arg.get())) + if (auto * set_func = typeid_cast(arg.get())) { if (set_func->name == "tuple") { @@ -1684,7 +1684,7 @@ void ExpressionAnalyzer::makeExplicitSet(const ASTFunction * node, const Block & else { /// Distinguish the case `(x, y) in ((1, 2), (3, 4))` from the case `(x, y) in (1, 2)`. - ASTFunction * any_element = typeid_cast(set_func->arguments->children.at(0).get()); + auto * any_element = typeid_cast(set_func->arguments->children.at(0).get()); if (set_element_types.size() >= 2 && (!any_element || any_element->name != "tuple")) single_value = true; else @@ -1902,7 +1902,7 @@ void ExpressionAnalyzer::getArrayJoinedColumnsImpl(const ASTPtr & ast) if (typeid_cast(ast.get())) return; - if (ASTIdentifier * node = typeid_cast(ast.get())) + if (auto * node = typeid_cast(ast.get())) { if (node->kind == ASTIdentifier::Column) { @@ -1955,7 +1955,7 @@ void ExpressionAnalyzer::getActionsImpl(const ASTPtr & ast, bool no_subqueries, && actions_stack.getSampleBlock().has(ast->getColumnName())) return; - if (ASTIdentifier * node = typeid_cast(ast.get())) + if (auto * node = typeid_cast(ast.get())) { std::string name = node->getColumnName(); if (!only_consts && !actions_stack.getSampleBlock().has(name)) @@ -1973,7 +1973,7 @@ void ExpressionAnalyzer::getActionsImpl(const ASTPtr & ast, bool no_subqueries, ErrorCodes::NOT_AN_AGGREGATE); } } - else if (ASTFunction * node = typeid_cast(ast.get())) + else if (auto * node = typeid_cast(ast.get())) { if (node->name == "lambda") throw Exception("Unexpected lambda expression", ErrorCodes::UNEXPECTED_EXPRESSION); @@ -2049,14 +2049,14 @@ void ExpressionAnalyzer::getActionsImpl(const ASTPtr & ast, bool no_subqueries, for (auto & child : node->arguments->children) { - ASTFunction * lambda = typeid_cast(child.get()); + auto * lambda = typeid_cast(child.get()); if (lambda && lambda->name == "lambda") { /// If the argument is a lambda expression, just remember its approximate type. if (lambda->arguments->children.size() != 2) throw Exception("lambda requires two arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - ASTFunction * lambda_args_tuple = typeid_cast(lambda->arguments->children.at(0).get()); + auto * lambda_args_tuple = typeid_cast(lambda->arguments->children.at(0).get()); if (!lambda_args_tuple || lambda_args_tuple->name != "tuple") throw Exception("First argument of lambda must be a tuple", ErrorCodes::TYPE_MISMATCH); @@ -2126,17 +2126,17 @@ void ExpressionAnalyzer::getActionsImpl(const ASTPtr & ast, bool no_subqueries, { ASTPtr child = node->arguments->children[i]; - ASTFunction * lambda = typeid_cast(child.get()); + auto * lambda = typeid_cast(child.get()); if (lambda && lambda->name == "lambda") { - const DataTypeFunction * lambda_type = typeid_cast(argument_types[i].get()); - ASTFunction * lambda_args_tuple = typeid_cast(lambda->arguments->children.at(0).get()); + const auto * lambda_type = typeid_cast(argument_types[i].get()); + auto * lambda_args_tuple = typeid_cast(lambda->arguments->children.at(0).get()); ASTs lambda_arg_asts = lambda_args_tuple->arguments->children; NamesAndTypesList lambda_arguments; for (size_t j = 0; j < lambda_arg_asts.size(); ++j) { - ASTIdentifier * identifier = typeid_cast(lambda_arg_asts[j].get()); + auto * identifier = typeid_cast(lambda_arg_asts[j].get()); if (!identifier) throw Exception("lambda argument declarations must be identifiers", ErrorCodes::TYPE_MISMATCH); @@ -2192,7 +2192,7 @@ void ExpressionAnalyzer::getActionsImpl(const ASTPtr & ast, bool no_subqueries, if (arguments_present) actions_stack.addAction(ExpressionAction::applyFunction(function_builder, argument_names, node->getColumnName())); } - else if (ASTLiteral * node = typeid_cast(ast.get())) + else if (auto * node = typeid_cast(ast.get())) { DataTypePtr type = applyVisitor(FieldToDataType(), node->value); @@ -2232,7 +2232,7 @@ void ExpressionAnalyzer::getAggregates(const ASTPtr & ast, ExpressionActionsPtr return; } - const ASTFunction * node = typeid_cast(ast.get()); + const auto * node = typeid_cast(ast.get()); if (node && AggregateFunctionFactory::instance().isAggregateFunctionName(node->name)) { has_aggregation = true; @@ -2276,7 +2276,7 @@ void ExpressionAnalyzer::getAggregates(const ASTPtr & ast, ExpressionActionsPtr void ExpressionAnalyzer::assertNoAggregates(const ASTPtr & ast, const char * description) { - const ASTFunction * node = typeid_cast(ast.get()); + const auto * node = typeid_cast(ast.get()); if (node && AggregateFunctionFactory::instance().isAggregateFunctionName(node->name)) throw Exception("Aggregate function " + node->getColumnName() @@ -2365,9 +2365,9 @@ bool ExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, bool only_ty initChain(chain, source_columns); ExpressionActionsChain::Step & step = chain.steps.back(); - const ASTTablesInSelectQueryElement & join_element = static_cast(*select_query->join()); - const ASTTableJoin & join_params = static_cast(*join_element.table_join); - const ASTTableExpression & table_to_join = static_cast(*join_element.table_expression); + const auto & join_element = static_cast(*select_query->join()); + const auto & join_params = static_cast(*join_element.table_join); + const auto & table_to_join = static_cast(*join_element.table_expression); if (join_params.using_expression_list) getRootActions(join_params.using_expression_list, only_types, false, step.actions); @@ -2386,7 +2386,7 @@ bool ExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, bool only_ty if (table) { - StorageJoin * storage_join = dynamic_cast(table.get()); + auto * storage_join = dynamic_cast(table.get()); if (storage_join) { @@ -2435,7 +2435,7 @@ bool ExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, bool only_ty /// TODO You do not need to set this up when JOIN is only needed on remote servers. subquery_for_set.join = join; - subquery_for_set.join->setSampleBlock(subquery_for_set.source->getHeader()); + subquery_for_set.join->init(subquery_for_set.source->getHeader()); } addJoinAction(step.actions, false); @@ -2544,7 +2544,7 @@ bool ExpressionAnalyzer::appendOrderBy(ExpressionActionsChain & chain, bool only ASTs asts = select_query->order_expression_list->children; for (const auto & i : asts) { - ASTOrderByElement * ast = typeid_cast(i.get()); + auto * ast = typeid_cast(i.get()); if (!ast || ast->children.empty()) throw Exception("Bad order expression AST", ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE); ASTPtr order_expression = ast->children.at(0); @@ -2598,7 +2598,7 @@ void ExpressionAnalyzer::appendProjectResult(ExpressionActionsChain & chain) con void ExpressionAnalyzer::getActionsBeforeAggregation(const ASTPtr & ast, ExpressionActionsPtr & actions, bool no_subqueries) { - ASTFunction * node = typeid_cast(ast.get()); + auto * node = typeid_cast(ast.get()); if (node && AggregateFunctionFactory::instance().isAggregateFunctionName(node->name)) for (auto & argument : node->arguments->children) @@ -2714,7 +2714,7 @@ void ExpressionAnalyzer::collectUsedColumns() NameSet required_joined_columns; getRequiredSourceColumnsImpl(ast, available_columns, required, ignored, available_joined_columns, required_joined_columns); - for (NamesAndTypesList::iterator it = columns_added_by_join.begin(); it != columns_added_by_join.end();) + for (auto it = columns_added_by_join.begin(); it != columns_added_by_join.end();) { if (required_joined_columns.count(it->name)) ++it; @@ -2737,7 +2737,7 @@ void ExpressionAnalyzer::collectUsedColumns() NameSet unknown_required_source_columns = required; - for (NamesAndTypesList::iterator it = source_columns.begin(); it != source_columns.end();) + for (auto it = source_columns.begin(); it != source_columns.end();) { unknown_required_source_columns.erase(it->name); @@ -2777,8 +2777,8 @@ void ExpressionAnalyzer::collectJoinedColumns(NameSet & joined_columns, NamesAnd if (!node) return; - const ASTTableJoin & table_join = static_cast(*node->table_join); - const ASTTableExpression & table_expression = static_cast(*node->table_expression); + const auto & table_join = static_cast(*node->table_join); + const auto & table_expression = static_cast(*node->table_expression); Block nested_result_sample; if (table_expression.database_and_table_name) @@ -2847,7 +2847,7 @@ void ExpressionAnalyzer::getRequiredSourceColumnsImpl(const ASTPtr & ast, * - we put identifiers available from JOIN in required_joined_columns. */ - if (ASTIdentifier * node = typeid_cast(ast.get())) + if (auto * node = typeid_cast(ast.get())) { if (node->kind == ASTIdentifier::Column && !ignored_names.count(node->name) @@ -2863,14 +2863,14 @@ void ExpressionAnalyzer::getRequiredSourceColumnsImpl(const ASTPtr & ast, return; } - if (ASTFunction * node = typeid_cast(ast.get())) + if (auto * node = typeid_cast(ast.get())) { if (node->name == "lambda") { if (node->arguments->children.size() != 2) throw Exception("lambda requires two arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - ASTFunction * lambda_args_tuple = typeid_cast(node->arguments->children.at(0).get()); + auto * lambda_args_tuple = typeid_cast(node->arguments->children.at(0).get()); if (!lambda_args_tuple || lambda_args_tuple->name != "tuple") throw Exception("First argument of lambda must be a tuple", ErrorCodes::TYPE_MISMATCH); @@ -2879,7 +2879,7 @@ void ExpressionAnalyzer::getRequiredSourceColumnsImpl(const ASTPtr & ast, Names added_ignored; for (auto & child : lambda_args_tuple->arguments->children) { - ASTIdentifier * identifier = typeid_cast(child.get()); + auto * identifier = typeid_cast(child.get()); if (!identifier) throw Exception("lambda argument declarations must be identifiers", ErrorCodes::TYPE_MISMATCH); @@ -2926,7 +2926,7 @@ void ExpressionAnalyzer::getRequiredSourceColumnsImpl(const ASTPtr & ast, static bool hasArrayJoin(const ASTPtr & ast) { - if (const ASTFunction * function = typeid_cast(&*ast)) + if (const auto * function = typeid_cast(&*ast)) if (function->name == "arrayJoin") return true; diff --git a/dbms/src/Interpreters/IDAsPathUpgrader.cpp b/dbms/src/Interpreters/IDAsPathUpgrader.cpp index ce72625fd46..8c807b537e9 100644 --- a/dbms/src/Interpreters/IDAsPathUpgrader.cpp +++ b/dbms/src/Interpreters/IDAsPathUpgrader.cpp @@ -33,10 +33,10 @@ #include #include #include -#include #include #include -#include +#include +#include #include #include @@ -71,7 +71,7 @@ std::shared_ptr getDatabaseEngine(const FileProviderPtr & file_prov ParserCreateQuery parser; ASTPtr ast = parseQuery(parser, query.data(), query.data() + query.size(), "in file " + filename, 0); ASTCreateQuery & ast_create_query = typeid_cast(*ast); - auto storage = ast_create_query.storage; + auto * storage = ast_create_query.storage; if (storage == nullptr || storage->engine == nullptr || storage->engine->name.empty()) { throw Exception("Can not get database engine for file: " + filename, ErrorCodes::LOGICAL_ERROR); @@ -97,7 +97,7 @@ std::pair getTableInfo(const FileProviderPtr & file_pro ParserCreateQuery parser; ASTPtr ast = parseQuery(parser, definition.data(), definition.data() + definition.size(), "in file " + table_metadata_file, 0); ASTCreateQuery & ast_create_query = typeid_cast(*ast); - auto storage = ast_create_query.storage; + auto * storage = ast_create_query.storage; if (storage == nullptr || storage->engine == nullptr || storage->engine->name.empty()) { throw Exception("Can not get table engine for file: " + table_metadata_file, ErrorCodes::LOGICAL_ERROR); @@ -105,7 +105,7 @@ std::pair getTableInfo(const FileProviderPtr & file_pro TiDB::TableInfo info; ASTFunction * engine = storage->engine; - auto * args = typeid_cast(engine->arguments.get()); + const auto * args = typeid_cast(engine->arguments.get()); if (args == nullptr) throw Exception("Can not cast table engine arguments", ErrorCodes::BAD_ARGUMENTS); @@ -399,12 +399,12 @@ String IDAsPathUpgrader::DatabaseDiskInfo::getNewMetaDirectory(const String & ro return root_path + (endsWith(root_path, "/") ? "" : "/") + "/metadata/" + escapeForFileName(newName()) + "/"; } // "data/" -String IDAsPathUpgrader::DatabaseDiskInfo::getNewDataDirectory(const String & root_path) const +String IDAsPathUpgrader::DatabaseDiskInfo::getNewDataDirectory(const String & root_path) { return root_path + "/data/"; } // "extra_data/" -String IDAsPathUpgrader::DatabaseDiskInfo::getNewExtraDirectory(const String & extra_root) const +String IDAsPathUpgrader::DatabaseDiskInfo::getNewExtraDirectory(const String & extra_root) { return extra_root + "/"; } @@ -457,11 +457,11 @@ IDAsPathUpgrader::IDAsPathUpgrader(Context & global_ctx_, bool is_mock_, std::un bool IDAsPathUpgrader::needUpgrade() { - const auto metadataPath = global_context.getPath() + "/metadata"; + const auto metadata_path = global_context.getPath() + "/metadata"; // For old version, we have database directories and its `.sql` file Poco::DirectoryIterator dir_end; - for (Poco::DirectoryIterator it(metadataPath); it != dir_end; ++it) + for (Poco::DirectoryIterator it(metadata_path); it != dir_end; ++it) { if (!it->isDirectory()) continue; @@ -893,7 +893,7 @@ void IDAsPathUpgrader::renameTable( args->children.emplace_back(literal); else if (args->children.size() >= 2) args->children.at(1) = literal; - } while (0); + } while (false); } const String new_tbl_meta_file = table.getNewMetaFilePath(root_path, db_info); diff --git a/dbms/src/Interpreters/IDAsPathUpgrader.h b/dbms/src/Interpreters/IDAsPathUpgrader.h index 8ef57f7f2cc..38dc37536aa 100644 --- a/dbms/src/Interpreters/IDAsPathUpgrader.h +++ b/dbms/src/Interpreters/IDAsPathUpgrader.h @@ -137,9 +137,9 @@ class IDAsPathUpgrader // "metadata/db_${id}/" String getNewMetaDirectory(const String & root_path) const; // "data/" - String getNewDataDirectory(const String & root_path) const; + static String getNewDataDirectory(const String & root_path); // "extra_data/" - String getNewExtraDirectory(const String & extra_root) const; + static String getNewExtraDirectory(const String & extra_root); private: // "metadata/${db_name}.sql" diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index a56fdb849e3..51b55f65bd4 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -55,12 +55,12 @@ #include #include #include -#include #include #include #include #include #include +#include #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-parameter" @@ -496,7 +496,7 @@ void InterpreterSelectQuery::executeImpl(Pipeline & pipeline, const BlockInputSt { if (expressions.has_join) { - const ASTTableJoin & join = static_cast(*query.join()->table_join); + const auto & join = static_cast(*query.join()->table_join); if (join.kind == ASTTableJoin::Kind::Full || join.kind == ASTTableJoin::Kind::Right) pipeline.stream_with_non_joined_data = expressions.before_join->createStreamWithNonJoinedDataIfFullOrRightJoin( pipeline.firstStream()->getHeader(), @@ -816,7 +816,7 @@ QueryProcessingStage::Enum InterpreterSelectQuery::executeFetchColumns(Pipeline for (size_t i = 0; i < arr->size(); i++) { - String str = arr->getElement(i); + auto str = arr->getElement(i); ::metapb::Region region; ::google::protobuf::TextFormat::ParseFromString(str, ®ion); @@ -839,7 +839,7 @@ QueryProcessingStage::Enum InterpreterSelectQuery::executeFetchColumns(Pipeline } /// PARTITION SELECT only supports MergeTree family now. - if (const ASTSelectQuery * select_query = typeid_cast(query_info.query.get())) + if (const auto * select_query = typeid_cast(query_info.query.get())) { if (select_query->partition_expression_list) { @@ -860,7 +860,7 @@ QueryProcessingStage::Enum InterpreterSelectQuery::executeFetchColumns(Pipeline if (auto managed_storage = std::dynamic_pointer_cast(storage); managed_storage && managed_storage->engineType() == TiDB::StorageEngine::DT) { - if (const ASTSelectQuery * select_query = typeid_cast(query_info.query.get())) + if (const auto * select_query = typeid_cast(query_info.query.get())) { // With `no_kvsotre` is true, we do not do learner read if (likely(!select_query->no_kvstore)) @@ -910,7 +910,7 @@ QueryProcessingStage::Enum InterpreterSelectQuery::executeFetchColumns(Pipeline QuotaForIntervals & quota = context.getQuota(); pipeline.transform([&](auto & stream) { - if (IProfilingBlockInputStream * p_stream = dynamic_cast(stream.get())) + if (auto * p_stream = dynamic_cast(stream.get())) { p_stream->setLimits(limits); @@ -1275,7 +1275,7 @@ void InterpreterSelectQuery::executeLimitBy(Pipeline & pipeline) // NOLINT for (const auto & elem : query.limit_by_expression_list->children) columns.emplace_back(elem->getColumnName()); - size_t value = safeGet(typeid_cast(*query.limit_by_value).value); + auto value = safeGet(typeid_cast(*query.limit_by_value).value); pipeline.transform([&](auto & stream) { stream = std::make_shared(stream, value, columns); @@ -1347,7 +1347,7 @@ void InterpreterSelectQuery::executeExtremes(Pipeline & pipeline) return; pipeline.transform([&](auto & stream) { - if (IProfilingBlockInputStream * p_stream = dynamic_cast(stream.get())) + if (auto * p_stream = dynamic_cast(stream.get())) p_stream->enableExtremes(); }); } diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index f1275d8e88e..820618a6e8b 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -26,10 +26,6 @@ #include #include #include -#include - -#include "executeQuery.h" - namespace DB { @@ -42,40 +38,67 @@ extern const int TYPE_MISMATCH; extern const int ILLEGAL_COLUMN; } // namespace ErrorCodes +namespace +{ /// Do I need to use the hash table maps_*_full, in which we remember whether the row was joined. -static bool getFullness(ASTTableJoin::Kind kind) +bool getFullness(ASTTableJoin::Kind kind) { return kind == ASTTableJoin::Kind::Right || kind == ASTTableJoin::Kind::Cross_Right || kind == ASTTableJoin::Kind::Full; } -static bool isLeftJoin(ASTTableJoin::Kind kind) +bool isLeftJoin(ASTTableJoin::Kind kind) { return kind == ASTTableJoin::Kind::Left || kind == ASTTableJoin::Kind::Cross_Left; } -static bool isRightJoin(ASTTableJoin::Kind kind) +bool isRightJoin(ASTTableJoin::Kind kind) { return kind == ASTTableJoin::Kind::Right || kind == ASTTableJoin::Kind::Cross_Right; } -static bool isInnerJoin(ASTTableJoin::Kind kind) +bool isInnerJoin(ASTTableJoin::Kind kind) { return kind == ASTTableJoin::Kind::Inner || kind == ASTTableJoin::Kind::Cross; } -static bool isAntiJoin(ASTTableJoin::Kind kind) +bool isAntiJoin(ASTTableJoin::Kind kind) { return kind == ASTTableJoin::Kind::Anti || kind == ASTTableJoin::Kind::Cross_Anti; } -static bool isCrossJoin(ASTTableJoin::Kind kind) +bool isCrossJoin(ASTTableJoin::Kind kind) { return kind == ASTTableJoin::Kind::Cross || kind == ASTTableJoin::Kind::Cross_Left || kind == ASTTableJoin::Kind::Cross_Right || kind == ASTTableJoin::Kind::Cross_Anti || kind == ASTTableJoin::Kind::Cross_LeftSemi || kind == ASTTableJoin::Kind::Cross_LeftAnti; } /// (cartesian) (anti) left semi join. -static bool isLeftSemiFamily(ASTTableJoin::Kind kind) +bool isLeftSemiFamily(ASTTableJoin::Kind kind) { return kind == ASTTableJoin::Kind::LeftSemi || kind == ASTTableJoin::Kind::LeftAnti || kind == ASTTableJoin::Kind::Cross_LeftSemi || kind == ASTTableJoin::Kind::Cross_LeftAnti; } +void convertColumnToNullable(ColumnWithTypeAndName & column) +{ + column.type = makeNullable(column.type); + if (column.column) + column.column = makeNullable(column.column); +} + +ColumnRawPtrs getKeyColumns(const Names & key_names, const Block & block) +{ + size_t keys_size = key_names.size(); + ColumnRawPtrs key_columns(keys_size); + + for (size_t i = 0; i < keys_size; ++i) + { + key_columns[i] = block.getByName(key_names[i]).column.get(); + + /// We will join only keys, where all components are not NULL. + if (key_columns[i]->isColumnNullable()) + key_columns[i] = &static_cast(*key_columns[i]).getNestedColumn(); + } + + return key_columns; +} +} // namespace + const std::string Join::match_helper_prefix = "__left-semi-join-match-helper"; const DataTypePtr Join::match_helper_type = makeNullable(std::make_shared()); @@ -88,7 +111,6 @@ Join::Join( ASTTableJoin::Kind kind_, ASTTableJoin::Strictness strictness_, const String & req_id, - size_t build_concurrency_, const TiDB::TiDBCollators & collators_, const String & left_filter_column_, const String & right_filter_column_, @@ -103,7 +125,8 @@ Join::Join( , key_names_left(key_names_left_) , key_names_right(key_names_right_) , use_nulls(use_nulls_) - , build_concurrency(std::max(1, build_concurrency_)) + , build_concurrency(0) + , build_set_exceeded(false) , collators(collators_) , left_filter_column(left_filter_column_) , right_filter_column(right_filter_column_) @@ -116,9 +139,6 @@ Join::Join( , log(Logger::get("Join", req_id)) , limits(limits) { - build_set_exceeded.store(false); - for (size_t i = 0; i < build_concurrency; i++) - pools.emplace_back(std::make_shared()); if (other_condition_ptr != nullptr) { /// if there is other_condition, then should keep all the valid rows during probe stage @@ -127,14 +147,9 @@ Join::Join( strictness = ASTTableJoin::Strictness::All; } } - if (getFullness(kind)) - { - for (size_t i = 0; i < build_concurrency; i++) - rows_not_inserted_to_map.push_back(std::make_unique()); - } - if (!left_filter_column.empty() && !isLeftJoin(kind)) + if (unlikely(!left_filter_column.empty() && !isLeftJoin(kind))) throw Exception("Not supported: non left join with left conditions"); - if (!right_filter_column.empty() && !isRightJoin(kind)) + if (unlikely(!right_filter_column.empty() && !isRightJoin(kind))) throw Exception("Not supported: non right join with right conditions"); } @@ -328,7 +343,7 @@ struct KeyGetterForType using Type = typename KeyGetterForTypeImpl::Type; }; -void Join::init(Type type_) +void Join::initMapImpl(Type type_) { type = type_; @@ -338,16 +353,16 @@ void Join::init(Type type_) if (!getFullness(kind)) { if (strictness == ASTTableJoin::Strictness::Any) - initImpl(maps_any, type, build_concurrency); + initImpl(maps_any, type, getBuildConcurrencyInternal()); else - initImpl(maps_all, type, build_concurrency); + initImpl(maps_all, type, getBuildConcurrencyInternal()); } else { if (strictness == ASTTableJoin::Strictness::Any) - initImpl(maps_any_full, type, build_concurrency); + initImpl(maps_any_full, type, getBuildConcurrencyInternal()); else - initImpl(maps_all_full, type, build_concurrency); + initImpl(maps_all_full, type, getBuildConcurrencyInternal()); } } @@ -396,37 +411,24 @@ size_t Join::getTotalByteCount() const return res; } - -static void convertColumnToNullable(ColumnWithTypeAndName & column) +void Join::setBuildConcurrencyAndInitPool(size_t build_concurrency_) { - column.type = makeNullable(column.type); - if (column.column) - column.column = makeNullable(column.column); -} + if (unlikely(build_concurrency > 0)) + throw Exception("Logical error: `setBuildConcurrencyAndInitPool` shouldn't be called more than once", ErrorCodes::LOGICAL_ERROR); + build_concurrency = std::max(1, build_concurrency_); - -void Join::setSampleBlock(const Block & block) -{ - std::unique_lock lock(rwlock); - - if (!empty()) - return; - - size_t keys_size = key_names_right.size(); - ColumnRawPtrs key_columns(keys_size); - - for (size_t i = 0; i < keys_size; ++i) + for (size_t i = 0; i < getBuildConcurrencyInternal(); ++i) + pools.emplace_back(std::make_shared()); + // init for non-joined-streams. + if (getFullness(kind)) { - key_columns[i] = block.getByName(key_names_right[i]).column.get(); - - /// We will join only keys, where all components are not NULL. - if (key_columns[i]->isColumnNullable()) - key_columns[i] = &static_cast(*key_columns[i]).getNestedColumn(); + for (size_t i = 0; i < getNotJoinedStreamConcurrencyInternal(); ++i) + rows_not_inserted_to_map.push_back(std::make_unique()); } +} - /// Choose data structure to use for JOIN. - init(chooseMethod(key_columns, key_sizes)); - +void Join::setSampleBlock(const Block & block) +{ sample_block_with_columns_to_add = materializeBlock(block); /// Move from `sample_block_with_columns_to_add` key columns to `sample_block_with_keys`, keeping the order. @@ -461,6 +463,18 @@ void Join::setSampleBlock(const Block & block) sample_block_with_columns_to_add.insert(ColumnWithTypeAndName(Join::match_helper_type, match_helper_name)); } +void Join::init(const Block & sample_block, size_t build_concurrency_) +{ + std::unique_lock lock(rwlock); + if (unlikely(initialized)) + throw Exception("Logical error: Join has been initialized", ErrorCodes::LOGICAL_ERROR); + initialized = true; + setBuildConcurrencyAndInitPool(build_concurrency_); + /// Choose data structure to use for JOIN. + initMapImpl(chooseMethod(getKeyColumns(key_names_right, sample_block), key_sizes)); + setSampleBlock(sample_block); +} + namespace { @@ -725,7 +739,7 @@ void recordFilteredRows(const Block & block, const String & filter_column, Colum column = column->convertToFullColumnIfConst(); if (column->isColumnNullable()) { - const ColumnNullable & column_nullable = static_cast(*column); + const auto & column_nullable = static_cast(*column); if (!null_map_holder) { null_map_holder = column_nullable.getNullMapColumnPtr(); @@ -761,9 +775,9 @@ void recordFilteredRows(const Block & block, const String & filter_column, Colum bool Join::insertFromBlock(const Block & block) { - if (empty()) - throw Exception("Logical error: Join was not initialized", ErrorCodes::LOGICAL_ERROR); std::unique_lock lock(rwlock); + if (unlikely(!initialized)) + throw Exception("Logical error: Join was not initialized", ErrorCodes::LOGICAL_ERROR); blocks.push_back(block); Block * stored_block = &blocks.back(); return insertFromBlockInternal(stored_block, 0); @@ -772,11 +786,12 @@ bool Join::insertFromBlock(const Block & block) /// the block should be valid. void Join::insertFromBlock(const Block & block, size_t stream_index) { - assert(stream_index < build_concurrency); + std::shared_lock lock(rwlock); + assert(stream_index < getBuildConcurrencyInternal()); + assert(stream_index < getNotJoinedStreamConcurrencyInternal()); - if (empty()) + if (unlikely(!initialized)) throw Exception("Logical error: Join was not initialized", ErrorCodes::LOGICAL_ERROR); - std::shared_lock lock(rwlock); Block * stored_block = nullptr; { std::lock_guard lk(blocks_lock); @@ -872,16 +887,16 @@ bool Join::insertFromBlockInternal(Block * stored_block, size_t stream_index) if (!getFullness(kind)) { if (strictness == ASTTableJoin::Strictness::Any) - insertFromBlockImpl(type, maps_any, rows, key_columns, key_sizes, collators, stored_block, null_map, nullptr, stream_index, build_concurrency, *pools[stream_index]); + insertFromBlockImpl(type, maps_any, rows, key_columns, key_sizes, collators, stored_block, null_map, nullptr, stream_index, getBuildConcurrencyInternal(), *pools[stream_index]); else - insertFromBlockImpl(type, maps_all, rows, key_columns, key_sizes, collators, stored_block, null_map, nullptr, stream_index, build_concurrency, *pools[stream_index]); + insertFromBlockImpl(type, maps_all, rows, key_columns, key_sizes, collators, stored_block, null_map, nullptr, stream_index, getBuildConcurrencyInternal(), *pools[stream_index]); } else { if (strictness == ASTTableJoin::Strictness::Any) - insertFromBlockImpl(type, maps_any_full, rows, key_columns, key_sizes, collators, stored_block, null_map, rows_not_inserted_to_map[stream_index].get(), stream_index, build_concurrency, *pools[stream_index]); + insertFromBlockImpl(type, maps_any_full, rows, key_columns, key_sizes, collators, stored_block, null_map, rows_not_inserted_to_map[stream_index].get(), stream_index, getBuildConcurrencyInternal(), *pools[stream_index]); else - insertFromBlockImpl(type, maps_all_full, rows, key_columns, key_sizes, collators, stored_block, null_map, rows_not_inserted_to_map[stream_index].get(), stream_index, build_concurrency, *pools[stream_index]); + insertFromBlockImpl(type, maps_all_full, rows, key_columns, key_sizes, collators, stored_block, null_map, rows_not_inserted_to_map[stream_index].get(), stream_index, getBuildConcurrencyInternal(), *pools[stream_index]); } } @@ -1958,7 +1973,8 @@ class NonJoinedBlockInputStream : public IProfilingBlockInputStream , max_block_size(max_block_size_) , add_not_mapped_rows(true) { - if (step > parent.build_concurrency || index >= parent.build_concurrency) + size_t build_concurrency = parent.getBuildConcurrency(); + if (unlikely(step > build_concurrency || index >= build_concurrency)) throw Exception("The concurrency of NonJoinedBlockInputStream should not be larger than join build concurrency"); /** left_sample_block contains keys and "left" columns. @@ -2048,7 +2064,7 @@ class NonJoinedBlockInputStream : public IProfilingBlockInputStream MutableColumns columns_right; std::unique_ptr> position; /// type erasure - size_t current_segment; + size_t current_segment = 0; Join::RowRefList * current_not_mapped_row = nullptr; void setNextCurrentNotMappedRow() diff --git a/dbms/src/Interpreters/Join.h b/dbms/src/Interpreters/Join.h index 89dad0d1ca6..01916aa1dcc 100644 --- a/dbms/src/Interpreters/Join.h +++ b/dbms/src/Interpreters/Join.h @@ -99,7 +99,6 @@ class Join ASTTableJoin::Kind kind_, ASTTableJoin::Strictness strictness_, const String & req_id, - size_t build_concurrency = 1, const TiDB::TiDBCollators & collators_ = TiDB::dummy_collators, const String & left_filter_column = "", const String & right_filter_column = "", @@ -109,17 +108,10 @@ class Join size_t max_block_size = 0, const String & match_helper_name = ""); - bool empty() { return type == Type::EMPTY; } - - /** Set information about structure of right hand of JOIN (joined data). + /** Call `setBuildConcurrencyAndInitPool`, `initMapImpl` and `setSampleBlock`. * You must call this method before subsequent calls to insertFromBlock. */ - void setSampleBlock(const Block & block); - - /** Add block of data from right hand of JOIN to the map. - * Returns false, if some limit was exceeded and you should not insert more data. - */ - bool insertFromBlockInternal(Block * stored_block, size_t stream_index); + void init(const Block & sample_block, size_t build_concurrency_ = 1); bool insertFromBlock(const Block & block); @@ -153,9 +145,19 @@ class Join bool useNulls() const { return use_nulls; } const Names & getLeftJoinKeys() const { return key_names_left; } - size_t getBuildConcurrency() const { return build_concurrency; } + + size_t getBuildConcurrency() const + { + std::shared_lock lock(rwlock); + return getBuildConcurrencyInternal(); + } + size_t getNotJoinedStreamConcurrency() const + { + std::shared_lock lock(rwlock); + return getNotJoinedStreamConcurrencyInternal(); + } + bool isBuildSetExceeded() const { return build_set_exceeded.load(); } - size_t getNotJoinedStreamConcurrency() const { return build_concurrency; }; enum BuildTableState { @@ -171,7 +173,7 @@ class Join const Block * block; size_t row_num; - RowRef() {} + RowRef() = default; RowRef(const Block * block_, size_t row_num_) : block(block_) , row_num(row_num_) @@ -183,7 +185,7 @@ class Join { RowRefList * next = nullptr; - RowRefList() {} + RowRefList() = default; RowRefList(const Block * block_, size_t row_num_) : RowRef(block_, row_num_) {} @@ -342,11 +344,40 @@ class Join */ mutable std::shared_mutex rwlock; - void init(Type type_); + bool initialized = false; + + size_t getBuildConcurrencyInternal() const + { + if (unlikely(build_concurrency == 0)) + throw Exception("Logical error: `setBuildConcurrencyAndInitPool` has not been called", ErrorCodes::LOGICAL_ERROR); + return build_concurrency; + } + size_t getNotJoinedStreamConcurrencyInternal() const + { + return getBuildConcurrencyInternal(); + } + + /// Initialize map implementations for various join types. + void initMapImpl(Type type_); + + /** Set information about structure of right hand of JOIN (joined data). + * You must call this method before subsequent calls to insertFromBlock. + */ + void setSampleBlock(const Block & block); + + /** Set Join build concurrency and init hash map. + * You must call this method before subsequent calls to insertFromBlock. + */ + void setBuildConcurrencyAndInitPool(size_t build_concurrency_); /// Throw an exception if blocks have different types of key columns. void checkTypesOfKeys(const Block & block_left, const Block & block_right) const; + /** Add block of data from right hand of JOIN to the map. + * Returns false, if some limit was exceeded and you should not insert more data. + */ + bool insertFromBlockInternal(Block * stored_block, size_t stream_index); + template void joinBlockImpl(Block & block, const Maps & maps) const; diff --git a/dbms/src/Interpreters/executeQuery.cpp b/dbms/src/Interpreters/executeQuery.cpp index 6c96e7c22ad..96cfc0a58ae 100644 --- a/dbms/src/Interpreters/executeQuery.cpp +++ b/dbms/src/Interpreters/executeQuery.cpp @@ -480,7 +480,7 @@ void executeQuery( if (streams.in) { - const ASTQueryWithOutput * ast_query_with_output = dynamic_cast(ast.get()); + const auto * ast_query_with_output = dynamic_cast(ast.get()); WriteBuffer * out_buf = &ostr; std::optional out_file_buf; diff --git a/dbms/src/Interpreters/loadMetadata.cpp b/dbms/src/Interpreters/loadMetadata.cpp index 77c2bda6cda..54167f364e7 100644 --- a/dbms/src/Interpreters/loadMetadata.cpp +++ b/dbms/src/Interpreters/loadMetadata.cpp @@ -26,9 +26,9 @@ #include #include #include -#include -#include #include +#include +#include #include #include diff --git a/dbms/src/Interpreters/sortBlock.cpp b/dbms/src/Interpreters/sortBlock.cpp index bfb0fa3afb7..81626d83fe2 100644 --- a/dbms/src/Interpreters/sortBlock.cpp +++ b/dbms/src/Interpreters/sortBlock.cpp @@ -51,7 +51,7 @@ static inline bool needCollation(const IColumn * column, const SortColumnDescrip { if (!description.collator) return false; - auto not_null_column = column->isColumnNullable() ? typeid_cast(column)->getNestedColumnPtr().get() : column; + const auto * not_null_column = column->isColumnNullable() ? typeid_cast(column)->getNestedColumnPtr().get() : column; if (not_null_column->isColumnConst()) return false; @@ -73,9 +73,9 @@ struct PartialSortingLess bool operator()(size_t a, size_t b) const { - for (ColumnsWithSortDescriptions::const_iterator it = columns.begin(); it != columns.end(); ++it) + for (const auto & column : columns) { - int res = it->second.direction * it->first->compareAt(a, b, *it->first, it->second.nulls_direction); + int res = column.second.direction * column.first->compareAt(a, b, *column.first, column.second.nulls_direction); if (res < 0) return true; else if (res > 0) @@ -95,15 +95,15 @@ struct PartialSortingLessWithCollation bool operator()(size_t a, size_t b) const { - for (ColumnsWithSortDescriptions::const_iterator it = columns.begin(); it != columns.end(); ++it) + for (const auto & column : columns) { int res; - if (needCollation(it->first, it->second)) - res = it->first->compareAtWithCollation(a, b, *it->first, it->second.nulls_direction, *it->second.collator); + if (needCollation(column.first, column.second)) + res = column.first->compareAt(a, b, *column.first, column.second.nulls_direction, *column.second.collator); else - res = it->first->compareAt(a, b, *it->first, it->second.nulls_direction); + res = column.first->compareAt(a, b, *column.first, column.second.nulls_direction); - res *= it->second.direction; + res *= column.second.direction; if (res < 0) return true; else if (res > 0) @@ -130,7 +130,7 @@ void sortBlock(Block & block, const SortDescription & description, size_t limit) IColumn::Permutation perm; if (needCollation(column, description[0])) - column->getPermutationWithCollation(*description[0].collator, reverse, limit, description[0].nulls_direction, perm); + column->getPermutation(*description[0].collator, reverse, limit, description[0].nulls_direction, perm); else column->getPermutation(reverse, limit, description[0].nulls_direction, perm); diff --git a/dbms/src/Server/RaftConfigParser.cpp b/dbms/src/Server/RaftConfigParser.cpp index 8e146dd842e..2f0a88855cd 100644 --- a/dbms/src/Server/RaftConfigParser.cpp +++ b/dbms/src/Server/RaftConfigParser.cpp @@ -92,11 +92,7 @@ TiFlashRaftConfig TiFlashRaftConfig::parseSettings(Poco::Util::LayeredConfigurat { String snapshot_method = config.getString("raft.snapshot.method"); std::transform(snapshot_method.begin(), snapshot_method.end(), snapshot_method.begin(), [](char ch) { return std::tolower(ch); }); - if (snapshot_method == "block") - { - res.snapshot_apply_method = TiDB::SnapshotApplyMethod::Block; - } - else if (snapshot_method == "file1") + if (snapshot_method == "file1") { res.snapshot_apply_method = TiDB::SnapshotApplyMethod::DTFile_Directory; } diff --git a/dbms/src/Server/Server.cpp b/dbms/src/Server/Server.cpp index 705b8a533f3..04676ef969d 100644 --- a/dbms/src/Server/Server.cpp +++ b/dbms/src/Server/Server.cpp @@ -65,10 +65,10 @@ #include #include #include -#include #include #include #include +#include #include #include #include @@ -464,7 +464,7 @@ struct RaftStoreProxyRunner : boost::noncopyable } RunRaftStoreProxyParms parms; - pthread_t thread; + pthread_t thread{}; Poco::Logger * log; }; @@ -477,6 +477,11 @@ void initStores(Context & global_context, Poco::Logger * log, bool lazily_init_s int err_cnt = 0; for (auto & [table_id, storage] : storages) { + // This will skip the init of storages that do not contain any data. TiFlash now sync the schema and + // create all tables regardless the table have define TiFlash replica or not, so there may be lots + // of empty tables in TiFlash. + // Note that we still need to init stores that contains data (defined by the stable dir of this storage + // is exist), or the data used size reported to PD is not correct. try { init_cnt += storage->initStoreIfDataDirExist() ? 1 : 0; @@ -498,6 +503,7 @@ void initStores(Context & global_context, Poco::Logger * log, bool lazily_init_s if (lazily_init_store) { LOG_FMT_INFO(log, "Lazily init store."); + // apply the inited in another thread to shorten the start time of TiFlash std::thread(do_init_stores).detach(); } else @@ -1149,7 +1155,7 @@ int Server::main(const std::vector & /*args*/) /// Try to increase limit on number of open files. { - rlimit rlim; + rlimit rlim{}; if (getrlimit(RLIMIT_NOFILE, &rlim)) throw Poco::Exception("Cannot getrlimit"); @@ -1437,6 +1443,7 @@ int Server::main(const std::vector & /*args*/) } /// This object will periodically calculate some metrics. + /// should init after `createTMTContext` cause we collect some data from the TiFlash context object. AsynchronousMetrics async_metrics(*global_context); attachSystemTablesAsync(*global_context->getDatabase("system"), async_metrics); diff --git a/dbms/src/Server/tests/gtest_server_config.cpp b/dbms/src/Server/tests/gtest_server_config.cpp index 53705f1a351..cf53a8d6c18 100644 --- a/dbms/src/Server/tests/gtest_server_config.cpp +++ b/dbms/src/Server/tests/gtest_server_config.cpp @@ -371,10 +371,10 @@ dt_page_gc_low_write_prob = 0.2 std::unique_ptr path_pool = std::make_unique(global_ctx.getPathPool().withTable("test", "t1", false)); std::unique_ptr storage_pool = std::make_unique(global_ctx, /*ns_id*/ 100, *path_pool, "test.t1"); - auto verify_storage_pool_reload_config = [&global_ctx](std::unique_ptr & storage_pool) { + auto verify_storage_pool_reload_config = [&](std::unique_ptr & storage_pool) { DB::Settings & settings = global_ctx.getSettingsRef(); - auto cfg = storage_pool->data_storage_v2->getSettings(); + auto cfg = storage_pool->dataWriter()->getSettings(); EXPECT_NE(cfg.gc_min_files, settings.dt_storage_pool_data_gc_min_file_num); EXPECT_NE(cfg.gc_min_legacy_num, settings.dt_storage_pool_data_gc_min_legacy_num); EXPECT_NE(cfg.gc_min_bytes, settings.dt_storage_pool_data_gc_min_bytes); @@ -384,9 +384,9 @@ dt_page_gc_low_write_prob = 0.2 EXPECT_NE(cfg.open_file_max_idle_time, settings.dt_open_file_max_idle_seconds); EXPECT_NE(cfg.prob_do_gc_when_write_is_low, settings.dt_page_gc_low_write_prob * 1000); - storage_pool->gc(settings, DM::StoragePool::Seconds(0)); + global_ctx.getGlobalStoragePool()->gc(); - cfg = storage_pool->data_storage_v2->getSettings(); + cfg = storage_pool->dataWriter()->getSettings(); EXPECT_EQ(cfg.gc_min_files, settings.dt_storage_pool_data_gc_min_file_num); EXPECT_EQ(cfg.gc_min_legacy_num, settings.dt_storage_pool_data_gc_min_legacy_num); EXPECT_EQ(cfg.gc_min_bytes, settings.dt_storage_pool_data_gc_min_bytes); diff --git a/dbms/src/Storages/DeltaMerge/StoragePool.cpp b/dbms/src/Storages/DeltaMerge/StoragePool.cpp index a040c5b6c6a..fa765cd9b1d 100644 --- a/dbms/src/Storages/DeltaMerge/StoragePool.cpp +++ b/dbms/src/Storages/DeltaMerge/StoragePool.cpp @@ -130,7 +130,7 @@ void GlobalStoragePool::restore() bool GlobalStoragePool::gc() { - return gc(Settings(), true, DELTA_MERGE_GC_PERIOD); + return gc(global_context.getSettingsRef(), true, DELTA_MERGE_GC_PERIOD); } bool GlobalStoragePool::gc(const Settings & settings, bool immediately, const Seconds & try_gc_period) diff --git a/dbms/src/Storages/DeltaMerge/StoragePool.h b/dbms/src/Storages/DeltaMerge/StoragePool.h index d05454a5431..77684ea46cb 100644 --- a/dbms/src/Storages/DeltaMerge/StoragePool.h +++ b/dbms/src/Storages/DeltaMerge/StoragePool.h @@ -28,6 +28,7 @@ struct Settings; class Context; class StoragePathPool; class StableDiskDelegator; +class AsynchronousMetrics; namespace DM { @@ -50,6 +51,7 @@ class GlobalStoragePool : private boost::noncopyable void restore(); friend class StoragePool; + friend class ::DB::AsynchronousMetrics; // GC immediately // Only used on dbgFuncMisc diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp index 1d0e00a5b58..35e6c3d00c6 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_delta_merge_store.cpp @@ -50,6 +50,7 @@ extern const char force_triggle_foreground_flush[]; extern const char force_set_segment_ingest_packs_fail[]; extern const char segment_merge_after_ingest_packs[]; extern const char force_set_segment_physical_split[]; +extern const char force_set_page_file_write_errno[]; } // namespace FailPoints namespace DM @@ -495,6 +496,198 @@ try } CATCH +TEST_P(DeltaMergeStoreRWTest, WriteCrashBeforeWalWithoutCache) +try +{ + const ColumnDefine col_str_define(2, "col2", std::make_shared()); + const ColumnDefine col_i8_define(3, "i8", std::make_shared()); + { + auto table_column_defines = DMTestEnv::getDefaultColumns(); + table_column_defines->emplace_back(col_str_define); + table_column_defines->emplace_back(col_i8_define); + + store = reload(table_column_defines); + } + + { + // check column structure + const auto & cols = store->getTableColumns(); + ASSERT_EQ(cols.size(), 5UL); + const auto & str_col = cols[3]; + ASSERT_EQ(str_col.name, col_str_define.name); + ASSERT_EQ(str_col.id, col_str_define.id); + ASSERT_TRUE(str_col.type->equals(*col_str_define.type)); + const auto & i8_col = cols[4]; + ASSERT_EQ(i8_col.name, col_i8_define.name); + ASSERT_EQ(i8_col.id, col_i8_define.id); + ASSERT_TRUE(i8_col.type->equals(*col_i8_define.type)); + } + + const size_t num_rows_write = 128; + { + // write to store + Block block; + { + block = DMTestEnv::prepareSimpleWriteBlock(0, num_rows_write, false); + // Add a column of col2:String for test + block.insert(DB::tests::createColumn( + createNumberStrings(0, num_rows_write), + col_str_define.name, + col_str_define.id)); + // Add a column of i8:Int8 for test + block.insert(DB::tests::createColumn( + createSignedNumbers(0, num_rows_write), + col_i8_define.name, + col_i8_define.id)); + } + db_context->getSettingsRef().dt_segment_delta_cache_limit_rows = 8; + FailPointHelper::enableFailPoint(FailPoints::force_set_page_file_write_errno); + ASSERT_THROW(store->write(*db_context, db_context->getSettingsRef(), block), DB::Exception); + try + { + store->write(*db_context, db_context->getSettingsRef(), block); + } + catch (DB::Exception & e) + { + if (e.code() != ErrorCodes::CANNOT_WRITE_TO_FILE_DESCRIPTOR) + throw; + } + } + FailPointHelper::disableFailPoint(FailPoints::force_set_page_file_write_errno); + + { + // read all columns from store + const auto & columns = store->getTableColumns(); + BlockInputStreamPtr in = store->read(*db_context, + db_context->getSettingsRef(), + columns, + {RowKeyRange::newAll(store->isCommonHandle(), store->getRowKeyColumnSize())}, + /* num_streams= */ 1, + /* max_version= */ std::numeric_limits::max(), + EMPTY_FILTER, + TRACING_NAME, + /* expected_block_size= */ 1024)[0]; + + size_t num_rows_read = 0; + in->readPrefix(); + while (Block block = in->read()) + { + num_rows_read += block.rows(); + } + in->readSuffix(); + ASSERT_EQ(num_rows_read, 0); + } +} +CATCH + +TEST_P(DeltaMergeStoreRWTest, WriteCrashBeforeWalWithCache) +try +{ + const ColumnDefine col_str_define(2, "col2", std::make_shared()); + const ColumnDefine col_i8_define(3, "i8", std::make_shared()); + { + auto table_column_defines = DMTestEnv::getDefaultColumns(); + table_column_defines->emplace_back(col_str_define); + table_column_defines->emplace_back(col_i8_define); + + store = reload(table_column_defines); + } + + { + // check column structure + const auto & cols = store->getTableColumns(); + ASSERT_EQ(cols.size(), 5UL); + const auto & str_col = cols[3]; + ASSERT_EQ(str_col.name, col_str_define.name); + ASSERT_EQ(str_col.id, col_str_define.id); + ASSERT_TRUE(str_col.type->equals(*col_str_define.type)); + const auto & i8_col = cols[4]; + ASSERT_EQ(i8_col.name, col_i8_define.name); + ASSERT_EQ(i8_col.id, col_i8_define.id); + ASSERT_TRUE(i8_col.type->equals(*col_i8_define.type)); + } + + const size_t num_rows_write = 128; + { + // write to store + Block block; + { + block = DMTestEnv::prepareSimpleWriteBlock(0, num_rows_write, false); + // Add a column of col2:String for test + block.insert(DB::tests::createColumn( + createNumberStrings(0, num_rows_write), + col_str_define.name, + col_str_define.id)); + // Add a column of i8:Int8 for test + block.insert(DB::tests::createColumn( + createSignedNumbers(0, num_rows_write), + col_i8_define.name, + col_i8_define.id)); + } + + FailPointHelper::enableFailPoint(FailPoints::force_set_page_file_write_errno); + store->write(*db_context, db_context->getSettingsRef(), block); + ASSERT_THROW(store->flushCache(*db_context, RowKeyRange::newAll(store->isCommonHandle(), store->getRowKeyColumnSize())), DB::Exception); + try + { + store->flushCache(*db_context, RowKeyRange::newAll(store->isCommonHandle(), store->getRowKeyColumnSize())); + } + catch (DB::Exception & e) + { + if (e.code() != ErrorCodes::CANNOT_WRITE_TO_FILE_DESCRIPTOR) + throw; + } + } + FailPointHelper::disableFailPoint(FailPoints::force_set_page_file_write_errno); + + { + // read all columns from store + const auto & columns = store->getTableColumns(); + BlockInputStreamPtr in = store->read(*db_context, + db_context->getSettingsRef(), + columns, + {RowKeyRange::newAll(store->isCommonHandle(), store->getRowKeyColumnSize())}, + /* num_streams= */ 1, + /* max_version= */ std::numeric_limits::max(), + EMPTY_FILTER, + TRACING_NAME, + /* expected_block_size= */ 1024)[0]; + + size_t num_rows_read = 0; + in->readPrefix(); + while (Block block = in->read()) + { + num_rows_read += block.rows(); + for (auto && iter : block) + { + auto c = iter.column; + for (Int64 i = 0; i < Int64(c->size()); ++i) + { + if (iter.name == DMTestEnv::pk_name) + { + //printf("pk:%lld\n", c->getInt(i)); + EXPECT_EQ(c->getInt(i), i); + } + else if (iter.name == col_str_define.name) + { + //printf("%s:%s\n", col_str_define.name.c_str(), c->getDataAt(i).data); + EXPECT_EQ(c->getDataAt(i), DB::toString(i)); + } + else if (iter.name == col_i8_define.name) + { + //printf("%s:%lld\n", col_i8_define.name.c_str(), c->getInt(i)); + Int64 num = i * (i % 2 == 0 ? -1 : 1); + EXPECT_EQ(c->getInt(i), num); + } + } + } + } + in->readSuffix(); + ASSERT_EQ(num_rows_read, num_rows_write); + } +} +CATCH + TEST_P(DeltaMergeStoreRWTest, DeleteRead) try { diff --git a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_storage_delta_merge.cpp b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_storage_delta_merge.cpp index a26471cfe01..1cb735a2b65 100644 --- a/dbms/src/Storages/DeltaMerge/tests/gtest_dm_storage_delta_merge.cpp +++ b/dbms/src/Storages/DeltaMerge/tests/gtest_dm_storage_delta_merge.cpp @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -168,6 +169,8 @@ try } EXPECT_EQ(total_segment_rows, num_rows_read); storage->drop(); + // remove the storage from TiFlash context manually + storage->removeFromTMTContext(); } CATCH @@ -252,6 +255,8 @@ try ASSERT_EQ(storage->getDatabaseName(), new_db_name); storage->drop(); + // remove the storage from TiFlash context manually + storage->removeFromTMTContext(); } CATCH @@ -315,6 +320,8 @@ try ASSERT_EQ(sort_desc.front().nulls_direction, sort_desc2.front().nulls_direction); storage->drop(); + // remove the storage from TiFlash context manually + storage->removeFromTMTContext(); } CATCH @@ -609,6 +616,8 @@ try sample.insert(DB::tests::createColumn( Strings(100, "a"), "col2")); + constexpr TiDB::TableID table_id = 1; + const String table_name = fmt::format("t_{}", table_id); Context ctx = DMTestEnv::getContext(); std::shared_ptr storage; @@ -631,12 +640,11 @@ try path.remove(true); // primary_expr_ast - const String table_name = "t_1233"; ASTPtr astptr(new ASTIdentifier(table_name, ASTIdentifier::Kind::Table)); astptr->children.emplace_back(new ASTIdentifier("col1")); TiDB::TableInfo tidb_table_info; - tidb_table_info.id = 1; + tidb_table_info.id = table_id; storage = StorageDeltaMerge::create("TiFlash", /* db_name= */ "default", @@ -692,8 +700,8 @@ try { Field res; c->get(i, res); - ASSERT(!res.isNull()); - ASSERT(res.get() == 1); + ASSERT_TRUE(!res.isNull()); + ASSERT_EQ(res.get(), table_id); } } } @@ -701,6 +709,8 @@ try in->readSuffix(); ASSERT_EQ(num_rows_read, sample.rows()); storage->drop(); + // remove the storage from TiFlash context manually + storage->removeFromTMTContext(); } CATCH @@ -848,6 +858,8 @@ try ASSERT_LT(read_data(), num_rows_write); } storage->drop(); + // remove the storage from TiFlash context manually + storage->removeFromTMTContext(); } CATCH diff --git a/dbms/src/Storages/DeltaMerge/tools/workload/DTWorkload.cpp b/dbms/src/Storages/DeltaMerge/tools/workload/DTWorkload.cpp index e5c7fd30f40..a6113f91d91 100644 --- a/dbms/src/Storages/DeltaMerge/tools/workload/DTWorkload.cpp +++ b/dbms/src/Storages/DeltaMerge/tools/workload/DTWorkload.cpp @@ -233,7 +233,7 @@ void DTWorkload::verifyHandle(uint64_t r) } for (size_t i = 0; i < handle_col->size(); i++) { - // Handle must be int64 or uint64. Currently, TableGenterator would ensure this limit. + // Handle must be int64 or uint64. Currently, TableGenerator would ensure this limit. uint64_t h = handle_col->getInt(i); uint64_t store_ts = ts_col->getInt(i); diff --git a/dbms/src/Storages/DeltaMerge/tools/workload/MainEntry.cpp b/dbms/src/Storages/DeltaMerge/tools/workload/MainEntry.cpp index e18a6ef30a2..f79d414f20b 100644 --- a/dbms/src/Storages/DeltaMerge/tools/workload/MainEntry.cpp +++ b/dbms/src/Storages/DeltaMerge/tools/workload/MainEntry.cpp @@ -124,13 +124,26 @@ void run(WorkloadOptions & opts) // Table Schema auto table_gen = TableGenerator::create(opts); auto table_info = table_gen->get(opts.table_id, opts.table_name); - // In this for loop, destory DeltaMergeStore gracefully and recreate it. - for (uint64_t i = 0; i < opts.verify_round; i++) + // In this for loop, destroy DeltaMergeStore gracefully and recreate it. + auto run_test = [&]() { + for (uint64_t i = 0; i < opts.verify_round; i++) + { + DTWorkload workload(opts, handle_table, table_info); + workload.run(i); + stats.push_back(workload.getStat()); + LOG_FMT_INFO(log, "No.{} Workload {} {}", i, opts.write_key_distribution, stats.back().toStrings()); + } + }; + run_test(); + + if (opts.ps_run_mode == DB::PageStorageRunMode::MIX_MODE) { - DTWorkload workload(opts, handle_table, table_info); - workload.run(i); - stats.push_back(workload.getStat()); - LOG_FMT_INFO(log, "No.{} Workload {} {}", i, opts.write_key_distribution, stats.back().toStrings()); + // clear statistic in DB::PageStorageRunMode::ONLY_V2 + stats.clear(); + auto & global_context = TiFlashTestEnv::getGlobalContext(); + global_context.setPageStorageRunMode(DB::PageStorageRunMode::MIX_MODE); + global_context.initializeGlobalStoragePoolIfNeed(global_context.getPathPool()); + run_test(); } } catch (...) @@ -254,8 +267,9 @@ int DTWorkload::mainEntry(int argc, char ** argv) // or the logging in global context won't be output to // the log file init(opts); - TiFlashTestEnv::initializeGlobalContext(opts.work_dirs, opts.enable_ps_v3); + // For mixed mode, we need to run the test in ONLY_V2 mode first. + TiFlashTestEnv::initializeGlobalContext(opts.work_dirs, opts.ps_run_mode == PageStorageRunMode::ONLY_V3 ? PageStorageRunMode::ONLY_V3 : PageStorageRunMode::ONLY_V2); if (opts.testing_type == "daily_perf") { dailyPerformanceTest(opts); @@ -277,7 +291,6 @@ int DTWorkload::mainEntry(int argc, char ** argv) runAndRandomKill(opts); } } - TiFlashTestEnv::shutdown(); return 0; } diff --git a/dbms/src/Storages/DeltaMerge/tools/workload/Options.cpp b/dbms/src/Storages/DeltaMerge/tools/workload/Options.cpp index 0d2b14d916b..1c6409f3c53 100644 --- a/dbms/src/Storages/DeltaMerge/tools/workload/Options.cpp +++ b/dbms/src/Storages/DeltaMerge/tools/workload/Options.cpp @@ -44,7 +44,7 @@ std::string WorkloadOptions::toString(std::string seperator) const fmt::format("read_stream_count {}{}", read_stream_count, seperator) + // fmt::format("testing_type {}{}", testing_type, seperator) + // fmt::format("log_write_request {}{}", log_write_request, seperator) + // - fmt::format("enable_ps_v3 {}{}", enable_ps_v3, seperator) + // + fmt::format("ps_run_mode {}{}", ps_run_mode, seperator) + // fmt::format("bg_thread_count {}{}", bg_thread_count, seperator) + // fmt::format("table_id {}{}", table_id, seperator) + // fmt::format("table_name {}{}", table_name, seperator); @@ -88,7 +88,7 @@ std::pair WorkloadOptions::parseOptions(int argc, char * argv // ("log_write_request", value()->default_value(false), "") // // - ("enable_ps_v3", value()->default_value(true), "") // + ("ps_run_mode", value()->default_value(2, "possible value: 1(only_v2), 2(only_v3), 3(mix_mode), and note that in mix_mode, the test will run twice, first round in only_v2 mode and second round in mix_mode")) // // ("bg_thread_count", value()->default_value(4), "") // // @@ -155,8 +155,20 @@ std::pair WorkloadOptions::parseOptions(int argc, char * argv testing_type = vm["testing_type"].as(); log_write_request = vm["log_write_request"].as(); - - enable_ps_v3 = vm["enable_ps_v3"].as(); + switch (vm["ps_run_mode"].as()) + { + case static_cast(PageStorageRunMode::ONLY_V2): + ps_run_mode = PageStorageRunMode::ONLY_V2; + break; + case static_cast(PageStorageRunMode::ONLY_V3): + ps_run_mode = PageStorageRunMode::ONLY_V3; + break; + case static_cast(PageStorageRunMode::MIX_MODE): + ps_run_mode = PageStorageRunMode::MIX_MODE; + break; + default: + return {false, fmt::format("unknown ps_run_mode {}.", vm["ps_run_mode"].as())}; + } bg_thread_count = vm["bg_thread_count"].as(); diff --git a/dbms/src/Storages/DeltaMerge/tools/workload/Options.h b/dbms/src/Storages/DeltaMerge/tools/workload/Options.h index 17c7a5ba61f..f017daf2d8a 100644 --- a/dbms/src/Storages/DeltaMerge/tools/workload/Options.h +++ b/dbms/src/Storages/DeltaMerge/tools/workload/Options.h @@ -14,6 +14,8 @@ #pragma once +#include + #include #include @@ -53,7 +55,7 @@ struct WorkloadOptions bool log_write_request; - bool enable_ps_v3; + PageStorageRunMode ps_run_mode; uint64_t bg_thread_count; diff --git a/dbms/src/Storages/Page/FileUsage.h b/dbms/src/Storages/Page/FileUsage.h new file mode 100644 index 00000000000..6319f4a4acf --- /dev/null +++ b/dbms/src/Storages/Page/FileUsage.h @@ -0,0 +1,29 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include + +#include + +namespace DB +{ +struct FileUsageStatistics +{ + size_t total_disk_size = 0; + size_t total_valid_size = 0; + size_t total_file_num = 0; +}; + +} // namespace DB diff --git a/dbms/src/Storages/Page/PageDefines.h b/dbms/src/Storages/Page/PageDefines.h index 46789419fbd..7feea494eb4 100644 --- a/dbms/src/Storages/Page/PageDefines.h +++ b/dbms/src/Storages/Page/PageDefines.h @@ -74,7 +74,7 @@ using PageFileIdAndLevels = std::vector; using PageSize = UInt64; -using BlobFileId = UInt32; +using BlobFileId = UInt64; using BlobFileOffset = UInt64; static constexpr BlobFileId INVALID_BLOBFILE_ID = 0; static constexpr BlobFileOffset INVALID_BLOBFILE_OFFSET = std::numeric_limits::max(); diff --git a/dbms/src/Storages/Page/PageStorage.cpp b/dbms/src/Storages/Page/PageStorage.cpp index 6e1addae093..d8b767a6c15 100644 --- a/dbms/src/Storages/Page/PageStorage.cpp +++ b/dbms/src/Storages/Page/PageStorage.cpp @@ -66,6 +66,8 @@ class PageReaderImpl : private boost::noncopyable // Get some statistics of all living snapshots and the oldest living snapshot. virtual SnapshotsStatistics getSnapshotsStat() const = 0; + virtual FileUsageStatistics getFileUsageStatistics() const = 0; + virtual void traverse(const std::function & acceptor, bool only_v2, bool only_v3) const = 0; }; @@ -137,6 +139,11 @@ class PageReaderImplNormal : public PageReaderImpl storage->traverse(acceptor, nullptr); } + FileUsageStatistics getFileUsageStatistics() const override + { + return storage->getFileUsageStatistics(); + } + private: NamespaceId ns_id; PageStoragePtr storage; @@ -294,6 +301,11 @@ class PageReaderImplMixed : public PageReaderImpl return statistics_total; } + FileUsageStatistics getFileUsageStatistics() const override + { + return storage_v3->getFileUsageStatistics(); + } + void traverse(const std::function & acceptor, bool only_v2, bool only_v3) const override { // Used by RegionPersister::restore @@ -424,6 +436,12 @@ SnapshotsStatistics PageReader::getSnapshotsStat() const return impl->getSnapshotsStat(); } + +FileUsageStatistics PageReader::getFileUsageStatistics() const +{ + return impl->getFileUsageStatistics(); +} + void PageReader::traverse(const std::function & acceptor, bool only_v2, bool only_v3) const { impl->traverse(acceptor, only_v2, only_v3); diff --git a/dbms/src/Storages/Page/PageStorage.h b/dbms/src/Storages/Page/PageStorage.h index 479c368a585..0059c0570c1 100644 --- a/dbms/src/Storages/Page/PageStorage.h +++ b/dbms/src/Storages/Page/PageStorage.h @@ -19,10 +19,12 @@ #include #include #include +#include #include #include #include #include +#include #include #include #include @@ -140,7 +142,7 @@ class PageStorage : private boost::noncopyable SettingUInt64 blob_block_alignment_bytes = 0; SettingUInt64 wal_roll_size = PAGE_META_ROLL_SIZE; - SettingUInt64 wal_recover_mode = 0; + SettingUInt64 wal_recover_mode = static_cast(WALRecoveryMode::TolerateCorruptedTailRecords); SettingUInt64 wal_max_persisted_log_files = MAX_PERSISTED_LOG_FILES; void reload(const Config & rhs) @@ -250,6 +252,12 @@ class PageStorage : private boost::noncopyable // Get some statistics of all living snapshots and the oldest living snapshot. virtual SnapshotsStatistics getSnapshotsStat() const = 0; + virtual FileUsageStatistics getFileUsageStatistics() const + { + // return all zeros by default + return FileUsageStatistics{}; + } + virtual size_t getNumberOfPages() = 0; virtual std::set getAliveExternalPageIds(NamespaceId ns_id) = 0; @@ -379,6 +387,8 @@ class PageReader : private boost::noncopyable // Get some statistics of all living snapshots and the oldest living snapshot. SnapshotsStatistics getSnapshotsStat() const; + FileUsageStatistics getFileUsageStatistics() const; + void traverse(const std::function & acceptor, bool only_v2 = false, bool only_v3 = false) const; private: @@ -406,7 +416,9 @@ class PageWriter : private boost::noncopyable // Only used for DATA transform data void writeIntoV3(WriteBatch && write_batch, WriteLimiterPtr write_limiter) const; +#ifndef DBMS_PUBLIC_GTEST private: +#endif void writeIntoMixMode(WriteBatch && write_batch, WriteLimiterPtr write_limiter) const; // A wrap of getSettings only used for `RegionPersister::gc` diff --git a/dbms/src/Storages/Page/Snapshot.h b/dbms/src/Storages/Page/Snapshot.h index 77e68f1b054..073fc0a2830 100644 --- a/dbms/src/Storages/Page/Snapshot.h +++ b/dbms/src/Storages/Page/Snapshot.h @@ -61,7 +61,7 @@ class PageStorageSnapshotMixed : public PageStorageSnapshot }; using PageStorageSnapshotMixedPtr = std::shared_ptr; -static inline PageStorageSnapshotMixedPtr +inline PageStorageSnapshotMixedPtr toConcreteMixedSnapshot(const PageStorageSnapshotPtr & ptr) { return std::static_pointer_cast(ptr); diff --git a/dbms/src/Storages/Page/V3/BlobStore.cpp b/dbms/src/Storages/Page/V3/BlobStore.cpp index dc5ed536f9e..d5f71841b91 100644 --- a/dbms/src/Storages/Page/V3/BlobStore.cpp +++ b/dbms/src/Storages/Page/V3/BlobStore.cpp @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -66,7 +67,7 @@ using ChecksumClass = Digest::CRC64; * BlobStore methods * *********************/ -BlobStore::BlobStore(String storage_name, const FileProviderPtr & file_provider_, PSDiskDelegatorPtr delegator_, BlobStore::Config config_) +BlobStore::BlobStore(String storage_name, const FileProviderPtr & file_provider_, PSDiskDelegatorPtr delegator_, const BlobStore::Config & config_) : delegator(std::move(delegator_)) , file_provider(file_provider_) , config(config_) @@ -115,6 +116,38 @@ void BlobStore::registerPaths() } } +FileUsageStatistics BlobStore::getFileUsageStatistics() const +{ + FileUsageStatistics usage; + + // Get a copy of stats map to avoid the big lock on stats map + const auto stats_list = blob_stats.getStats(); + + for (const auto & [path, stats] : stats_list) + { + (void)path; + for (const auto & stat : stats) + { + // We can access to these type without any locking. + if (stat->isReadOnly() || stat->isBigBlob()) + { + usage.total_disk_size += stat->sm_total_size; + usage.total_valid_size += stat->sm_valid_size; + } + else + { + // Else the stat may being updated, acquire a lock to avoid data race. + auto lock = stat->lock(); + usage.total_disk_size += stat->sm_total_size; + usage.total_valid_size += stat->sm_valid_size; + } + } + usage.total_file_num += stats.size(); + } + + return usage; +} + PageEntriesEdit BlobStore::handleLargeWrite(DB::WriteBatch & wb, const WriteLimiterPtr & write_limiter) { auto ns_id = wb.getNamespaceId(); @@ -872,6 +905,7 @@ struct BlobStoreGCInfo std::vector BlobStore::getGCStats() { + // Get a copy of stats map to avoid the big lock on stats map const auto stats_list = blob_stats.getStats(); std::vector blob_need_gc; BlobStoreGCInfo blobstore_gc_info; @@ -914,7 +948,22 @@ std::vector BlobStore::getGCStats() // Avoid divide by zero if (right_margin == 0) { - LOG_FMT_TRACE(log, "Current blob is empty [blob_id={}, total size(all invalid)={}].", stat->id, stat->sm_total_size); + if (unlikely(stat->sm_valid_rate != 0)) + { + throw Exception(fmt::format("Current blob is empty, but valid rate is not 0. [blob_id={}][valid_size={}][valid_rate={}]", + stat->id, + stat->sm_valid_size, + stat->sm_valid_rate)); + } + + LOG_FMT_TRACE(log, "Current blob is empty [blob_id={}, total size(all invalid)={}] [valid_rate={}].", stat->id, stat->sm_total_size, stat->sm_valid_rate); + + // If current blob empty, the size of in disk blob may not empty + // So we need truncate current blob, and let it be reused. + auto blobfile = getBlobFile(stat->id); + LOG_FMT_TRACE(log, "Truncate empty blob file [blob_id={}] to 0.", stat->id); + blobfile->truncate(right_margin); + blobstore_gc_info.appendToTruncatedBlob(stat->id, stat->sm_valid_rate); continue; } @@ -1196,7 +1245,7 @@ BlobStatPtr BlobStore::BlobStats::createStat(BlobFileId blob_file_id, const std: // New blob file id won't bigger than roll_id if (blob_file_id > roll_id) { - throw Exception(fmt::format("BlobStats won't create [blob_id={}], which is bigger than [RollMaxId={}]", + throw Exception(fmt::format("BlobStats won't create [blob_id={}], which is bigger than [roll_id={}]", blob_file_id, roll_id), ErrorCodes::LOGICAL_ERROR); @@ -1259,8 +1308,7 @@ BlobStatPtr BlobStore::BlobStats::createBigPageStatNotChecking(BlobFileId blob_f BlobStatPtr stat = std::make_shared( blob_file_id, SpaceMap::SpaceMapType::SMAP64_BIG, - config.file_limit_size, - BlobStatType::BIG_BLOB); + config.file_limit_size); PageFileIdAndLevel id_lvl{blob_file_id, 0}; stats_map[delegator->choosePath(id_lvl)].emplace_back(stat); @@ -1438,7 +1486,7 @@ bool BlobStore::BlobStats::BlobStat::removePosFromStat(BlobFileOffset offset, si if (!smap->markFree(offset, buf_size)) { smap->logDebugString(); - throw Exception(fmt::format("Remove postion from BlobStat failed, [offset={} , buf_size={}, blob_id={}] is invalid.", + throw Exception(fmt::format("Remove postion from BlobStat failed, invalid position [offset={}] [buf_size={}] [blob_id={}]", offset, buf_size, id), @@ -1455,7 +1503,7 @@ void BlobStore::BlobStats::BlobStat::restoreSpaceMap(BlobFileOffset offset, size if (!smap->markUsed(offset, buf_size)) { smap->logDebugString(); - throw Exception(fmt::format("Restore postion from BlobStat failed, [offset={}] [buf_size={}] [blob_id={}] is used or subspan is used", + throw Exception(fmt::format("Restore postion from BlobStat failed, the space/subspace is already being used [offset={}] [buf_size={}] [blob_id={}]", offset, buf_size, id), @@ -1468,7 +1516,7 @@ void BlobStore::BlobStats::BlobStat::recalculateSpaceMap() const auto & [total_size, valid_size] = smap->getSizes(); sm_total_size = total_size; sm_valid_size = valid_size; - sm_valid_rate = valid_size * 1.0 / total_size; + sm_valid_rate = total_size == 0 ? 0.0 : valid_size * 1.0 / total_size; recalculateCapacity(); } diff --git a/dbms/src/Storages/Page/V3/BlobStore.h b/dbms/src/Storages/Page/V3/BlobStore.h index e527eb0f3bf..5a3e98400d1 100644 --- a/dbms/src/Storages/Page/V3/BlobStore.h +++ b/dbms/src/Storages/Page/V3/BlobStore.h @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -107,15 +108,20 @@ class BlobStore : private Allocator UInt64 sm_max_caps = 0; UInt64 sm_total_size = 0; UInt64 sm_valid_size = 0; - double sm_valid_rate = 1.0; + double sm_valid_rate = 0.0; public: - BlobStat(BlobFileId id_, SpaceMap::SpaceMapType sm_type, UInt64 sm_max_caps_, BlobStatType type_ = BlobStatType::NORMAL) + BlobStat(BlobFileId id_, SpaceMap::SpaceMapType sm_type, UInt64 sm_max_caps_) : id(id_) - , type(type_) + , type(BlobStatType::NORMAL) , smap(SpaceMap::createSpaceMap(sm_type, 0, sm_max_caps_)) , sm_max_caps(sm_max_caps_) { + if (sm_type == SpaceMap::SpaceMapType::SMAP64_BIG) + { + type = BlobStatType::BIG_BLOB; + } + // Won't create read-only blob by default. assert(type != BlobStatType::READ_ONLY); } @@ -246,10 +252,12 @@ class BlobStore : private Allocator std::map> stats_map; }; - BlobStore(String storage_name, const FileProviderPtr & file_provider_, PSDiskDelegatorPtr delegator_, BlobStore::Config config); + BlobStore(String storage_name, const FileProviderPtr & file_provider_, PSDiskDelegatorPtr delegator_, const BlobStore::Config & config); void registerPaths(); + FileUsageStatistics getFileUsageStatistics() const; + std::vector getGCStats(); PageEntriesEdit gc(std::map & entries_need_gc, diff --git a/dbms/src/Storages/Page/V3/LogFile/LogFormat.h b/dbms/src/Storages/Page/V3/LogFile/LogFormat.h index c1151d1181f..aa273167109 100644 --- a/dbms/src/Storages/Page/V3/LogFile/LogFormat.h +++ b/dbms/src/Storages/Page/V3/LogFile/LogFormat.h @@ -56,7 +56,7 @@ static constexpr UInt32 PAYLOAD_FIELD_SIZE = sizeof(UInt16); // The checksum count begin at the `type` field in Header/RecyclableHeader static constexpr size_t CHECKSUM_START_OFFSET = Format::CHECKSUM_FIELD_SIZE + Format::PAYLOAD_FIELD_SIZE; -using LogNumberType = UInt32; +using LogNumberType = UInt64; // Header is // - checksum (`CHECKSUM_FIELD_SIZE` bytes diff --git a/dbms/src/Storages/Page/V3/PageDirectory.cpp b/dbms/src/Storages/Page/V3/PageDirectory.cpp index 64a3fead674..e9b754854b8 100644 --- a/dbms/src/Storages/Page/V3/PageDirectory.cpp +++ b/dbms/src/Storages/Page/V3/PageDirectory.cpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -796,7 +797,17 @@ PageIDAndEntryV3 PageDirectory::get(PageIdV3Internal page_id, const PageDirector } } - throw Exception(fmt::format("Fail to get entry [page_id={}] [seq={}] [resolve_id={}] [resolve_ver={}]", page_id, snap->sequence, id_to_resolve, ver_to_resolve), ErrorCodes::PS_ENTRY_NO_VALID_VERSION); + // Only mix mode throw_on_not_exist is false. + // In mix mode, storage will create a snapshot contains V2 and V3. + // If we find a del entry in V3, we still need find it in V2. + if (throw_on_not_exist) + { + throw Exception(fmt::format("Fail to get entry [page_id={}] [seq={}] [resolve_id={}] [resolve_ver={}]", page_id, snap->sequence, id_to_resolve, ver_to_resolve), ErrorCodes::PS_ENTRY_NO_VALID_VERSION); + } + else + { + return PageIDAndEntryV3{page_id, PageEntryV3{.file_id = INVALID_BLOBFILE_ID}}; + } } std::pair PageDirectory::get(const PageIdV3Internals & page_ids, const PageDirectorySnapshotPtr & snap, bool throw_on_not_exist) const @@ -845,7 +856,15 @@ std::pair PageDirectory::get(const PageIdV3Internal break; // continue the resolving } } - throw Exception(fmt::format("Fail to get entry [page_id={}] [ver={}] [resolve_id={}] [resolve_ver={}] [idx={}]", page_id, init_ver_to_resolve, id_to_resolve, ver_to_resolve, idx), ErrorCodes::PS_ENTRY_NO_VALID_VERSION); + + if (throw_on_not_exist) + { + throw Exception(fmt::format("Fail to get entry [page_id={}] [ver={}] [resolve_id={}] [resolve_ver={}] [idx={}]", page_id, init_ver_to_resolve, id_to_resolve, ver_to_resolve, idx), ErrorCodes::PS_ENTRY_NO_VALID_VERSION); + } + else + { + return false; + } }; PageIDAndEntriesV3 id_entries; @@ -1191,16 +1210,31 @@ PageDirectory::getEntriesByBlobIds(const std::vector & blob_ids) con return std::make_pair(std::move(blob_versioned_entries), total_page_size); } -bool PageDirectory::tryDumpSnapshot(const WriteLimiterPtr & write_limiter) +bool PageDirectory::tryDumpSnapshot(const ReadLimiterPtr & read_limiter, const WriteLimiterPtr & write_limiter) { bool done_any_io = false; // In order not to make read amplification too high, only apply compact logs when ... auto files_snap = wal->getFilesSnapshot(); if (files_snap.needSave(max_persisted_log_files)) { + // To prevent writes from affecting dumping snapshot (and vice versa), old log files + // are read from disk and a temporary PageDirectory is generated for dumping snapshot. + // The main reason write affect dumping snapshot is that we can not get a read-only + // `being_ref_count` by the function `createSnapshot()`. + assert(!files_snap.persisted_log_files.empty()); // should not be empty when `needSave` return true + auto log_num = files_snap.persisted_log_files.rbegin()->log_num; + auto identifier = fmt::format("{}_dump_{}", wal->name(), log_num); + auto snapshot_reader = wal->createReaderForFiles(identifier, files_snap.persisted_log_files, read_limiter); + PageDirectoryFactory factory; + // we just use the `collapsed_dir` to dump edit of the snapshot, should never call functions like `apply` that + // persist new logs into disk. So we pass `nullptr` as `wal` to the factory. + PageDirectoryPtr collapsed_dir = factory.createFromReader( + identifier, + std::move(snapshot_reader), + /*wal=*/nullptr); // The records persisted in `files_snap` is older than or equal to all records in `edit` - auto edit = dumpSnapshotToEdit(); - done_any_io = wal->saveSnapshot(std::move(files_snap), std::move(edit), write_limiter); + auto edit_from_disk = collapsed_dir->dumpSnapshotToEdit(); + done_any_io = wal->saveSnapshot(std::move(files_snap), std::move(edit_from_disk), write_limiter); } return done_any_io; } diff --git a/dbms/src/Storages/Page/V3/PageDirectory.h b/dbms/src/Storages/Page/V3/PageDirectory.h index a3c6b079fee..39b5a05a40a 100644 --- a/dbms/src/Storages/Page/V3/PageDirectory.h +++ b/dbms/src/Storages/Page/V3/PageDirectory.h @@ -358,7 +358,7 @@ class PageDirectory void gcApply(PageEntriesEdit && migrated_edit, const WriteLimiterPtr & write_limiter = nullptr); - bool tryDumpSnapshot(const WriteLimiterPtr & write_limiter = nullptr); + bool tryDumpSnapshot(const ReadLimiterPtr & read_limiter = nullptr, const WriteLimiterPtr & write_limiter = nullptr); PageEntriesV3 gcInMemEntries(); diff --git a/dbms/src/Storages/Page/V3/PageDirectoryFactory.cpp b/dbms/src/Storages/Page/V3/PageDirectoryFactory.cpp index 9d20e0a64ab..483c5073ab5 100644 --- a/dbms/src/Storages/Page/V3/PageDirectoryFactory.cpp +++ b/dbms/src/Storages/Page/V3/PageDirectoryFactory.cpp @@ -31,7 +31,12 @@ namespace PS::V3 PageDirectoryPtr PageDirectoryFactory::create(String storage_name, FileProviderPtr & file_provider, PSDiskDelegatorPtr & delegator, WALStore::Config config) { auto [wal, reader] = WALStore::create(storage_name, file_provider, delegator, config); - PageDirectoryPtr dir = std::make_unique(std::move(storage_name), std::move(wal), config.max_persisted_log_files); + return createFromReader(storage_name, reader, std::move(wal)); +} + +PageDirectoryPtr PageDirectoryFactory::createFromReader(String storage_name, WALStoreReaderPtr reader, WALStorePtr wal) +{ + PageDirectoryPtr dir = std::make_unique(storage_name, std::move(wal)); loadFromDisk(dir, std::move(reader)); // Reset the `sequence` to the maximum of persisted. @@ -40,7 +45,7 @@ PageDirectoryPtr PageDirectoryFactory::create(String storage_name, FileProviderP // After restoring from the disk, we need cleanup all invalid entries in memory, or it will // try to run GC again on some entries that are already marked as invalid in BlobStore. dir->gcInMemEntries(); - LOG_FMT_INFO(DB::Logger::get("PageDirectoryFactory"), "PageDirectory restored [max_page_id={}] [max_applied_ver={}]", dir->getMaxId(), dir->sequence); + LOG_FMT_INFO(DB::Logger::get("PageDirectoryFactory", storage_name), "PageDirectory restored [max_page_id={}] [max_applied_ver={}]", dir->getMaxId(), dir->sequence); if (blob_stats) { diff --git a/dbms/src/Storages/Page/V3/PageDirectoryFactory.h b/dbms/src/Storages/Page/V3/PageDirectoryFactory.h index 185e8fd19a5..a922db3b497 100644 --- a/dbms/src/Storages/Page/V3/PageDirectoryFactory.h +++ b/dbms/src/Storages/Page/V3/PageDirectoryFactory.h @@ -17,6 +17,7 @@ #include #include #include +#include namespace DB { @@ -47,6 +48,8 @@ class PageDirectoryFactory PageDirectoryPtr create(String storage_name, FileProviderPtr & file_provider, PSDiskDelegatorPtr & delegator, WALStore::Config config); + PageDirectoryPtr createFromReader(String storage_name, WALStoreReaderPtr reader, WALStorePtr wal); + // just for test PageDirectoryPtr createFromEdit(String storage_name, FileProviderPtr & file_provider, PSDiskDelegatorPtr & delegator, const PageEntriesEdit & edit); diff --git a/dbms/src/Storages/Page/V3/PageStorageImpl.cpp b/dbms/src/Storages/Page/V3/PageStorageImpl.cpp index 58fe4b4dd4c..a568bb5087f 100644 --- a/dbms/src/Storages/Page/V3/PageStorageImpl.cpp +++ b/dbms/src/Storages/Page/V3/PageStorageImpl.cpp @@ -80,6 +80,11 @@ DB::PageStorage::SnapshotPtr PageStorageImpl::getSnapshot(const String & tracing return page_directory->createSnapshot(tracing_id); } +FileUsageStatistics PageStorageImpl::getFileUsageStatistics() const +{ + return blob_store.getFileUsageStatistics(); +} + SnapshotsStatistics PageStorageImpl::getSnapshotsStat() const { return page_directory->getSnapshotsStat(); @@ -289,7 +294,7 @@ bool PageStorageImpl::gcImpl(bool /*not_skip*/, const WriteLimiterPtr & write_li // 1. Do the MVCC gc, clean up expired snapshot. // And get the expired entries. - if (page_directory->tryDumpSnapshot(write_limiter)) + if (page_directory->tryDumpSnapshot(read_limiter, write_limiter)) { GET_METRIC(tiflash_storage_page_gc_count, type_v3_mvcc_dumped).Increment(); } diff --git a/dbms/src/Storages/Page/V3/PageStorageImpl.h b/dbms/src/Storages/Page/V3/PageStorageImpl.h index 082adb8df34..f49601ce2ad 100644 --- a/dbms/src/Storages/Page/V3/PageStorageImpl.h +++ b/dbms/src/Storages/Page/V3/PageStorageImpl.h @@ -34,7 +34,7 @@ class PageStorageImpl : public DB::PageStorage const Config & config_, const FileProviderPtr & file_provider_); - ~PageStorageImpl(); + ~PageStorageImpl() override; static BlobStore::Config parseBlobConfig(const Config & config) { @@ -54,8 +54,8 @@ class PageStorageImpl : public DB::PageStorage WALStore::Config wal_config; wal_config.roll_size = config.wal_roll_size; - wal_config.wal_recover_mode = config.wal_recover_mode; wal_config.max_persisted_log_files = config.wal_max_persisted_log_files; + wal_config.setRecoverMode(config.wal_recover_mode); return wal_config; } @@ -72,6 +72,8 @@ class PageStorageImpl : public DB::PageStorage SnapshotsStatistics getSnapshotsStat() const override; + FileUsageStatistics getFileUsageStatistics() const override; + size_t getNumberOfPages() override; std::set getAliveExternalPageIds(NamespaceId ns_id) override; diff --git a/dbms/src/Storages/Page/V3/WALStore.cpp b/dbms/src/Storages/Page/V3/WALStore.cpp index 1f1eaf3bc33..c7f11ee8b3c 100644 --- a/dbms/src/Storages/Page/V3/WALStore.cpp +++ b/dbms/src/Storages/Page/V3/WALStore.cpp @@ -13,6 +13,7 @@ // limitations under the License. #include +#include #include #include #include @@ -46,7 +47,7 @@ std::pair WALStore::create( auto reader = WALStoreReader::create(storage_name, provider, delegator, - static_cast(config.wal_recover_mode.get())); + config.getRecoverMode()); // Create a new LogFile for writing new logs auto last_log_num = reader->lastLogNum() + 1; // TODO reuse old file return { @@ -54,17 +55,23 @@ std::pair WALStore::create( reader}; } +WALStoreReaderPtr WALStore::createReaderForFiles(const String & identifier, const LogFilenameSet & log_filenames, const ReadLimiterPtr & read_limiter) +{ + return WALStoreReader::create(identifier, provider, log_filenames, config.getRecoverMode(), read_limiter); +} + WALStore::WALStore( - String storage_name, + String storage_name_, const PSDiskDelegatorPtr & delegator_, const FileProviderPtr & provider_, Format::LogNumberType last_log_num_, WALStore::Config config_) - : delegator(delegator_) + : storage_name(std::move(storage_name_)) + , delegator(delegator_) , provider(provider_) , last_log_num(last_log_num_) , wal_paths_index(0) - , logger(Logger::get("WALStore", std::move(storage_name))) + , logger(Logger::get("WALStore", storage_name)) , config(config_) { } @@ -186,7 +193,7 @@ bool WALStore::saveSnapshot(FilesSnapshot && files_snap, PageEntriesEdit && dire LOG_FMT_INFO(logger, "Saving directory snapshot"); - // Use {largest_log_num + 1, 1} to save the `edit` + // Use {largest_log_num, 1} to save the `edit` const auto log_num = files_snap.persisted_log_files.rbegin()->log_num; // Create a temporary file for saving directory snapshot auto [compact_log, log_filename] = createLogWriter({log_num, 1}, /*manual_flush*/ true); @@ -212,25 +219,11 @@ bool WALStore::saveSnapshot(FilesSnapshot && files_snap, PageEntriesEdit && dire true); LOG_FMT_INFO(logger, "Rename log file to normal done [fullname={}]", normal_fullname); - // #define ARCHIVE_COMPACTED_LOGS // keep for debug - // Remove compacted log files. for (const auto & filename : files_snap.persisted_log_files) { - if (auto f = Poco::File(filename.fullname(LogFileStage::Normal)); f.exists()) - { -#ifndef ARCHIVE_COMPACTED_LOGS - f.remove(); -#else - const Poco::Path archive_path(delegator->defaultPath(), "archive"); - Poco::File archive_dir(archive_path); - if (!archive_dir.exists()) - archive_dir.createDirectory(); - auto dest = archive_path.toString() + "/" + filename.filename(LogFileStage::Normal); - f.moveTo(dest); - LOG_FMT_INFO(logger, "archive {} to {}", filename.fullname(LogFileStage::Normal), dest); -#endif - } + const auto log_fullname = filename.fullname(LogFileStage::Normal); + provider->deleteRegularFile(log_fullname, EncryptionPath(log_fullname, "")); } FmtBuffer fmt_buf; diff --git a/dbms/src/Storages/Page/V3/WALStore.h b/dbms/src/Storages/Page/V3/WALStore.h index 039903a8608..f1ea00d3562 100644 --- a/dbms/src/Storages/Page/V3/WALStore.h +++ b/dbms/src/Storages/Page/V3/WALStore.h @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -34,45 +35,7 @@ class PSDiskDelegator; using PSDiskDelegatorPtr = std::shared_ptr; namespace PS::V3 { -enum class WALRecoveryMode : UInt8 -{ - // Original levelDB recovery - // - // We tolerate the last record in any log to be incomplete due to a crash - // while writing it. Zeroed bytes from preallocation are also tolerated in the - // trailing data of any log. - // - // Use case: Applications for which updates, once applied, must not be rolled - // back even after a crash-recovery. In this recovery mode, RocksDB guarantees - // this as long as `WritableFile::Append()` writes are durable. In case the - // user needs the guarantee in more situations (e.g., when - // `WritableFile::Append()` writes to page cache, but the user desires this - // guarantee in face of power-loss crash-recovery), RocksDB offers various - // mechanisms to additionally invoke `WritableFile::Sync()` in order to - // strengthen the guarantee. - // - // This differs from `kPointInTimeRecovery` in that, in case a corruption is - // detected during recovery, this mode will refuse to open the DB. Whereas, - // `kPointInTimeRecovery` will stop recovery just before the corruption since - // that is a valid point-in-time to which to recover. - TolerateCorruptedTailRecords = 0x00, - // Recover from clean shutdown - // We don't expect to find any corruption in the WAL - // Use case : This is ideal for unit tests and rare applications that - // can require high consistency guarantee - AbsoluteConsistency = 0x01, - // Recover to point-in-time consistency (default) - // We stop the WAL playback on discovering WAL inconsistency - // Use case : Ideal for systems that have disk controller cache like - // hard disk, SSD without super capacitor that store related data - PointInTimeRecovery = 0x02, - // Recovery after a disaster - // We ignore any corruption in the WAL and try to salvage as much data as - // possible - // Use case : Ideal for last ditch effort to recover data or systems that - // operate with low grade unrelated data - SkipAnyCorruptedRecords = 0x03, -}; + class WALStore; using WALStorePtr = std::unique_ptr; @@ -86,30 +49,56 @@ class WALStore struct Config { SettingUInt64 roll_size = PAGE_META_ROLL_SIZE; - SettingUInt64 wal_recover_mode = 0; SettingUInt64 max_persisted_log_files = MAX_PERSISTED_LOG_FILES; + + private: + SettingUInt64 wal_recover_mode = 0; + + public: + void setRecoverMode(UInt64 recover_mode) + { + if (unlikely(recover_mode != static_cast(WALRecoveryMode::TolerateCorruptedTailRecords) + && recover_mode != static_cast(WALRecoveryMode::AbsoluteConsistency) + && recover_mode != static_cast(WALRecoveryMode::PointInTimeRecovery) + && recover_mode != static_cast(WALRecoveryMode::SkipAnyCorruptedRecords))) + { + throw Exception("Unknow recover mode [num={}]", recover_mode); + } + wal_recover_mode = recover_mode; + } + + WALRecoveryMode getRecoverMode() + { + return static_cast(wal_recover_mode.get()); + } }; constexpr static const char * wal_folder_prefix = "/wal"; static std::pair create( - String storage_name, + String storage_name_, FileProviderPtr & provider, PSDiskDelegatorPtr & delegator, WALStore::Config config); + WALStoreReaderPtr createReaderForFiles(const String & identifier, const LogFilenameSet & log_filenames, const ReadLimiterPtr & read_limiter); + void apply(PageEntriesEdit & edit, const PageVersion & version, const WriteLimiterPtr & write_limiter = nullptr); void apply(const PageEntriesEdit & edit, const WriteLimiterPtr & write_limiter = nullptr); struct FilesSnapshot { Format::LogNumberType current_writting_log_num; + // The log files to generate snapshot from. Sorted by . + // If the WAL log file is not inited, it is an empty set. LogFilenameSet persisted_log_files; - bool needSave(const size_t & max_size) const + // Note that persisted_log_files should not be empty for needSave() == true, + // cause we get the largest log num from persisted_log_files as the new + // file name. + bool needSave(const size_t max_size) const { - // TODO: Make it configurable and check the reasonable of this number return persisted_log_files.size() > max_size; } }; @@ -121,6 +110,8 @@ class WALStore PageEntriesEdit && directory_snap, const WriteLimiterPtr & write_limiter = nullptr); + const String & name() { return storage_name; } + private: WALStore( String storage_name, @@ -134,6 +125,8 @@ class WALStore const std::pair & new_log_lvl, bool manual_flush); +private: + const String storage_name; PSDiskDelegatorPtr delegator; FileProviderPtr provider; mutable std::mutex log_file_mutex; diff --git a/dbms/src/Storages/Page/V3/tests/entries_helper.h b/dbms/src/Storages/Page/V3/tests/entries_helper.h index cce59919ec8..19e42755dae 100644 --- a/dbms/src/Storages/Page/V3/tests/entries_helper.h +++ b/dbms/src/Storages/Page/V3/tests/entries_helper.h @@ -25,6 +25,7 @@ #include #include #include +#include namespace DB { @@ -221,7 +222,9 @@ inline ::testing::AssertionResult getEntryNotExist( String error; try { - auto id_entry = dir->get(page_id, snap); + auto id_entry = dir->getOrNull(page_id, snap); + if (!id_entry.second.isValid()) + return ::testing::AssertionSuccess(); error = fmt::format( "Expect entry [id={}] from {} with snap{} not exist, but got <{}.{}, {}>", page_id_expr, diff --git a/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp b/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp index 151b3b50657..6e2b0efa1ea 100644 --- a/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp +++ b/dbms/src/Storages/Page/V3/tests/gtest_page_directory.cpp @@ -50,12 +50,17 @@ class PageDirectoryTest : public DB::base::TiFlashStorageTestBasic { auto path = getTemporaryPath(); dropDataOnDisk(path); + dir = restoreFromDisk(); + } + static PageDirectoryPtr restoreFromDisk() + { + auto path = getTemporaryPath(); auto ctx = DB::tests::TiFlashTestEnv::getContext(); FileProviderPtr provider = ctx.getFileProvider(); PSDiskDelegatorPtr delegator = std::make_shared(path); PageDirectoryFactory factory; - dir = factory.create("PageDirectoryTest", provider, delegator, WALStore::Config()); + return factory.create("PageDirectoryTest", provider, delegator, WALStore::Config()); } protected: @@ -1286,6 +1291,60 @@ class PageDirectoryGCTest : public PageDirectoryTest dir->apply(std::move(edit)); \ } +TEST_F(PageDirectoryGCTest, ManyEditsAndDumpSnapshot) +{ + PageId page_id0 = 50; + PageId page_id1 = 51; + PageId page_id2 = 52; + PageId page_id3 = 53; + + PageEntryV3 last_entry_for_0; + constexpr size_t num_edits_test = 50000; + for (size_t i = 0; i < num_edits_test; ++i) + { + { + INSERT_ENTRY(page_id0, i); + last_entry_for_0 = entry_vi; + } + { + INSERT_ENTRY(page_id1, i); + } + } + INSERT_DELETE(page_id1); + EXPECT_TRUE(dir->tryDumpSnapshot()); + dir.reset(); + + dir = restoreFromDisk(); + { + auto snap = dir->createSnapshot(); + ASSERT_SAME_ENTRY(dir->get(page_id0, snap).second, last_entry_for_0); + EXPECT_ENTRY_NOT_EXIST(dir, page_id1, snap); + } + + PageEntryV3 last_entry_for_2; + for (size_t i = 0; i < num_edits_test; ++i) + { + { + INSERT_ENTRY(page_id2, i); + last_entry_for_2 = entry_vi; + } + { + INSERT_ENTRY(page_id3, i); + } + } + INSERT_DELETE(page_id3); + EXPECT_TRUE(dir->tryDumpSnapshot()); + + dir = restoreFromDisk(); + { + auto snap = dir->createSnapshot(); + ASSERT_SAME_ENTRY(dir->get(page_id0, snap).second, last_entry_for_0); + EXPECT_ENTRY_NOT_EXIST(dir, page_id1, snap); + ASSERT_SAME_ENTRY(dir->get(page_id2, snap).second, last_entry_for_2); + EXPECT_ENTRY_NOT_EXIST(dir, page_id3, snap); + } +} + TEST_F(PageDirectoryGCTest, GCPushForward) try { @@ -1931,7 +1990,6 @@ try auto s0 = dir->createSnapshot(); auto edit = dir->dumpSnapshotToEdit(s0); - edit.size(); auto restore_from_edit = [](const PageEntriesEdit & edit) { auto deseri_edit = DB::PS::V3::ser::deserializeFrom(DB::PS::V3::ser::serializeTo(edit)); auto ctx = DB::tests::TiFlashTestEnv::getContext(); @@ -2214,6 +2272,36 @@ try } CATCH +TEST_F(PageDirectoryGCTest, CleanAfterDecreaseRef) +try +{ + PageEntryV3 entry_50_1{.file_id = 1, .size = 7890, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + PageEntryV3 entry_50_2{.file_id = 2, .size = 7890, .tag = 0, .offset = 0x123, .checksum = 0x4567}; + + auto restore_from_edit = [](const PageEntriesEdit & edit) { + auto ctx = ::DB::tests::TiFlashTestEnv::getContext(); + auto provider = ctx.getFileProvider(); + auto path = getTemporaryPath(); + PSDiskDelegatorPtr delegator = std::make_shared(path); + PageDirectoryFactory factory; + auto d = factory.createFromEdit(getCurrentTestName(), provider, delegator, edit); + return d; + }; + + { + PageEntriesEdit edit; + edit.put(50, entry_50_1); + edit.put(50, entry_50_2); + edit.ref(51, 50); + edit.del(50); + edit.del(51); + auto restored_dir = restore_from_edit(edit); + auto page_ids = restored_dir->getAllPageIds(); + ASSERT_EQ(page_ids.size(), 0); + } +} +CATCH + #undef INSERT_ENTRY_TO #undef INSERT_ENTRY #undef INSERT_ENTRY_ACQ_SNAP diff --git a/dbms/src/Storages/Page/V3/tests/gtest_page_storage.cpp b/dbms/src/Storages/Page/V3/tests/gtest_page_storage.cpp index 498fd4124e5..f7ba33c46c8 100644 --- a/dbms/src/Storages/Page/V3/tests/gtest_page_storage.cpp +++ b/dbms/src/Storages/Page/V3/tests/gtest_page_storage.cpp @@ -1380,5 +1380,67 @@ try } CATCH +TEST_F(PageStorageTest, CleanAfterDecreaseRef) +try +{ + // Make it in log_1_0 + { + WriteBatch batch; + batch.putExternal(1, 0); + page_storage->write(std::move(batch)); + } + + page_storage = reopenWithConfig(config); + + // Make it in log_2_0 + { + WriteBatch batch; + batch.putExternal(1, 0); + batch.putRefPage(2, 1); + batch.delPage(1); + batch.delPage(2); + page_storage->write(std::move(batch)); + } + page_storage = reopenWithConfig(config); + + auto alive_ids = page_storage->getAliveExternalPageIds(TEST_NAMESPACE_ID); + ASSERT_EQ(alive_ids.size(), 0); +} +CATCH + +TEST_F(PageStorageTest, TruncateBlobFile) +try +{ + const size_t buf_sz = 1024; + char c_buff[buf_sz]; + + for (size_t i = 0; i < buf_sz; ++i) + { + c_buff[i] = i % 0xff; + } + + { + WriteBatch batch; + batch.putPage(1, 0, std::make_shared(c_buff, buf_sz), buf_sz, {}); + page_storage->write(std::move(batch)); + } + + auto blob_file = Poco::File(getTemporaryPath() + "/blobfile_1"); + + page_storage = reopenWithConfig(config); + EXPECT_GT(blob_file.getSize(), 0); + + { + WriteBatch batch; + batch.delPage(1); + page_storage->write(std::move(batch)); + } + page_storage = reopenWithConfig(config); + page_storage->gc(/*not_skip*/ false, nullptr, nullptr); + EXPECT_EQ(blob_file.getSize(), 0); +} +CATCH + + } // namespace PS::V3::tests } // namespace DB diff --git a/dbms/src/Storages/Page/V3/tests/gtest_page_storage_mix_mode.cpp b/dbms/src/Storages/Page/V3/tests/gtest_page_storage_mix_mode.cpp index 98d84989dd9..5517539b898 100644 --- a/dbms/src/Storages/Page/V3/tests/gtest_page_storage_mix_mode.cpp +++ b/dbms/src/Storages/Page/V3/tests/gtest_page_storage_mix_mode.cpp @@ -44,17 +44,16 @@ class PageStorageMixedTest : public DB::base::TiFlashStorageTestBasic storage_path_pool_v3 = std::make_unique(Strings{path}, Strings{path}, Strings{}, std::make_shared(0, paths, caps, Strings{}, caps), global_context.getFileProvider(), true); global_context.setPageStorageRunMode(PageStorageRunMode::MIX_MODE); - if (!global_context.getGlobalStoragePool()) - global_context.initializeGlobalStoragePoolIfNeed(*storage_path_pool_v3); } void SetUp() override { + auto & global_context = DB::tests::TiFlashTestEnv::getGlobalContext(); + global_context.setPageStorageRunMode(PageStorageRunMode::MIX_MODE); TiFlashStorageTestBasic::SetUp(); const auto & path = getTemporaryPath(); createIfNotExist(path); - auto & global_context = DB::tests::TiFlashTestEnv::getGlobalContext(); std::vector caps = {}; Strings paths = {path}; @@ -76,7 +75,7 @@ class PageStorageMixedTest : public DB::base::TiFlashStorageTestBasic PageStorageRunMode reloadMixedStoragePool() { - DB::tests::TiFlashTestEnv::getContext().setPageStorageRunMode(PageStorageRunMode::MIX_MODE); + db_context->setPageStorageRunMode(PageStorageRunMode::MIX_MODE); PageStorageRunMode run_mode = storage_pool_mix->restore(); page_writer_mix = storage_pool_mix->logWriter(); page_reader_mix = storage_pool_mix->logReader(); @@ -85,7 +84,7 @@ class PageStorageMixedTest : public DB::base::TiFlashStorageTestBasic void reloadV2StoragePool() { - DB::tests::TiFlashTestEnv::getContext().setPageStorageRunMode(PageStorageRunMode::ONLY_V2); + db_context->setPageStorageRunMode(PageStorageRunMode::ONLY_V2); storage_pool_v2->restore(); page_writer_v2 = storage_pool_v2->logWriter(); page_reader_v2 = storage_pool_v2->logReader(); @@ -491,6 +490,318 @@ try } CATCH +// v2 put 1, v2 ref 2->1, get snapshot s1, v3 del 1, read s1 +TEST_F(PageStorageMixedTest, RefWithSnapshot) +try +{ + const size_t buf_sz = 1024; + char c_buff[buf_sz] = {0}; + + { + WriteBatch batch; + ReadBufferPtr buff = std::make_shared(c_buff, sizeof(c_buff)); + batch.putPage(1, 0, buff, buf_sz); + page_writer_v2->write(std::move(batch), nullptr); + } + + { + WriteBatch batch; + batch.putRefPage(2, 1); + page_writer_v2->write(std::move(batch), nullptr); + } + + { + ASSERT_EQ(reloadMixedStoragePool(), PageStorageRunMode::MIX_MODE); + ASSERT_EQ(page_reader_mix->getNormalPageId(2), 1); + } + + auto snapshot_mix_mode = page_reader_mix->getSnapshot("ReadWithSnapshotAfterDelOrigin"); + { + auto page_reader_mix_with_snap = storage_pool_mix->newLogReader(nullptr, snapshot_mix_mode); + const auto & page1 = page_reader_mix_with_snap.read(1); + ASSERT_PAGE_EQ(c_buff, buf_sz, page1, 1); + } + + { + WriteBatch batch; + batch.delPage(1); + page_writer_mix->write(std::move(batch), nullptr); + } + + { + auto page_reader_mix_with_snap = storage_pool_mix->newLogReader(nullptr, snapshot_mix_mode); + const auto & page1 = page_reader_mix_with_snap.read(1); + ASSERT_PAGE_EQ(c_buff, buf_sz, page1, 1); + } +} +CATCH + +// v2 put 1, v2 ref 2->1, get snapshot s1, v3 del 1, v3 del 2, read s1 +TEST_F(PageStorageMixedTest, RefWithDelSnapshot) +try +{ + const size_t buf_sz = 1024; + char c_buff[buf_sz] = {0}; + + { + WriteBatch batch; + ReadBufferPtr buff = std::make_shared(c_buff, sizeof(c_buff)); + batch.putPage(1, 0, buff, buf_sz); + page_writer_v2->write(std::move(batch), nullptr); + } + + { + WriteBatch batch; + batch.putRefPage(2, 1); + page_writer_v2->write(std::move(batch), nullptr); + } + + { + ASSERT_EQ(reloadMixedStoragePool(), PageStorageRunMode::MIX_MODE); + ASSERT_EQ(page_reader_mix->getNormalPageId(2), 1); + } + + auto snapshot_mix_mode = page_reader_mix->getSnapshot("ReadWithSnapshotAfterDelOrigin"); + { + auto page_reader_mix_with_snap = storage_pool_mix->newLogReader(nullptr, snapshot_mix_mode); + const auto & page1 = page_reader_mix_with_snap.read(1); + ASSERT_PAGE_EQ(c_buff, buf_sz, page1, 1); + } + + { + WriteBatch batch; + batch.delPage(1); + batch.delPage(2); + page_writer_mix->write(std::move(batch), nullptr); + } + + { + auto page_reader_mix_with_snap = storage_pool_mix->newLogReader(nullptr, snapshot_mix_mode); + const auto & page1 = page_reader_mix_with_snap.read(1); + ASSERT_PAGE_EQ(c_buff, buf_sz, page1, 1); + } +} +CATCH + +// v2 put 1, v2 ref 2->1, v3 del 1, get snapshot s1, v3 del 2, use s1 read 2 +TEST_F(PageStorageMixedTest, RefWithDelSnapshot2) +try +{ + const size_t buf_sz = 1024; + char c_buff[buf_sz] = {0}; + + { + WriteBatch batch; + ReadBufferPtr buff = std::make_shared(c_buff, sizeof(c_buff)); + batch.putPage(1, 0, buff, buf_sz); + page_writer_v2->write(std::move(batch), nullptr); + } + + { + WriteBatch batch; + batch.putRefPage(2, 1); + page_writer_v2->write(std::move(batch), nullptr); + } + + { + ASSERT_EQ(reloadMixedStoragePool(), PageStorageRunMode::MIX_MODE); + ASSERT_EQ(page_reader_mix->getNormalPageId(2), 1); + } + + { + WriteBatch batch; + batch.delPage(1); + page_writer_mix->write(std::move(batch), nullptr); + } + + auto snapshot_mix_mode = page_reader_mix->getSnapshot("ReadWithSnapshotAfterDelOrigin"); + { + auto page_reader_mix_with_snap = storage_pool_mix->newLogReader(nullptr, snapshot_mix_mode); + const auto & page1 = page_reader_mix_with_snap.read(2); + ASSERT_PAGE_EQ(c_buff, buf_sz, page1, 2); + } + + { + WriteBatch batch; + batch.delPage(2); + page_writer_mix->write(std::move(batch), nullptr); + } + + { + auto page_reader_mix_with_snap = storage_pool_mix->newLogReader(nullptr, snapshot_mix_mode); + const auto & page1 = page_reader_mix_with_snap.read(2); + ASSERT_PAGE_EQ(c_buff, buf_sz, page1, 2); + } +} +CATCH + +// v2 put 1, v2 del 1, v3 put 2, v3 del 2 +TEST_F(PageStorageMixedTest, GetMaxId) +try +{ + const size_t buf_sz = 1024; + char c_buff[buf_sz] = {0}; + + { + WriteBatch batch; + ReadBufferPtr buff = std::make_shared(c_buff, sizeof(c_buff)); + batch.putPage(1, 0, buff, buf_sz); + page_writer_v2->write(std::move(batch), nullptr); + } + + { + WriteBatch batch; + batch.delPage(1); + page_writer_v2->write(std::move(batch), nullptr); + } + + { + WriteBatch batch; + ReadBufferPtr buff = std::make_shared(c_buff, sizeof(c_buff)); + batch.putPage(2, 0, buff, buf_sz); + page_writer_v2->write(std::move(batch), nullptr); + } + + { + ASSERT_EQ(reloadMixedStoragePool(), PageStorageRunMode::MIX_MODE); + ASSERT_EQ(storage_pool_mix->newLogPageId(), 3); + } + + { + WriteBatch batch; + ReadBufferPtr buff = std::make_shared(c_buff, sizeof(c_buff)); + batch.putPage(3, 0, buff, buf_sz); + page_writer_mix->write(std::move(batch), nullptr); + } + + { + WriteBatch batch; + batch.delPage(3); + page_writer_mix->write(std::move(batch), nullptr); + } + + { + ASSERT_EQ(reloadMixedStoragePool(), PageStorageRunMode::MIX_MODE); + ASSERT_EQ(storage_pool_mix->newLogPageId(), 4); + } +} +CATCH + + +TEST_F(PageStorageMixedTest, ReuseV2ID) +try +{ + const size_t buf_sz = 1024; + char c_buff[buf_sz] = {0}; + + { + WriteBatch batch; + ReadBufferPtr buff = std::make_shared(c_buff, sizeof(c_buff)); + batch.putPage(1, 0, buff, buf_sz); + page_writer_v2->write(std::move(batch), nullptr); + } + + { + WriteBatch batch; + batch.delPage(1); + page_writer_v2->write(std::move(batch), nullptr); + } + + { + ASSERT_EQ(reloadMixedStoragePool(), PageStorageRunMode::ONLY_V3); + ASSERT_EQ(storage_pool_mix->newLogPageId(), 1); + } + + { + WriteBatch batch; + ReadBufferPtr buff = std::make_shared(c_buff, sizeof(c_buff)); + batch.putPage(1, 0, buff, buf_sz); + page_writer_mix->write(std::move(batch), nullptr); + } + + { + WriteBatch batch; + batch.delPage(1); + page_writer_mix->write(std::move(batch), nullptr); + } + + { + ASSERT_EQ(reloadMixedStoragePool(), PageStorageRunMode::ONLY_V3); + ASSERT_EQ(storage_pool_mix->newLogPageId(), 2); + } +} +CATCH + +// v2 put 1, v3 ref 2->1, reload, check max id, get snapshot s1, v3 del 1, get snapshot s2, v3 del 2, get snapshot s3, check snapshots +TEST_F(PageStorageMixedTest, V3RefV2WithSnapshot) +try +{ + const size_t buf_sz = 1024; + char c_buff[buf_sz] = {0}; + + { + WriteBatch batch; + ReadBufferPtr buff = std::make_shared(c_buff, sizeof(c_buff)); + batch.putPage(1, 0, buff, buf_sz); + page_writer_v2->write(std::move(batch), nullptr); + } + + ASSERT_EQ(reloadMixedStoragePool(), PageStorageRunMode::MIX_MODE); + + { + WriteBatch batch; + batch.putRefPage(2, 1); + page_writer_mix->write(std::move(batch), nullptr); + } + + { + ASSERT_EQ(reloadMixedStoragePool(), PageStorageRunMode::MIX_MODE); + ASSERT_EQ(page_reader_mix->getNormalPageId(2), 1); + ASSERT_EQ(storage_pool_mix->newLogPageId(), 3); + } + + auto snapshot_before_del = page_reader_mix->getSnapshot("ReadWithSnapshotBeforeDelOrigin"); + + { + WriteBatch batch; + batch.delPage(1); + page_writer_mix->write(std::move(batch), nullptr); + } + + auto snapshot_after_del_origin = page_reader_mix->getSnapshot("ReadWithSnapshotAfterDelOrigin"); + + { + auto page_reader_mix_with_snap = storage_pool_mix->newLogReader(nullptr, snapshot_before_del); + const auto & page1 = page_reader_mix_with_snap.read(1); + ASSERT_PAGE_EQ(c_buff, buf_sz, page1, 1); + } + + { + WriteBatch batch; + batch.delPage(2); + page_writer_mix->write(std::move(batch), nullptr); + } + + auto snapshot_after_del_all = page_reader_mix->getSnapshot("ReadWithSnapshotAfterDelAll"); + + { + auto page_reader_mix_with_snap = storage_pool_mix->newLogReader(nullptr, snapshot_after_del_origin); + const auto & page1 = page_reader_mix_with_snap.read(2); + ASSERT_PAGE_EQ(c_buff, buf_sz, page1, 2); + } + + { + auto page_reader_mix_with_snap = storage_pool_mix->newLogReader(nullptr, snapshot_after_del_origin); + const auto & page1 = page_reader_mix_with_snap.read(2); + ASSERT_PAGE_EQ(c_buff, buf_sz, page1, 2); + } + + { + auto page_reader_mix_with_snap = storage_pool_mix->newLogReader(nullptr, snapshot_after_del_all); + ASSERT_ANY_THROW(page_reader_mix_with_snap.read(2)); + } +} +CATCH TEST_F(PageStorageMixedTest, MockDTIngest) try @@ -568,6 +879,82 @@ try } CATCH +TEST_F(PageStorageMixedTest, ReadWithSnapshotAfterMergeDelta) +try +{ + UInt64 tag = 0; + const size_t buf_sz = 1024; + char c_buff[buf_sz]; + for (size_t i = 0; i < buf_sz; ++i) + { + c_buff[i] = i % 0xff; + } + { + WriteBatch batch; + ReadBufferPtr buff = std::make_shared(c_buff, sizeof(c_buff)); + batch.putPage(1, tag, buff, buf_sz); + buff = std::make_shared(c_buff, sizeof(c_buff)); + batch.putPage(2, tag, buff, buf_sz, {20, 120, 400, 200, 15, 75, 170, 24}); + page_writer_v2->write(std::move(batch), nullptr); + } + ASSERT_EQ(reloadMixedStoragePool(), PageStorageRunMode::MIX_MODE); + const size_t buf_sz2 = 2048; + char c_buff2[buf_sz2] = {0}; + { + WriteBatch batch; + ReadBufferPtr buff2 = std::make_shared(c_buff2, sizeof(c_buff2)); + batch.putPage(3, tag, buff2, buf_sz2); + page_writer_mix->write(std::move(batch), nullptr); + } + // Thread A create snapshot for read + auto snapshot_mix_before_merge_delta = page_reader_mix->getSnapshot("ReadWithSnapshotAfterMergeDelta"); + { + auto page_reader_mix_with_snap = storage_pool_mix->newLogReader(nullptr, snapshot_mix_before_merge_delta); + const auto & page1 = page_reader_mix_with_snap.read(1); + const auto & page2 = page_reader_mix_with_snap.read(2); + const auto & page3 = page_reader_mix_with_snap.read(3); + ASSERT_PAGE_EQ(c_buff, buf_sz, page1, 1); + ASSERT_PAGE_EQ(c_buff, buf_sz, page2, 2); + ASSERT_PAGE_EQ(c_buff2, buf_sz2, page3, 3); + } + { + auto page_reader_mix_with_snap = storage_pool_mix->newLogReader(nullptr, true, "ReadWithSnapshotAfterMergeDelta"); + const auto & page1 = page_reader_mix_with_snap.read(1); + const auto & page2 = page_reader_mix_with_snap.read(2); + const auto & page3 = page_reader_mix_with_snap.read(3); + ASSERT_PAGE_EQ(c_buff, buf_sz, page1, 1); + ASSERT_PAGE_EQ(c_buff, buf_sz, page2, 2); + ASSERT_PAGE_EQ(c_buff2, buf_sz2, page3, 3); + } + // Thread B apply merge delta, create page 4, and delete the origin page 1, 3 + { + WriteBatch batch; + ReadBufferPtr buff2 = std::make_shared(c_buff2, sizeof(c_buff2)); + batch.putPage(4, tag, buff2, buf_sz2); + batch.delPage(1); + batch.delPage(3); + page_writer_mix->write(std::move(batch), nullptr); + } + // Thread A continue to read 1, 3 + { + auto page_reader_mix_with_snap = storage_pool_mix->newLogReader(nullptr, snapshot_mix_before_merge_delta); + // read 1, 3 with snapshot, should be success + const auto & page1 = page_reader_mix_with_snap.read(1); + const auto & page3 = page_reader_mix_with_snap.read(3); + ASSERT_PAGE_EQ(c_buff, buf_sz, page1, 1); + ASSERT_PAGE_EQ(c_buff2, buf_sz2, page3, 3); + ASSERT_THROW(page_reader_mix_with_snap.read(4), DB::Exception); + } + { + // Revert v3 + WriteBatch batch; + batch.delPage(3); + batch.delPage(4); + page_writer_mix->write(std::move(batch), nullptr); + } +} +CATCH + } // namespace PS::V3::tests } // namespace DB diff --git a/dbms/src/Storages/Page/V3/tests/gtest_wal_store.cpp b/dbms/src/Storages/Page/V3/tests/gtest_wal_store.cpp index 89c4e54f7e7..6d47adabbc5 100644 --- a/dbms/src/Storages/Page/V3/tests/gtest_wal_store.cpp +++ b/dbms/src/Storages/Page/V3/tests/gtest_wal_store.cpp @@ -596,11 +596,12 @@ TEST_P(WALStoreTest, ManyEdits) try { auto ctx = DB::tests::TiFlashTestEnv::getContext(); - auto provider = ctx.getFileProvider(); + auto enc_key_manager = std::make_shared(/*encryption_enabled_=*/true); + auto enc_provider = std::make_shared(enc_key_manager, true); auto path = getTemporaryPath(); // Stage 1. empty - auto [wal, reader] = WALStore::create(getCurrentTestName(), provider, delegator, config); + auto [wal, reader] = WALStore::create(getCurrentTestName(), enc_provider, delegator, config); ASSERT_NE(wal, nullptr); std::mt19937 rd; @@ -633,7 +634,7 @@ try size_t num_edits_read = 0; size_t num_pages_read = 0; - std::tie(wal, reader) = WALStore::create(getCurrentTestName(), provider, delegator, config); + std::tie(wal, reader) = WALStore::create(getCurrentTestName(), enc_provider, delegator, config); while (reader->remained()) { auto [ok, edit] = reader->next(); @@ -653,8 +654,7 @@ try LOG_FMT_INFO(&Poco::Logger::get("WALStoreTest"), "Done test for {} persist pages in {} edits", num_pages_read, num_edits_test); // Test for save snapshot (with encryption) - auto enc_key_manager = std::make_shared(/*encryption_enabled_=*/true); - auto enc_provider = std::make_shared(enc_key_manager, true); + LogFilenameSet persisted_log_files = WALStoreReader::listAllFiles(delegator, log); WALStore::FilesSnapshot file_snap{.current_writting_log_num = 100, // just a fake value .persisted_log_files = persisted_log_files}; diff --git a/dbms/src/Storages/Page/WALRecoveryMode.h b/dbms/src/Storages/Page/WALRecoveryMode.h new file mode 100644 index 00000000000..740c9ed37a5 --- /dev/null +++ b/dbms/src/Storages/Page/WALRecoveryMode.h @@ -0,0 +1,61 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include + +namespace DB +{ + +enum class WALRecoveryMode : UInt8 +{ + // Original levelDB recovery + // + // We tolerate the last record in any log to be incomplete due to a crash + // while writing it. Zeroed bytes from preallocation are also tolerated in the + // trailing data of any log. + // + // Use case: Applications for which updates, once applied, must not be rolled + // back even after a crash-recovery. In this recovery mode, RocksDB guarantees + // this as long as `WritableFile::Append()` writes are durable. In case the + // user needs the guarantee in more situations (e.g., when + // `WritableFile::Append()` writes to page cache, but the user desires this + // guarantee in face of power-loss crash-recovery), RocksDB offers various + // mechanisms to additionally invoke `WritableFile::Sync()` in order to + // strengthen the guarantee. + // + // This differs from `kPointInTimeRecovery` in that, in case a corruption is + // detected during recovery, this mode will refuse to open the DB. Whereas, + // `kPointInTimeRecovery` will stop recovery just before the corruption since + // that is a valid point-in-time to which to recover. + TolerateCorruptedTailRecords = 0x00, + // Recover from clean shutdown + // We don't expect to find any corruption in the WAL + // Use case : This is ideal for unit tests and rare applications that + // can require high consistency guarantee + AbsoluteConsistency = 0x01, + // Recover to point-in-time consistency (default) + // We stop the WAL playback on discovering WAL inconsistency + // Use case : Ideal for systems that have disk controller cache like + // hard disk, SSD without super capacitor that store related data + PointInTimeRecovery = 0x02, + // Recovery after a disaster + // We ignore any corruption in the WAL and try to salvage as much data as + // possible + // Use case : Ideal for last ditch effort to recover data or systems that + // operate with low grade unrelated data + SkipAnyCorruptedRecords = 0x03, +}; + +} // namespace DB diff --git a/dbms/src/Storages/StorageDeltaMerge.cpp b/dbms/src/Storages/StorageDeltaMerge.cpp index 38a947a027f..fc73e28e23a 100644 --- a/dbms/src/Storages/StorageDeltaMerge.cpp +++ b/dbms/src/Storages/StorageDeltaMerge.cpp @@ -44,10 +44,10 @@ #include #include #include -#include #include #include #include +#include #include #include #include diff --git a/dbms/src/Storages/StorageJoin.cpp b/dbms/src/Storages/StorageJoin.cpp index 47907b3e94e..4ca3e79a7ab 100644 --- a/dbms/src/Storages/StorageJoin.cpp +++ b/dbms/src/Storages/StorageJoin.cpp @@ -52,7 +52,7 @@ StorageJoin::StorageJoin( /// NOTE StorageJoin doesn't use join_use_nulls setting. join = std::make_shared(key_names, key_names, false /* use_nulls */, SizeLimits(), kind, strictness, /*req_id=*/""); - join->setSampleBlock(getSampleBlock().sortColumns()); + join->init(getSampleBlock().sortColumns()); restore(); } @@ -87,7 +87,7 @@ void registerStorageJoin(StorageFactory & factory) "Storage Join requires at least 3 parameters: Join(ANY|ALL, LEFT|INNER, keys...).", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - const ASTIdentifier * strictness_id = typeid_cast(engine_args[0].get()); + const auto * strictness_id = typeid_cast(engine_args[0].get()); if (!strictness_id) throw Exception("First parameter of storage Join must be ANY or ALL (without quotes).", ErrorCodes::BAD_ARGUMENTS); @@ -100,7 +100,7 @@ void registerStorageJoin(StorageFactory & factory) else throw Exception("First parameter of storage Join must be ANY or ALL (without quotes).", ErrorCodes::BAD_ARGUMENTS); - const ASTIdentifier * kind_id = typeid_cast(engine_args[1].get()); + const auto * kind_id = typeid_cast(engine_args[1].get()); if (!kind_id) throw Exception("Second parameter of storage Join must be LEFT or INNER (without quotes).", ErrorCodes::BAD_ARGUMENTS); @@ -121,7 +121,7 @@ void registerStorageJoin(StorageFactory & factory) key_names.reserve(engine_args.size() - 2); for (size_t i = 2, size = engine_args.size(); i < size; ++i) { - const ASTIdentifier * key = typeid_cast(engine_args[i].get()); + const auto * key = typeid_cast(engine_args[i].get()); if (!key) throw Exception("Parameter №" + toString(i + 1) + " of storage Join don't look like column name.", ErrorCodes::BAD_ARGUMENTS); diff --git a/dbms/src/Storages/System/StorageSystemDTSegments.cpp b/dbms/src/Storages/System/StorageSystemDTSegments.cpp index 37f2dbe1b99..f84a19a005c 100644 --- a/dbms/src/Storages/System/StorageSystemDTSegments.cpp +++ b/dbms/src/Storages/System/StorageSystemDTSegments.cpp @@ -23,12 +23,13 @@ #include #include #include -#include #include +#include namespace DB { -StorageSystemDTSegments::StorageSystemDTSegments(const std::string & name_) : name(name_) +StorageSystemDTSegments::StorageSystemDTSegments(const std::string & name_) + : name(name_) { setColumns(ColumnsDescription({ {"database", std::make_shared()}, @@ -61,11 +62,11 @@ StorageSystemDTSegments::StorageSystemDTSegments(const std::string & name_) : na } BlockInputStreams StorageSystemDTSegments::read(const Names & column_names, - const SelectQueryInfo &, - const Context & context, - QueryProcessingStage::Enum & processed_stage, - const size_t /*max_block_size*/, - const unsigned /*num_streams*/) + const SelectQueryInfo &, + const Context & context, + QueryProcessingStage::Enum & processed_stage, + const size_t /*max_block_size*/, + const unsigned /*num_streams*/) { check(column_names); processed_stage = QueryProcessingStage::FetchColumns; @@ -78,19 +79,19 @@ BlockInputStreams StorageSystemDTSegments::read(const Names & column_names, for (const auto & d : databases) { String database_name = d.first; - auto & database = d.second; + const auto & database = d.second; const DatabaseTiFlash * db_tiflash = typeid_cast(database.get()); auto it = database->getIterator(context); for (; it->isValid(); it->next()) { - auto & table_name = it->name(); + const auto & table_name = it->name(); auto & storage = it->table(); if (storage->getName() != MutableSupport::delta_tree_storage_name) continue; auto dm_storage = std::dynamic_pointer_cast(storage); - auto & table_info = dm_storage->getTableInfo(); + const auto & table_info = dm_storage->getTableInfo(); auto table_id = table_info.id; auto segment_stats = dm_storage->getStore()->getSegmentStats(); for (auto & stat : segment_stats) diff --git a/dbms/src/Storages/System/StorageSystemDTTables.cpp b/dbms/src/Storages/System/StorageSystemDTTables.cpp index b3f9cf5b29e..b700cfb5324 100644 --- a/dbms/src/Storages/System/StorageSystemDTTables.cpp +++ b/dbms/src/Storages/System/StorageSystemDTTables.cpp @@ -23,7 +23,7 @@ #include #include #include -#include +#include namespace DB { diff --git a/dbms/src/Storages/System/StorageSystemDatabases.cpp b/dbms/src/Storages/System/StorageSystemDatabases.cpp index 6c665fec900..5ba249a0aba 100644 --- a/dbms/src/Storages/System/StorageSystemDatabases.cpp +++ b/dbms/src/Storages/System/StorageSystemDatabases.cpp @@ -21,16 +21,17 @@ #include #include #include -#include #include #include +#include namespace DB { -StorageSystemDatabases::StorageSystemDatabases(const std::string & name_) : name(name_) +StorageSystemDatabases::StorageSystemDatabases(const std::string & name_) + : name(name_) { setColumns(ColumnsDescription({ {"name", std::make_shared()}, @@ -45,11 +46,11 @@ StorageSystemDatabases::StorageSystemDatabases(const std::string & name_) : name BlockInputStreams StorageSystemDatabases::read(const Names & column_names, - const SelectQueryInfo &, - const Context & context, - QueryProcessingStage::Enum & processed_stage, - const size_t /*max_block_size*/, - const unsigned /*num_streams*/) + const SelectQueryInfo &, + const Context & context, + QueryProcessingStage::Enum & processed_stage, + const size_t /*max_block_size*/, + const unsigned /*num_streams*/) { check(column_names); processed_stage = QueryProcessingStage::FetchColumns; @@ -79,7 +80,7 @@ BlockInputStreams StorageSystemDatabases::read(const Names & column_names, res_columns[j++]->insert(Int64(database_id)); res_columns[j++]->insert(database.second->getEngineName()); - res_columns[j++]->insert((UInt64)tombstone); + res_columns[j++]->insert(static_cast(tombstone)); res_columns[j++]->insert(database.second->getDataPath()); res_columns[j++]->insert(database.second->getMetadataPath()); } diff --git a/dbms/src/Storages/System/StorageSystemTables.cpp b/dbms/src/Storages/System/StorageSystemTables.cpp index dd24d426e80..4abf0044c87 100644 --- a/dbms/src/Storages/System/StorageSystemTables.cpp +++ b/dbms/src/Storages/System/StorageSystemTables.cpp @@ -27,10 +27,10 @@ #include #include #include -#include #include #include #include +#include namespace DB { @@ -71,7 +71,8 @@ NameAndTypePair tryGetColumn(const ColumnsWithTypeAndName & columns, const Strin struct VirtualColumnsProcessor { explicit VirtualColumnsProcessor(const ColumnsWithTypeAndName & all_virtual_columns_) - : all_virtual_columns(all_virtual_columns_), virtual_columns_mask(all_virtual_columns_.size(), 0) + : all_virtual_columns(all_virtual_columns_) + , virtual_columns_mask(all_virtual_columns_.size(), 0) {} /// Separates real and virtual column names, returns real ones @@ -131,7 +132,8 @@ struct VirtualColumnsProcessor } // namespace -StorageSystemTables::StorageSystemTables(const std::string & name_) : name(name_) +StorageSystemTables::StorageSystemTables(const std::string & name_) + : name(name_) { setColumns(ColumnsDescription({ {"database", std::make_shared()}, @@ -147,7 +149,8 @@ StorageSystemTables::StorageSystemTables(const std::string & name_) : name(name_ })); virtual_columns = {{std::make_shared(), "metadata_modification_time"}, - {std::make_shared(), "create_table_query"}, {std::make_shared(), "engine_full"}}; + {std::make_shared(), "create_table_query"}, + {std::make_shared(), "engine_full"}}; } @@ -164,11 +167,11 @@ static ColumnPtr getFilteredDatabases(const ASTPtr & query, const Context & cont BlockInputStreams StorageSystemTables::read(const Names & column_names, - const SelectQueryInfo & query_info, - const Context & context, - QueryProcessingStage::Enum & processed_stage, - const size_t /*max_block_size*/, - const unsigned /*num_streams*/) + const SelectQueryInfo & query_info, + const Context & context, + QueryProcessingStage::Enum & processed_stage, + const size_t /*max_block_size*/, + const unsigned /*num_streams*/) { processed_stage = QueryProcessingStage::FetchColumns; @@ -226,7 +229,7 @@ BlockInputStreams StorageSystemTables::read(const Names & column_names, { if (db_tiflash) tidb_database_name = mapper.displayDatabaseName(db_tiflash->getDatabaseInfo()); - auto & table_info = managed_storage->getTableInfo(); + const auto & table_info = managed_storage->getTableInfo(); tidb_table_name = mapper.displayTableName(table_info); table_id = table_info.id; tombstone = managed_storage->getTombstone(); @@ -279,7 +282,7 @@ BlockInputStreams StorageSystemTables::read(const Names & column_names, { Tables external_tables = context.getSessionContext().getExternalTables(); - for (auto table : external_tables) + for (const auto & table : external_tables) { size_t j = 0; res_columns[j++]->insertDefault(); diff --git a/dbms/src/Storages/Transaction/ApplySnapshot.cpp b/dbms/src/Storages/Transaction/ApplySnapshot.cpp index 6106dda6f4b..2df95fead93 100644 --- a/dbms/src/Storages/Transaction/ApplySnapshot.cpp +++ b/dbms/src/Storages/Transaction/ApplySnapshot.cpp @@ -28,8 +28,8 @@ #include #include #include -#include #include +#include #include @@ -261,75 +261,6 @@ void KVStore::onSnapshot(const RegionPtrWrap & new_region_wrap, RegionPtr old_re extern RegionPtrWithBlock::CachePtr GenRegionPreDecodeBlockData(const RegionPtr &, Context &); -/// `preHandleSnapshotToBlock` read data from SSTFiles and predoced the data as a block -RegionPreDecodeBlockDataPtr KVStore::preHandleSnapshotToBlock( - RegionPtr new_region, - const SSTViewVec snaps, - uint64_t /*index*/, - uint64_t /*term*/, - TMTContext & tmt) -{ - RegionPreDecodeBlockDataPtr cache{nullptr}; - { - decltype(bg_gc_region_data)::value_type tmp; - std::lock_guard lock(bg_gc_region_data_mutex); - if (!bg_gc_region_data.empty()) - { - tmp.swap(bg_gc_region_data.back()); - bg_gc_region_data.pop_back(); - } - } - - Stopwatch watch; - auto & ctx = tmt.getContext(); - SCOPE_EXIT({ GET_METRIC(tiflash_raft_command_duration_seconds, type_apply_snapshot_predecode).Observe(watch.elapsedSeconds()); }); - - { - LOG_FMT_INFO(log, "Pre-handle snapshot {} with {} TiKV sst files", new_region->toString(false), snaps.len); - // Iterator over all SST files and insert key-values into `new_region` - for (UInt64 i = 0; i < snaps.len; ++i) - { - const auto & snapshot = snaps.views[i]; - auto sst_reader = SSTReader{proxy_helper, snapshot}; - - uint64_t kv_size = 0; - while (sst_reader.remained()) - { - auto key = sst_reader.key(); - auto value = sst_reader.value(); - new_region->insert(snaps.views[i].type, TiKVKey(key.data, key.len), TiKVValue(value.data, value.len)); - ++kv_size; - sst_reader.next(); - } - - LOG_FMT_INFO(log, - "Decode {} got [cf: {}, kv size: {}]", - std::string_view(snapshot.path.data, snapshot.path.len), - CFToName(snapshot.type), - kv_size); - // Note that number of keys in different cf will be aggregated into one metrics - GET_METRIC(tiflash_raft_process_keys, type_apply_snapshot).Increment(kv_size); - } - { - LOG_FMT_INFO(log, "Start to pre-decode {} into block", new_region->toString()); - auto block_cache = GenRegionPreDecodeBlockData(new_region, ctx); - if (block_cache) - { - std::stringstream ss; - block_cache->toString(ss); - LOG_FMT_INFO(log, "Got pre-decode block cache {}", ss.str()); - } - else - LOG_FMT_INFO(log, "Got empty pre-decode block cache"); - - cache = std::move(block_cache); - } - LOG_FMT_INFO(log, "Pre-handle snapshot {} cost {}ms", new_region->toString(false), watch.elapsedMilliseconds()); - } - - return cache; -} - std::vector KVStore::preHandleSnapshotToFiles( RegionPtr new_region, const SSTViewVec snaps, @@ -473,8 +404,8 @@ void KVStore::handlePreApplySnapshot(const RegionPtrWrap & new_region, TMTContex LOG_FMT_INFO(log, "{} apply snapshot success", new_region->toString(false)); } -template void KVStore::handlePreApplySnapshot(const RegionPtrWithBlock &, TMTContext &); template void KVStore::handlePreApplySnapshot(const RegionPtrWithSnapshotFiles &, TMTContext &); + template void KVStore::checkAndApplySnapshot(const RegionPtrWithBlock &, TMTContext &); template void KVStore::checkAndApplySnapshot(const RegionPtrWithSnapshotFiles &, TMTContext &); template void KVStore::onSnapshot(const RegionPtrWithBlock &, RegionPtr, UInt64, TMTContext &); @@ -521,10 +452,7 @@ void KVStore::handleApplySnapshot( TMTContext & tmt) { auto new_region = genRegionPtr(std::move(region), peer_id, index, term); - if (snapshot_apply_method == TiDB::SnapshotApplyMethod::Block) - handlePreApplySnapshot(RegionPtrWithBlock{new_region, preHandleSnapshotToBlock(new_region, snaps, index, term, tmt)}, tmt); - else - handlePreApplySnapshot(RegionPtrWithSnapshotFiles{new_region, preHandleSnapshotToFiles(new_region, snaps, index, term, tmt)}, tmt); + handlePreApplySnapshot(RegionPtrWithSnapshotFiles{new_region, preHandleSnapshotToFiles(new_region, snaps, index, term, tmt)}, tmt); } EngineStoreApplyRes KVStore::handleIngestSST(UInt64 region_id, const SSTViewVec snaps, UInt64 index, UInt64 term, TMTContext & tmt) @@ -543,15 +471,12 @@ EngineStoreApplyRes KVStore::handleIngestSST(UInt64 region_id, const SSTViewVec fiu_do_on(FailPoints::force_set_sst_decode_rand, { static int num_call = 0; - switch (num_call++ % 3) + switch (num_call++ % 2) { case 0: - snapshot_apply_method = TiDB::SnapshotApplyMethod::Block; - break; - case 1: snapshot_apply_method = TiDB::SnapshotApplyMethod::DTFile_Directory; break; - case 2: + case 1: snapshot_apply_method = TiDB::SnapshotApplyMethod::DTFile_Single; break; default: @@ -576,15 +501,6 @@ EngineStoreApplyRes KVStore::handleIngestSST(UInt64 region_id, const SSTViewVec } }; - if (snapshot_apply_method == TiDB::SnapshotApplyMethod::Block) - { - // try to flush remain data in memory. - func_try_flush(); - region->handleIngestSSTInMemory(snaps, index, term); - // after `handleIngestSSTInMemory`, all data are stored in `region`, try to flush committed data into storage - func_try_flush(); - } - else { // try to flush remain data in memory. func_try_flush(); diff --git a/dbms/src/Storages/Transaction/KVStore.h b/dbms/src/Storages/Transaction/KVStore.h index 9d30f249e60..8673cae3ff3 100644 --- a/dbms/src/Storages/Transaction/KVStore.h +++ b/dbms/src/Storages/Transaction/KVStore.h @@ -109,12 +109,7 @@ class KVStore final : private boost::noncopyable EngineStoreApplyRes handleWriteRaftCmd(const WriteCmdsView & cmds, UInt64 region_id, UInt64 index, UInt64 term, TMTContext & tmt); void handleApplySnapshot(metapb::Region && region, uint64_t peer_id, const SSTViewVec, uint64_t index, uint64_t term, TMTContext & tmt); - RegionPreDecodeBlockDataPtr preHandleSnapshotToBlock( - RegionPtr new_region, - const SSTViewVec, - uint64_t index, - uint64_t term, - TMTContext & tmt); + std::vector /* */ preHandleSnapshotToFiles( RegionPtr new_region, const SSTViewVec, @@ -162,6 +157,11 @@ class KVStore final : private boost::noncopyable ~KVStore(); + FileUsageStatistics getFileUsageStatistics() const + { + return region_persister.getFileUsageStatistics(); + } + private: friend class MockTiDB; friend struct MockTiDBTable; diff --git a/dbms/src/Storages/Transaction/PartitionStreams.cpp b/dbms/src/Storages/Transaction/PartitionStreams.cpp index 9142aad5358..ada792c80f7 100644 --- a/dbms/src/Storages/Transaction/PartitionStreams.cpp +++ b/dbms/src/Storages/Transaction/PartitionStreams.cpp @@ -26,10 +26,10 @@ #include #include #include -#include #include #include #include +#include #include namespace DB @@ -353,60 +353,6 @@ void RegionTable::writeBlockByRegion( data_list_to_remove = std::move(*data_list_read); } -RegionTable::ReadBlockByRegionRes RegionTable::readBlockByRegion(const TiDB::TableInfo & table_info, - const ColumnsDescription & columns [[maybe_unused]], - const Names & column_names_to_read, - const RegionPtr & region, - RegionVersion region_version, - RegionVersion conf_version, - bool resolve_locks, - Timestamp start_ts, - const std::unordered_set * bypass_lock_ts, - RegionScanFilterPtr scan_filter) -{ - if (!region) - throw Exception(std::string(__PRETTY_FUNCTION__) + ": region is null", ErrorCodes::LOGICAL_ERROR); - - // Tiny optimization for queries that need only handle, tso, delmark. - bool need_value = column_names_to_read.size() != 3; - auto region_data_lock = resolveLocksAndReadRegionData( - table_info.id, - region, - start_ts, - bypass_lock_ts, - region_version, - conf_version, - resolve_locks, - need_value); - - return std::visit(variant_op::overloaded{ - [&](RegionDataReadInfoList & data_list_read) -> ReadBlockByRegionRes { - /// Read region data as block. - Block block; - // FIXME: remove this deprecated function - assert(0); - { - auto reader = RegionBlockReader(nullptr); - bool ok = reader.setStartTs(start_ts) - .setFilter(scan_filter) - .read(block, data_list_read, /*force_decode*/ true); - if (!ok) - // TODO: Enrich exception message. - throw Exception("Read region " + std::to_string(region->id()) + " of table " - + std::to_string(table_info.id) + " failed", - ErrorCodes::LOGICAL_ERROR); - } - return block; - }, - [&](LockInfoPtr & lock_value) -> ReadBlockByRegionRes { - assert(lock_value); - throw LockException(region->id(), std::move(lock_value)); - }, - [](RegionException::RegionReadStatus & s) -> ReadBlockByRegionRes { return s; }, - }, - region_data_lock); -} - RegionTable::ResolveLocksAndWriteRegionRes RegionTable::resolveLocksAndWriteRegion(TMTContext & tmt, const TiDB::TableID table_id, const RegionPtr & region, diff --git a/dbms/src/Storages/Transaction/ProxyFFI.cpp b/dbms/src/Storages/Transaction/ProxyFFI.cpp index 58e7f5ad2e5..cc7d1e10a49 100644 --- a/dbms/src/Storages/Transaction/ProxyFFI.cpp +++ b/dbms/src/Storages/Transaction/ProxyFFI.cpp @@ -304,25 +304,13 @@ RawRustPtrWrap::~RawRustPtrWrap() RustGcHelper::instance().gcRustPtr(ptr, type); } RawRustPtrWrap::RawRustPtrWrap(RawRustPtrWrap && src) + : RawRustPtr() { RawRustPtr & tar = (*this); tar = src; src.ptr = nullptr; } -struct PreHandledSnapshotWithBlock -{ - ~PreHandledSnapshotWithBlock() { CurrentMetrics::sub(CurrentMetrics::RaftNumSnapshotsPendingApply); } - PreHandledSnapshotWithBlock(const RegionPtr & region_, RegionPtrWithBlock::CachePtr && cache_) - : region(region_) - , cache(std::move(cache_)) - { - CurrentMetrics::add(CurrentMetrics::RaftNumSnapshotsPendingApply); - } - RegionPtr region; - RegionPtrWithBlock::CachePtr cache; -}; - struct PreHandledSnapshotWithFiles { ~PreHandledSnapshotWithFiles() { CurrentMetrics::sub(CurrentMetrics::RaftNumSnapshotsPendingApply); } @@ -362,13 +350,6 @@ RawCppPtr PreHandleSnapshot( switch (kvstore->applyMethod()) { - case TiDB::SnapshotApplyMethod::Block: - { - // Pre-decode as a block - auto new_region_block_cache = kvstore->preHandleSnapshotToBlock(new_region, snaps, index, term, tmt); - auto * res = new PreHandledSnapshotWithBlock{new_region, std::move(new_region_block_cache)}; - return GenRawCppPtr(res, RawCppPtrTypeImpl::PreHandledSnapshotWithBlock); - } case TiDB::SnapshotApplyMethod::DTFile_Directory: case TiDB::SnapshotApplyMethod::DTFile_Single: { @@ -391,18 +372,12 @@ RawCppPtr PreHandleSnapshot( template void ApplyPreHandledSnapshot(EngineStoreServerWrap * server, PreHandledSnapshot * snap) { - static_assert( - std::is_same_v || std::is_same_v, - "Unknown pre-handled snapshot type"); + static_assert(std::is_same_v, "Unknown pre-handled snapshot type"); try { auto & kvstore = server->tmt->getKVStore(); - if constexpr (std::is_same_v) - { - kvstore->handlePreApplySnapshot(RegionPtrWithBlock{snap->region, std::move(snap->cache)}, *server->tmt); - } - else if constexpr (std::is_same_v) + if constexpr (std::is_same_v) { kvstore->handlePreApplySnapshot(RegionPtrWithSnapshotFiles{snap->region, std::move(snap->ingest_ids)}, *server->tmt); } @@ -418,12 +393,6 @@ void ApplyPreHandledSnapshot(EngineStoreServerWrap * server, RawVoidPtr res, Raw { switch (static_cast(type)) { - case RawCppPtrTypeImpl::PreHandledSnapshotWithBlock: - { - auto * snap = reinterpret_cast(res); - ApplyPreHandledSnapshot(server, snap); - break; - } case RawCppPtrTypeImpl::PreHandledSnapshotWithFiles: { auto * snap = reinterpret_cast(res); @@ -445,9 +414,6 @@ void GcRawCppPtr(RawVoidPtr ptr, RawCppPtrType type) case RawCppPtrTypeImpl::String: delete reinterpret_cast(ptr); break; - case RawCppPtrTypeImpl::PreHandledSnapshotWithBlock: - delete reinterpret_cast(ptr); - break; case RawCppPtrTypeImpl::PreHandledSnapshotWithFiles: delete reinterpret_cast(ptr); break; diff --git a/dbms/src/Storages/Transaction/ProxyFFI.h b/dbms/src/Storages/Transaction/ProxyFFI.h index 5d87af94f30..e1c01599275 100644 --- a/dbms/src/Storages/Transaction/ProxyFFI.h +++ b/dbms/src/Storages/Transaction/ProxyFFI.h @@ -56,7 +56,6 @@ enum class RawCppPtrTypeImpl : RawCppPtrType { None = 0, String, - PreHandledSnapshotWithBlock, PreHandledSnapshotWithFiles, WakerNotifier, }; diff --git a/dbms/src/Storages/Transaction/Region.cpp b/dbms/src/Storages/Transaction/Region.cpp index e021de3d978..aa75eabb4b9 100644 --- a/dbms/src/Storages/Transaction/Region.cpp +++ b/dbms/src/Storages/Transaction/Region.cpp @@ -720,47 +720,6 @@ EngineStoreApplyRes Region::handleWriteRaftCmd(const WriteCmdsView & cmds, UInt6 return EngineStoreApplyRes::None; } -void Region::handleIngestSSTInMemory(const SSTViewVec snaps, UInt64 index, UInt64 term) -{ - if (index <= appliedIndex()) - return; - - { - std::unique_lock lock(mutex); - - for (UInt64 i = 0; i < snaps.len; ++i) - { - const auto & snapshot = snaps.views[i]; - auto sst_reader = SSTReader{proxy_helper, snapshot}; - - LOG_FMT_INFO(log, - "{} begin to ingest sst of cf {} at [term: {}, index: {}]", - this->toString(false), - CFToName(snapshot.type), - term, - index); - - uint64_t kv_size = 0; - while (sst_reader.remained()) - { - auto key = sst_reader.key(); - auto value = sst_reader.value(); - doInsert(snaps.views[i].type, TiKVKey(key.data, key.len), TiKVValue(value.data, value.len)); - ++kv_size; - sst_reader.next(); - } - - LOG_FMT_INFO(log, - "{} finish to ingest sst of kv count {}", - this->toString(false), - kv_size); - GET_METRIC(tiflash_raft_process_keys, type_ingest_sst).Increment(kv_size); - } - meta.setApplied(index, term); - } - meta.notifyAll(); -} - void Region::finishIngestSSTByDTFile(RegionPtr && rhs, UInt64 index, UInt64 term) { if (index <= appliedIndex()) diff --git a/dbms/src/Storages/Transaction/Region.h b/dbms/src/Storages/Transaction/Region.h index b31ae0cdc49..06b18de379a 100644 --- a/dbms/src/Storages/Transaction/Region.h +++ b/dbms/src/Storages/Transaction/Region.h @@ -191,7 +191,6 @@ class Region : public std::enable_shared_from_this TableID getMappedTableID() const; EngineStoreApplyRes handleWriteRaftCmd(const WriteCmdsView & cmds, UInt64 index, UInt64 term, TMTContext & tmt); - void handleIngestSSTInMemory(const SSTViewVec snaps, UInt64 index, UInt64 term); void finishIngestSSTByDTFile(RegionPtr && rhs, UInt64 index, UInt64 term); UInt64 getSnapshotEventFlag() const { return snapshot_event_flag; } diff --git a/dbms/src/Storages/Transaction/RegionBlockReader.cpp b/dbms/src/Storages/Transaction/RegionBlockReader.cpp index 32be7302775..af351f4a6b0 100644 --- a/dbms/src/Storages/Transaction/RegionBlockReader.cpp +++ b/dbms/src/Storages/Transaction/RegionBlockReader.cpp @@ -58,7 +58,7 @@ bool RegionBlockReader::readImpl(Block & block, const RegionDataReadInfoList & d const auto & pk_column_ids = schema_snapshot->pk_column_ids; const auto & pk_pos_map = schema_snapshot->pk_pos_map; - SortedColumnIDWithPosConstIter column_ids_iter = read_column_ids.begin(); + auto column_ids_iter = read_column_ids.begin(); size_t next_column_pos = 0; /// every table in tiflash must have an extra handle column, it either @@ -112,25 +112,6 @@ bool RegionBlockReader::readImpl(Block & block, const RegionDataReadInfoList & d size_t index = 0; for (const auto & [pk, write_type, commit_ts, value_ptr] : data_list) { - // Ignore data after the start_ts. - if (commit_ts > start_ts) - continue; - - bool should_skip = false; - if constexpr (pk_type != TMTPKType::STRING) - { - if constexpr (pk_type == TMTPKType::UINT64) - { - should_skip = scan_filter != nullptr && scan_filter->filter(static_cast(pk)); - } - else - { - should_skip = scan_filter != nullptr && scan_filter->filter(static_cast(pk)); - } - } - if (should_skip) - continue; - /// set delmark and version column delmark_data.emplace_back(write_type == Region::DelFlag); version_data.emplace_back(commit_ts); @@ -186,7 +167,7 @@ bool RegionBlockReader::readImpl(Block & block, const RegionDataReadInfoList & d { // The pk_type must be Int32/Uint32 or more narrow type // so cannot tell its' exact type here, just use `insert(Field)` - HandleID handle_value(static_cast(pk)); + auto handle_value(static_cast(pk)); raw_pk_column->insert(Field(handle_value)); if (unlikely(raw_pk_column->getInt(index) != handle_value)) { diff --git a/dbms/src/Storages/Transaction/RegionBlockReader.h b/dbms/src/Storages/Transaction/RegionBlockReader.h index 860e0d149e6..ec633e805c0 100644 --- a/dbms/src/Storages/Transaction/RegionBlockReader.h +++ b/dbms/src/Storages/Transaction/RegionBlockReader.h @@ -37,79 +37,12 @@ using ManageableStoragePtr = std::shared_ptr; struct ColumnsDescription; class Block; -class RegionScanFilter -{ - bool is_full_range_scan; - std::vector> int64_ranges; - std::vector> uint64_ranges; - - bool isValidHandle(UInt64 handle) - { - for (const auto & range : uint64_ranges) - { - if (handle >= range.first && handle < range.second) - { - return true; - } - } - return false; - } - bool isValidHandle(Int64 handle) - { - for (const auto & range : int64_ranges) - { - if (handle >= range.first && handle < range.second) - { - return true; - } - } - return false; - } - -public: - RegionScanFilter( - bool is_full_range_scan_, - std::vector> int64_ranges_, - std::vector> uint64_ranges_) - : is_full_range_scan(is_full_range_scan_) - , int64_ranges(std::move(int64_ranges_)) - , uint64_ranges(std::move(uint64_ranges_)) - {} - bool filter(UInt64 handle) { return !is_full_range_scan && !isValidHandle(handle); } - bool filter(Int64 handle) { return !is_full_range_scan && !isValidHandle(handle); } - bool isFullRangeScan() { return is_full_range_scan; } - const std::vector> & getUInt64Ranges() { return uint64_ranges; } - const std::vector> & getInt64Ranges() { return int64_ranges; } -}; - -using RegionScanFilterPtr = std::shared_ptr; - /// The Reader to read the region data in `data_list` and decode based on the given table_info and columns, as a block. class RegionBlockReader : private boost::noncopyable { - RegionScanFilterPtr scan_filter; - Timestamp start_ts = std::numeric_limits::max(); - public: RegionBlockReader(DecodingStorageSchemaSnapshotConstPtr schema_snapshot_); - inline RegionBlockReader & setFilter(RegionScanFilterPtr filter) - { - scan_filter = std::move(filter); - return *this; - } - - /// Set the `start_ts` for reading data. The `start_ts` is `Timestamp::max` if not set. - /// - /// Data with commit_ts > start_ts will be ignored. This is for the sake of decode safety on read, - /// i.e. as data keeps being synced to region cache while the schema for a specific read is fixed, - /// we'll always have newer data than schema, only ignoring them can guarantee the decode safety. - inline RegionBlockReader & setStartTs(Timestamp tso) - { - start_ts = tso; - return *this; - } - /// Read `data_list` as a block. /// /// On decode error, i.e. column number/type mismatch, will do force apply schema, @@ -117,7 +50,7 @@ class RegionBlockReader : private boost::noncopyable /// Moreover, exception will be thrown if we see fatal decode error meanwhile `force_decode` is true. /// /// `RegionBlockReader::read` is the common routine used by both 'flush' and 'read' processes of TXN engine (Delta-Tree, TXN-MergeTree), - /// each of which will use carefully adjusted 'start_ts' and 'force_decode' with appropriate error handling/retry to get what they want. + /// each of which will use carefully adjusted 'force_decode' with appropriate error handling/retry to get what they want. bool read(Block & block, const RegionDataReadInfoList & data_list, bool force_decode); private: diff --git a/dbms/src/Storages/Transaction/RegionPersister.cpp b/dbms/src/Storages/Transaction/RegionPersister.cpp index c3db88daece..7ce52c6caa1 100644 --- a/dbms/src/Storages/Transaction/RegionPersister.cpp +++ b/dbms/src/Storages/Transaction/RegionPersister.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -379,4 +380,9 @@ bool RegionPersister::gc() return stable_page_storage->gc(); } +FileUsageStatistics RegionPersister::getFileUsageStatistics() const +{ + return page_reader->getFileUsageStatistics(); +} + } // namespace DB diff --git a/dbms/src/Storages/Transaction/RegionPersister.h b/dbms/src/Storages/Transaction/RegionPersister.h index f2828add202..a6b400345f8 100644 --- a/dbms/src/Storages/Transaction/RegionPersister.h +++ b/dbms/src/Storages/Transaction/RegionPersister.h @@ -16,6 +16,7 @@ #include #include +#include #include #include #include @@ -57,6 +58,8 @@ class RegionPersister final : private boost::noncopyable PageStorage::Config getPageStorageSettings() const; + FileUsageStatistics getFileUsageStatistics() const; + #ifndef DBMS_PUBLIC_GTEST private: #endif diff --git a/dbms/src/Storages/Transaction/RegionTable.cpp b/dbms/src/Storages/Transaction/RegionTable.cpp index 8b5ca5746f0..c855d5b3226 100644 --- a/dbms/src/Storages/Transaction/RegionTable.cpp +++ b/dbms/src/Storages/Transaction/RegionTable.cpp @@ -20,9 +20,9 @@ #include #include #include -#include #include #include +#include namespace DB { diff --git a/dbms/src/Storages/Transaction/RegionTable.h b/dbms/src/Storages/Transaction/RegionTable.h index b30a905541a..717b1cd568f 100644 --- a/dbms/src/Storages/Transaction/RegionTable.h +++ b/dbms/src/Storages/Transaction/RegionTable.h @@ -146,21 +146,6 @@ class RegionTable : private boost::noncopyable Poco::Logger * log, bool lock_region = true); - /// Read the data of the given region into block, take good care of learner read and locks. - /// Assuming that the schema has been properly synced by outer, i.e. being new enough to decode data before start_ts, - /// we directly ask RegionBlockReader::read to perform a read with the given start_ts and force_decode being true. - using ReadBlockByRegionRes = std::variant; - static ReadBlockByRegionRes readBlockByRegion(const TiDB::TableInfo & table_info, - const ColumnsDescription & columns, - const Names & column_names_to_read, - const RegionPtr & region, - RegionVersion region_version, - RegionVersion conf_version, - bool resolve_locks, - Timestamp start_ts, - const std::unordered_set * bypass_lock_ts, - RegionScanFilterPtr scan_filter = nullptr); - /// Check transaction locks in region, and write committed data in it into storage engine if check passed. Otherwise throw an LockException. /// The write logic is the same as #writeBlockByRegion, with some extra checks about region version and conf_version. using ResolveLocksAndWriteRegionRes = std::variant; diff --git a/dbms/src/Storages/Transaction/StorageEngineType.h b/dbms/src/Storages/Transaction/StorageEngineType.h index f202d15a769..3d103ca60c1 100644 --- a/dbms/src/Storages/Transaction/StorageEngineType.h +++ b/dbms/src/Storages/Transaction/StorageEngineType.h @@ -33,7 +33,7 @@ enum class StorageEngine enum class SnapshotApplyMethod : std::int32_t { - Block = 1, + DEPRECATED_Block = 1, // Invalid if the storage engine is not DeltaTree DTFile_Directory, DTFile_Single, @@ -43,14 +43,12 @@ inline const std::string applyMethodToString(SnapshotApplyMethod method) { switch (method) { - case SnapshotApplyMethod::Block: - return "block"; - case SnapshotApplyMethod::DTFile_Directory: - return "file1"; - case SnapshotApplyMethod::DTFile_Single: - return "file2"; - default: - return "unknown(" + std::to_string(static_cast(method)) + ")"; + case SnapshotApplyMethod::DTFile_Directory: + return "file1"; + case SnapshotApplyMethod::DTFile_Single: + return "file2"; + default: + return "unknown(" + std::to_string(static_cast(method)) + ")"; } return "unknown"; } diff --git a/dbms/src/Storages/Transaction/TMTContext.cpp b/dbms/src/Storages/Transaction/TMTContext.cpp index 006be9d7a92..3c7468cbd64 100644 --- a/dbms/src/Storages/Transaction/TMTContext.cpp +++ b/dbms/src/Storages/Transaction/TMTContext.cpp @@ -22,9 +22,9 @@ #include #include #include -#include #include -#include +#include +#include #include namespace DB @@ -36,6 +36,8 @@ extern const uint64_t DEFAULT_WAIT_INDEX_TIMEOUT_MS = 5 * 60 * 1000; const int64_t DEFAULT_WAIT_REGION_READY_TIMEOUT_SEC = 20 * 60; +const int64_t DEFAULT_READ_INDEX_WORKER_TICK_MS = 10; + TMTContext::TMTContext(Context & context_, const TiFlashRaftConfig & raft_config, const pingcap::ClusterConfig & cluster_config) : context(context_) , kvstore(std::make_shared(context, raft_config.snapshot_apply_method)) @@ -55,6 +57,8 @@ TMTContext::TMTContext(Context & context_, const TiFlashRaftConfig & raft_config , engine(raft_config.engine) , replica_read_max_thread(1) , batch_read_index_timeout_ms(DEFAULT_BATCH_READ_INDEX_TIMEOUT_MS) + , wait_index_timeout_ms(DEFAULT_WAIT_INDEX_TIMEOUT_MS) + , read_index_worker_tick_ms(DEFAULT_READ_INDEX_WORKER_TICK_MS) , wait_region_ready_timeout_sec(DEFAULT_WAIT_REGION_READY_TIMEOUT_SEC) {} @@ -147,12 +151,6 @@ SchemaSyncerPtr TMTContext::getSchemaSyncer() const return schema_syncer; } -void TMTContext::setSchemaSyncer(SchemaSyncerPtr rhs) -{ - std::lock_guard lock(mutex); - schema_syncer = rhs; -} - pingcap::pd::ClientPtr TMTContext::getPDClient() const { return cluster->pd_client; @@ -192,7 +190,7 @@ void TMTContext::reloadConfig(const Poco::Util::AbstractConfiguration & config) t = t >= 0 ? t : std::numeric_limits::max(); // set -1 to wait infinitely t; }); - read_index_worker_tick_ms = config.getUInt64(READ_INDEX_WORKER_TICK_MS, 10 /*10ms*/); + read_index_worker_tick_ms = config.getUInt64(READ_INDEX_WORKER_TICK_MS, DEFAULT_READ_INDEX_WORKER_TICK_MS); } { LOG_FMT_INFO( diff --git a/dbms/src/Storages/Transaction/TMTContext.h b/dbms/src/Storages/Transaction/TMTContext.h index c84ada2eec0..8e26c0da88c 100644 --- a/dbms/src/Storages/Transaction/TMTContext.h +++ b/dbms/src/Storages/Transaction/TMTContext.h @@ -78,7 +78,6 @@ class TMTContext : private boost::noncopyable explicit TMTContext(Context & context_, const TiFlashRaftConfig & raft_config, const pingcap::ClusterConfig & cluster_config_); SchemaSyncerPtr getSchemaSyncer() const; - void setSchemaSyncer(SchemaSyncerPtr); pingcap::pd::ClientPtr getPDClient() const; diff --git a/dbms/src/Storages/Transaction/TiDB.cpp b/dbms/src/Storages/Transaction/TiDB.cpp index 763dcac39fc..15bf2a3fb58 100644 --- a/dbms/src/Storages/Transaction/TiDB.cpp +++ b/dbms/src/Storages/Transaction/TiDB.cpp @@ -23,8 +23,10 @@ #include #include #include -#include #include +#include + +#include namespace DB { @@ -110,14 +112,28 @@ Field ColumnInfo::defaultValueToField() const } switch (tp) { - // TODO: Consider unsigned? // Integer Type. case TypeTiny: case TypeShort: case TypeLong: case TypeLongLong: case TypeInt24: - return value.convert(); + { + // In c++, cast a unsigned integer to signed integer will not change the value. + // like 9223372036854775808 which is larger than the maximum value of Int64, + // static_cast(static_cast(9223372036854775808)) == 9223372036854775808 + // so we don't need consider unsigned here. + try + { + return value.convert(); + } + catch (...) + { + // due to https://github.com/pingcap/tidb/issues/34881 + // we do this to avoid exception in older version of TiDB. + return static_cast(std::llround(value.convert())); + } + } case TypeBit: { // TODO: We shall use something like `orig_default_bit`, which will never change once created, @@ -615,6 +631,8 @@ catch (const Poco::Exception & e) /////////////////////// IndexColumnInfo::IndexColumnInfo(Poco::JSON::Object::Ptr json) + : offset(0) + , length(0) { deserialize(json); } @@ -664,6 +682,13 @@ catch (const Poco::Exception & e) /////////////////////// IndexInfo::IndexInfo(Poco::JSON::Object::Ptr json) + : id(0) + , state(TiDB::SchemaState::StateNone) + , index_type(0) + , is_unique(true) + , is_primary(true) + , is_invisible(true) + , is_global(true) { deserialize(json); } diff --git a/dbms/src/Storages/Transaction/tests/gtest_kvstore.cpp b/dbms/src/Storages/Transaction/tests/gtest_kvstore.cpp index e93a117cc1c..f0cafce3914 100644 --- a/dbms/src/Storages/Transaction/tests/gtest_kvstore.cpp +++ b/dbms/src/Storages/Transaction/tests/gtest_kvstore.cpp @@ -47,8 +47,7 @@ RegionPtr makeRegion(UInt64 id, const std::string start_key, const std::string e class RegionKVStoreTest : public ::testing::Test { public: - RegionKVStoreTest() - = default; + RegionKVStoreTest() = default; static void SetUpTestCase() {} static void testBasic(); @@ -311,10 +310,13 @@ void RegionKVStoreTest::testRaftMergeRollback(KVStore & kvs, TMTContext & tmt) } } region->setStateApplying(); + try { - kvs.handleAdminRaftCmd(std::move(request), - std::move(response), + raft_cmdpb::AdminRequest first_request = request; + raft_cmdpb::AdminResponse first_response = response; + kvs.handleAdminRaftCmd(std::move(first_request), + std::move(first_response), region_id, 32, 6, @@ -926,12 +928,14 @@ void RegionKVStoreTest::testKVStore() TiKVValue lock_value = RecordKVFormat::encodeLockCfValue(Region::DelFlag, "pk", 77, 0); RegionBench::setupDelRequest(request.add_requests(), ColumnFamilyName::Lock, lock_key); } - ASSERT_EQ(kvs.handleWriteRaftCmd(std::move(request), 1, 7, 6, ctx.getTMTContext()), + raft_cmdpb::RaftCmdRequest first_request = request; + ASSERT_EQ(kvs.handleWriteRaftCmd(std::move(first_request), 1, 7, 6, ctx.getTMTContext()), EngineStoreApplyRes::None); RegionBench::setupDelRequest(request.add_requests(), ColumnFamilyName::Write, TiKVKey("illegal key")); // index <= appliedIndex(), ignore - ASSERT_EQ(kvs.handleWriteRaftCmd(std::move(request), 1, 7, 6, ctx.getTMTContext()), + raft_cmdpb::RaftCmdRequest second_request; + ASSERT_EQ(kvs.handleWriteRaftCmd(std::move(second_request), 1, 7, 6, ctx.getTMTContext()), EngineStoreApplyRes::None); try { @@ -973,13 +977,24 @@ void RegionKVStoreTest::testKVStore() request.mutable_compact_log(); request.set_cmd_type(::raft_cmdpb::AdminCmdType::CompactLog); - ASSERT_EQ(kvs.handleAdminRaftCmd(std::move(request), std::move(response), 7, 22, 6, ctx.getTMTContext()), EngineStoreApplyRes::Persist); - ASSERT_EQ(kvs.handleAdminRaftCmd(raft_cmdpb::AdminRequest{request}, std::move(response), 7, 23, 6, ctx.getTMTContext()), EngineStoreApplyRes::None); + raft_cmdpb::AdminRequest first_request = request; + raft_cmdpb::AdminResponse first_response = response; + + ASSERT_EQ(kvs.handleAdminRaftCmd(std::move(first_request), std::move(first_response), 7, 22, 6, ctx.getTMTContext()), EngineStoreApplyRes::Persist); + + raft_cmdpb::AdminResponse second_response = response; + ASSERT_EQ(kvs.handleAdminRaftCmd(raft_cmdpb::AdminRequest{request}, std::move(second_response), 7, 23, 6, ctx.getTMTContext()), EngineStoreApplyRes::None); request.set_cmd_type(::raft_cmdpb::AdminCmdType::ComputeHash); - ASSERT_EQ(kvs.handleAdminRaftCmd(raft_cmdpb::AdminRequest{request}, std::move(response), 7, 24, 6, ctx.getTMTContext()), EngineStoreApplyRes::None); + + raft_cmdpb::AdminResponse third_response = response; + ASSERT_EQ(kvs.handleAdminRaftCmd(raft_cmdpb::AdminRequest{request}, std::move(third_response), 7, 24, 6, ctx.getTMTContext()), EngineStoreApplyRes::None); request.set_cmd_type(::raft_cmdpb::AdminCmdType::VerifyHash); - ASSERT_EQ(kvs.handleAdminRaftCmd(raft_cmdpb::AdminRequest{request}, std::move(response), 7, 25, 6, ctx.getTMTContext()), EngineStoreApplyRes::None); - ASSERT_EQ(kvs.handleAdminRaftCmd(raft_cmdpb::AdminRequest{request}, std::move(response), 8192, 5, 6, ctx.getTMTContext()), EngineStoreApplyRes::NotFound); + + raft_cmdpb::AdminResponse fourth_response = response; + ASSERT_EQ(kvs.handleAdminRaftCmd(raft_cmdpb::AdminRequest{request}, std::move(fourth_response), 7, 25, 6, ctx.getTMTContext()), EngineStoreApplyRes::None); + + raft_cmdpb::AdminResponse fifth_response = response; + ASSERT_EQ(kvs.handleAdminRaftCmd(raft_cmdpb::AdminRequest{request}, std::move(fifth_response), 8192, 5, 6, ctx.getTMTContext()), EngineStoreApplyRes::NotFound); { kvs.setRegionCompactLogConfig(0, 0, 0); request.set_cmd_type(::raft_cmdpb::AdminCmdType::CompactLog); @@ -995,62 +1010,12 @@ void RegionKVStoreTest::testKVStore() } { auto ori_snapshot_apply_method = kvs.snapshot_apply_method; - kvs.snapshot_apply_method = TiDB::SnapshotApplyMethod::Block; + kvs.snapshot_apply_method = TiDB::SnapshotApplyMethod::DTFile_Single; SCOPE_EXIT({ kvs.snapshot_apply_method = ori_snapshot_apply_method; }); - { - { - auto region = makeRegion(22, RecordKVFormat::genKey(1, 55), RecordKVFormat::genKey(1, 65)); - kvs.checkAndApplySnapshot(region, ctx.getTMTContext()); - } - try - { - auto region = makeRegion(20, RecordKVFormat::genKey(1, 55), RecordKVFormat::genKey(1, 65)); - kvs.checkAndApplySnapshot(region, ctx.getTMTContext()); // overlap, but not tombstone - ASSERT_TRUE(false); - } - catch (Exception & e) - { - ASSERT_EQ(e.message(), "range of region 20 is overlapped with 22, state: region { id: 22 }"); - } - { - const auto * ori_ptr = proxy_helper.proxy_ptr.inner; - proxy_helper.proxy_ptr.inner = nullptr; - SCOPE_EXIT({ - proxy_helper.proxy_ptr.inner = ori_ptr; - }); - try - { - auto region = makeRegion(20, RecordKVFormat::genKey(1, 55), RecordKVFormat::genKey(1, 65)); - kvs.checkAndApplySnapshot(region, ctx.getTMTContext()); - ASSERT_TRUE(false); - } - catch (Exception & e) - { - ASSERT_EQ(e.message(), "getRegionLocalState meet internal error: RaftStoreProxyPtr is none"); - } - } - - { - proxy_instance.getRegion(22)->setSate(({ - raft_serverpb::RegionLocalState s; - s.set_state(::raft_serverpb::PeerState::Tombstone); - s; - })); - auto region = makeRegion(20, RecordKVFormat::genKey(1, 55), RecordKVFormat::genKey(1, 65)); - kvs.checkAndApplySnapshot(region, ctx.getTMTContext()); // overlap, tombstone, remove previous one - ASSERT_EQ(nullptr, kvs.getRegion(22)); - ASSERT_NE(nullptr, kvs.getRegion(20)); - - auto state = proxy_helper.getRegionLocalState(8192); - ASSERT_EQ(state.state(), raft_serverpb::PeerState::Tombstone); - } - - kvs.handleDestroy(20, ctx.getTMTContext()); - } auto region_id = 19; auto region = makeRegion(region_id, RecordKVFormat::genKey(1, 50), RecordKVFormat::genKey(1, 60)); auto region_id_str = std::to_string(19); @@ -1077,7 +1042,7 @@ void RegionKVStoreTest::testKVStore() 8, 5, ctx.getTMTContext()); - ASSERT_EQ(kvs.getRegion(19)->dataInfo(), "[default 2 ]"); + ASSERT_EQ(kvs.getRegion(19)->checkIndex(8), true); try { kvs.handleApplySnapshot( @@ -1092,22 +1057,85 @@ void RegionKVStoreTest::testKVStore() catch (Exception & e) { ASSERT_EQ(e.message(), "[region 19] already has newer apply-index 8 than 6, should not happen"); - ASSERT_EQ(kvs.getRegion(19)->dataInfo(), "[default 2 ]"); // apply-snapshot do not work } - kvs.handleApplySnapshot( - region->getMetaRegion(), - 2, - {}, // empty - 8, // same index - 5, - ctx.getTMTContext()); - ASSERT_EQ(kvs.getRegion(19)->dataInfo(), "[default 2 ]"); // apply-snapshot do not work - region = makeRegion(19, RecordKVFormat::genKey(1, 50), RecordKVFormat::genKey(1, 60)); - region->handleWriteRaftCmd({}, 10, 10, ctx.getTMTContext()); - kvs.checkAndApplySnapshot(region, ctx.getTMTContext()); - ASSERT_EQ(kvs.getRegion(19)->dataInfo(), "[]"); + } + + { + { + auto region = makeRegion(22, RecordKVFormat::genKey(55, 50), RecordKVFormat::genKey(55, 100)); + auto ingest_ids = kvs.preHandleSnapshotToFiles( + region, + {}, + 9, + 5, + ctx.getTMTContext()); + kvs.checkAndApplySnapshot(RegionPtrWithSnapshotFiles{region, std::move(ingest_ids)}, ctx.getTMTContext()); + } + try + { + auto region = makeRegion(20, RecordKVFormat::genKey(55, 50), RecordKVFormat::genKey(55, 100)); + auto ingest_ids = kvs.preHandleSnapshotToFiles( + region, + {}, + 9, + 5, + ctx.getTMTContext()); + kvs.checkAndApplySnapshot(RegionPtrWithSnapshotFiles{region, std::move(ingest_ids)}, ctx.getTMTContext()); // overlap, but not tombstone + ASSERT_TRUE(false); + } + catch (Exception & e) + { + ASSERT_EQ(e.message(), "range of region 20 is overlapped with 22, state: region { id: 22 }"); + } + + { + const auto * ori_ptr = proxy_helper.proxy_ptr.inner; + proxy_helper.proxy_ptr.inner = nullptr; + SCOPE_EXIT({ + proxy_helper.proxy_ptr.inner = ori_ptr; + }); + + try + { + auto region = makeRegion(20, RecordKVFormat::genKey(55, 50), RecordKVFormat::genKey(55, 100)); + auto ingest_ids = kvs.preHandleSnapshotToFiles( + region, + {}, + 10, + 5, + ctx.getTMTContext()); + kvs.checkAndApplySnapshot(RegionPtrWithSnapshotFiles{region, std::move(ingest_ids)}, ctx.getTMTContext()); + ASSERT_TRUE(false); + } + catch (Exception & e) + { + ASSERT_EQ(e.message(), "getRegionLocalState meet internal error: RaftStoreProxyPtr is none"); + } + } + + { + proxy_instance.getRegion(22)->setSate(({ + raft_serverpb::RegionLocalState s; + s.set_state(::raft_serverpb::PeerState::Tombstone); + s; + })); + auto region = makeRegion(20, RecordKVFormat::genKey(55, 50), RecordKVFormat::genKey(55, 100)); + auto ingest_ids = kvs.preHandleSnapshotToFiles( + region, + {}, + 10, + 5, + ctx.getTMTContext()); + kvs.checkAndApplySnapshot(RegionPtrWithSnapshotFiles{region, std::move(ingest_ids)}, ctx.getTMTContext()); // overlap, tombstone, remove previous one + + auto state = proxy_helper.getRegionLocalState(8192); + ASSERT_EQ(state.state(), raft_serverpb::PeerState::Tombstone); + } + + kvs.handleDestroy(20, ctx.getTMTContext()); } } + { auto region_id = 19; auto region_id_str = std::to_string(19); @@ -1126,11 +1154,6 @@ void RegionKVStoreTest::testKVStore() RegionMockTest mock_test(ctx.getTMTContext().getKVStore(), region); { - auto ori_snapshot_apply_method = kvs.snapshot_apply_method; - kvs.snapshot_apply_method = TiDB::SnapshotApplyMethod::Block; - SCOPE_EXIT({ - kvs.snapshot_apply_method = ori_snapshot_apply_method; - }); // Mocking ingest a SST for column family "Write" std::vector sst_views; sst_views.push_back(SSTView{ @@ -1143,9 +1166,10 @@ void RegionKVStoreTest::testKVStore() 100, 1, ctx.getTMTContext()); - ASSERT_EQ(kvs.getRegion(19)->dataInfo(), "[default 2 ]"); + ASSERT_EQ(kvs.getRegion(19)->checkIndex(100), true); } } + { raft_cmdpb::AdminRequest request; raft_cmdpb::AdminResponse response; @@ -1155,7 +1179,7 @@ void RegionKVStoreTest::testKVStore() try { - kvs.handleAdminRaftCmd(std::move(request), std::move(response), 19, 110, 6, ctx.getTMTContext()); + kvs.handleAdminRaftCmd(std::move(request), std::move(response), 1, 110, 6, ctx.getTMTContext()); ASSERT_TRUE(false); } catch (Exception & e) @@ -1372,12 +1396,30 @@ void RegionKVStoreTest::testBasic() } } -TEST_F(RegionKVStoreTest, run) +TEST_F(RegionKVStoreTest, Basic) try { testBasic(); +} +CATCH + +TEST_F(RegionKVStoreTest, KVStore) +try +{ testKVStore(); +} +CATCH + +TEST_F(RegionKVStoreTest, Region) +try +{ testRegion(); +} +CATCH + +TEST_F(RegionKVStoreTest, ReadIndex) +try +{ testReadIndex(); } CATCH diff --git a/dbms/src/Storages/Transaction/tests/gtest_region_persister.cpp b/dbms/src/Storages/Transaction/tests/gtest_region_persister.cpp index 16a35f42da1..963e3a3571d 100644 --- a/dbms/src/Storages/Transaction/tests/gtest_region_persister.cpp +++ b/dbms/src/Storages/Transaction/tests/gtest_region_persister.cpp @@ -65,6 +65,11 @@ class RegionPersister_test : public ::testing::Test String dir_path; DB::Timestamp tso = 0; + + String getPageStorageV3MetaPath(String & path) + { + return path + "/page/kvstore/wal/log_1_0"; + } }; static ::testing::AssertionResult PeerCompare( @@ -251,7 +256,7 @@ try } // If we truncate page data file, exception will throw instead of droping last region. - auto meta_path = path + "/kvstore/page_1_0/meta"; // First page + auto meta_path = getPageStorageV3MetaPath(path); // First page Poco::File meta_file(meta_path); size_t size = meta_file.getSize(); int rt = ::truncate(meta_path.c_str(), size - 1); // Remove last one byte @@ -288,9 +293,13 @@ try { std::string path = dir_path + "/compatible_mode"; + auto current_storage_run_mode = TiFlashTestEnv::getGlobalContext().getPageStorageRunMode(); // Force to run in compatible mode for the default region persister FailPointHelper::enableFailPoint(FailPoints::force_enable_region_persister_compatible_mode); - SCOPE_EXIT({ FailPointHelper::disableFailPoint(FailPoints::force_enable_region_persister_compatible_mode); }); + SCOPE_EXIT( + { FailPointHelper::disableFailPoint(FailPoints::force_enable_region_persister_compatible_mode); + TiFlashTestEnv::getGlobalContext().setPageStorageRunMode(current_storage_run_mode); }); + TiFlashTestEnv::getGlobalContext().setPageStorageRunMode(PageStorageRunMode::ONLY_V2); auto ctx = TiFlashTestEnv::getContext(DB::Settings(), Strings{ path, diff --git a/dbms/src/Storages/Transaction/tests/gtest_rename_resolver.cpp b/dbms/src/Storages/Transaction/tests/gtest_rename_resolver.cpp index f149fd40e3a..4d1afe2653c 100644 --- a/dbms/src/Storages/Transaction/tests/gtest_rename_resolver.cpp +++ b/dbms/src/Storages/Transaction/tests/gtest_rename_resolver.cpp @@ -12,9 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include -#include #include +#include +#include namespace DB::tests { @@ -138,10 +138,10 @@ inline ::testing::AssertionResult ColumnNameWithIDPairsCompare( // return ::testing::AssertionSuccess(); else return ::testing::internal::EqFailure(lhs_expr, - rhs_expr, - "<" + lhs.first.toString() + "," + lhs.second.toString() + ">", - "<" + rhs.first.toString() + "," + rhs.second.toString() + ">", - false); + rhs_expr, + "<" + lhs.first.toString() + "," + lhs.second.toString() + ">", + "<" + rhs.first.toString() + "," + rhs.second.toString() + ">", + false); } #define ASSERT_COLUMN_NAME_ID_PAIR_EQ(val1, val2) ASSERT_PRED_FORMAT2(::DB::tests::ColumnNameWithIDPairsCompare, val1, val2) diff --git a/dbms/src/Storages/Transaction/tests/gtest_table_info.cpp b/dbms/src/Storages/Transaction/tests/gtest_table_info.cpp index 0c9747cc24c..516a173b151 100644 --- a/dbms/src/Storages/Transaction/tests/gtest_table_info.cpp +++ b/dbms/src/Storages/Transaction/tests/gtest_table_info.cpp @@ -18,10 +18,10 @@ #include #include #include -#include #include #include #include +#include using TableInfo = TiDB::TableInfo; @@ -65,20 +65,20 @@ try R"json({"id":45,"name":{"O":"t","L":"t"},"charset":"utf8mb4","collate":"utf8mb4_bin","cols":[{"id":1,"name":{"O":"t","L":"t"},"offset":0,"origin_default":"\u0000\u00124","origin_default_bit":null,"default":null,"default_bit":null,"default_is_expr":false,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":254,"Flag":129,"Flen":4,"Decimal":0,"Charset":"binary","Collate":"binary","Elems":null},"state":5,"comment":"","hidden":false,"change_state_info":null,"version":2}],"index_info":null,"constraint_info":null,"fk_info":null,"state":5,"pk_is_handle":false,"is_common_handle":false,"comment":"","auto_inc_id":0,"auto_id_cache":0,"auto_rand_id":0,"max_col_id":1,"max_idx_id":0,"max_cst_id":0,"update_timestamp":418683341902184450,"ShardRowIDBits":0,"max_shard_row_id_bits":0,"auto_random_bits":0,"pre_split_regions":0,"partition":null,"compression":"","view":null,"sequence":null,"Lock":null,"version":3,"tiflash_replica":{"Count":1,"LocationLabels":[],"Available":false,"AvailablePartitionIDs":null}})json", [](const TableInfo & table_info) { ASSERT_EQ(table_info.columns[0].defaultValueToField().get(), - Field(String("\0\x12" - "4\0", - 4)) - .get()); + Field(String("\0\x12" + "4\0", + 4)) + .get()); }}, // Test binary default value with exact length having the full content. ParseCase{ R"json({"id":45,"name":{"O":"t","L":"t"},"charset":"utf8mb4","collate":"utf8mb4_bin","cols":[{"id":1,"name":{"O":"t","L":"t"},"offset":0,"origin_default":"\u0000\u00124","origin_default_bit":null,"default":null,"default_bit":null,"default_is_expr":false,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":254,"Flag":129,"Flen":3,"Decimal":0,"Charset":"binary","Collate":"binary","Elems":null},"state":5,"comment":"","hidden":false,"change_state_info":null,"version":2}],"index_info":null,"constraint_info":null,"fk_info":null,"state":5,"pk_is_handle":false,"is_common_handle":false,"comment":"","auto_inc_id":0,"auto_id_cache":0,"auto_rand_id":0,"max_col_id":1,"max_idx_id":0,"max_cst_id":0,"update_timestamp":418683341902184450,"ShardRowIDBits":0,"max_shard_row_id_bits":0,"auto_random_bits":0,"pre_split_regions":0,"partition":null,"compression":"","view":null,"sequence":null,"Lock":null,"version":3,"tiflash_replica":{"Count":1,"LocationLabels":[],"Available":false,"AvailablePartitionIDs":null}})json", [](const TableInfo & table_info) { ASSERT_EQ(table_info.columns[0].defaultValueToField().get(), - Field(String("\0\x12" - "4", - 3)) - .get()); + Field(String("\0\x12" + "4", + 3)) + .get()); }}, }; @@ -141,47 +141,47 @@ try { auto cases = // {StmtCase{ - 1145, // + 1145, // R"json({"id":1939,"db_name":{"O":"customer","L":"customer"},"charset":"utf8mb4","collate":"utf8mb4_bin","state":5})json", // R"json({"id":1145,"name":{"O":"customerdebt","L":"customerdebt"},"cols":[{"id":1,"name":{"O":"id","L":"id"},"offset":0,"origin_default":null,"default":null,"default_bit":null,"type":{"Tp":8,"Flag":515,"Flen":20,"Decimal":0},"state":5,"comment":"i\"d"}],"state":5,"pk_is_handle":true,"schema_version":-1,"comment":"负债信息","partition":null})json", // R"stmt(CREATE TABLE `customer`.`customerdebt`(`id` Int64) Engine = DeltaMerge((`id`), '{"cols":[{"comment":"i\\"d","default":null,"default_bit":null,"id":1,"name":{"L":"id","O":"id"},"offset":0,"origin_default":null,"state":5,"type":{"Charset":null,"Collate":null,"Decimal":0,"Elems":null,"Flag":515,"Flen":20,"Tp":8}}],"comment":"\\u8D1F\\u503A\\u4FE1\\u606F","id":1145,"index_info":[],"is_common_handle":false,"name":{"L":"customerdebt","O":"customerdebt"},"partition":null,"pk_is_handle":true,"schema_version":-1,"state":5,"tiflash_replica":{"Count":0},"update_timestamp":0}'))stmt", // }, - StmtCase{ - 2049, // - R"json({"id":1939,"db_name":{"O":"customer","L":"customer"},"charset":"utf8mb4","collate":"utf8mb4_bin","state":5})json", // - R"json({"id":2049,"name":{"O":"customerdebt","L":"customerdebt"},"cols":[{"id":1,"name":{"O":"id","L":"id"},"offset":0,"origin_default":null,"default":null,"default_bit":null,"type":{"Tp":8,"Flag":515,"Flen":20,"Decimal":0},"state":5,"comment":"i\"d"}],"state":5,"pk_is_handle":true,"schema_version":-1,"comment":"负债信息","update_timestamp":404545295996944390,"partition":null})json", // - R"stmt(CREATE TABLE `customer`.`customerdebt`(`id` Int64) Engine = DeltaMerge((`id`), '{"cols":[{"comment":"i\\"d","default":null,"default_bit":null,"id":1,"name":{"L":"id","O":"id"},"offset":0,"origin_default":null,"state":5,"type":{"Charset":null,"Collate":null,"Decimal":0,"Elems":null,"Flag":515,"Flen":20,"Tp":8}}],"comment":"\\u8D1F\\u503A\\u4FE1\\u606F","id":2049,"index_info":[],"is_common_handle":false,"name":{"L":"customerdebt","O":"customerdebt"},"partition":null,"pk_is_handle":true,"schema_version":-1,"state":5,"tiflash_replica":{"Count":0},"update_timestamp":404545295996944390}'))stmt", // - }, - StmtCase{ - 31, // - R"json({"id":1,"db_name":{"O":"db1","L":"db1"},"charset":"utf8mb4","collate":"utf8mb4_bin","state":5})json", // - R"json({"id":31,"name":{"O":"simple_t","L":"simple_t"},"charset":"","collate":"","cols":[{"id":1,"name":{"O":"i","L":"i"},"offset":0,"origin_default":null,"default":null,"default_bit":null,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":3,"Flag":0,"Flen":11,"Decimal":0,"Charset":"binary","Collate":"binary","Elems":null},"state":5,"comment":""}],"index_info":null,"fk_info":null,"state":5,"pk_is_handle":false,"schema_version":-1,"comment":"","auto_inc_id":0,"max_col_id":1,"max_idx_id":0,"update_timestamp":404545295996944390,"ShardRowIDBits":0,"partition":null})json", // - R"stmt(CREATE TABLE `db1`.`simple_t`(`i` Nullable(Int32), `_tidb_rowid` Int64) Engine = DeltaMerge((`_tidb_rowid`), '{"cols":[{"comment":"","default":null,"default_bit":null,"id":1,"name":{"L":"i","O":"i"},"offset":0,"origin_default":null,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Elems":null,"Flag":0,"Flen":11,"Tp":3}}],"comment":"","id":31,"index_info":[],"is_common_handle":false,"name":{"L":"simple_t","O":"simple_t"},"partition":null,"pk_is_handle":false,"schema_version":-1,"state":5,"tiflash_replica":{"Count":0},"update_timestamp":404545295996944390}'))stmt", // - }, - StmtCase{ - 33, // - R"json({"id":2,"db_name":{"O":"db2","L":"db2"},"charset":"utf8mb4","collate":"utf8mb4_bin","state":5})json", // - R"json({"id":33,"name":{"O":"pk_t","L":"pk_t"},"charset":"","collate":"","cols":[{"id":1,"name":{"O":"i","L":"i"},"offset":0,"origin_default":null,"default":null,"default_bit":null,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":3,"Flag":3,"Flen":11,"Decimal":0,"Charset":"binary","Collate":"binary","Elems":null},"state":5,"comment":""}],"index_info":null,"fk_info":null,"state":5,"pk_is_handle":true,"schema_version":-1,"comment":"","auto_inc_id":0,"max_col_id":1,"max_idx_id":0,"update_timestamp":404545312978108418,"ShardRowIDBits":0,"partition":null})json", // - R"stmt(CREATE TABLE `db2`.`pk_t`(`i` Int32) Engine = DeltaMerge((`i`), '{"cols":[{"comment":"","default":null,"default_bit":null,"id":1,"name":{"L":"i","O":"i"},"offset":0,"origin_default":null,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Elems":null,"Flag":3,"Flen":11,"Tp":3}}],"comment":"","id":33,"index_info":[],"is_common_handle":false,"name":{"L":"pk_t","O":"pk_t"},"partition":null,"pk_is_handle":true,"schema_version":-1,"state":5,"tiflash_replica":{"Count":0},"update_timestamp":404545312978108418}'))stmt", // - }, - StmtCase{ - 35, // - R"json({"id":1,"db_name":{"O":"db1","L":"db1"},"charset":"utf8mb4","collate":"utf8mb4_bin","state":5})json", // - R"json({"id":35,"name":{"O":"not_null_t","L":"not_null_t"},"charset":"","collate":"","cols":[{"id":1,"name":{"O":"i","L":"i"},"offset":0,"origin_default":null,"default":null,"default_bit":null,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":3,"Flag":4097,"Flen":11,"Decimal":0,"Charset":"binary","Collate":"binary","Elems":null},"state":5,"comment":""}],"index_info":null,"fk_info":null,"state":5,"pk_is_handle":false,"schema_version":-1,"comment":"","auto_inc_id":0,"max_col_id":1,"max_idx_id":0,"update_timestamp":404545324922961926,"ShardRowIDBits":0,"partition":null})json", // - R"stmt(CREATE TABLE `db1`.`not_null_t`(`i` Int32, `_tidb_rowid` Int64) Engine = DeltaMerge((`_tidb_rowid`), '{"cols":[{"comment":"","default":null,"default_bit":null,"id":1,"name":{"L":"i","O":"i"},"offset":0,"origin_default":null,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Elems":null,"Flag":4097,"Flen":11,"Tp":3}}],"comment":"","id":35,"index_info":[],"is_common_handle":false,"name":{"L":"not_null_t","O":"not_null_t"},"partition":null,"pk_is_handle":false,"schema_version":-1,"state":5,"tiflash_replica":{"Count":0},"update_timestamp":404545324922961926}'))stmt", // - }, - StmtCase{ - 37, // - R"json({"id":2,"db_name":{"O":"db2","L":"db2"},"charset":"utf8mb4","collate":"utf8mb4_bin","state":5})json", - R"json({"id":37,"name":{"O":"mytable","L":"mytable"},"charset":"","collate":"","cols":[{"id":1,"name":{"O":"mycol","L":"mycol"},"offset":0,"origin_default":null,"default":null,"default_bit":null,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":15,"Flag":4099,"Flen":256,"Decimal":0,"Charset":"utf8","Collate":"utf8_bin","Elems":null},"state":5,"comment":""}],"index_info":[{"id":1,"idx_name":{"O":"PRIMARY","L":"primary"},"tbl_name":{"O":"","L":""},"idx_cols":[{"name":{"O":"mycol","L":"mycol"},"offset":0,"length":-1}],"is_unique":true,"is_primary":true,"state":5,"comment":"","index_type":1}],"fk_info":null,"state":5,"pk_is_handle":true,"schema_version":-1,"comment":"","auto_inc_id":0,"max_col_id":1,"max_idx_id":1,"update_timestamp":404566455285710853,"ShardRowIDBits":0,"partition":null})json", // - R"stmt(CREATE TABLE `db2`.`mytable`(`mycol` String) Engine = DeltaMerge((`mycol`), '{"cols":[{"comment":"","default":null,"default_bit":null,"id":1,"name":{"L":"mycol","O":"mycol"},"offset":0,"origin_default":null,"state":5,"type":{"Charset":"utf8","Collate":"utf8_bin","Decimal":0,"Elems":null,"Flag":4099,"Flen":256,"Tp":15}}],"comment":"","id":37,"index_info":[],"is_common_handle":false,"name":{"L":"mytable","O":"mytable"},"partition":null,"pk_is_handle":true,"schema_version":-1,"state":5,"tiflash_replica":{"Count":0},"update_timestamp":404566455285710853}'))stmt", // - }, - StmtCase{ - 32, // - R"json({"id":1,"db_name":{"O":"test","L":"test"},"charset":"utf8mb4","collate":"utf8mb4_bin","state":5})json", // - R"json({"id":31,"name":{"O":"range_part_t","L":"range_part_t"},"charset":"utf8mb4","collate":"utf8mb4_bin","cols":[{"id":1,"name":{"O":"i","L":"i"},"offset":0,"origin_default":null,"default":null,"default_bit":null,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":3,"Flag":0,"Flen":11,"Decimal":0,"Charset":"binary","Collate":"binary","Elems":null},"state":5,"comment":"","version":0}],"index_info":null,"fk_info":null,"state":5,"pk_is_handle":false,"schema_version":-1,"comment":"","auto_inc_id":0,"max_col_id":1,"max_idx_id":0,"update_timestamp":407445773801488390,"ShardRowIDBits":0,"partition":{"type":1,"expr":"`i`","columns":null,"enable":true,"definitions":[{"id":32,"name":{"O":"p0","L":"p0"},"less_than":["0"]},{"id":33,"name":{"O":"p1","L":"p1"},"less_than":["100"]}],"num":0},"compression":"","version":1})json", // - R"stmt(CREATE TABLE `test`.`range_part_t_32`(`i` Nullable(Int32), `_tidb_rowid` Int64) Engine = DeltaMerge((`_tidb_rowid`), '{"belonging_table_id":31,"cols":[{"comment":"","default":null,"default_bit":null,"id":1,"name":{"L":"i","O":"i"},"offset":0,"origin_default":null,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Elems":null,"Flag":0,"Flen":11,"Tp":3}}],"comment":"","id":32,"index_info":[],"is_common_handle":false,"is_partition_sub_table":true,"name":{"L":"range_part_t_32","O":"range_part_t_32"},"partition":null,"pk_is_handle":false,"schema_version":-1,"state":5,"tiflash_replica":{"Count":0},"update_timestamp":407445773801488390}'))stmt", // - }}; + StmtCase{ + 2049, // + R"json({"id":1939,"db_name":{"O":"customer","L":"customer"},"charset":"utf8mb4","collate":"utf8mb4_bin","state":5})json", // + R"json({"id":2049,"name":{"O":"customerdebt","L":"customerdebt"},"cols":[{"id":1,"name":{"O":"id","L":"id"},"offset":0,"origin_default":null,"default":null,"default_bit":null,"type":{"Tp":8,"Flag":515,"Flen":20,"Decimal":0},"state":5,"comment":"i\"d"}],"state":5,"pk_is_handle":true,"schema_version":-1,"comment":"负债信息","update_timestamp":404545295996944390,"partition":null})json", // + R"stmt(CREATE TABLE `customer`.`customerdebt`(`id` Int64) Engine = DeltaMerge((`id`), '{"cols":[{"comment":"i\\"d","default":null,"default_bit":null,"id":1,"name":{"L":"id","O":"id"},"offset":0,"origin_default":null,"state":5,"type":{"Charset":null,"Collate":null,"Decimal":0,"Elems":null,"Flag":515,"Flen":20,"Tp":8}}],"comment":"\\u8D1F\\u503A\\u4FE1\\u606F","id":2049,"index_info":[],"is_common_handle":false,"name":{"L":"customerdebt","O":"customerdebt"},"partition":null,"pk_is_handle":true,"schema_version":-1,"state":5,"tiflash_replica":{"Count":0},"update_timestamp":404545295996944390}'))stmt", // + }, + StmtCase{ + 31, // + R"json({"id":1,"db_name":{"O":"db1","L":"db1"},"charset":"utf8mb4","collate":"utf8mb4_bin","state":5})json", // + R"json({"id":31,"name":{"O":"simple_t","L":"simple_t"},"charset":"","collate":"","cols":[{"id":1,"name":{"O":"i","L":"i"},"offset":0,"origin_default":null,"default":null,"default_bit":null,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":3,"Flag":0,"Flen":11,"Decimal":0,"Charset":"binary","Collate":"binary","Elems":null},"state":5,"comment":""}],"index_info":null,"fk_info":null,"state":5,"pk_is_handle":false,"schema_version":-1,"comment":"","auto_inc_id":0,"max_col_id":1,"max_idx_id":0,"update_timestamp":404545295996944390,"ShardRowIDBits":0,"partition":null})json", // + R"stmt(CREATE TABLE `db1`.`simple_t`(`i` Nullable(Int32), `_tidb_rowid` Int64) Engine = DeltaMerge((`_tidb_rowid`), '{"cols":[{"comment":"","default":null,"default_bit":null,"id":1,"name":{"L":"i","O":"i"},"offset":0,"origin_default":null,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Elems":null,"Flag":0,"Flen":11,"Tp":3}}],"comment":"","id":31,"index_info":[],"is_common_handle":false,"name":{"L":"simple_t","O":"simple_t"},"partition":null,"pk_is_handle":false,"schema_version":-1,"state":5,"tiflash_replica":{"Count":0},"update_timestamp":404545295996944390}'))stmt", // + }, + StmtCase{ + 33, // + R"json({"id":2,"db_name":{"O":"db2","L":"db2"},"charset":"utf8mb4","collate":"utf8mb4_bin","state":5})json", // + R"json({"id":33,"name":{"O":"pk_t","L":"pk_t"},"charset":"","collate":"","cols":[{"id":1,"name":{"O":"i","L":"i"},"offset":0,"origin_default":null,"default":null,"default_bit":null,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":3,"Flag":3,"Flen":11,"Decimal":0,"Charset":"binary","Collate":"binary","Elems":null},"state":5,"comment":""}],"index_info":null,"fk_info":null,"state":5,"pk_is_handle":true,"schema_version":-1,"comment":"","auto_inc_id":0,"max_col_id":1,"max_idx_id":0,"update_timestamp":404545312978108418,"ShardRowIDBits":0,"partition":null})json", // + R"stmt(CREATE TABLE `db2`.`pk_t`(`i` Int32) Engine = DeltaMerge((`i`), '{"cols":[{"comment":"","default":null,"default_bit":null,"id":1,"name":{"L":"i","O":"i"},"offset":0,"origin_default":null,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Elems":null,"Flag":3,"Flen":11,"Tp":3}}],"comment":"","id":33,"index_info":[],"is_common_handle":false,"name":{"L":"pk_t","O":"pk_t"},"partition":null,"pk_is_handle":true,"schema_version":-1,"state":5,"tiflash_replica":{"Count":0},"update_timestamp":404545312978108418}'))stmt", // + }, + StmtCase{ + 35, // + R"json({"id":1,"db_name":{"O":"db1","L":"db1"},"charset":"utf8mb4","collate":"utf8mb4_bin","state":5})json", // + R"json({"id":35,"name":{"O":"not_null_t","L":"not_null_t"},"charset":"","collate":"","cols":[{"id":1,"name":{"O":"i","L":"i"},"offset":0,"origin_default":null,"default":null,"default_bit":null,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":3,"Flag":4097,"Flen":11,"Decimal":0,"Charset":"binary","Collate":"binary","Elems":null},"state":5,"comment":""}],"index_info":null,"fk_info":null,"state":5,"pk_is_handle":false,"schema_version":-1,"comment":"","auto_inc_id":0,"max_col_id":1,"max_idx_id":0,"update_timestamp":404545324922961926,"ShardRowIDBits":0,"partition":null})json", // + R"stmt(CREATE TABLE `db1`.`not_null_t`(`i` Int32, `_tidb_rowid` Int64) Engine = DeltaMerge((`_tidb_rowid`), '{"cols":[{"comment":"","default":null,"default_bit":null,"id":1,"name":{"L":"i","O":"i"},"offset":0,"origin_default":null,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Elems":null,"Flag":4097,"Flen":11,"Tp":3}}],"comment":"","id":35,"index_info":[],"is_common_handle":false,"name":{"L":"not_null_t","O":"not_null_t"},"partition":null,"pk_is_handle":false,"schema_version":-1,"state":5,"tiflash_replica":{"Count":0},"update_timestamp":404545324922961926}'))stmt", // + }, + StmtCase{ + 37, // + R"json({"id":2,"db_name":{"O":"db2","L":"db2"},"charset":"utf8mb4","collate":"utf8mb4_bin","state":5})json", + R"json({"id":37,"name":{"O":"mytable","L":"mytable"},"charset":"","collate":"","cols":[{"id":1,"name":{"O":"mycol","L":"mycol"},"offset":0,"origin_default":null,"default":null,"default_bit":null,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":15,"Flag":4099,"Flen":256,"Decimal":0,"Charset":"utf8","Collate":"utf8_bin","Elems":null},"state":5,"comment":""}],"index_info":[{"id":1,"idx_name":{"O":"PRIMARY","L":"primary"},"tbl_name":{"O":"","L":""},"idx_cols":[{"name":{"O":"mycol","L":"mycol"},"offset":0,"length":-1}],"is_unique":true,"is_primary":true,"state":5,"comment":"","index_type":1}],"fk_info":null,"state":5,"pk_is_handle":true,"schema_version":-1,"comment":"","auto_inc_id":0,"max_col_id":1,"max_idx_id":1,"update_timestamp":404566455285710853,"ShardRowIDBits":0,"partition":null})json", // + R"stmt(CREATE TABLE `db2`.`mytable`(`mycol` String) Engine = DeltaMerge((`mycol`), '{"cols":[{"comment":"","default":null,"default_bit":null,"id":1,"name":{"L":"mycol","O":"mycol"},"offset":0,"origin_default":null,"state":5,"type":{"Charset":"utf8","Collate":"utf8_bin","Decimal":0,"Elems":null,"Flag":4099,"Flen":256,"Tp":15}}],"comment":"","id":37,"index_info":[],"is_common_handle":false,"name":{"L":"mytable","O":"mytable"},"partition":null,"pk_is_handle":true,"schema_version":-1,"state":5,"tiflash_replica":{"Count":0},"update_timestamp":404566455285710853}'))stmt", // + }, + StmtCase{ + 32, // + R"json({"id":1,"db_name":{"O":"test","L":"test"},"charset":"utf8mb4","collate":"utf8mb4_bin","state":5})json", // + R"json({"id":31,"name":{"O":"range_part_t","L":"range_part_t"},"charset":"utf8mb4","collate":"utf8mb4_bin","cols":[{"id":1,"name":{"O":"i","L":"i"},"offset":0,"origin_default":null,"default":null,"default_bit":null,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":3,"Flag":0,"Flen":11,"Decimal":0,"Charset":"binary","Collate":"binary","Elems":null},"state":5,"comment":"","version":0}],"index_info":null,"fk_info":null,"state":5,"pk_is_handle":false,"schema_version":-1,"comment":"","auto_inc_id":0,"max_col_id":1,"max_idx_id":0,"update_timestamp":407445773801488390,"ShardRowIDBits":0,"partition":{"type":1,"expr":"`i`","columns":null,"enable":true,"definitions":[{"id":32,"name":{"O":"p0","L":"p0"},"less_than":["0"]},{"id":33,"name":{"O":"p1","L":"p1"},"less_than":["100"]}],"num":0},"compression":"","version":1})json", // + R"stmt(CREATE TABLE `test`.`range_part_t_32`(`i` Nullable(Int32), `_tidb_rowid` Int64) Engine = DeltaMerge((`_tidb_rowid`), '{"belonging_table_id":31,"cols":[{"comment":"","default":null,"default_bit":null,"id":1,"name":{"L":"i","O":"i"},"offset":0,"origin_default":null,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Elems":null,"Flag":0,"Flen":11,"Tp":3}}],"comment":"","id":32,"index_info":[],"is_common_handle":false,"is_partition_sub_table":true,"name":{"L":"range_part_t_32","O":"range_part_t_32"},"partition":null,"pk_is_handle":false,"schema_version":-1,"state":5,"tiflash_replica":{"Count":0},"update_timestamp":407445773801488390}'))stmt", // + }}; for (auto & c : cases) { diff --git a/dbms/src/Storages/tests/gtest_filter_parser.cpp b/dbms/src/Storages/tests/gtest_filter_parser.cpp index a027ea71cfc..8820c05d2da 100644 --- a/dbms/src/Storages/tests/gtest_filter_parser.cpp +++ b/dbms/src/Storages/tests/gtest_filter_parser.cpp @@ -25,10 +25,10 @@ #include #include #include -#include -#include #include #include +#include +#include #include #include diff --git a/dbms/src/TestUtils/ColumnsToTiPBExpr.cpp b/dbms/src/TestUtils/ColumnsToTiPBExpr.cpp new file mode 100644 index 00000000000..2c3bf243176 --- /dev/null +++ b/dbms/src/TestUtils/ColumnsToTiPBExpr.cpp @@ -0,0 +1,202 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include + +namespace DB +{ +namespace tests +{ +namespace +{ +void columnToTiPBExpr(tipb::Expr * expr, const ColumnWithTypeAndName column, size_t index) +{ + ColumnInfo ci = reverseGetColumnInfo({column.name, column.type}, 0, Field(), true); + bool is_const = false; + if (column.column != nullptr) + { + is_const = column.column->isColumnConst(); + if (!is_const) + { + if (column.column->isColumnNullable()) + { + auto [col, null_map] = removeNullable(column.column.get()); + is_const = col->isColumnConst(); + } + } + } + if (is_const) + { + Field val_field; + column.column->get(0, val_field); + literalFieldToTiPBExpr(ci, val_field, expr, 0); + } + else + { + *(expr->mutable_field_type()) = columnInfoToFieldType(ci); + expr->set_tp(tipb::ExprType::ColumnRef); + WriteBufferFromOwnString ss; + encodeDAGInt64(index, ss); + expr->set_val(ss.releaseStr()); + } +} +void columnsToTiPBExprForRegExp( + tipb::Expr * expr, + const String &, + const ColumnNumbers & argument_column_number, + const ColumnsWithTypeAndName & columns, + const TiDB::TiDBCollatorPtr & collator) +{ + expr->set_tp(tipb::ExprType::ScalarFunc); + if (collator == nullptr || !collator->isBinary()) + expr->set_sig(tipb::ScalarFuncSig::RegexpUTF8Sig); + else + expr->set_sig(tipb::ScalarFuncSig::RegexpSig); + for (size_t i = 0; i < argument_column_number.size(); ++i) + { + auto * argument_expr = expr->add_children(); + columnToTiPBExpr(argument_expr, columns[argument_column_number[i]], i); + } + /// since we don't know the type, just set a fake one + expr->mutable_field_type()->set_tp(TiDB::TypeLongLong); + if (collator != nullptr) + expr->mutable_field_type()->set_collate(-collator->getCollatorId()); +} +void columnsToTiPBExprForTiDBCast( + tipb::Expr * expr, + const String & func_name, + const ColumnNumbers & argument_column_number, + const ColumnsWithTypeAndName & columns, + const TiDB::TiDBCollatorPtr & collator) +{ + expr->set_tp(tipb::ExprType::ScalarFunc); + expr->set_sig(reverseGetFuncSigByFuncName(func_name)); + assert(argument_column_number.size() == 2); + const auto & type_column = columns[argument_column_number[1]]; + bool is_const = false; + if (type_column.column != nullptr) + { + is_const = type_column.column->isColumnConst(); + if (!is_const) + { + if (type_column.column->isColumnNullable()) + { + auto [col, null_map] = removeNullable(type_column.column.get()); + is_const = col->isColumnConst(); + } + } + } + assert(is_const && removeNullable(type_column.type)->isString()); + Field val; + type_column.column->get(0, val); + String type_string = val.safeGet(); + DataTypePtr target_type = DataTypeFactory::instance().get(type_string); + auto * argument_expr = expr->add_children(); + columnToTiPBExpr(argument_expr, columns[argument_column_number[0]], 0); + ColumnInfo ci = reverseGetColumnInfo({type_string, target_type}, 0, Field(), true); + *(expr->mutable_field_type()) = columnInfoToFieldType(ci); + if (collator != nullptr) + expr->mutable_field_type()->set_collate(-collator->getCollatorId()); +} + +const std::unordered_map date_add_sub_map({ + {"addDays", "DAY"}, + {"addWeeks", "WEEK"}, + {"addMonths", "MONTH"}, + {"addYears", "YEAR"}, + {"addHours", "HOUR"}, + {"addMinutes", "MINUTE"}, + {"addSeconds", "SECOND"}, + {"subtractDays", "DAY"}, + {"subtractWeeks", "WEEK"}, + {"subtractMonths", "MONTH"}, + {"subtractYears", "YEAR"}, + {"subtractHours", "HOUR"}, + {"subtractMinutes", "MINUTE"}, + {"subtractSeconds", "SECOND"}, +}); + +void columnsToTiPBExprForDateAddSub( + tipb::Expr * expr, + const String & func_name, + const ColumnNumbers & argument_column_number, + const ColumnsWithTypeAndName & columns, + const TiDB::TiDBCollatorPtr & collator) +{ + String name = func_name.substr(0, 3) == "add" ? "date_add" : "date_sub"; + expr->set_tp(tipb::ExprType::ScalarFunc); + expr->set_sig(reverseGetFuncSigByFuncName(name)); + for (size_t i = 0; i < argument_column_number.size(); ++i) + { + auto * argument_expr = expr->add_children(); + columnToTiPBExpr(argument_expr, columns[argument_column_number[i]], i); + } + String unit = date_add_sub_map.find(func_name)->second; + *(expr->add_children()) = constructStringLiteralTiExpr(unit); + /// since we don't know the type, just set a fake one + expr->mutable_field_type()->set_tp(TiDB::TypeLongLong); + if (collator != nullptr) + expr->mutable_field_type()->set_collate(-collator->getCollatorId()); +} +void columnsToTiPBExpr( + tipb::Expr * expr, + const String & func_name, + const ColumnNumbers & argument_column_number, + const ColumnsWithTypeAndName & columns, + const TiDB::TiDBCollatorPtr & collator) +{ + if (func_name == "tidb_cast") + { + columnsToTiPBExprForTiDBCast(expr, func_name, argument_column_number, columns, collator); + } + else if (func_name == "regexp") + { + columnsToTiPBExprForRegExp(expr, func_name, argument_column_number, columns, collator); + } + else if (date_add_sub_map.find(func_name) != date_add_sub_map.end()) + { + columnsToTiPBExprForDateAddSub(expr, func_name, argument_column_number, columns, collator); + } + else + { + expr->set_tp(tipb::ExprType::ScalarFunc); + expr->set_sig(reverseGetFuncSigByFuncName(func_name)); + for (size_t i = 0; i < argument_column_number.size(); ++i) + { + auto * argument_expr = expr->add_children(); + columnToTiPBExpr(argument_expr, columns[argument_column_number[i]], i); + } + /// since we don't know the type, just set a fake one + expr->mutable_field_type()->set_tp(TiDB::TypeLongLong); + if (collator != nullptr) + expr->mutable_field_type()->set_collate(-collator->getCollatorId()); + } +} +} // namespace + +tipb::Expr columnsToTiPBExpr( + const String & func_name, + const ColumnNumbers & argument_column_number, + const ColumnsWithTypeAndName & columns, + const TiDB::TiDBCollatorPtr & collator) +{ + tipb::Expr ret; + columnsToTiPBExpr(&ret, func_name, argument_column_number, columns, collator); + return ret; +} +} // namespace tests +} // namespace DB diff --git a/dbms/src/TestUtils/ColumnsToTiPBExpr.h b/dbms/src/TestUtils/ColumnsToTiPBExpr.h new file mode 100644 index 00000000000..e7a2e81d59e --- /dev/null +++ b/dbms/src/TestUtils/ColumnsToTiPBExpr.h @@ -0,0 +1,35 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ +namespace tests +{ +tipb::Expr columnsToTiPBExpr( + const String & func_name, + const ColumnNumbers & argument_column_number, + const ColumnsWithTypeAndName & columns, + const TiDB::TiDBCollatorPtr & collator); +} // namespace tests +} // namespace DB diff --git a/dbms/src/TestUtils/FunctionTestUtils.cpp b/dbms/src/TestUtils/FunctionTestUtils.cpp index 6aa7541ee59..dae07f7123b 100644 --- a/dbms/src/TestUtils/FunctionTestUtils.cpp +++ b/dbms/src/TestUtils/FunctionTestUtils.cpp @@ -12,10 +12,15 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include #include +#include +#include +#include #include #include +#include #include #include #include @@ -115,60 +120,113 @@ void blockEqual( } } +std::pair buildFunction( + Context & context, + const String & func_name, + const ColumnNumbers & argument_column_numbers, + const ColumnsWithTypeAndName & columns, + const TiDB::TiDBCollatorPtr & collator) +{ + tipb::Expr tipb_expr = columnsToTiPBExpr(func_name, argument_column_numbers, columns, collator); + NamesAndTypes source_columns; + for (size_t index : argument_column_numbers) + source_columns.emplace_back(columns[index].name, columns[index].type); + DAGExpressionAnalyzer analyzer(source_columns, context); + ExpressionActionsChain chain; + auto & last_step = analyzer.initAndGetLastStep(chain); + auto result_name = DB::DAGExpressionAnalyzerHelper::buildFunction(&analyzer, tipb_expr, last_step.actions); + last_step.required_output.push_back(result_name); + chain.finalize(); + return std::make_pair(last_step.actions, result_name); +} -ColumnWithTypeAndName executeFunction(Context & context, const String & func_name, const ColumnsWithTypeAndName & columns, const TiDB::TiDBCollatorPtr & collator) +ColumnsWithTypeAndName toColumnsWithUniqueName(const ColumnsWithTypeAndName & columns) { - auto & factory = FunctionFactory::instance(); + ColumnsWithTypeAndName columns_with_distinct_name = columns; + std::string base_name = "col"; + for (size_t i = 0; i < columns.size(); ++i) + { + columns_with_distinct_name[i].name = fmt::format("{}_{}", base_name, i); + } + return columns_with_distinct_name; +} - Block block(columns); - ColumnNumbers cns; +ColumnWithTypeAndName executeFunction( + Context & context, + const String & func_name, + const ColumnsWithTypeAndName & columns, + const TiDB::TiDBCollatorPtr & collator, + bool raw_function_test) +{ + ColumnNumbers argument_column_numbers; for (size_t i = 0; i < columns.size(); ++i) - cns.push_back(i); - - auto bp = factory.tryGet(func_name, context); - if (!bp) - throw TiFlashTestException(fmt::format("Function {} not found!", func_name)); - auto func = bp->build(columns, collator); - block.insert({nullptr, func->getReturnType(), "res"}); - func->execute(block, cns, columns.size()); - return block.getByPosition(columns.size()); + argument_column_numbers.push_back(i); + return executeFunction(context, func_name, argument_column_numbers, columns, collator, raw_function_test); } -ColumnWithTypeAndName executeFunction(Context & context, const String & func_name, const ColumnNumbers & argument_column_numbers, const ColumnsWithTypeAndName & columns) +ColumnWithTypeAndName executeFunction( + Context & context, + const String & func_name, + const ColumnNumbers & argument_column_numbers, + const ColumnsWithTypeAndName & columns, + const TiDB::TiDBCollatorPtr & collator, + bool raw_function_test) { - auto & factory = FunctionFactory::instance(); - Block block(columns); - ColumnsWithTypeAndName arguments; - for (size_t i = 0; i < argument_column_numbers.size(); ++i) - arguments.push_back(columns.at(i)); - auto bp = factory.tryGet(func_name, context); - if (!bp) - throw TiFlashTestException(fmt::format("Function {} not found!", func_name)); - auto func = bp->build(arguments); - block.insert({nullptr, func->getReturnType(), "res"}); - func->execute(block, argument_column_numbers, columns.size()); - return block.getByPosition(columns.size()); + if (raw_function_test) + { + auto & factory = FunctionFactory::instance(); + Block block(columns); + ColumnsWithTypeAndName arguments; + for (size_t i = 0; i < argument_column_numbers.size(); ++i) + arguments.push_back(columns.at(i)); + auto bp = factory.tryGet(func_name, context); + if (!bp) + throw TiFlashTestException(fmt::format("Function {} not found!", func_name)); + auto func = bp->build(arguments, collator); + block.insert({nullptr, func->getReturnType(), "res"}); + func->execute(block, argument_column_numbers, columns.size()); + return block.getByPosition(columns.size()); + } + auto columns_with_unique_name = toColumnsWithUniqueName(columns); + auto [actions, result_name] = buildFunction(context, func_name, argument_column_numbers, columns_with_unique_name, collator); + Block block(columns_with_unique_name); + actions->execute(block); + return block.getByName(result_name); } DataTypePtr getReturnTypeForFunction( Context & context, const String & func_name, const ColumnsWithTypeAndName & columns, - const TiDB::TiDBCollatorPtr & collator) + const TiDB::TiDBCollatorPtr & collator, + bool raw_function_test) { - auto & factory = FunctionFactory::instance(); - - Block block(columns); - ColumnNumbers cns; - for (size_t i = 0; i < columns.size(); ++i) - cns.push_back(i); - - auto bp = factory.tryGet(func_name, context); - if (!bp) - throw TiFlashTestException(fmt::format("Function {} not found!", func_name)); - auto func = bp->build(columns, collator); - return func->getReturnType(); + if (raw_function_test) + { + auto & factory = FunctionFactory::instance(); + + Block block(columns); + ColumnNumbers cns; + for (size_t i = 0; i < columns.size(); ++i) + cns.push_back(i); + + auto bp = factory.tryGet(func_name, context); + if (!bp) + throw TiFlashTestException(fmt::format("Function {} not found!", func_name)); + auto func = bp->build(columns, collator); + return func->getReturnType(); + } + else + { + ColumnNumbers argument_column_numbers; + for (size_t i = 0; i < columns.size(); ++i) + argument_column_numbers.push_back(i); + auto columns_with_unique_name = toColumnsWithUniqueName(columns); + auto [actions, result_name] = buildFunction(context, func_name, argument_column_numbers, columns_with_unique_name, collator); + return actions->getSampleBlock().getByName(result_name).type; + } } + ColumnWithTypeAndName createOnlyNullColumnConst(size_t size, const String & name) { DataTypePtr data_type = std::make_shared(std::make_shared()); diff --git a/dbms/src/TestUtils/FunctionTestUtils.h b/dbms/src/TestUtils/FunctionTestUtils.h index e88f33a5ca7..615a58ebda5 100644 --- a/dbms/src/TestUtils/FunctionTestUtils.h +++ b/dbms/src/TestUtils/FunctionTestUtils.h @@ -535,13 +535,16 @@ ColumnWithTypeAndName executeFunction( Context & context, const String & func_name, const ColumnsWithTypeAndName & columns, - const TiDB::TiDBCollatorPtr & collator = nullptr); + const TiDB::TiDBCollatorPtr & collator = nullptr, + bool raw_function_test = true); ColumnWithTypeAndName executeFunction( Context & context, const String & func_name, const ColumnNumbers & argument_column_numbers, - const ColumnsWithTypeAndName & columns); + const ColumnsWithTypeAndName & columns, + const TiDB::TiDBCollatorPtr & collator = nullptr, + bool raw_function_test = true); template ColumnWithTypeAndName executeFunction( @@ -558,7 +561,8 @@ DataTypePtr getReturnTypeForFunction( Context & context, const String & func_name, const ColumnsWithTypeAndName & columns, - const TiDB::TiDBCollatorPtr & collator = nullptr); + const TiDB::TiDBCollatorPtr & collator = nullptr, + bool raw_function_test = true); template ColumnWithTypeAndName createNullableColumn(InferredDataVector init_vec, const std::vector & null_map, const String name = "") @@ -679,9 +683,13 @@ class FunctionTest : public ::testing::Test context.setDAGContext(dag_context_ptr.get()); } - ColumnWithTypeAndName executeFunction(const String & func_name, const ColumnsWithTypeAndName & columns, const TiDB::TiDBCollatorPtr & collator = nullptr) + ColumnWithTypeAndName executeFunction( + const String & func_name, + const ColumnsWithTypeAndName & columns, + const TiDB::TiDBCollatorPtr & collator = nullptr, + bool raw_function_test = true) { - return DB::tests::executeFunction(context, func_name, columns, collator); + return DB::tests::executeFunction(context, func_name, columns, collator, raw_function_test); } template @@ -691,9 +699,14 @@ class FunctionTest : public ::testing::Test return executeFunction(func_name, vec); } - ColumnWithTypeAndName executeFunction(const String & func_name, const ColumnNumbers & argument_column_numbers, const ColumnsWithTypeAndName & columns) + ColumnWithTypeAndName executeFunction( + const String & func_name, + const ColumnNumbers & argument_column_numbers, + const ColumnsWithTypeAndName & columns, + const TiDB::TiDBCollatorPtr & collator = nullptr, + bool raw_function_test = true) { - return DB::tests::executeFunction(context, func_name, argument_column_numbers, columns); + return DB::tests::executeFunction(context, func_name, argument_column_numbers, columns, collator, raw_function_test); } template diff --git a/dbms/src/TestUtils/TiFlashTestEnv.cpp b/dbms/src/TestUtils/TiFlashTestEnv.cpp index 34355d43775..cbd42b57550 100644 --- a/dbms/src/TestUtils/TiFlashTestEnv.cpp +++ b/dbms/src/TestUtils/TiFlashTestEnv.cpp @@ -28,7 +28,7 @@ namespace DB::tests { std::unique_ptr TiFlashTestEnv::global_context = nullptr; -void TiFlashTestEnv::initializeGlobalContext(Strings testdata_path, bool enable_ps_v3) +void TiFlashTestEnv::initializeGlobalContext(Strings testdata_path, PageStorageRunMode ps_run_mode) { // set itself as global context global_context = std::make_unique(DB::Context::createGlobal()); @@ -68,7 +68,7 @@ void TiFlashTestEnv::initializeGlobalContext(Strings testdata_path, bool enable_ global_context->getPathCapacity(), global_context->getFileProvider()); - global_context->setPageStorageRunMode(enable_ps_v3 ? PageStorageRunMode::ONLY_V3 : PageStorageRunMode::ONLY_V2); + global_context->setPageStorageRunMode(ps_run_mode); global_context->initializeGlobalStoragePoolIfNeed(global_context->getPathPool()); LOG_FMT_INFO(Logger::get("TiFlashTestEnv"), "Storage mode : {}", static_cast(global_context->getPageStorageRunMode())); @@ -88,12 +88,13 @@ Context TiFlashTestEnv::getContext(const DB::Settings & settings, Strings testda Context context = *global_context; context.setGlobalContext(*global_context); // Load `testdata_path` as path if it is set. - const String root_path = testdata_path.empty() ? getTemporaryPath() : testdata_path[0]; + const String root_path = testdata_path.empty() ? (DB::toString(getpid()) + "/" + getTemporaryPath()) : testdata_path[0]; if (testdata_path.empty()) testdata_path.push_back(root_path); context.setPath(root_path); auto paths = getPathPool(testdata_path); context.setPathPool(paths.first, paths.second, Strings{}, true, context.getPathCapacity(), context.getFileProvider()); + global_context->initializeGlobalStoragePoolIfNeed(context.getPathPool()); context.getSettingsRef() = settings; return context; } diff --git a/dbms/src/TestUtils/TiFlashTestEnv.h b/dbms/src/TestUtils/TiFlashTestEnv.h index 65dad63d937..dafecf6e1de 100644 --- a/dbms/src/TestUtils/TiFlashTestEnv.h +++ b/dbms/src/TestUtils/TiFlashTestEnv.h @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -88,7 +89,7 @@ class TiFlashTestEnv static Context getContext(const DB::Settings & settings = DB::Settings(), Strings testdata_path = {}); - static void initializeGlobalContext(Strings testdata_path = {}, bool enable_ps_v3 = false); + static void initializeGlobalContext(Strings testdata_path = {}, PageStorageRunMode ps_run_mode = PageStorageRunMode::ONLY_V3); static Context & getGlobalContext() { return *global_context; } static void shutdown(); diff --git a/dbms/src/Storages/Transaction/SchemaBuilder-internal.h b/dbms/src/TiDB/Schema/SchemaBuilder-internal.h similarity index 100% rename from dbms/src/Storages/Transaction/SchemaBuilder-internal.h rename to dbms/src/TiDB/Schema/SchemaBuilder-internal.h diff --git a/dbms/src/Storages/Transaction/SchemaBuilder.cpp b/dbms/src/TiDB/Schema/SchemaBuilder.cpp similarity index 99% rename from dbms/src/Storages/Transaction/SchemaBuilder.cpp rename to dbms/src/TiDB/Schema/SchemaBuilder.cpp index 1ed8b33d415..64d118eec3e 100644 --- a/dbms/src/Storages/Transaction/SchemaBuilder.cpp +++ b/dbms/src/TiDB/Schema/SchemaBuilder.cpp @@ -35,11 +35,11 @@ #include #include #include -#include -#include -#include #include #include +#include +#include +#include #include #include @@ -1078,7 +1078,7 @@ void SchemaBuilder::applyCreatePhysicalTable(DBInfoPtr db_in ParserCreateQuery parser; ASTPtr ast = parseQuery(parser, stmt.data(), stmt.data() + stmt.size(), "from syncSchema " + table_info->name, 0); - ASTCreateQuery * ast_create_query = typeid_cast(ast.get()); + auto * ast_create_query = typeid_cast(ast.get()); ast_create_query->attach = true; ast_create_query->if_not_exists = true; ast_create_query->database = name_mapper.mapDatabaseName(*db_info); diff --git a/dbms/src/Storages/Transaction/SchemaBuilder.h b/dbms/src/TiDB/Schema/SchemaBuilder.h similarity index 98% rename from dbms/src/Storages/Transaction/SchemaBuilder.h rename to dbms/src/TiDB/Schema/SchemaBuilder.h index fcfba7db57b..8446765f74a 100644 --- a/dbms/src/Storages/Transaction/SchemaBuilder.h +++ b/dbms/src/TiDB/Schema/SchemaBuilder.h @@ -15,8 +15,8 @@ #pragma once #include -#include #include +#include namespace DB { diff --git a/dbms/src/Storages/Transaction/SchemaGetter.cpp b/dbms/src/TiDB/Schema/SchemaGetter.cpp similarity index 83% rename from dbms/src/Storages/Transaction/SchemaGetter.cpp rename to dbms/src/TiDB/Schema/SchemaGetter.cpp index a8dbf8befb7..7f52f9301b1 100644 --- a/dbms/src/Storages/Transaction/SchemaGetter.cpp +++ b/dbms/src/TiDB/Schema/SchemaGetter.cpp @@ -14,7 +14,7 @@ #include #include -#include +#include #include namespace DB @@ -103,14 +103,14 @@ struct TxnStructure } public: - static String Get(pingcap::kv::Snapshot & snap, const String & key) + static String get(pingcap::kv::Snapshot & snap, const String & key) { String encode_key = encodeStringDataKey(key); String value = snap.Get(encode_key); return value; } - static String HGet(pingcap::kv::Snapshot & snap, const String & key, const String & field) + static String hGet(pingcap::kv::Snapshot & snap, const String & key, const String & field) { String encode_key = encodeHashDataKey(key, field); String value = snap.Get(encode_key); @@ -118,7 +118,7 @@ struct TxnStructure } // For convinient, we only return values. - static std::vector> HGetAll(pingcap::kv::Snapshot & snap, const String & key) + static std::vector> hGetAll(pingcap::kv::Snapshot & snap, const String & key) { auto tikv_key_prefix = hashDataKeyPrefix(key); String tikv_key_end = pingcap::kv::prefixNext(tikv_key_prefix); @@ -137,7 +137,14 @@ struct TxnStructure } }; -AffectedOption::AffectedOption(Poco::JSON::Object::Ptr json) { deserialize(json); } +AffectedOption::AffectedOption(Poco::JSON::Object::Ptr json) + : schema_id(0) + , table_id(0) + , old_table_id(0) + , old_schema_id(0) +{ + deserialize(json); +} void AffectedOption::deserialize(Poco::JSON::Object::Ptr json) { @@ -175,19 +182,22 @@ void SchemaDiff::deserialize(const String & data) Int64 SchemaGetter::getVersion() { - String ver = TxnStructure::Get(snap, schemaVersionKey); - if (ver == "") + String ver = TxnStructure::get(snap, schemaVersionKey); + if (ver.empty()) return 0; return std::stoll(ver); } -String SchemaGetter::getSchemaDiffKey(Int64 ver) { return std::string(schemaDiffPrefix) + ":" + std::to_string(ver); } +String SchemaGetter::getSchemaDiffKey(Int64 ver) +{ + return std::string(schemaDiffPrefix) + ":" + std::to_string(ver); +} SchemaDiff SchemaGetter::getSchemaDiff(Int64 ver) { String key = getSchemaDiffKey(ver); - String data = TxnStructure::Get(snap, key); - if (data == "") + String data = TxnStructure::get(snap, key); + if (data.empty()) { throw TiFlashException("cannot find schema diff for version: " + std::to_string(ver), Errors::Table::SyncError); } @@ -196,16 +206,22 @@ SchemaDiff SchemaGetter::getSchemaDiff(Int64 ver) return diff; } -String SchemaGetter::getDBKey(DatabaseID db_id) { return String(DBPrefix) + ":" + std::to_string(db_id); } +String SchemaGetter::getDBKey(DatabaseID db_id) +{ + return String(DBPrefix) + ":" + std::to_string(db_id); +} -String SchemaGetter::getTableKey(TableID table_id) { return String(TablePrefix) + ":" + std::to_string(table_id); } +String SchemaGetter::getTableKey(TableID table_id) +{ + return String(TablePrefix) + ":" + std::to_string(table_id); +} TiDB::DBInfoPtr SchemaGetter::getDatabase(DatabaseID db_id) { String key = getDBKey(db_id); - String json = TxnStructure::HGet(snap, DBs, key); + String json = TxnStructure::hGet(snap, DBs, key); - if (json == "") + if (json.empty()) return nullptr; LOG_DEBUG(log, "Get DB Info from TiKV : " + json); @@ -221,8 +237,8 @@ TiDB::TableInfoPtr SchemaGetter::getTableInfo(DatabaseID db_id, TableID table_id throw Exception(); } String table_key = getTableKey(table_id); - String table_info_json = TxnStructure::HGet(snap, db_key, table_key); - if (table_info_json == "") + String table_info_json = TxnStructure::hGet(snap, db_key, table_key); + if (table_info_json.empty()) return nullptr; LOG_DEBUG(log, "Get Table Info from TiKV : " + table_info_json); TiDB::TableInfoPtr table_info = std::make_shared(table_info_json); @@ -232,8 +248,8 @@ TiDB::TableInfoPtr SchemaGetter::getTableInfo(DatabaseID db_id, TableID table_id std::vector SchemaGetter::listDBs() { std::vector res; - auto pairs = TxnStructure::HGetAll(snap, DBs); - for (auto pair : pairs) + auto pairs = TxnStructure::hGetAll(snap, DBs); + for (const auto & pair : pairs) { auto db_info = std::make_shared(pair.second); res.push_back(db_info); @@ -243,8 +259,8 @@ std::vector SchemaGetter::listDBs() bool SchemaGetter::checkDBExists(const String & key) { - String value = TxnStructure::HGet(snap, DBs, key); - return value.size() > 0; + String value = TxnStructure::hGet(snap, DBs, key); + return !value.empty(); } std::vector SchemaGetter::listTables(DatabaseID db_id) @@ -257,9 +273,9 @@ std::vector SchemaGetter::listTables(DatabaseID db_id) std::vector res; - auto kv_pairs = TxnStructure::HGetAll(snap, db_key); + auto kv_pairs = TxnStructure::hGetAll(snap, db_key); - for (auto kv_pair : kv_pairs) + for (const auto & kv_pair : kv_pairs) { const String & key = kv_pair.first; if (key.rfind(TablePrefix, 0) != 0) diff --git a/dbms/src/Storages/Transaction/SchemaGetter.h b/dbms/src/TiDB/Schema/SchemaGetter.h similarity index 96% rename from dbms/src/Storages/Transaction/SchemaGetter.h rename to dbms/src/TiDB/Schema/SchemaGetter.h index efcac11f626..cfa5e1c6335 100644 --- a/dbms/src/Storages/Transaction/SchemaGetter.h +++ b/dbms/src/TiDB/Schema/SchemaGetter.h @@ -139,13 +139,13 @@ struct SchemaGetter SchemaDiff getSchemaDiff(Int64 ver); - String getSchemaDiffKey(Int64 ver); + static String getSchemaDiffKey(Int64 ver); bool checkDBExists(const String & key); - String getDBKey(DatabaseID db_id); + static String getDBKey(DatabaseID db_id); - String getTableKey(TableID table_id); + static String getTableKey(TableID table_id); TiDB::DBInfoPtr getDatabase(DatabaseID db_id); diff --git a/dbms/src/Storages/Transaction/SchemaNameMapper.h b/dbms/src/TiDB/Schema/SchemaNameMapper.h similarity index 100% rename from dbms/src/Storages/Transaction/SchemaNameMapper.h rename to dbms/src/TiDB/Schema/SchemaNameMapper.h diff --git a/dbms/src/Storages/Transaction/SchemaSyncService.cpp b/dbms/src/TiDB/Schema/SchemaSyncService.cpp similarity index 98% rename from dbms/src/Storages/Transaction/SchemaSyncService.cpp rename to dbms/src/TiDB/Schema/SchemaSyncService.cpp index a22578b51fc..92eb700766b 100644 --- a/dbms/src/Storages/Transaction/SchemaSyncService.cpp +++ b/dbms/src/TiDB/Schema/SchemaSyncService.cpp @@ -19,10 +19,10 @@ #include #include #include -#include -#include -#include #include +#include +#include +#include #include namespace DB diff --git a/dbms/src/Storages/Transaction/SchemaSyncService.h b/dbms/src/TiDB/Schema/SchemaSyncService.h similarity index 100% rename from dbms/src/Storages/Transaction/SchemaSyncService.h rename to dbms/src/TiDB/Schema/SchemaSyncService.h diff --git a/dbms/src/Storages/Transaction/SchemaSyncer.h b/dbms/src/TiDB/Schema/SchemaSyncer.h similarity index 100% rename from dbms/src/Storages/Transaction/SchemaSyncer.h rename to dbms/src/TiDB/Schema/SchemaSyncer.h diff --git a/dbms/src/Storages/Transaction/TiDBSchemaSyncer.h b/dbms/src/TiDB/Schema/TiDBSchemaSyncer.h similarity index 99% rename from dbms/src/Storages/Transaction/TiDBSchemaSyncer.h rename to dbms/src/TiDB/Schema/TiDBSchemaSyncer.h index 9b017dae0dd..e7d25c304e7 100644 --- a/dbms/src/Storages/Transaction/TiDBSchemaSyncer.h +++ b/dbms/src/TiDB/Schema/TiDBSchemaSyncer.h @@ -18,8 +18,8 @@ #include #include #include -#include #include +#include #include #include diff --git a/docs/design/2022-05-19-compact-table-via-sql.md b/docs/design/2022-05-19-compact-table-via-sql.md new file mode 100644 index 00000000000..c2f9aeff22f --- /dev/null +++ b/docs/design/2022-05-19-compact-table-via-sql.md @@ -0,0 +1,224 @@ +# Compact Table via SQL + +- Author(s): [Wish](http://github.com/breezewish) + +## Introduction + +This RFC introduces a compaction SQL command in TiDB. The command triggers a +compaction on TiFlash replicas, which can be used to: + +1. Migrate from PageStorage v2 to PageStorage v3 +2. Optimize performance by better organizing the data + +## Motivation or Background + +Recently the PageStorage v3 engine was introduced to TiFlash. By allowing +users to manually trigger the compaction, the migration from PageStorage v2 to +PageStorage v3 can be done easily, as a compaction command will merge and clear +all delta layer data (which was stored in v2) into the stable layer, while new +delta layer data will be stored in PageStorage v3. + +As a bonus, even when the Delta layer is already stored in PageStorage v3, users +can also benefit from this manual compaction command, considering that +compaction will rewrite the stored data into a better organized state. + +## Product Behavior + +New SQL syntax: + +```sql +ALTER TABLE table_name COMPACT [engine_type REPLICA] +-- engine_type could be either TIKV or TIFLASH +``` + +Sample SQLs: + +```sql +ALTER TABLE `users` COMPACT; -- Implicit: Not recommended +ALTER TABLE `users` COMPACT TIFLASH REPLICA; -- Explicit: Recommended +``` + +- The compaction is triggered immediately and runs in the foreground. The SQL + won’t return until the compaction is done. +- When a table is already in compacting progress, the new compaction SQL command + involving this table will be exited immediately by producing a “canceled” + warning. +- The compact SQL commands can be executed on different tables simultaneously, + resulting in multiple tables being compacted simultaneously. +- When `engine_type` is specified as `TIKV`, “unsupported” error will be + returned. When `engine_type` is not specified, the SQL will run as compact + TiFlash replicas only. This behavior will change when we support TiKV compaction + in future. +- When the table contains multiple partitions, all partitions will be compacted. +- The compact command will exit in the following ways: + 1. User kill SQL via `KILL [TIDB] `: stop task immediately + 2. TiDB stopped: the compaction run on TiFlash should be stopped. There will + be no retries after TiDB is restarted. + 3. TiFlash stopped: the compaction command should stop and return error + immediately. There will be no retries after TiFlash is restarted. + 4. Compaction is finished. + +## Detailed Design + +### Protocol + +New APIs will be added to TiFlash: + +```protobuf +// Pseudo code + +message Error { + oneof { + ErrorCompactInProgress, + ErrorTooManyPendingTasks, + // More errors can be added in future + } +} + +message ErrorCompactInProgress {} + +message ErrorTooManyPendingTasks {} + +message CompactRequest { + bytes id + bytes start_key // Optional + bytes max_end_key // Optional + int64 physical_table_id // Used to locate the TiFlsh table +} + +message CompactResponse { + optional Error error + bool has_remaining + bytes compacted_start_key + bytes compacted_end_key +} +``` + +### General Flow + +![](./images/2022-05-19-compact-table-via-sql-1.png) + +TiDB sends `CompactRequest` to one TiFlash instance in series. Each request +compacts one or multiple Segments in the TiFlash instance. The segments may +change (e.g. split or merge) during the SQL execution process so that TiFlash +will respond with the end key for each request, which will be then used as the +`StartKey` for the next request sent from TiDB. + +When there are multiple TiFlash instances, TiDB talks with each TiFlash +concurrently. However, for each TiFlash connection, requests are still sent in +series. Newly added TiFlash instances during a compaction SQL execution are +discarded. + +### Interrupt Execution (Kill) + +![](./images/2022-05-19-compact-table-via-sql-2.png) + +When user executes `KILL [TIDB] ` to interrupt the compaction command, +TiDB simply stops the execution by not sending future `CompactRequest`. +There will be `CompactRequest` running in TiFlash instances at the moment when +user initiates the `KILL`. These running "small" compaction tasks will be +untouched and kept running until finished. There is no way to stop a running +`CompactRequest` in TiFlash. + +### On TiDB Restart + +Similar to Kill, TiDB does not need to do anything extra after the restart. +TiFlash will be returned to idle after currently running `CompactRequest` are +finished. + +### On Request Error + +The `CompactRequest` from TiDB to TiFlash may fail for several reasons, for +example, encountering network failures or receiving errors from TiFlash. +In such case, there will be no more future `CompactRequest` sent to this +failed TiFlash instance during the compaction command. Requests will +continue sending to other TiFlash instances. + +![](./images/2022-05-19-compact-table-via-sql-3.png) + +### Service Endpoint + +A new gRPC endpoint will be added to the tikv gRPC service: + +```protobuf +service Tikv { + // Existing endpoints + rpc KvGet(kvrpcpb.GetRequest) returns (kvrpcpb.GetResponse) {} + rpc KvScan(kvrpcpb.ScanRequest) returns (kvrpcpb.ScanResponse) {} + rpc KvPrewrite(kvrpcpb.PrewriteRequest) returns (kvrpcpb.PrewriteResponse) {} + ... + + // Newly added management endpoint + rpc Compact(managementpb.CompactRequest) returns (managementpb.CompactResponse) {} +} +``` + +### Handling CompactRequest in TiFlash + +![](./images/2022-05-19-compact-table-via-sql-4.png) + +The `CompactRequest` will be processed one by one in a new thread pool with 1 +worker thread. When one request is being processed, TiFlash locates the Segment +according to `start_key` and then performs a foreground Delta Merge. + +The number of worker thread can be configured in case of users want more table +compaction concurrency. Note that compaction is always stepped in serial for a +single table, even if there are more than 1 worker threads. + +### Compact Multiple Segments in One Request + +When Delta Merge takes a short time, TiFlash repeats itself to compact more +Segments, until either: + +- 1 minute (can be configured) has elapsed since receiving the request +- There are no more Segments in `[start_key, max_end_key)` + This can speed up the compaction process when there are many compacted + segments by reducing round trips. + +![](./images/2022-05-19-compact-table-via-sql-5.png) + +### Multiple Compaction Command from Same Table + +Compacting the same table concurrently doesn't make sense and leads to extra +resource costs, so that we would like to avoid concurrent compactions for the same +table, allowing only one to be executed. + +In order to detect such cases, an `ID` field is attached to the `CompactRequest`, +which will be set as the Table ID in TiDB. TiFlash adds ID to a map when the +request is received, and removes ID from the map when the response is going +to be returned. If ID exists in the map, `ErrorCompactInProgress` will be +returned immediately, without processing the request in the thread pool at all. + +![](./images/2022-05-19-compact-table-via-sql-6.png) + +### Multiple Compaction Command from Different Tables + +As there is only one worker thread, when user invokes multiple compaction +command concurrently, these compactions will be stepped evenly, instead of +following a FIFO order, as demostrated below: + +![](./images/2022-05-19-compact-table-via-sql-7.png) + +If there are too many queued requests (>N), new requests will be rejected +and `ErrorTooManyPendingTasks` will be returned. This effectively means, the +number of concurrent running compact commands will be limited to max N. + +# Investigation & Alternatives + +- The Compact API can be placed in TiKV Debug Service (`debugpb`). There is even + existing `debugpb.CompactRequest`. However, + - All TiKV Debug Service endpoints are only used by tikv-ctl, instead of + TiDB or any other clients now. + - `debugpb.CompactRequest` is not suitable to be used by TiFlash, containing + too many TiKV specific fields and lacking fields for TiFlash. It will also be + hard to modify it while keeping compatibility + clean. +- The Compact API can be provided via `DBGInvoker`, which means it available via + Clickhouse Client SQL or TiFlash HTTP Service. This is also the home of most + management or debug APIs of TiFlash. However, + - Currently TiDB does not talk to TiFlash in this way. + - We require this API to be stable for TiDB to use. All APIs in DBGInvoker + are human-facing, not producing machine readable and stable output now. + +# Unresolved Questions + +None. diff --git a/docs/design/images/2022-05-19-compact-table-via-sql-1.png b/docs/design/images/2022-05-19-compact-table-via-sql-1.png new file mode 100644 index 00000000000..7c07de7e433 Binary files /dev/null and b/docs/design/images/2022-05-19-compact-table-via-sql-1.png differ diff --git a/docs/design/images/2022-05-19-compact-table-via-sql-2.png b/docs/design/images/2022-05-19-compact-table-via-sql-2.png new file mode 100644 index 00000000000..1712e78493e Binary files /dev/null and b/docs/design/images/2022-05-19-compact-table-via-sql-2.png differ diff --git a/docs/design/images/2022-05-19-compact-table-via-sql-3.png b/docs/design/images/2022-05-19-compact-table-via-sql-3.png new file mode 100644 index 00000000000..779cbedb930 Binary files /dev/null and b/docs/design/images/2022-05-19-compact-table-via-sql-3.png differ diff --git a/docs/design/images/2022-05-19-compact-table-via-sql-4.png b/docs/design/images/2022-05-19-compact-table-via-sql-4.png new file mode 100644 index 00000000000..5ab899c566f Binary files /dev/null and b/docs/design/images/2022-05-19-compact-table-via-sql-4.png differ diff --git a/docs/design/images/2022-05-19-compact-table-via-sql-5.png b/docs/design/images/2022-05-19-compact-table-via-sql-5.png new file mode 100644 index 00000000000..88dd5ef0918 Binary files /dev/null and b/docs/design/images/2022-05-19-compact-table-via-sql-5.png differ diff --git a/docs/design/images/2022-05-19-compact-table-via-sql-6.png b/docs/design/images/2022-05-19-compact-table-via-sql-6.png new file mode 100644 index 00000000000..96c71231c44 Binary files /dev/null and b/docs/design/images/2022-05-19-compact-table-via-sql-6.png differ diff --git a/docs/design/images/2022-05-19-compact-table-via-sql-7.png b/docs/design/images/2022-05-19-compact-table-via-sql-7.png new file mode 100644 index 00000000000..64f8025c725 Binary files /dev/null and b/docs/design/images/2022-05-19-compact-table-via-sql-7.png differ diff --git a/etc/config-template.toml b/etc/config-template.toml index cad45dc8105..f56a6a095d4 100644 --- a/etc/config-template.toml +++ b/etc/config-template.toml @@ -130,10 +130,6 @@ # pd_addr = "pd0:2379" # specify which storage engine we use. tmt or dt TODO: Remove deprecated tmt engine # storage_engine = "dt" -[raft.snapshot] -# The way to apply snapshot data -# The value is one of "block" / "file1" -# method = "file1" [status] # The port through which Prometheus pulls metrics information. diff --git a/metrics/grafana/tiflash_summary.json b/metrics/grafana/tiflash_summary.json index a6d1abac46f..f899a47ed10 100644 --- a/metrics/grafana/tiflash_summary.json +++ b/metrics/grafana/tiflash_summary.json @@ -33,12 +33,6 @@ "id": "prometheus", "name": "Prometheus", "version": "1.0.0" - }, - { - "type": "panel", - "id": "timeseries", - "name": "Time series", - "version": "" } ], "annotations": { @@ -58,7 +52,7 @@ "gnetId": null, "graphTooltip": 1, "id": null, - "iteration": 1652861766192, + "iteration": 1653635389238, "links": [], "panels": [ { @@ -5336,14 +5330,30 @@ "align": false, "alignLevel": null } - }, + } + ], + "repeat": null, + "title": "Storage", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 5 + }, + "id": 119, + "panels": [ { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "The states of BlobStore (an internal component of storage engine)", + "description": "The states of BlobStore (an internal component of PageStorage)", "fieldConfig": { "defaults": {}, "overrides": [] @@ -5354,20 +5364,20 @@ "h": 8, "w": 12, "x": 0, - "y": 69 + "y": 6 }, "hiddenSeries": false, "id": 85, "legend": { - "alignAsTable": false, + "alignAsTable": true, "avg": false, - "current": false, + "current": true, "max": false, "min": false, - "rightSide": false, + "rightSide": true, "show": true, "total": false, - "values": false + "values": true }, "lines": true, "linewidth": 1, @@ -5383,11 +5393,11 @@ "renderer": "flot", "seriesOverrides": [ { - "alias": "/^BlobAllocated/", + "alias": "/^allocated/", "yaxis": 1 }, { - "alias": "/^BlobExpandRate/", + "alias": "/^expand_rate/", "yaxis": 2 } ], @@ -5402,7 +5412,7 @@ "hide": false, "interval": "", "intervalFactor": 2, - "legendFormat": "BlobAllocated-{{instance}}", + "legendFormat": "allocated-{{instance}}", "refId": "A" }, { @@ -5412,7 +5422,7 @@ "hide": false, "interval": "", "intervalFactor": 1, - "legendFormat": "BlobExpandRate-{{instance}}", + "legendFormat": "expand_rate-{{instance}}", "refId": "B" } ], @@ -5420,7 +5430,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "BlobStore Status", + "title": "PageStorage Blob Status", "tooltip": { "shared": true, "sort": 0, @@ -5457,23 +5467,7 @@ "align": false, "alignLevel": null } - } - ], - "repeat": null, - "title": "Storage", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 5 - }, - "id": 64, - "panels": [ + }, { "aliasColors": {}, "bars": false, @@ -5481,42 +5475,40 @@ "dashes": false, "datasource": "${DS_TEST-CLUSTER}", "decimals": 1, - "description": "The throughput of write and delta's background management", + "description": "The disk usage of PageStorage instances in each TiFlash node", + "editable": true, + "error": false, "fieldConfig": { "defaults": {}, "overrides": [] }, - "fill": 1, + "fill": 0, "fillGradient": 0, + "grid": {}, "gridPos": { - "h": 9, - "w": 24, - "x": 0, + "h": 8, + "w": 12, + "x": 12, "y": 6 }, - "height": "", "hiddenSeries": false, - "id": 70, + "id": 128, "legend": { "alignAsTable": true, "avg": false, "current": true, - "hideEmpty": false, - "hideZero": false, - "max": true, + "max": false, "min": false, "rightSide": true, "show": true, "sideWidth": null, - "sort": "current", - "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "nullPointMode": "null as zero", "options": { "alertThreshold": true }, @@ -5525,11 +5517,14 @@ "pointradius": 5, "points": false, "renderer": "flot", - "repeatedByRow": true, "seriesOverrides": [ { - "alias": "/total/", + "alias": "/^valid_rate/", "yaxis": 2 + }, + { + "alias": "/size/", + "linewidth": 3 } ], "spaceLength": 10, @@ -5538,47 +5533,51 @@ "targets": [ { "exemplar": true, - "expr": "sum(rate(tiflash_storage_throughput_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"write|ingest\"}[1m]))", - "format": "time_series", + "expr": "tiflash_system_asynchronous_metric_BlobDiskBytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}", "hide": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "throughput_write+ingest", - "refId": "A", - "step": 10 + "intervalFactor": 2, + "legendFormat": "disk_size-{{instance}}", + "refId": "A" }, { - "expr": "sum(rate(tiflash_storage_throughput_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type!~\"write|ingest\"}[1m]))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "throughput_delta-management", + "exemplar": true, + "expr": "sum(tiflash_system_asynchronous_metric_BlobValidBytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "valid_size-{{instance}}", "refId": "B" }, { "exemplar": true, - "expr": "sum(tiflash_storage_throughput_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"write|ingest\"})", - "format": "time_series", + "expr": "sum((tiflash_system_asynchronous_metric_BlobValidBytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) / (tiflash_system_asynchronous_metric_BlobDiskBytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"})) by (instance)", + "hide": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "total_write+ingest", + "legendFormat": "valid_rate-{{instance}}", "refId": "C" }, { - "expr": "sum(tiflash_storage_throughput_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type!~\"write|ingest\"})", + "exemplar": true, + "expr": "sum(tiflash_system_asynchronous_metric_BlobFileNums{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", "format": "time_series", - "intervalFactor": 1, - "legendFormat": "total_delta-management", - "refId": "D" + "hide": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "num_file-{{instance}}", + "refId": "E", + "step": 10 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Write & Delta Management Throughput", + "title": "PageStorage Disk Usage", "tooltip": { + "msResolution": false, "shared": true, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -5591,7 +5590,7 @@ }, "yaxes": [ { - "format": "binBps", + "format": "bytes", "label": null, "logBase": 1, "max": null, @@ -5599,11 +5598,11 @@ "show": true }, { - "format": "bytes", + "format": "percentunit", "label": null, "logBase": 1, - "max": null, - "min": null, + "max": "1.1", + "min": "0", "show": true } ], @@ -5618,29 +5617,34 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "The stall duration of write and delete range", + "decimals": 1, + "description": "The number of files of PageStorage instances in each TiFlash node", + "editable": true, + "error": false, "fieldConfig": { "defaults": {}, "overrides": [] }, - "fill": 1, + "fill": 0, "fillGradient": 0, + "grid": {}, "gridPos": { "h": 8, - "w": 24, + "w": 12, "x": 0, - "y": 15 + "y": 14 }, "hiddenSeries": false, - "id": 62, + "id": 129, "legend": { "alignAsTable": true, "avg": false, "current": true, - "max": true, + "max": false, "min": false, "rightSide": true, "show": true, + "sideWidth": null, "total": false, "values": true }, @@ -5656,40 +5660,32 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "99-delta_merge", - "yaxis": 2 - } - ], + "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tiflash_storage_write_stall_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type, instance))", - "format": "time_series", - "hide": true, - "intervalFactor": 1, - "legendFormat": "99-{{type}}-{{instance}}", - "refId": "B" - }, - { - "expr": "histogram_quantile(1, sum(rate(tiflash_storage_write_stall_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type, instance))", + "exemplar": true, + "expr": "sum(tiflash_system_asynchronous_metric_BlobFileNums{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", "format": "time_series", - "intervalFactor": 1, - "legendFormat": "max-{{type}}-{{instance}}", - "refId": "A" + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "num_file-{{instance}}", + "refId": "A", + "step": 10 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Write Stall Duration", + "title": "PageStorage File Num", "tooltip": { + "msResolution": false, "shared": true, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -5702,7 +5698,7 @@ }, "yaxes": [ { - "format": "s", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -5710,10 +5706,10 @@ "show": true }, { - "format": "s", + "format": "percentunit", "label": null, "logBase": 1, - "max": null, + "max": "1.1", "min": "0", "show": true } @@ -5730,29 +5726,29 @@ "dashes": false, "datasource": "${DS_TEST-CLUSTER}", "decimals": 1, - "description": "The throughput of write by instance", + "description": "The number of tables running under different mode in DeltaTree", + "editable": true, + "error": false, "fieldConfig": { "defaults": {}, "overrides": [] }, - "fill": 1, + "fill": 0, "fillGradient": 0, + "grid": {}, "gridPos": { - "h": 9, - "w": 24, - "x": 0, - "y": 23 + "h": 8, + "w": 12, + "x": 12, + "y": 14 }, - "height": "", "hiddenSeries": false, - "id": 89, + "id": 123, "legend": { "alignAsTable": true, "avg": false, "current": true, - "hideEmpty": false, - "hideZero": false, - "max": true, + "max": false, "min": false, "rightSide": true, "show": true, @@ -5765,7 +5761,7 @@ "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null", + "nullPointMode": "null as zero", "options": { "alertThreshold": true }, @@ -5774,43 +5770,45 @@ "pointradius": 5, "points": false, "renderer": "flot", - "repeatedByRow": true, - "seriesOverrides": [ - { - "alias": "/total/", - "yaxis": 2 - } - ], + "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "exemplar": true, - "expr": "sum(rate(tiflash_storage_throughput_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"write\"}[1m])) by (instance)", + "expr": "sum(tiflash_system_current_metric_StoragePoolV2Only{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", "format": "time_series", - "hide": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "throughput_write-{{instance}}", + "intervalFactor": 2, + "legendFormat": "{{instance}}-OnlyV2", "refId": "A", "step": 10 }, { "exemplar": true, - "expr": "sum(rate(tiflash_storage_throughput_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"ingest\"}[1m])) by (instance)", + "expr": "sum(tiflash_system_current_metric_StoragePoolV3Only{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", "hide": false, "interval": "", - "legendFormat": "throughput_ingest-{{instance}}", + "legendFormat": "{{instance}}-OnlyV3", "refId": "B" + }, + { + "exemplar": true, + "expr": "sum(tiflash_system_current_metric_StoragePoolMixMode{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", + "hide": false, + "interval": "", + "legendFormat": "{{instance}}-MixMode", + "refId": "C" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Write Throughput By Instance", + "title": "StoragePool Runmode", "tooltip": { + "msResolution": false, "shared": true, "sort": 0, "value_type": "individual" @@ -5825,7 +5823,7 @@ }, "yaxes": [ { - "format": "binBps", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -5833,7 +5831,7 @@ "show": true }, { - "format": "bytes", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -5845,43 +5843,65 @@ "align": false, "alignLevel": null } - }, + } + ], + "title": "StoragePool", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 6 + }, + "id": 64, + "panels": [ { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "The total count of different kinds of commands received", + "decimals": 1, + "description": "The throughput of write and delta's background management", "fieldConfig": { "defaults": {}, "overrides": [] }, - "fill": 0, + "fill": 1, "fillGradient": 0, "gridPos": { "h": 9, "w": 24, "x": 0, - "y": 32 + "y": 71 }, + "height": "", "hiddenSeries": false, - "id": 90, + "id": 70, "legend": { "alignAsTable": true, "avg": false, "current": true, + "hideEmpty": false, + "hideZero": false, "max": true, "min": false, "rightSide": true, "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], - "nullPointMode": "null as zero", + "nullPointMode": "null", "options": { "alertThreshold": true }, @@ -5890,9 +5910,10 @@ "pointradius": 5, "points": false, "renderer": "flot", + "repeatedByRow": true, "seriesOverrides": [ { - "alias": "/delete_range|ingest/", + "alias": "/total/", "yaxis": 2 } ], @@ -5901,8 +5922,372 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(tiflash_system_profile_event_DMWriteBlock{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance, type)", - "format": "time_series", + "exemplar": true, + "expr": "sum(rate(tiflash_storage_throughput_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"write|ingest\"}[1m]))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "throughput_write+ingest", + "refId": "A", + "step": 10 + }, + { + "expr": "sum(rate(tiflash_storage_throughput_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type!~\"write|ingest\"}[1m]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "throughput_delta-management", + "refId": "B" + }, + { + "exemplar": true, + "expr": "sum(tiflash_storage_throughput_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"write|ingest\"})", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "total_write+ingest", + "refId": "C" + }, + { + "expr": "sum(tiflash_storage_throughput_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type!~\"write|ingest\"})", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "total_delta-management", + "refId": "D" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Write & Delta Management Throughput", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "binBps", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "The stall duration of write and delete range", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 80 + }, + "hiddenSeries": false, + "id": 62, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "99-delta_merge", + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(tiflash_storage_write_stall_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type, instance))", + "format": "time_series", + "hide": true, + "intervalFactor": 1, + "legendFormat": "99-{{type}}-{{instance}}", + "refId": "B" + }, + { + "expr": "histogram_quantile(1, sum(rate(tiflash_storage_write_stall_duration_seconds_bucket{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (le, type, instance))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "max-{{type}}-{{instance}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Write Stall Duration", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "decimals": 1, + "description": "The throughput of write by instance", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 88 + }, + "height": "", + "hiddenSeries": false, + "id": 89, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeatedByRow": true, + "seriesOverrides": [ + { + "alias": "/total/", + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(rate(tiflash_storage_throughput_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"write\"}[1m])) by (instance)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "throughput_write-{{instance}}", + "refId": "A", + "step": 10 + }, + { + "exemplar": true, + "expr": "sum(rate(tiflash_storage_throughput_bytes{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", type=~\"ingest\"}[1m])) by (instance)", + "hide": false, + "interval": "", + "legendFormat": "throughput_ingest-{{instance}}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Write Throughput By Instance", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "binBps", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "The total count of different kinds of commands received", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 97 + }, + "hiddenSeries": false, + "id": 90, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/delete_range|ingest/", + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(tiflash_system_profile_event_DMWriteBlock{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance, type)", + "format": "time_series", "hide": false, "intervalFactor": 1, "legendFormat": "write block-{{instance}}", @@ -5970,7 +6355,7 @@ "h": 1, "w": 24, "x": 0, - "y": 6 + "y": 7 }, "id": 34, "panels": [ @@ -5990,7 +6375,7 @@ "h": 7, "w": 12, "x": 0, - "y": 7 + "y": 8 }, "hiddenSeries": false, "id": 35, @@ -6088,7 +6473,7 @@ "h": 7, "w": 12, "x": 12, - "y": 7 + "y": 8 }, "hiddenSeries": false, "id": 36, @@ -6206,7 +6591,7 @@ "h": 7, "w": 12, "x": 0, - "y": 14 + "y": 15 }, "hiddenSeries": false, "id": 37, @@ -6340,7 +6725,7 @@ "h": 7, "w": 12, "x": 12, - "y": 14 + "y": 15 }, "hiddenSeries": false, "id": 75, @@ -6444,7 +6829,7 @@ "h": 7, "w": 24, "x": 0, - "y": 21 + "y": 22 }, "hiddenSeries": false, "id": 82, @@ -6597,7 +6982,7 @@ "h": 7, "w": 12, "x": 0, - "y": 28 + "y": 29 }, "heatmap": {}, "hideZeroBuckets": true, @@ -6667,7 +7052,7 @@ "h": 7, "w": 12, "x": 12, - "y": 28 + "y": 29 }, "heatmap": {}, "hideZeroBuckets": true, @@ -6737,7 +7122,7 @@ "h": 7, "w": 12, "x": 0, - "y": 35 + "y": 36 }, "heatmap": {}, "hideZeroBuckets": true, @@ -6807,7 +7192,7 @@ "h": 7, "w": 12, "x": 12, - "y": 35 + "y": 36 }, "heatmap": {}, "hideZeroBuckets": true, @@ -6871,7 +7256,7 @@ "h": 7, "w": 24, "x": 0, - "y": 42 + "y": 43 }, "height": "", "hiddenSeries": false, @@ -6985,7 +7370,7 @@ "h": 7, "w": 12, "x": 0, - "y": 49 + "y": 50 }, "heatmap": {}, "hideZeroBuckets": true, @@ -7054,7 +7439,7 @@ "h": 7, "w": 12, "x": 12, - "y": 49 + "y": 50 }, "heatmap": {}, "hideZeroBuckets": true, @@ -7124,7 +7509,7 @@ "h": 7, "w": 12, "x": 0, - "y": 56 + "y": 57 }, "heatmap": {}, "hideZeroBuckets": true, @@ -7194,7 +7579,7 @@ "h": 7, "w": 12, "x": 12, - "y": 56 + "y": 57 }, "heatmap": {}, "hideZeroBuckets": true, @@ -7264,7 +7649,7 @@ "h": 7, "w": 12, "x": 0, - "y": 63 + "y": 64 }, "heatmap": {}, "hideZeroBuckets": true, @@ -7330,7 +7715,7 @@ "h": 7, "w": 12, "x": 12, - "y": 63 + "y": 64 }, "hiddenSeries": false, "id": 91, @@ -7453,7 +7838,7 @@ "h": 1, "w": 24, "x": 0, - "y": 7 + "y": 8 }, "id": 95, "panels": [ @@ -7675,290 +8060,9 @@ ], "title": "Rough Set Filter Rate Histogram", "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 8 - }, - "id": 119, - "panels": [ - { - "datasource": "${DS_TEST-CLUSTER}", - "description": "The Global StoragePool and KVStore Runmode", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "axisSoftMax": 5, - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "graph": false, - "legend": false, - "tooltip": false - }, - "lineInterpolation": "linear", - "lineStyle": { - "fill": "solid" - }, - "lineWidth": 1, - "pointSize": 11, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false - }, - "decimals": 0, - "mappings": [ - { - "from": "", - "id": 1, - "text": "ONLY_V2", - "to": "", - "type": 1, - "value": "1" - }, - { - "from": "", - "id": 2, - "text": "ONLY_V3", - "to": "", - "type": 1, - "value": "2" - }, - { - "from": "", - "id": 3, - "text": "MIX_MODE", - "to": "", - "type": 1, - "value": "3" - }, - { - "from": "", - "id": 4, - "text": " ", - "to": "", - "type": 1, - "value": "4" - }, - { - "from": "", - "id": 5, - "text": " ", - "to": "", - "type": 1, - "value": "5" - } - ], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 9 - }, - "id": 126, - "links": [], - "options": { - "graph": {}, - "legend": { - "calcs": [ - "lastNotNull" - ], - "displayMode": "table", - "placement": "right" - }, - "tooltipOptions": { - "mode": "multi" - } - }, - "pluginVersion": "7.5.11", - "targets": [ - { - "exemplar": true, - "expr": "tiflash_system_current_metric_GlobalStorageRunMode{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{instance}}-GlobalRunMode", - "refId": "A", - "step": 10 - }, - { - "exemplar": false, - "expr": "tiflash_system_current_metric_RegionPersisterRunMode{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}", - "hide": false, - "interval": "", - "legendFormat": "{{instance}}-KVStoreRunMode", - "refId": "B" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Global Runmode", - "type": "timeseries" - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${DS_TEST-CLUSTER}", - "decimals": 1, - "description": "The StoragePool Runmode in DeltaMerge Storage", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fill": 0, - "fillGradient": 0, - "grid": {}, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 9 - }, - "hiddenSeries": false, - "id": 123, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": null, - "sort": "current", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null as zero", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.11", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "exemplar": true, - "expr": "sum(tiflash_system_current_metric_StoragePoolV2Only{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{instance}}-OnlyV2", - "refId": "A", - "step": 10 - }, - { - "exemplar": true, - "expr": "sum(tiflash_system_current_metric_StoragePoolV3Only{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", - "hide": false, - "interval": "", - "legendFormat": "{{instance}}-OnlyV3", - "refId": "B" - }, - { - "exemplar": true, - "expr": "sum(tiflash_system_current_metric_StoragePoolMixMode{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}) by (instance)", - "hide": false, - "interval": "", - "legendFormat": "{{instance}}-MixMode", - "refId": "C" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "StoragePool Runmode", - "tooltip": { - "msResolution": false, - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - } - ], - "title": "StoragePool", - "type": "row" } ], - "refresh": false, + "refresh": "30s", "schemaVersion": 27, "style": "dark", "tags": [], diff --git a/tests/docker/config/tics_dt.toml b/tests/docker/config/tics_dt.toml index 89147f80c7d..56bef659cb7 100644 --- a/tests/docker/config/tics_dt.toml +++ b/tests/docker/config/tics_dt.toml @@ -13,25 +13,16 @@ # limitations under the License. tmp_path = "/tmp/tiflash/data/tmp" -display_name = "TiFlash" -# specify paths used for store data, multiple path should be seperated by comma path = "/tmp/tiflash/data/db" -capacity = "107374182400" -# multi-paths example -# path = "/tmp/tiflash/data/db,/tmp/tiflash1,/tmp/tiflash2" -# capacity = "0,0,0" +capacity = "107374182400" # 100GB mark_cache_size = 5368709120 minmax_index_cache_size = 5368709120 tcp_port = 9000 http_port = 8123 + [logger] count = 10 errorlog = "/tmp/tiflash/log/error.log" size = "1000M" log = "/tmp/tiflash/log/server.log" level = "trace" -[application] -runAsDaemon = true -[raft] -# specify which storage engine we use. tmt or dt -storage_engine = "dt" diff --git a/tests/docker/config/tiflash_dt_async_grpc.toml b/tests/docker/config/tiflash_dt_async_grpc.toml index 3c67c37db33..bf31c61cfa8 100644 --- a/tests/docker/config/tiflash_dt_async_grpc.toml +++ b/tests/docker/config/tiflash_dt_async_grpc.toml @@ -13,71 +13,15 @@ # limitations under the License. tmp_path = "/tmp/tiflash/data/tmp" -display_name = "TiFlash" -## Deprecated storage path setting style. Check [storage] section for new style. path = "/tmp/tiflash/data/db" capacity = "10737418240" -## Deprecated storage path setting style of multi-disks. Check [storage] section for new style. -# path = "/tmp/tiflash/data/db,/tmp/tiflash1,/tmp/tiflash2" -# capacity = "0" mark_cache_size = 5368709120 minmax_index_cache_size = 5368709120 tcp_port = 9000 http_port = 8123 -## Storage paths settings. -# [storage] -## The storage format version in storage engine. Valid values: 1, 2 (experimental). -## format_version = 1 - -## If there are multiple SSD disks on the machine, -## specify the path list on `storage.main.dir` can improve TiFlash performance. - -## If there are multiple disks with different IO metrics (e.g. one SSD and some HDDs) -## on the machine, -## set `storage.latest.dir` to store the latest data on SSD (disks with higher IOPS metrics) -## set `storage.main.dir` to store the main data on HDD (disks with lower IOPS metrics) -## can improve TiFlash performance. - -# [storage.main] -## The path to store main data. -# e.g. -# dir = [ "/data0/tiflash" ] -# or -# dir = [ "/data0/tiflash", "/data1/tiflash" ] - -## Store capacity of each path, i.e. max data size allowed. -## If it is not set, or is set to 0s, the actual disk capacity is used. -## Note that we don't support human-readable big numbers(like "10GB") yet. -## Please set in the specified number of bytes. -# e.g. -# capacity = [ 10737418240, 10737418240 ] - -# [storage.latest] -## The path(s) to store latest data. -## If not set, it will be the same with `storage.main.dir`. -# dir = [ ] - -## Store capacity of each path, i.e. max data size allowed. -## If it is not set, or is set to 0s, the actual disk capacity is used. -# e.g. -# capacity = [ 10737418240, 10737418240 ] - -# [storage.raft] -## The path(s) to store Raft data. -## If not set, it will be the paths in `storage.latest.dir` appended with "/kvstore". -# dir = [ ] - -# [storage.io_rate_limit] -## The max I/O bandwith. Default value is 0 and I/O rate limit is disabled. -# max_bytes_per_sec = 268435456 -## max_read_bytes_per_sec and max_write_bytes_per_sec are the same meaning as max_bytes_per_sec, -## but for disk that read bandwidth and write bandwith are calculated separatly, such as GCP's persistent disks. -# max_read_bytes_per_sec = 0 -# max_write_bytes_per_sec = 0 - [flash] tidb_status_addr = "tidb0:10080" service_addr = "0.0.0.0:3930" @@ -100,22 +44,9 @@ size = "1000M" log = "/tmp/tiflash/log/server.log" level = "trace" -[application] -runAsDaemon = true - [raft] pd_addr = "pd0:2379" ignore_databases = "system,default" -# specify which storage engine we use. tmt or dt -storage_engine = "dt" -# Deprecated Raft data storage path setting style. Check [storage.raft] section for new style. -# If it is not set, it will be the first path of "path" appended with "/kvstore". -# kvstore_path = "" - -[raft.snapshot] -# The way to apply snapshot data -# The value is one of "block" / "file1" / "file2". -# method = "file1" [profiles] [profiles.default] diff --git a/tests/docker/config/tiflash_dt_disable_local_tunnel.toml b/tests/docker/config/tiflash_dt_disable_local_tunnel.toml index 23b82909776..1fb166a9a19 100644 --- a/tests/docker/config/tiflash_dt_disable_local_tunnel.toml +++ b/tests/docker/config/tiflash_dt_disable_local_tunnel.toml @@ -13,71 +13,15 @@ # limitations under the License. tmp_path = "/tmp/tiflash/data/tmp" -display_name = "TiFlash" -## Deprecated storage path setting style. Check [storage] section for new style. path = "/tmp/tiflash/data/db" capacity = "10737418240" -## Deprecated storage path setting style of multi-disks. Check [storage] section for new style. -# path = "/tmp/tiflash/data/db,/tmp/tiflash1,/tmp/tiflash2" -# capacity = "0" mark_cache_size = 5368709120 minmax_index_cache_size = 5368709120 tcp_port = 9000 http_port = 8123 -## Storage paths settings. -# [storage] -## The storage format version in storage engine. Valid values: 1, 2 (experimental). -## format_version = 1 - -## If there are multiple SSD disks on the machine, -## specify the path list on `storage.main.dir` can improve TiFlash performance. - -## If there are multiple disks with different IO metrics (e.g. one SSD and some HDDs) -## on the machine, -## set `storage.latest.dir` to store the latest data on SSD (disks with higher IOPS metrics) -## set `storage.main.dir` to store the main data on HDD (disks with lower IOPS metrics) -## can improve TiFlash performance. - -# [storage.main] -## The path to store main data. -# e.g. -# dir = [ "/data0/tiflash" ] -# or -# dir = [ "/data0/tiflash", "/data1/tiflash" ] - -## Store capacity of each path, i.e. max data size allowed. -## If it is not set, or is set to 0s, the actual disk capacity is used. -## Note that we don't support human-readable big numbers(like "10GB") yet. -## Please set in the specified number of bytes. -# e.g. -# capacity = [ 10737418240, 10737418240 ] - -# [storage.latest] -## The path(s) to store latest data. -## If not set, it will be the same with `storage.main.dir`. -# dir = [ ] - -## Store capacity of each path, i.e. max data size allowed. -## If it is not set, or is set to 0s, the actual disk capacity is used. -# e.g. -# capacity = [ 10737418240, 10737418240 ] - -# [storage.raft] -## The path(s) to store Raft data. -## If not set, it will be the paths in `storage.latest.dir` appended with "/kvstore". -# dir = [ ] - -# [storage.io_rate_limit] -## The max I/O bandwith. Default value is 0 and I/O rate limit is disabled. -# max_bytes_per_sec = 268435456 -## max_read_bytes_per_sec and max_write_bytes_per_sec are the same meaning as max_bytes_per_sec, -## but for disk that read bandwidth and write bandwith are calculated separatly, such as GCP's persistent disks. -# max_read_bytes_per_sec = 0 -# max_write_bytes_per_sec = 0 - [flash] tidb_status_addr = "tidb0:10080" service_addr = "0.0.0.0:3930" @@ -100,22 +44,9 @@ size = "1000M" log = "/tmp/tiflash/log/server.log" level = "trace" -[application] -runAsDaemon = true - [raft] pd_addr = "pd0:2379" ignore_databases = "system,default" -# specify which storage engine we use. tmt or dt -storage_engine = "dt" -# Deprecated Raft data storage path setting style. Check [storage.raft] section for new style. -# If it is not set, it will be the first path of "path" appended with "/kvstore". -# kvstore_path = "" - -[raft.snapshot] -# The way to apply snapshot data -# The value is one of "block" / "file1" / "file2". -# method = "file1" [profiles] [profiles.default] diff --git a/tests/fullstack-test/mpp/misc_join.test b/tests/fullstack-test/mpp/misc_join.test new file mode 100644 index 00000000000..61a1de49925 --- /dev/null +++ b/tests/fullstack-test/mpp/misc_join.test @@ -0,0 +1,41 @@ +# Copyright 2022 PingCAP, Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Preparation. +mysql> drop table if exists test.t1; +mysql> create table test.t1 (id decimal(5,2), value bigint(20)); +mysql> insert into test.t1 values(1, 1),(2, 2); +mysql> drop table if exists test.t2; +mysql> create table test.t2 (id decimal(5,2), value bigint(20)); +mysql> insert into test.t2 values(1, 1),(2, 2),(3, 3),(4, 4); + +mysql> alter table test.t1 set tiflash replica 1 +mysql> alter table test.t2 set tiflash replica 1 +mysql> analyze table test.t1 +mysql> analyze table test.t2 + +func> wait_table test t1 +func> wait_table test t2 + +mysql> use test; set tidb_allow_mpp=1; set tidb_enforce_mpp=1; set tidb_isolation_read_engines='tiflash'; select * from t1 left join t2 on cast(t1.id as decimal(7,2)) = cast(t2.id as decimal(7,2)) and t1.id + cast(t2.id as decimal(7,2)) + t1.id > 10; ++------+-------+------+-------+ +| id | value | id | value | ++------+-------+------+-------+ +| 1.00 | 1 | NULL | NULL | +| 2.00 | 2 | NULL | NULL | ++------+-------+------+-------+ + +# Clean up. +mysql> drop table if exists test.t1 +mysql> drop table if exists test.t2